Backport Integer#odd? to older Ruby versions
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 require 'pp'
4
5 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
6 # older versions
7 unless 2.respond_to? :odd?
8         class Integer
9                 def odd?
10                         self % 2 == 1
11                 end
12         end
13 end
14
15 def usage
16         STDERR.puts <<EOM
17 #{$0} [options] file [file ...]
18
19 Fast-export the RCS history of one or more file.
20
21 Options:
22         --help, -h, -?          display this help text
23         --authors-file, -A      specify a file containing username = Full Name <email> mappings
24         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision
25
26 Config options:
27         rcs.authorsFile         for --authors-file
28         rcs.tagEachRev          for --tag-each-rev
29
30 EOM
31 end
32
33 def not_found(arg)
34         STDERR.puts "Could not find #{arg}"
35 end
36
37 # returns a hash that maps usernames to author names & emails
38 def load_authors_file(fn)
39         hash = {}
40         begin
41                 File.open(File.expand_path fn) do |io|
42                         io.each_line do |line|
43                                 uname, author = line.split('=', 2)
44                                 uname.strip!
45                                 author.strip!
46                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
47                                 hash[uname] = author
48                         end
49                 end
50         rescue
51                 not_found(fn)
52         end
53         return hash
54 end
55
56 class Time
57         def Time.rcs(string)
58                 fields = string.split('.')
59                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
60                 Time.utc(*fields)
61         end
62 end
63
64 module RCS
65         # strip an optional final ;
66         def RCS.clean(arg)
67                 arg.chomp(';')
68         end
69
70         # strip the first and last @, and de-double @@s
71         def RCS.sanitize(arg)
72                 case arg
73                 when Array
74                         ret = arg.dup
75                         raise 'malformed first line' unless ret.first[0,1] == '@'
76                         raise 'malformed last line' unless ret.last[-1,1] == '@'
77                         ret.first.sub!(/^@/,'')
78                         ret.last.sub!(/@$/,'')
79                         ret.map { |l| l.gsub('@@','@') }
80                 when String
81                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
82                 else
83                         raise
84                 end
85         end
86
87         # clean and sanitize
88         def RCS.at_clean(arg)
89                 RCS.sanitize RCS.clean(arg)
90         end
91
92         def RCS.blob(arg)
93                 arg.gsub('.', '0') + ('90'*5)
94         end
95
96         def RCS.commit(arg)
97                 arg.gsub('.', '0') + ('09'*5)
98         end
99
100         class File
101                 attr_accessor :head, :comment, :desc, :revision
102                 def initialize(fname)
103                         @fname = fname.dup
104                         @head = nil
105                         @comment = nil
106                         @desc = []
107                         @revision = Hash.new { |h, r| h[r] = Revision.new(r) }
108                 end
109
110                 def has_revision?(rev)
111                         @revision.has_key?(rev) and not @revision[rev].author.nil?
112                 end
113
114                 def export_commits(opts={})
115                         counter = 0
116                         exported = []
117                         until @revision.empty?
118                                 counter += 1
119
120                                 # a string sort is a very good candidate for
121                                 # export order, getting a miss only for
122                                 # multi-digit revision components
123                                 keys = @revision.keys.sort
124
125                                 STDERR.puts "commit export loop ##{counter}"
126                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
127                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
128
129                                 keys.each do |key|
130                                         rev = @revision[key]
131                                         # the parent commit is rev.next if we're on the
132                                         # master branch (rev.branch is nil) or
133                                         # rev.diff_base otherwise
134                                         from = rev.branch.nil? ? rev.next : rev.diff_base
135                                         # A commit can only be exported if it has no
136                                         # parent, or if the parent has been exported
137                                         # already. Skip this commit otherwise
138                                         if from and not exported.include? from
139                                                 next
140                                         end
141
142                                         branch = rev.branch || 'master'
143                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
144                                         date = "#{rev.date.tv_sec} +0000"
145                                         log = rev.log.to_s
146
147                                         puts "commit refs/heads/#{branch}"
148                                         puts "mark :#{RCS.commit key}"
149                                         puts "committer #{author} #{date}"
150                                         puts "data #{log.length}"
151                                         puts log unless log.empty?
152                                         puts "from :#{RCS.commit from}" if rev.branch_point
153                                         puts "M 644 :#{RCS.blob key} #{@fname}"
154
155                                         # TODO FIXME this *should* be safe, in
156                                         # that it should not unduly move
157                                         # branches back in time, but I'm not
158                                         # 100% sure ...
159                                         rev.branches.each do |sym|
160                                                 puts "reset refs/heads/#{sym}"
161                                                 puts "from :#{RCS.commit key}"
162                                         end
163                                         rev.symbols.each do |sym|
164                                                 puts "reset refs/tags/#{sym}"
165                                                 puts "from :#{RCS.commit key}"
166                                         end
167                                         if opts[:tag_each_rev]
168                                                 puts "reset refs/tags/#{key}"
169                                                 puts "from :#{RCS.commit key}"
170                                         end
171
172                                         exported.push key
173                                 end
174                                 exported.each { |k| @revision.delete(k) }
175                         end
176                 end
177         end
178
179         class Revision
180                 attr_accessor :rev, :author, :date, :state, :next
181                 attr_accessor :branches, :log, :text, :symbols
182                 attr_accessor :branch, :diff_base, :branch_point
183                 def initialize(rev)
184                         @rev = rev
185                         @author = nil
186                         @date = nil
187                         @state = nil
188                         @next = nil
189                         @branches = []
190                         @branch = nil
191                         @branch_point = nil
192                         @diff_base = nil
193                         @log = []
194                         @text = []
195                         @symbols = []
196                 end
197
198                 def date=(str)
199                         @date = Time.rcs(str)
200                 end
201
202                 def blob
203                         str = @text.join('')
204                         ret = "blob\nmark :#{RCS.blob @rev}\ndata #{str.length}\n#{str}\n"
205                         ret
206                 end
207         end
208
209         def RCS.parse(fname, rcsfile, opts={})
210                 rcs = RCS::File.new(fname)
211
212                 ::File.open(rcsfile, 'r') do |file|
213                         status = [:basic]
214                         rev = nil
215                         lines = []
216                         difflines = []
217                         file.each_line do |line|
218                                 case status.last
219                                 when :basic
220                                         command, args = line.split($;,2)
221                                         next if command.empty?
222
223                                         case command
224                                         when 'head'
225                                                 rcs.head = RCS.clean(args.chomp)
226                                         when 'symbols'
227                                                 status.push :symbols
228                                         when 'comment'
229                                                 rcs.comment = RCS.at_clean(args.chomp)
230                                         when /^[0-9.]+$/
231                                                 rev = command.dup
232                                                 if rcs.has_revision?(rev)
233                                                         status.push :revision_data
234                                                 else
235                                                         status.push :new_revision
236                                                 end
237                                         when 'desc'
238                                                 status.push :desc
239                                                 lines.clear
240                                                 status.push :read_lines
241                                         else
242                                                 STDERR.puts "Skipping unhandled command #{command.inspect}"
243                                         end
244                                 when :symbols
245                                         sym, rev = line.strip.split(':',2);
246                                         status.pop if rev.chomp!(';')
247                                         rcs.revision[rev].symbols << sym
248                                 when :desc
249                                         rcs.desc.replace lines.dup
250                                         status.pop
251                                 when :read_lines
252                                         # we sanitize lines as we read them
253
254                                         actual_line = line.dup
255
256                                         # the first line must begin with a @, which we strip
257                                         if lines.empty?
258                                                 ats = line.match(/^@+/)
259                                                 raise 'malformed line' unless ats
260                                                 actual_line.replace line.sub(/^@/,'')
261                                         end
262
263                                         # if the line ends with an ODD number of @, it's the
264                                         # last line -- we work on actual_line so that content
265                                         # such as @\n or @ work correctly (they would be
266                                         # encoded respectively as ['@@@\n','@\n'] and
267                                         # ['@@@@\n']
268                                         ats = actual_line.chomp.match(/@+$/)
269                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
270                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
271                                         end
272                                         lines << actual_line.gsub('@@','@')
273                                         if nomore
274                                                 status.pop
275                                                 redo
276                                         end
277                                 when :new_revision
278                                         case line.chomp
279                                         when /^date\s+(\S+);\s+author\s+(\S+);\sstate\s(\S+);$/
280                                                 rcs.revision[rev].date = $1
281                                                 rcs.revision[rev].author = $2
282                                                 rcs.revision[rev].state = $3
283                                         when 'branches'
284                                                 status.push :branches
285                                         when 'branches;'
286                                                 next
287                                         when /^next\s+(\S+)?;$/
288                                                 nxt = rcs.revision[rev].next = $1
289                                                 next unless nxt
290                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
291                                                 rcs.revision[nxt].diff_base = rev
292                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
293                                         else
294                                                 status.pop
295                                         end
296                                 when :branches
297                                         candidate = line.split(';',2)
298                                         branch = candidate.first.strip
299                                         rcs.revision[rev].branches.push branch
300                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
301                                         rcs.revision[branch].diff_base = rev
302                                         # we drop the last number from the branch name
303                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
304                                         rcs.revision[branch].branch_point = rev
305                                         status.pop if candidate.length > 1
306                                 when :revision_data
307                                         case line.chomp
308                                         when 'log'
309                                                 status.push :log
310                                                 lines.clear
311                                                 status.push :read_lines
312                                         when 'text'
313                                                 if rev == rcs.head
314                                                         status.push :head
315                                                 else
316                                                         status.push :diff
317                                                 end
318                                                 lines.clear
319                                                 status.push :read_lines
320                                         else
321                                                 status.pop
322                                         end
323                                 when :log
324                                         rcs.revision[rev].log.replace lines.dup
325                                         status.pop
326                                 when :head
327                                         rcs.revision[rev].text.replace lines.dup
328                                         puts rcs.revision[rev].blob
329                                         status.pop
330                                 when :diff
331                                         difflines.replace lines.dup
332                                         difflines.pop if difflines.last.empty?
333                                         base = rcs.revision[rev].diff_base
334                                         unless rcs.revision[base].text
335                                                 pp rcs
336                                                 puts rev, base
337                                                 raise 'no diff base!'
338                                         end
339                                         # deep copy
340                                         buffer = []
341                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
342
343                                         adding = false
344                                         index = nil
345                                         count = nil
346
347                                         while l = difflines.shift
348                                                 if adding
349                                                         raise 'negative index during insertion' if index < 0
350                                                         raise 'negative count during insertion' if count < 0
351                                                         adding << l
352                                                         count -= 1
353                                                         # collected all the lines, put the before
354                                                         unless count > 0
355                                                                 buffer[index].unshift *adding
356                                                                 adding = false
357                                                         end
358                                                         next
359                                                 end
360
361                                                 l.chomp!
362                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
363                                                 diff_cmd = $1.intern
364                                                 index = $2.to_i
365                                                 count = $3.to_i
366                                                 case diff_cmd
367                                                 when :d
368                                                         # for deletion, index 1 is the first index, so the Ruby
369                                                         # index is one less than the diff one
370                                                         index -= 1
371                                                         # we replace them with empty string so that 'a' commands
372                                                         # referring to the same line work properly
373                                                         while count > 0
374                                                                 buffer[index].clear
375                                                                 index += 1
376                                                                 count -= 1
377                                                         end
378                                                 when :a
379                                                         # addition will prepend the appropriate lines
380                                                         # to the given index, and in this case Ruby
381                                                         # and diff indices are the same
382                                                         adding = []
383                                                 end
384                                         end
385
386                                         # turn the buffer into an array of lines, deleting the empty ones
387                                         buffer.delete_if { |l| l.empty? }
388                                         buffer.flatten!
389
390                                         rcs.revision[rev].text = buffer
391                                         puts rcs.revision[rev].blob
392                                         status.pop
393                                 else
394                                         STDERR.puts "Unknown status #{status.last}"
395                                         exit 1
396                                 end
397                         end
398                 end
399
400                 # clean up the symbols/branches: look for revisions that have
401                 # one or more symbols but no dates, and make them into
402                 # branches, pointing to the highest commit with that key
403                 branches = []
404                 keys = rcs.revision.keys
405                 rcs.revision.each do |key, rev|
406                         if rev.date.nil? and not rev.symbols.empty?
407                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
408                                 tr = rcs.revision[top]
409                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
410                                 tr.branches |= rev.symbols
411                                 branches << key
412                         end
413                 end
414                 branches.each { |k| rcs.revision.delete k }
415
416                 # export the commits
417                 rcs.export_commits(opts)
418         end
419 end
420
421 require 'getoptlong'
422
423 opts = GetoptLong.new(
424         # Authors file, like git-svn and git-cvsimport, more than one can be
425         # specified
426         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
427         # RCS file suffix, like RCS
428         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
429         # tag each revision?
430         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
431         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
432         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
433 )
434
435 # We read options in order, but they apply to all passed parameters.
436 # TODO maybe they should only apply to the following, unless there's only one
437 # file?
438 opts.ordering = GetoptLong::RETURN_IN_ORDER
439
440 file_list = []
441 parse_options = {
442         :authors => Hash.new,
443 }
444
445 # Read config options
446 `git config --get-all rcs.authorsfile`.each_line do |fn|
447         parse_options[:authors].merge! load_authors_file(fn.chomp)
448 end
449
450 parse_options[:tag_each_rev] = (
451         `git config --bool rcs.tageachrev`.chomp == 'true'
452 ) ? true : false
453
454 opts.each do |opt, arg|
455         case opt
456         when '--authors-file'
457                 authors = load_authors_file(arg)
458                 redef = parse_options[:authors].keys & authors.keys
459                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
460                 parse_options[:authors].merge!(authors)
461         when '--rcs-suffixes'
462                 # TODO
463         when '--tag-each-rev'
464                 parse_options[:tag_each_rev] = true
465         when '--no-tag-each-rev'
466                 # this is the default, which is fine since the missing key
467                 # (default) returns nil which is false in Ruby
468                 parse_options[:tag_each_rev] = false
469         when ''
470                 file_list << arg
471         when '--help'
472                 usage
473                 exit
474         end
475 end
476
477 require 'etc'
478
479 user = Etc.getlogin || ENV['USER']
480
481 # steal username/email data from other init files that may contain the
482 # information
483 def steal_username
484         [
485                 # the user's .hgrc file for a username field
486                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
487                 # the user's .(g)vimrc for a changelog_username setting
488                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
489                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
490                 []
491         ].each do |fn, rx, idx|
492                 file = File.expand_path fn
493                 if File.readable?(file) and File.read(file) =~ rx
494                         parse_options[:authors][user] = Regexp.last_match(idx).strip
495                         break
496                 end
497         end
498 end
499
500 if user and not user.empty? and not parse_options[:authors].has_key?(user)
501         name = ENV['GIT_AUTHOR_NAME'] || ''
502         name.replace(`git config user.name`.chomp) if name.empty?
503         name.replace(Etc.getpwnam(user).gecos) if name.empty?
504
505         if name.empty?
506                 # couldn't find a name, try to steal data from other sources
507                 steal_username
508         else
509                 # if we found a name, try to find an email too
510                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
511                 email.replace(`git config user.email`.chomp) if email.empty?
512
513                 if email.empty?
514                         # couldn't find an email, try to steal data too
515                         steal_username
516                 else
517                         # we got both a name and email, fill the info
518                         parse_options[:authors][user] = "#{name} <#{email}>"
519                 end
520         end
521 end
522
523 if file_list.empty?
524         usage
525         exit 1
526 end
527
528 SFX = ',v'
529
530 status = 0
531
532 file_list.each do |arg|
533         if arg[-2,2] == SFX
534                 if File.exists? arg
535                         rcsfile = arg.dup
536                 else
537                         not_found "RCS file #{arg}"
538                         status |= 1
539                 end
540                 filename = File.basename(arg, SFX)
541         else
542                 filename = File.basename(arg)
543                 path = File.dirname(arg)
544                 rcsfile = File.join(path, 'RCS', filename) + SFX
545                 unless File.exists? rcsfile
546                         rcsfile.replace File.join(path, filename) + SFX
547                         unless File.exists? rcsfile
548                                 not_found "RCS file for #{filename} in #{path}"
549                         end
550                 end
551         end
552
553         RCS.parse(filename, rcsfile, parse_options)
554 end
555
556 exit status