Fix diff index handling
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 require 'pp'
4
5 def usage
6         STDERR.puts <<EOM
7 #{$0} [options] file [file ...]
8
9 Fast-export the RCS history of one or more file.
10
11 Options:
12         --help, -h, -?          display this help text
13         --authors-file, -A      specify a file containing username = Full Name <email> mappings
14         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision
15
16 Config options:
17         rcs.authorsFile         for --authors-file
18         rcs.tagEachRev          for --tag-each-rev
19
20 EOM
21 end
22
23 def not_found(arg)
24         STDERR.puts "Could not find #{arg}"
25 end
26
27 # returns a hash that maps usernames to author names & emails
28 def load_authors_file(fn)
29         hash = {}
30         begin
31                 File.open(File.expand_path fn) do |io|
32                         io.each_line do |line|
33                                 uname, author = line.split('=', 2)
34                                 uname.strip!
35                                 author.strip!
36                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
37                                 hash[uname] = author
38                         end
39                 end
40         rescue
41                 not_found(fn)
42         end
43         return hash
44 end
45
46 class Time
47         def Time.rcs(string)
48                 fields = string.split('.')
49                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
50                 Time.utc(*fields)
51         end
52 end
53
54 module RCS
55         # strip an optional final ;
56         def RCS.clean(arg)
57                 arg.chomp(';')
58         end
59
60         # strip the first and last @, and de-double @@s
61         def RCS.sanitize(arg)
62                 case arg
63                 when Array
64                         ret = arg.dup
65                         raise 'malformed first line' unless ret.first[0,1] == '@'
66                         raise 'malformed last line' unless ret.last[-1,1] == '@'
67                         ret.first.sub!(/^@/,'')
68                         ret.last.sub!(/@$/,'')
69                         ret.map { |l| l.gsub('@@','@') }
70                 when String
71                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
72                 else
73                         raise
74                 end
75         end
76
77         # clean and sanitize
78         def RCS.at_clean(arg)
79                 RCS.sanitize RCS.clean(arg)
80         end
81
82         def RCS.blob(arg)
83                 arg.gsub('.', '0') + ('90'*5)
84         end
85
86         def RCS.commit(arg)
87                 arg.gsub('.', '0') + ('09'*5)
88         end
89
90         class File
91                 attr_accessor :head, :comment, :desc, :revision
92                 def initialize(fname)
93                         @fname = fname.dup
94                         @head = nil
95                         @comment = nil
96                         @desc = []
97                         @revision = Hash.new { |h, r| h[r] = Revision.new(r) }
98                 end
99
100                 def has_revision?(rev)
101                         @revision.has_key?(rev) and not @revision[rev].author.nil?
102                 end
103
104                 def export_commits(opts={})
105                         counter = 0
106                         exported = []
107                         until @revision.empty?
108                                 counter += 1
109
110                                 # a string sort is a very good candidate for
111                                 # export order, getting a miss only for
112                                 # multi-digit revision components
113                                 keys = @revision.keys.sort
114
115                                 STDERR.puts "commit export loop ##{counter}"
116                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
117                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
118
119                                 keys.each do |key|
120                                         rev = @revision[key]
121                                         # the parent commit is rev.next if we're on the
122                                         # master branch (rev.branch is nil) or
123                                         # rev.diff_base otherwise
124                                         from = rev.branch.nil? ? rev.next : rev.diff_base
125                                         # A commit can only be exported if it has no
126                                         # parent, or if the parent has been exported
127                                         # already. Skip this commit otherwise
128                                         if from and not exported.include? from
129                                                 next
130                                         end
131
132                                         branch = rev.branch || 'master'
133                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
134                                         date = "#{rev.date.tv_sec} +0000"
135                                         log = rev.log.to_s
136
137                                         puts "commit refs/heads/#{branch}"
138                                         puts "mark :#{RCS.commit key}"
139                                         puts "committer #{author} #{date}"
140                                         puts "data #{log.length}"
141                                         puts log unless log.empty?
142                                         puts "from :#{RCS.commit from}" if rev.branch_point
143                                         puts "M 644 :#{RCS.blob key} #{@fname}"
144
145                                         # TODO FIXME this *should* be safe, in
146                                         # that it should not unduly move
147                                         # branches back in time, but I'm not
148                                         # 100% sure ...
149                                         rev.branches.each do |sym|
150                                                 puts "reset refs/heads/#{sym}"
151                                                 puts "from :#{RCS.commit key}"
152                                         end
153                                         rev.symbols.each do |sym|
154                                                 puts "reset refs/tags/#{sym}"
155                                                 puts "from :#{RCS.commit key}"
156                                         end
157                                         if opts[:tag_each_rev]
158                                                 puts "reset refs/tags/#{key}"
159                                                 puts "from :#{RCS.commit key}"
160                                         end
161
162                                         exported.push key
163                                 end
164                                 exported.each { |k| @revision.delete(k) }
165                         end
166                 end
167         end
168
169         class Revision
170                 attr_accessor :rev, :author, :date, :state, :next
171                 attr_accessor :branches, :log, :text, :symbols
172                 attr_accessor :branch, :diff_base, :branch_point
173                 def initialize(rev)
174                         @rev = rev
175                         @author = nil
176                         @date = nil
177                         @state = nil
178                         @next = nil
179                         @branches = []
180                         @branch = nil
181                         @branch_point = nil
182                         @diff_base = nil
183                         @log = []
184                         @text = []
185                         @symbols = []
186                 end
187
188                 def date=(str)
189                         @date = Time.rcs(str)
190                 end
191
192                 def blob
193                         str = @text.join('')
194                         ret = "blob\nmark :#{RCS.blob @rev}\ndata #{str.length}\n#{str}\n"
195                         ret
196                 end
197         end
198
199         def RCS.parse(fname, rcsfile, opts={})
200                 rcs = RCS::File.new(fname)
201
202                 ::File.open(rcsfile, 'r') do |file|
203                         status = [:basic]
204                         rev = nil
205                         lines = []
206                         difflines = []
207                         file.each_line do |line|
208                                 case status.last
209                                 when :basic
210                                         command, args = line.split($;,2)
211                                         next if command.empty?
212
213                                         case command
214                                         when 'head'
215                                                 rcs.head = RCS.clean(args.chomp)
216                                         when 'symbols'
217                                                 status.push :symbols
218                                         when 'comment'
219                                                 rcs.comment = RCS.at_clean(args.chomp)
220                                         when /^[0-9.]+$/
221                                                 rev = command.dup
222                                                 if rcs.has_revision?(rev)
223                                                         status.push :revision_data
224                                                 else
225                                                         status.push :new_revision
226                                                 end
227                                         when 'desc'
228                                                 status.push :desc
229                                                 lines.clear
230                                                 status.push :read_lines
231                                         else
232                                                 STDERR.puts "Skipping unhandled command #{command.inspect}"
233                                         end
234                                 when :symbols
235                                         sym, rev = line.strip.split(':',2);
236                                         status.pop if rev.chomp!(';')
237                                         rcs.revision[rev].symbols << sym
238                                 when :desc
239                                         rcs.desc.replace lines.dup
240                                         status.pop
241                                 when :read_lines
242                                         # we sanitize lines as we read them
243
244                                         actual_line = line.dup
245
246                                         # the first line must begin with a @, which we strip
247                                         if lines.empty?
248                                                 ats = line.match(/^@+/)
249                                                 raise 'malformed line' unless ats
250                                                 actual_line.replace line.sub(/^@/,'')
251                                         end
252
253                                         # if the line ends with an ODD number of @, it's the
254                                         # last line -- we work on actual_line so that content
255                                         # such as @\n or @ work correctly (they would be
256                                         # encoded respectively as ['@@@\n','@\n'] and
257                                         # ['@@@@\n']
258                                         ats = actual_line.chomp.match(/@+$/)
259                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
260                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
261                                         end
262                                         lines << actual_line.gsub('@@','@')
263                                         if nomore
264                                                 status.pop
265                                                 redo
266                                         end
267                                 when :new_revision
268                                         case line.chomp
269                                         when /^date\s+(\S+);\s+author\s+(\S+);\sstate\s(\S+);$/
270                                                 rcs.revision[rev].date = $1
271                                                 rcs.revision[rev].author = $2
272                                                 rcs.revision[rev].state = $3
273                                         when 'branches'
274                                                 status.push :branches
275                                         when 'branches;'
276                                                 next
277                                         when /^next\s+(\S+)?;$/
278                                                 nxt = rcs.revision[rev].next = $1
279                                                 next unless nxt
280                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
281                                                 rcs.revision[nxt].diff_base = rev
282                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
283                                         else
284                                                 status.pop
285                                         end
286                                 when :branches
287                                         candidate = line.split(';',2)
288                                         branch = candidate.first.strip
289                                         rcs.revision[rev].branches.push branch
290                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
291                                         rcs.revision[branch].diff_base = rev
292                                         # we drop the last number from the branch name
293                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
294                                         rcs.revision[branch].branch_point = rev
295                                         status.pop if candidate.length > 1
296                                 when :revision_data
297                                         case line.chomp
298                                         when 'log'
299                                                 status.push :log
300                                                 lines.clear
301                                                 status.push :read_lines
302                                         when 'text'
303                                                 if rev == rcs.head
304                                                         status.push :head
305                                                 else
306                                                         status.push :diff
307                                                 end
308                                                 lines.clear
309                                                 status.push :read_lines
310                                         else
311                                                 status.pop
312                                         end
313                                 when :log
314                                         rcs.revision[rev].log.replace lines.dup
315                                         status.pop
316                                 when :head
317                                         rcs.revision[rev].text.replace lines.dup
318                                         puts rcs.revision[rev].blob
319                                         status.pop
320                                 when :diff
321                                         difflines.replace lines.dup
322                                         difflines.pop if difflines.last.empty?
323                                         base = rcs.revision[rev].diff_base
324                                         unless rcs.revision[base].text
325                                                 pp rcs
326                                                 puts rev, base
327                                                 raise 'no diff base!'
328                                         end
329                                         # deep copy
330                                         buffer = []
331                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
332
333                                         adding = false
334                                         index = nil
335                                         count = nil
336
337                                         while l = difflines.shift
338                                                 if adding
339                                                         raise 'negative index during insertion' if index < 0
340                                                         raise 'negative count during insertion' if count < 0
341                                                         adding << l
342                                                         count -= 1
343                                                         # collected all the lines, put the before
344                                                         unless count > 0
345                                                                 buffer[index].unshift *adding
346                                                                 adding = false
347                                                         end
348                                                         next
349                                                 end
350
351                                                 l.chomp!
352                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
353                                                 diff_cmd = $1.intern
354                                                 index = $2.to_i
355                                                 count = $3.to_i
356                                                 case diff_cmd
357                                                 when :d
358                                                         # for deletion, index 1 is the first index, so the Ruby
359                                                         # index is one less than the diff one
360                                                         index -= 1
361                                                         # we replace them with empty string so that 'a' commands
362                                                         # referring to the same line work properly
363                                                         while count > 0
364                                                                 buffer[index].clear
365                                                                 index += 1
366                                                                 count -= 1
367                                                         end
368                                                 when :a
369                                                         # addition will prepend the appropriate lines
370                                                         # to the given index, and in this case Ruby
371                                                         # and diff indices are the same
372                                                         adding = []
373                                                 end
374                                         end
375
376                                         # turn the buffer into an array of lines, deleting the empty ones
377                                         buffer.delete_if { |l| l.empty? }
378                                         buffer.flatten!
379
380                                         rcs.revision[rev].text = buffer
381                                         puts rcs.revision[rev].blob
382                                         status.pop
383                                 else
384                                         STDERR.puts "Unknown status #{status.last}"
385                                         exit 1
386                                 end
387                         end
388                 end
389
390                 # clean up the symbols/branches: look for revisions that have
391                 # one or more symbols but no dates, and make them into
392                 # branches, pointing to the highest commit with that key
393                 branches = []
394                 keys = rcs.revision.keys
395                 rcs.revision.each do |key, rev|
396                         if rev.date.nil? and not rev.symbols.empty?
397                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
398                                 tr = rcs.revision[top]
399                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
400                                 tr.branches |= rev.symbols
401                                 branches << key
402                         end
403                 end
404                 branches.each { |k| rcs.revision.delete k }
405
406                 # export the commits
407                 rcs.export_commits(opts)
408         end
409 end
410
411 require 'getoptlong'
412
413 opts = GetoptLong.new(
414         # Authors file, like git-svn and git-cvsimport, more than one can be
415         # specified
416         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
417         # RCS file suffix, like RCS
418         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
419         # tag each revision?
420         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
421         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
422         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
423 )
424
425 # We read options in order, but they apply to all passed parameters.
426 # TODO maybe they should only apply to the following, unless there's only one
427 # file?
428 opts.ordering = GetoptLong::RETURN_IN_ORDER
429
430 file_list = []
431 parse_options = {
432         :authors => Hash.new,
433 }
434
435 # Read config options
436 `git config --get-all rcs.authorsfile`.each_line do |fn|
437         parse_options[:authors].merge! load_authors_file(fn.chomp)
438 end
439
440 parse_options[:tag_each_rev] = (
441         `git config --bool rcs.tageachrev`.chomp == 'true'
442 ) ? true : false
443
444 opts.each do |opt, arg|
445         case opt
446         when '--authors-file'
447                 authors = load_authors_file(arg)
448                 redef = parse_options[:authors].keys & authors.keys
449                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
450                 parse_options[:authors].merge!(authors)
451         when '--rcs-suffixes'
452                 # TODO
453         when '--tag-each-rev'
454                 parse_options[:tag_each_rev] = true
455         when '--no-tag-each-rev'
456                 # this is the default, which is fine since the missing key
457                 # (default) returns nil which is false in Ruby
458                 parse_options[:tag_each_rev] = false
459         when ''
460                 file_list << arg
461         when '--help'
462                 usage
463                 exit
464         end
465 end
466
467 require 'etc'
468
469 user = Etc.getlogin || ENV['USER']
470
471 # steal username/email data from other init files that may contain the
472 # information
473 def steal_username
474         [
475                 # the user's .hgrc file for a username field
476                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
477                 # the user's .(g)vimrc for a changelog_username setting
478                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
479                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
480                 []
481         ].each do |fn, rx, idx|
482                 file = File.expand_path fn
483                 if File.readable?(file) and File.read(file) =~ rx
484                         parse_options[:authors][user] = Regexp.last_match(idx).strip
485                         break
486                 end
487         end
488 end
489
490 if user and not user.empty? and not parse_options[:authors].has_key?(user)
491         name = ENV['GIT_AUTHOR_NAME'] || ''
492         name.replace(`git config user.name`.chomp) if name.empty?
493         name.replace(Etc.getpwnam(user).gecos) if name.empty?
494
495         if name.empty?
496                 # couldn't find a name, try to steal data from other sources
497                 steal_username
498         else
499                 # if we found a name, try to find an email too
500                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
501                 email.replace(`git config user.email`.chomp) if email.empty?
502
503                 if email.empty?
504                         # couldn't find an email, try to steal data too
505                         steal_username
506                 else
507                         # we got both a name and email, fill the info
508                         parse_options[:authors][user] = "#{name} <#{email}>"
509                 end
510         end
511 end
512
513 if file_list.empty?
514         usage
515         exit 1
516 end
517
518 SFX = ',v'
519
520 status = 0
521
522 file_list.each do |arg|
523         if arg[-2,2] == SFX
524                 if File.exists? arg
525                         rcsfile = arg.dup
526                 else
527                         not_found "RCS file #{arg}"
528                         status |= 1
529                 end
530                 filename = File.basename(arg, SFX)
531         else
532                 filename = File.basename(arg)
533                 path = File.dirname(arg)
534                 rcsfile = File.join(path, 'RCS', filename) + SFX
535                 unless File.exists? rcsfile
536                         rcsfile.replace File.join(path, filename) + SFX
537                         unless File.exists? rcsfile
538                                 not_found "RCS file for #{filename} in #{path}"
539                         end
540                 end
541         end
542
543         RCS.parse(filename, rcsfile, parse_options)
544 end
545
546 exit status