Build buffer as an array of arrays
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 require 'pp'
4
5 def usage
6         STDERR.puts <<EOM
7 #{$0} [options] file [file ...]
8
9 Fast-export the RCS history of one or more file.
10
11 Options:
12         --help, -h, -?          display this help text
13         --authors-file, -A      specify a file containing username = Full Name <email> mappings
14         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision
15
16 Config options:
17         rcs.authorsFile         for --authors-file
18         rcs.tagEachRev          for --tag-each-rev
19
20 EOM
21 end
22
23 def not_found(arg)
24         STDERR.puts "Could not find #{arg}"
25 end
26
27 # returns a hash that maps usernames to author names & emails
28 def load_authors_file(fn)
29         hash = {}
30         begin
31                 File.open(File.expand_path fn) do |io|
32                         io.each_line do |line|
33                                 uname, author = line.split('=', 2)
34                                 uname.strip!
35                                 author.strip!
36                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
37                                 hash[uname] = author
38                         end
39                 end
40         rescue
41                 not_found(fn)
42         end
43         return hash
44 end
45
46 class Time
47         def Time.rcs(string)
48                 fields = string.split('.')
49                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
50                 Time.utc(*fields)
51         end
52 end
53
54 module RCS
55         # strip an optional final ;
56         def RCS.clean(arg)
57                 arg.chomp(';')
58         end
59
60         # strip the first and last @, and de-double @@s
61         def RCS.sanitize(arg)
62                 case arg
63                 when Array
64                         ret = arg.dup
65                         raise 'malformed first line' unless ret.first[0,1] == '@'
66                         raise 'malformed last line' unless ret.last[-1,1] == '@'
67                         ret.first.sub!(/^@/,'')
68                         ret.last.sub!(/@$/,'')
69                         ret.map { |l| l.gsub('@@','@') }
70                 when String
71                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
72                 else
73                         raise
74                 end
75         end
76
77         # clean and sanitize
78         def RCS.at_clean(arg)
79                 RCS.sanitize RCS.clean(arg)
80         end
81
82         def RCS.blob(arg)
83                 arg.gsub('.', '0') + ('90'*5)
84         end
85
86         def RCS.commit(arg)
87                 arg.gsub('.', '0') + ('09'*5)
88         end
89
90         class File
91                 attr_accessor :head, :comment, :desc, :revision
92                 def initialize(fname)
93                         @fname = fname.dup
94                         @head = nil
95                         @comment = nil
96                         @desc = []
97                         @revision = Hash.new { |h, r| h[r] = Revision.new(r) }
98                 end
99
100                 def has_revision?(rev)
101                         @revision.has_key?(rev) and not @revision[rev].author.nil?
102                 end
103
104                 def export_commits(opts={})
105                         counter = 0
106                         exported = []
107                         until @revision.empty?
108                                 counter += 1
109
110                                 # a string sort is a very good candidate for
111                                 # export order, getting a miss only for
112                                 # multi-digit revision components
113                                 keys = @revision.keys.sort
114
115                                 STDERR.puts "commit export loop ##{counter}"
116                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
117                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
118
119                                 keys.each do |key|
120                                         rev = @revision[key]
121                                         # the parent commit is rev.next if we're on the
122                                         # master branch (rev.branch is nil) or
123                                         # rev.diff_base otherwise
124                                         from = rev.branch.nil? ? rev.next : rev.diff_base
125                                         # A commit can only be exported if it has no
126                                         # parent, or if the parent has been exported
127                                         # already. Skip this commit otherwise
128                                         if from and not exported.include? from
129                                                 next
130                                         end
131
132                                         branch = rev.branch || 'master'
133                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
134                                         date = "#{rev.date.tv_sec} +0000"
135                                         log = rev.log.to_s
136
137                                         puts "commit refs/heads/#{branch}"
138                                         puts "mark :#{RCS.commit key}"
139                                         puts "committer #{author} #{date}"
140                                         puts "data #{log.length}"
141                                         puts log unless log.empty?
142                                         puts "from :#{RCS.commit from}" if rev.branch_point
143                                         puts "M 644 :#{RCS.blob key} #{@fname}"
144
145                                         # TODO FIXME this *should* be safe, in
146                                         # that it should not unduly move
147                                         # branches back in time, but I'm not
148                                         # 100% sure ...
149                                         rev.branches.each do |sym|
150                                                 puts "reset refs/heads/#{sym}"
151                                                 puts "from :#{RCS.commit key}"
152                                         end
153                                         rev.symbols.each do |sym|
154                                                 puts "reset refs/tags/#{sym}"
155                                                 puts "from :#{RCS.commit key}"
156                                         end
157                                         if opts[:tag_each_rev]
158                                                 puts "reset refs/tags/#{key}"
159                                                 puts "from :#{RCS.commit key}"
160                                         end
161
162                                         exported.push key
163                                 end
164                                 exported.each { |k| @revision.delete(k) }
165                         end
166                 end
167         end
168
169         class Revision
170                 attr_accessor :rev, :author, :date, :state, :next
171                 attr_accessor :branches, :log, :text, :symbols
172                 attr_accessor :branch, :diff_base, :branch_point
173                 def initialize(rev)
174                         @rev = rev
175                         @author = nil
176                         @date = nil
177                         @state = nil
178                         @next = nil
179                         @branches = []
180                         @branch = nil
181                         @branch_point = nil
182                         @diff_base = nil
183                         @log = []
184                         @text = []
185                         @symbols = []
186                 end
187
188                 def date=(str)
189                         @date = Time.rcs(str)
190                 end
191
192                 def blob
193                         str = @text.join('')
194                         ret = "blob\nmark :#{RCS.blob @rev}\ndata #{str.length}\n#{str}\n"
195                         ret
196                 end
197         end
198
199         def RCS.parse(fname, rcsfile, opts={})
200                 rcs = RCS::File.new(fname)
201
202                 ::File.open(rcsfile, 'r') do |file|
203                         status = [:basic]
204                         rev = nil
205                         lines = []
206                         difflines = []
207                         file.each_line do |line|
208                                 case status.last
209                                 when :basic
210                                         command, args = line.split($;,2)
211                                         next if command.empty?
212
213                                         case command
214                                         when 'head'
215                                                 rcs.head = RCS.clean(args.chomp)
216                                         when 'symbols'
217                                                 status.push :symbols
218                                         when 'comment'
219                                                 rcs.comment = RCS.at_clean(args.chomp)
220                                         when /^[0-9.]+$/
221                                                 rev = command.dup
222                                                 if rcs.has_revision?(rev)
223                                                         status.push :revision_data
224                                                 else
225                                                         status.push :new_revision
226                                                 end
227                                         when 'desc'
228                                                 status.push :desc
229                                                 lines.clear
230                                                 status.push :read_lines
231                                         else
232                                                 STDERR.puts "Skipping unhandled command #{command.inspect}"
233                                         end
234                                 when :symbols
235                                         sym, rev = line.strip.split(':',2);
236                                         status.pop if rev.chomp!(';')
237                                         rcs.revision[rev].symbols << sym
238                                 when :desc
239                                         rcs.desc.replace lines.dup
240                                         status.pop
241                                 when :read_lines
242                                         # we sanitize lines as we read them
243
244                                         actual_line = line.dup
245
246                                         # the first line must begin with a @, which we strip
247                                         if lines.empty?
248                                                 ats = line.match(/^@+/)
249                                                 raise 'malformed line' unless ats
250                                                 actual_line.replace line.sub(/^@/,'')
251                                         end
252
253                                         # if the line ends with an ODD number of @, it's the
254                                         # last line -- we work on actual_line so that content
255                                         # such as @\n or @ work correctly (they would be
256                                         # encoded respectively as ['@@@\n','@\n'] and
257                                         # ['@@@@\n']
258                                         ats = actual_line.chomp.match(/@+$/)
259                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
260                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
261                                         end
262                                         lines << actual_line.gsub('@@','@')
263                                         if nomore
264                                                 status.pop
265                                                 redo
266                                         end
267                                 when :new_revision
268                                         case line.chomp
269                                         when /^date\s+(\S+);\s+author\s+(\S+);\sstate\s(\S+);$/
270                                                 rcs.revision[rev].date = $1
271                                                 rcs.revision[rev].author = $2
272                                                 rcs.revision[rev].state = $3
273                                         when 'branches'
274                                                 status.push :branches
275                                         when 'branches;'
276                                                 next
277                                         when /^next\s+(\S+)?;$/
278                                                 nxt = rcs.revision[rev].next = $1
279                                                 next unless nxt
280                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
281                                                 rcs.revision[nxt].diff_base = rev
282                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
283                                         else
284                                                 status.pop
285                                         end
286                                 when :branches
287                                         candidate = line.split(';',2)
288                                         branch = candidate.first.strip
289                                         rcs.revision[rev].branches.push branch
290                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
291                                         rcs.revision[branch].diff_base = rev
292                                         # we drop the last number from the branch name
293                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
294                                         rcs.revision[branch].branch_point = rev
295                                         status.pop if candidate.length > 1
296                                 when :revision_data
297                                         case line.chomp
298                                         when 'log'
299                                                 status.push :log
300                                                 lines.clear
301                                                 status.push :read_lines
302                                         when 'text'
303                                                 if rev == rcs.head
304                                                         status.push :head
305                                                 else
306                                                         status.push :diff
307                                                 end
308                                                 lines.clear
309                                                 status.push :read_lines
310                                         else
311                                                 status.pop
312                                         end
313                                 when :log
314                                         rcs.revision[rev].log.replace lines.dup
315                                         status.pop
316                                 when :head
317                                         rcs.revision[rev].text.replace lines.dup
318                                         puts rcs.revision[rev].blob
319                                         status.pop
320                                 when :diff
321                                         difflines.replace lines.dup
322                                         difflines.pop if difflines.last.empty?
323                                         base = rcs.revision[rev].diff_base
324                                         unless rcs.revision[base].text
325                                                 pp rcs
326                                                 puts rev, base
327                                                 raise 'no diff base!'
328                                         end
329                                         # deep copy
330                                         buffer = []
331                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
332
333                                         adding = false
334                                         index = -1
335                                         count = -1
336
337                                         while l = difflines.shift
338                                                 if adding
339                                                         buffer[index] << l
340                                                         count -= 1
341                                                         adding = false unless count > 0
342                                                         next
343                                                 end
344
345                                                 l.chomp!
346                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
347                                                 diff_cmd = $1.intern
348                                                 index = $2.to_i-1
349                                                 count = $3.to_i
350                                                 case diff_cmd
351                                                 when :d
352                                                         # we replace them with empty string so that 'a' commands
353                                                         # referring to the same line work properly
354                                                         while count > 0
355                                                                 buffer[index].clear
356                                                                 index += 1
357                                                                 count -= 1
358                                                         end
359                                                 when :a
360                                                         adding = true
361                                                 end
362                                         end
363
364                                         # turn the buffer into an array of lines, deleting the empty ones
365                                         buffer.delete_if { |l| l.empty? }
366                                         buffer.flatten!
367
368                                         rcs.revision[rev].text = buffer
369                                         puts rcs.revision[rev].blob
370                                         status.pop
371                                 else
372                                         STDERR.puts "Unknown status #{status.last}"
373                                         exit 1
374                                 end
375                         end
376                 end
377
378                 # clean up the symbols/branches: look for revisions that have
379                 # one or more symbols but no dates, and make them into
380                 # branches, pointing to the highest commit with that key
381                 branches = []
382                 keys = rcs.revision.keys
383                 rcs.revision.each do |key, rev|
384                         if rev.date.nil? and not rev.symbols.empty?
385                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
386                                 tr = rcs.revision[top]
387                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
388                                 tr.branches |= rev.symbols
389                                 branches << key
390                         end
391                 end
392                 branches.each { |k| rcs.revision.delete k }
393
394                 # export the commits
395                 rcs.export_commits(opts)
396         end
397 end
398
399 require 'getoptlong'
400
401 opts = GetoptLong.new(
402         # Authors file, like git-svn and git-cvsimport, more than one can be
403         # specified
404         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
405         # RCS file suffix, like RCS
406         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
407         # tag each revision?
408         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
409         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
410         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
411 )
412
413 # We read options in order, but they apply to all passed parameters.
414 # TODO maybe they should only apply to the following, unless there's only one
415 # file?
416 opts.ordering = GetoptLong::RETURN_IN_ORDER
417
418 file_list = []
419 parse_options = {
420         :authors => Hash.new,
421 }
422
423 # Read config options
424 `git config --get-all rcs.authorsfile`.each_line do |fn|
425         parse_options[:authors].merge! load_authors_file(fn.chomp)
426 end
427
428 parse_options[:tag_each_rev] = (
429         `git config --bool rcs.tageachrev`.chomp == 'true'
430 ) ? true : false
431
432 opts.each do |opt, arg|
433         case opt
434         when '--authors-file'
435                 authors = load_authors_file(arg)
436                 redef = parse_options[:authors].keys & authors.keys
437                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
438                 parse_options[:authors].merge!(authors)
439         when '--rcs-suffixes'
440                 # TODO
441         when '--tag-each-rev'
442                 parse_options[:tag_each_rev] = true
443         when '--no-tag-each-rev'
444                 # this is the default, which is fine since the missing key
445                 # (default) returns nil which is false in Ruby
446                 parse_options[:tag_each_rev] = false
447         when ''
448                 file_list << arg
449         when '--help'
450                 usage
451                 exit
452         end
453 end
454
455 require 'etc'
456
457 user = Etc.getlogin || ENV['USER']
458
459 # steal username/email data from other init files that may contain the
460 # information
461 def steal_username
462         [
463                 # the user's .hgrc file for a username field
464                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
465                 # the user's .(g)vimrc for a changelog_username setting
466                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
467                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
468                 []
469         ].each do |fn, rx, idx|
470                 file = File.expand_path fn
471                 if File.readable?(file) and File.read(file) =~ rx
472                         parse_options[:authors][user] = Regexp.last_match(idx).strip
473                         break
474                 end
475         end
476 end
477
478 if user and not user.empty? and not parse_options[:authors].has_key?(user)
479         name = ENV['GIT_AUTHOR_NAME'] || ''
480         name.replace(`git config user.name`.chomp) if name.empty?
481         name.replace(Etc.getpwnam(user).gecos) if name.empty?
482
483         if name.empty?
484                 # couldn't find a name, try to steal data from other sources
485                 steal_username
486         else
487                 # if we found a name, try to find an email too
488                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
489                 email.replace(`git config user.email`.chomp) if email.empty?
490
491                 if email.empty?
492                         # couldn't find an email, try to steal data too
493                         steal_username
494                 else
495                         # we got both a name and email, fill the info
496                         parse_options[:authors][user] = "#{name} <#{email}>"
497                 end
498         end
499 end
500
501 if file_list.empty?
502         usage
503         exit 1
504 end
505
506 SFX = ',v'
507
508 status = 0
509
510 file_list.each do |arg|
511         if arg[-2,2] == SFX
512                 if File.exists? arg
513                         rcsfile = arg.dup
514                 else
515                         not_found "RCS file #{arg}"
516                         status |= 1
517                 end
518                 filename = File.basename(arg, SFX)
519         else
520                 filename = File.basename(arg)
521                 path = File.dirname(arg)
522                 rcsfile = File.join(path, 'RCS', filename) + SFX
523                 unless File.exists? rcsfile
524                         rcsfile.replace File.join(path, filename) + SFX
525                         unless File.exists? rcsfile
526                                 not_found "RCS file for #{filename} in #{path}"
527                         end
528                 end
529         end
530
531         RCS.parse(filename, rcsfile, parse_options)
532 end
533
534 exit status