Suppress two more warnings
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 require 'pp'
4
5 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
6 # older versions
7 unless 2.respond_to? :odd?
8         class Integer
9                 def odd?
10                         self % 2 == 1
11                 end
12         end
13 end
14
15 def usage
16         STDERR.puts <<EOM
17 #{$0} [options] file [file ...]
18
19 Fast-export the RCS history of one or more file.
20
21 Options:
22         --help, -h, -?          display this help text
23         --authors-file, -A      specify a file containing username = Full Name <email> mappings
24         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision
25
26 Config options:
27         rcs.authorsFile         for --authors-file
28         rcs.tagEachRev          for --tag-each-rev
29
30 EOM
31 end
32
33 def not_found(arg)
34         STDERR.puts "Could not find #{arg}"
35 end
36
37 # returns a hash that maps usernames to author names & emails
38 def load_authors_file(fn)
39         hash = {}
40         begin
41                 File.open(File.expand_path(fn)) do |io|
42                         io.each_line do |line|
43                                 uname, author = line.split('=', 2)
44                                 uname.strip!
45                                 author.strip!
46                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
47                                 hash[uname] = author
48                         end
49                 end
50         rescue
51                 not_found(fn)
52         end
53         return hash
54 end
55
56 class Time
57         def Time.rcs(string)
58                 fields = string.split('.')
59                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
60                 Time.utc(*fields)
61         end
62 end
63
64 module RCS
65         # strip an optional final ;
66         def RCS.clean(arg)
67                 arg.chomp(';')
68         end
69
70         # strip the first and last @, and de-double @@s
71         def RCS.sanitize(arg)
72                 case arg
73                 when Array
74                         ret = arg.dup
75                         raise 'malformed first line' unless ret.first[0,1] == '@'
76                         raise 'malformed last line' unless ret.last[-1,1] == '@'
77                         ret.first.sub!(/^@/,'')
78                         ret.last.sub!(/@$/,'')
79                         ret.map { |l| l.gsub('@@','@') }
80                 when String
81                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
82                 else
83                         raise
84                 end
85         end
86
87         # clean and sanitize
88         def RCS.at_clean(arg)
89                 RCS.sanitize RCS.clean(arg)
90         end
91
92         def RCS.blob(arg)
93                 arg.gsub('.', '0') + ('90'*5)
94         end
95
96         def RCS.commit(arg)
97                 arg.gsub('.', '0') + ('09'*5)
98         end
99
100         class File
101                 attr_accessor :head, :comment, :desc, :revision
102                 def initialize(fname)
103                         @fname = fname.dup
104                         @head = nil
105                         @comment = nil
106                         @desc = []
107                         @revision = Hash.new { |h, r| h[r] = Revision.new(r) }
108                 end
109
110                 def has_revision?(rev)
111                         @revision.has_key?(rev) and not @revision[rev].author.nil?
112                 end
113
114                 def export_commits(opts={})
115                         counter = 0
116                         exported = []
117                         until @revision.empty?
118                                 counter += 1
119
120                                 # a string sort is a very good candidate for
121                                 # export order, getting a miss only for
122                                 # multi-digit revision components
123                                 keys = @revision.keys.sort
124
125                                 STDERR.puts "commit export loop ##{counter}"
126                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
127                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
128
129                                 keys.each do |key|
130                                         rev = @revision[key]
131                                         # the parent commit is rev.next if we're on the
132                                         # master branch (rev.branch is nil) or
133                                         # rev.diff_base otherwise
134                                         from = rev.branch.nil? ? rev.next : rev.diff_base
135                                         # A commit can only be exported if it has no
136                                         # parent, or if the parent has been exported
137                                         # already. Skip this commit otherwise
138                                         if from and not exported.include? from
139                                                 next
140                                         end
141
142                                         branch = rev.branch || 'master'
143                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
144                                         date = "#{rev.date.tv_sec} +0000"
145                                         log = rev.log.to_s
146
147                                         puts "commit refs/heads/#{branch}"
148                                         puts "mark :#{RCS.commit key}"
149                                         puts "committer #{author} #{date}"
150                                         puts "data #{log.length}"
151                                         puts log unless log.empty?
152                                         puts "from :#{RCS.commit from}" if rev.branch_point
153                                         puts "M 644 :#{RCS.blob key} #{@fname}"
154
155                                         # TODO FIXME this *should* be safe, in
156                                         # that it should not unduly move
157                                         # branches back in time, but I'm not
158                                         # 100% sure ...
159                                         rev.branches.each do |sym|
160                                                 puts "reset refs/heads/#{sym}"
161                                                 puts "from :#{RCS.commit key}"
162                                         end
163                                         rev.symbols.each do |sym|
164                                                 puts "reset refs/tags/#{sym}"
165                                                 puts "from :#{RCS.commit key}"
166                                         end
167                                         if opts[:tag_each_rev]
168                                                 puts "reset refs/tags/#{key}"
169                                                 puts "from :#{RCS.commit key}"
170                                         end
171
172                                         exported.push key
173                                 end
174                                 exported.each { |k| @revision.delete(k) }
175                         end
176                 end
177         end
178
179         class Revision
180                 attr_accessor :rev, :author, :state, :next
181                 attr_accessor :branches, :log, :text, :symbols
182                 attr_accessor :branch, :diff_base, :branch_point
183                 attr_reader   :date
184                 def initialize(rev)
185                         @rev = rev
186                         @author = nil
187                         @date = nil
188                         @state = nil
189                         @next = nil
190                         @branches = []
191                         @branch = nil
192                         @branch_point = nil
193                         @diff_base = nil
194                         @log = []
195                         @text = []
196                         @symbols = []
197                 end
198
199                 def date=(str)
200                         @date = Time.rcs(str)
201                 end
202
203                 def blob
204                         str = @text.join('')
205                         ret = "blob\nmark :#{RCS.blob @rev}\ndata #{str.length}\n#{str}\n"
206                         ret
207                 end
208         end
209
210         def RCS.parse(fname, rcsfile, opts={})
211                 rcs = RCS::File.new(fname)
212
213                 ::File.open(rcsfile, 'r') do |file|
214                         status = [:basic]
215                         rev = nil
216                         lines = []
217                         difflines = []
218                         file.each_line do |line|
219                                 case status.last
220                                 when :basic
221                                         command, args = line.split($;,2)
222                                         next if command.empty?
223
224                                         case command
225                                         when 'head'
226                                                 rcs.head = RCS.clean(args.chomp)
227                                         when 'symbols'
228                                                 status.push :symbols
229                                         when 'comment'
230                                                 rcs.comment = RCS.at_clean(args.chomp)
231                                         when /^[0-9.]+$/
232                                                 rev = command.dup
233                                                 if rcs.has_revision?(rev)
234                                                         status.push :revision_data
235                                                 else
236                                                         status.push :new_revision
237                                                 end
238                                         when 'desc'
239                                                 status.push :desc
240                                                 lines.clear
241                                                 status.push :read_lines
242                                         else
243                                                 STDERR.puts "Skipping unhandled command #{command.inspect}"
244                                         end
245                                 when :symbols
246                                         sym, rev = line.strip.split(':',2);
247                                         status.pop if rev.chomp!(';')
248                                         rcs.revision[rev].symbols << sym
249                                 when :desc
250                                         rcs.desc.replace lines.dup
251                                         status.pop
252                                 when :read_lines
253                                         # we sanitize lines as we read them
254
255                                         actual_line = line.dup
256
257                                         # the first line must begin with a @, which we strip
258                                         if lines.empty?
259                                                 ats = line.match(/^@+/)
260                                                 raise 'malformed line' unless ats
261                                                 actual_line.replace line.sub(/^@/,'')
262                                         end
263
264                                         # if the line ends with an ODD number of @, it's the
265                                         # last line -- we work on actual_line so that content
266                                         # such as @\n or @ work correctly (they would be
267                                         # encoded respectively as ['@@@\n','@\n'] and
268                                         # ['@@@@\n']
269                                         ats = actual_line.chomp.match(/@+$/)
270                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
271                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
272                                         end
273                                         lines << actual_line.gsub('@@','@')
274                                         if nomore
275                                                 status.pop
276                                                 redo
277                                         end
278                                 when :new_revision
279                                         case line.chomp
280                                         when /^date\s+(\S+);\s+author\s+(\S+);\sstate\s(\S+);$/
281                                                 rcs.revision[rev].date = $1
282                                                 rcs.revision[rev].author = $2
283                                                 rcs.revision[rev].state = $3
284                                         when 'branches'
285                                                 status.push :branches
286                                         when 'branches;'
287                                                 next
288                                         when /^next\s+(\S+)?;$/
289                                                 nxt = rcs.revision[rev].next = $1
290                                                 next unless nxt
291                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
292                                                 rcs.revision[nxt].diff_base = rev
293                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
294                                         else
295                                                 status.pop
296                                         end
297                                 when :branches
298                                         candidate = line.split(';',2)
299                                         branch = candidate.first.strip
300                                         rcs.revision[rev].branches.push branch
301                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
302                                         rcs.revision[branch].diff_base = rev
303                                         # we drop the last number from the branch name
304                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
305                                         rcs.revision[branch].branch_point = rev
306                                         status.pop if candidate.length > 1
307                                 when :revision_data
308                                         case line.chomp
309                                         when 'log'
310                                                 status.push :log
311                                                 lines.clear
312                                                 status.push :read_lines
313                                         when 'text'
314                                                 if rev == rcs.head
315                                                         status.push :head
316                                                 else
317                                                         status.push :diff
318                                                 end
319                                                 lines.clear
320                                                 status.push :read_lines
321                                         else
322                                                 status.pop
323                                         end
324                                 when :log
325                                         rcs.revision[rev].log.replace lines.dup
326                                         status.pop
327                                 when :head
328                                         rcs.revision[rev].text.replace lines.dup
329                                         puts rcs.revision[rev].blob
330                                         status.pop
331                                 when :diff
332                                         difflines.replace lines.dup
333                                         difflines.pop if difflines.last.empty?
334                                         base = rcs.revision[rev].diff_base
335                                         unless rcs.revision[base].text
336                                                 pp rcs
337                                                 puts rev, base
338                                                 raise 'no diff base!'
339                                         end
340                                         # deep copy
341                                         buffer = []
342                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
343
344                                         adding = false
345                                         index = nil
346                                         count = nil
347
348                                         while l = difflines.shift
349                                                 if adding
350                                                         raise 'negative index during insertion' if index < 0
351                                                         raise 'negative count during insertion' if count < 0
352                                                         adding << l
353                                                         count -= 1
354                                                         # collected all the lines, put the before
355                                                         unless count > 0
356                                                                 buffer[index].unshift(*adding)
357                                                                 adding = false
358                                                         end
359                                                         next
360                                                 end
361
362                                                 l.chomp!
363                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
364                                                 diff_cmd = $1.intern
365                                                 index = $2.to_i
366                                                 count = $3.to_i
367                                                 case diff_cmd
368                                                 when :d
369                                                         # for deletion, index 1 is the first index, so the Ruby
370                                                         # index is one less than the diff one
371                                                         index -= 1
372                                                         # we replace them with empty string so that 'a' commands
373                                                         # referring to the same line work properly
374                                                         while count > 0
375                                                                 buffer[index].clear
376                                                                 index += 1
377                                                                 count -= 1
378                                                         end
379                                                 when :a
380                                                         # addition will prepend the appropriate lines
381                                                         # to the given index, and in this case Ruby
382                                                         # and diff indices are the same
383                                                         adding = []
384                                                 end
385                                         end
386
387                                         # turn the buffer into an array of lines, deleting the empty ones
388                                         buffer.delete_if { |l| l.empty? }
389                                         buffer.flatten!
390
391                                         rcs.revision[rev].text = buffer
392                                         puts rcs.revision[rev].blob
393                                         status.pop
394                                 else
395                                         STDERR.puts "Unknown status #{status.last}"
396                                         exit 1
397                                 end
398                         end
399                 end
400
401                 # clean up the symbols/branches: look for revisions that have
402                 # one or more symbols but no dates, and make them into
403                 # branches, pointing to the highest commit with that key
404                 branches = []
405                 keys = rcs.revision.keys
406                 rcs.revision.each do |key, rev|
407                         if rev.date.nil? and not rev.symbols.empty?
408                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
409                                 tr = rcs.revision[top]
410                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
411                                 tr.branches |= rev.symbols
412                                 branches << key
413                         end
414                 end
415                 branches.each { |k| rcs.revision.delete k }
416
417                 # export the commits
418                 rcs.export_commits(opts)
419         end
420 end
421
422 require 'getoptlong'
423
424 opts = GetoptLong.new(
425         # Authors file, like git-svn and git-cvsimport, more than one can be
426         # specified
427         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
428         # RCS file suffix, like RCS
429         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
430         # tag each revision?
431         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
432         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
433         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
434 )
435
436 # We read options in order, but they apply to all passed parameters.
437 # TODO maybe they should only apply to the following, unless there's only one
438 # file?
439 opts.ordering = GetoptLong::RETURN_IN_ORDER
440
441 file_list = []
442 parse_options = {
443         :authors => Hash.new,
444 }
445
446 # Read config options
447 `git config --get-all rcs.authorsfile`.each_line do |fn|
448         parse_options[:authors].merge! load_authors_file(fn.chomp)
449 end
450
451 parse_options[:tag_each_rev] = (
452         `git config --bool rcs.tageachrev`.chomp == 'true'
453 ) ? true : false
454
455 opts.each do |opt, arg|
456         case opt
457         when '--authors-file'
458                 authors = load_authors_file(arg)
459                 redef = parse_options[:authors].keys & authors.keys
460                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
461                 parse_options[:authors].merge!(authors)
462         when '--rcs-suffixes'
463                 # TODO
464         when '--tag-each-rev'
465                 parse_options[:tag_each_rev] = true
466         when '--no-tag-each-rev'
467                 # this is the default, which is fine since the missing key
468                 # (default) returns nil which is false in Ruby
469                 parse_options[:tag_each_rev] = false
470         when ''
471                 file_list << arg
472         when '--help'
473                 usage
474                 exit
475         end
476 end
477
478 require 'etc'
479
480 user = Etc.getlogin || ENV['USER']
481
482 # steal username/email data from other init files that may contain the
483 # information
484 def steal_username
485         [
486                 # the user's .hgrc file for a username field
487                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
488                 # the user's .(g)vimrc for a changelog_username setting
489                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
490                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
491                 []
492         ].each do |fn, rx, idx|
493                 file = File.expand_path fn
494                 if File.readable?(file) and File.read(file) =~ rx
495                         parse_options[:authors][user] = Regexp.last_match(idx).strip
496                         break
497                 end
498         end
499 end
500
501 if user and not user.empty? and not parse_options[:authors].has_key?(user)
502         name = ENV['GIT_AUTHOR_NAME'] || ''
503         name.replace(`git config user.name`.chomp) if name.empty?
504         name.replace(Etc.getpwnam(user).gecos) if name.empty?
505
506         if name.empty?
507                 # couldn't find a name, try to steal data from other sources
508                 steal_username
509         else
510                 # if we found a name, try to find an email too
511                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
512                 email.replace(`git config user.email`.chomp) if email.empty?
513
514                 if email.empty?
515                         # couldn't find an email, try to steal data too
516                         steal_username
517                 else
518                         # we got both a name and email, fill the info
519                         parse_options[:authors][user] = "#{name} <#{email}>"
520                 end
521         end
522 end
523
524 if file_list.empty?
525         usage
526         exit 1
527 end
528
529 SFX = ',v'
530
531 status = 0
532
533 file_list.each do |arg|
534         if arg[-2,2] == SFX
535                 if File.exists? arg
536                         rcsfile = arg.dup
537                 else
538                         not_found "RCS file #{arg}"
539                         status |= 1
540                 end
541                 filename = File.basename(arg, SFX)
542         else
543                 filename = File.basename(arg)
544                 path = File.dirname(arg)
545                 rcsfile = File.join(path, 'RCS', filename) + SFX
546                 unless File.exists? rcsfile
547                         rcsfile.replace File.join(path, filename) + SFX
548                         unless File.exists? rcsfile
549                                 not_found "RCS file for #{filename} in #{path}"
550                         end
551                 end
552         end
553
554         RCS.parse(filename, rcsfile, parse_options)
555 end
556
557 exit status