make that change a little less ugly by using a procedure.
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 RCS fast export: run the script with the `--help` option for further
5 information.
6
7 No installation needed: you can run it from anywhere, including the git
8 checkout directory. For extra comfort, symlink it to some directory in
9 your PATH. I myself have this symlink:
10
11         ~/bin/rcs-fast-export -> ~/src/rcs-fast-export/rcs-fast-export.rb
12
13 allowing me to run `rcs-fast-export` from anywhere.
14 =end
15
16 =begin
17 TODO
18         * Refactor commit coalescing
19         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
20         * Add support for commitid for coalescing commits
21         * Further coalescing options? (e.g. small logfile differences)
22         * Proper branching support in multi-file export
23         * Optimize memory usage by discarding unneeded text
24 =end
25
26 require 'pp'
27 require 'set'
28
29 class NoBranchSupport < NotImplementedError ; end
30
31 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
32 # older versions
33 unless 2.respond_to? :odd?
34         class Integer
35                 def odd?
36                         self % 2 == 1
37                 end
38         end
39 end
40
41 # Set standard output to binary mode: git fast-import doesn't like Windows
42 # line-endings, and this ensures that the line termination will be a simple 0x0a
43 # on Windows too (it expands to 0x0D 0x0A otherwise).
44 STDOUT.binmode
45
46 =begin
47 RCS fast-export version: set to `git` in the repository, but can be overridden
48 by packagers, e.g. based on the latest tag, git description, custom packager
49 patches or whatever.
50
51 When the version is set to `git`, we make a little effort to find more information
52 about which commit we are at.
53 =end
54
55 RFE_VERSION="git"
56
57 def version
58         if RFE_VERSION == "git"
59                 nolinkfile = File.readlink(__FILE__) rescue __FILE__
60                 Dir.chdir File.expand_path File.dirname nolinkfile
61
62                 if File.exists? '.git' ; begin
63                         git_out = `git log -1 --pretty="%h %H%n%ai" | git name-rev --stdin`.split("\n")
64                         hash=git_out.first.split.first
65                         branch=git_out.first.split('(').last.chomp(')')
66                         date=git_out.last.split.first
67                         changed=`git diff --no-ext-diff --quiet --exit-code`
68                         branch << "*" unless $?.success?
69                         info=" [#{branch}] #{hash} (#{date})"
70                 rescue
71                         info=" (no info)"
72                 end ; end
73
74                 STDERR.puts "#{$0}: RCS fast-export, #{RFE_VERSION} version#{info}"
75         else
76                 STDERR.puts "#{$0}: RCS fast-export, version #{RFE_VERSION}"
77         end
78 end
79
80 def usage
81         $stdout.flush
82         STDERR.puts <<EOM
83 #{$0} [options] file [file ...]
84
85 Fast-export the RCS history of one or more files. If a directory is specified,
86 all RCS-tracked files in the directory and its descendants are exported.
87
88 When importing single files, their pathname is discarded during import. When
89 importing directories, only the specified directory component is discarded.
90
91 When importing a single file, RCS commits are converted one by one. Otherwise,
92 some heuristics is used to determine how to coalesce commits touching different
93 files.
94
95 Currently, commits are coalesced if they share the exact same log and if their
96 date differs by no more than the user-specified fuzziness. Additionally, the
97 symbols in one of the commit must be a subset of the symbols in the other
98 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
99 false in the git configuration.
100
101 Typical usage:
102     git init && rcs-fast-export.rb . | git fast-import && git reset
103
104 Options:
105         --help, -h, -?          display this help text
106         --authors-file, -A      specify a file containing username = Full Name <email> mappings
107         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
108                                 importing multiple files
109                                 (in seconds, defaults to 300, i.e. 5 minutes)
110         --[no-]symbol-check     [do not] check symbols when coalescing commits
111         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
112                                 importing a single file
113         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
114                                 a single file
115         --skip-branches         when exporting multiple files with a branched history, export
116                                 the main branch only instead of aborting due to the lack of
117                                 support for branched multi-file history export
118
119
120
121 Config options:
122         rcs.authorsFile         for --authors-file
123         rcs.tagEachRev          for --tag-each-rev
124         rcs.logFilename         for --log-filename
125         rcs.commitFuzz          for --rcs-commit-fuzz
126         rcs.symbolCheck         for --rcs-symbol-check
127         rcs.tagFuzz             for --rcs-tag-fuzz
128
129 EOM
130 end
131
132 def warning(msg)
133         $stdout.flush
134         STDERR.puts msg
135 end
136
137 def not_found(arg)
138         warning "Could not find #{arg}"
139 end
140
141 # returns a hash that maps usernames to author names & emails
142 def load_authors_file(fn)
143         hash = {}
144         begin
145                 File.open(File.expand_path(fn)) do |io|
146                         io.each_line do |line|
147                                 uname, author = line.split('=', 2)
148                                 uname.strip!
149                                 author.strip!
150                                 warning "Username #{uname} redefined to #{author}" if hash.has_key? uname
151                                 hash[uname] = author
152                         end
153                 end
154         rescue
155                 not_found(fn)
156         end
157         return hash
158 end
159
160 # display a message about a (recoverable) error
161 def alert(msg, action)
162         STDERR.puts "ERROR:\t#{msg}"
163         STDERR.puts "\t#{action}"
164 end
165
166 class Time
167         def Time.rcs(string)
168                 fields = string.split('.')
169                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
170                 # in Ruby 1.9, '99' is interpreted as year 99, not year 1999
171                 if fields.first.length < 3
172                         fields.first.insert 0, '19'
173                 end
174                 Time.utc(*fields)
175         end
176 end
177
178 module RCS
179         # strip an optional final ;
180         def RCS.clean(arg)
181                 arg.chomp(';')
182         end
183
184         # strip the first and last @, and de-double @@s
185         def RCS.sanitize(arg)
186                 case arg
187                 when Array
188                         ret = arg.dup
189                         raise 'malformed first line' unless ret.first[0,1] == '@'
190                         raise 'malformed last line' unless ret.last[-1,1] == '@'
191                         ret.first.sub!(/^@/,'')
192                         ret.last.sub!(/@$/,'')
193                         ret.map { |l| l.gsub('@@','@') }
194                 when String
195                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
196                 else
197                         raise
198                 end
199         end
200
201         # clean and sanitize
202         def RCS.at_clean(arg)
203                 RCS.sanitize RCS.clean(arg)
204         end
205
206         def RCS.mark(key)
207                 @@marks ||= {}
208                 if @@marks.key? key
209                         @@marks[key]
210                 else
211                         @@marks[key] = @@marks.length + 1
212                 end
213         end
214
215         def RCS.blob(file, rev)
216                 RCS.mark([file, rev])
217         end
218
219         def RCS.commit(commit)
220                 RCS.mark(commit)
221         end
222
223         class File
224                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
225                 def initialize(fname, executable)
226                         @fname = fname.dup
227                         @head = nil
228                         @comment = nil
229                         @desc = []
230                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
231                         @mode = executable ? '755' : '644'
232                 end
233
234                 def has_revision?(rev)
235                         @revision.has_key?(rev) and not @revision[rev].author.nil?
236                 end
237
238                 def export_commits(opts={})
239                         counter = 0
240                         exported = []
241                         until @revision.empty?
242                                 counter += 1
243
244                                 # a string sort is a very good candidate for
245                                 # export order, getting a miss only for
246                                 # multi-digit revision components
247                                 keys = @revision.keys.sort
248
249                                 warning "commit export loop ##{counter}"
250                                 warning "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
251                                 warning "\t#{keys.size} to export: #{keys.join(', ')}"
252
253                                 keys.each do |key|
254                                         rev = @revision[key]
255                                         # the parent commit is rev.next if we're on the
256                                         # master branch (rev.branch is nil) or
257                                         # rev.diff_base otherwise
258                                         from = rev.branch.nil? ? rev.next : rev.diff_base
259                                         # A commit can only be exported if it has no
260                                         # parent, or if the parent has been exported
261                                         # already. Skip this commit otherwise
262                                         if from and not exported.include? from
263                                                 next
264                                         end
265
266                                         branch = rev.branch || 'master'
267                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
268                                         date = "#{rev.date.tv_sec} +0000"
269                                         log = String.new
270                                         if opts[:log_filename]
271                                                 log << @fname << ": "
272                                         end
273                                         log << rev.log.join
274
275                                         puts "commit refs/heads/#{branch}"
276                                         puts "mark :#{RCS.commit key}"
277                                         puts "committer #{author} #{date}"
278                                         puts "data #{log.length}"
279                                         puts log unless log.empty?
280                                         puts "from :#{RCS.commit from}" if from
281                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
282
283                                         # TODO FIXME this *should* be safe, in
284                                         # that it should not unduly move
285                                         # branches back in time, but I'm not
286                                         # 100% sure ...
287                                         rev.branches.each do |sym|
288                                                 puts "reset refs/heads/#{sym}"
289                                                 puts "from :#{RCS.commit key}"
290                                         end
291                                         rev.symbols.each do |sym|
292                                                 puts "reset refs/tags/#{sym}"
293                                                 puts "from :#{RCS.commit key}"
294                                         end
295                                         if opts[:tag_each_rev]
296                                                 puts "reset refs/tags/#{key}"
297                                                 puts "from :#{RCS.commit key}"
298                                         end
299
300                                         exported.push key
301                                 end
302                                 exported.each { |k| @revision.delete(k) }
303                         end
304                 end
305         end
306
307         class Revision
308                 attr_accessor :rev, :author, :state, :next
309                 attr_accessor :branches, :log, :text, :symbols
310                 attr_accessor :branch, :diff_base, :branch_point
311                 attr_reader   :date
312                 def initialize(file, rev)
313                         @file = file
314                         @rev = rev
315                         @author = nil
316                         @date = nil
317                         @state = nil
318                         @next = nil
319                         @branches = Set.new
320                         @branch = nil
321                         @branch_point = nil
322                         @diff_base = nil
323                         @log = []
324                         @text = []
325                         @symbols = Set.new
326                 end
327
328                 def date=(str)
329                         @date = Time.rcs(str)
330                 end
331
332                 def blob
333                         str = @text.join('')
334                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
335                         ret
336                 end
337         end
338
339         def RCS.parse(fname, rcsfile)
340                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
341
342                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
343                         status = [:basic]
344                         rev = nil
345                         lines = []
346                         difflines = []
347                         file.each_line do |line|
348                                 case status.last
349                                 when :basic
350                                         command, args = line.split($;,2)
351                                         next if command.empty?
352
353                                         if command.chomp!(';')
354                                                 warning "Skipping empty command #{command.inspect}" if $DEBUG
355                                                 next
356                                         end
357
358                                         case command
359                                         when 'head'
360                                                 rcs.head = RCS.clean(args.chomp)
361                                         when 'symbols'
362                                                 status.push :symbols
363                                                 next if args.empty?
364                                                 line = args; redo
365                                         when 'comment'
366                                                 rcs.comment = RCS.at_clean(args.chomp)
367                                         when /^[0-9.]+$/
368                                                 rev = command.dup
369                                                 if rcs.has_revision?(rev)
370                                                         status.push :revision_data
371                                                 else
372                                                         status.push :new_revision
373                                                 end
374                                         when 'desc'
375                                                 status.push :desc
376                                                 lines.clear
377                                                 status.push :read_lines
378                                         when 'branch', 'access', 'locks', 'expand'
379                                                 warning "Skipping unhandled command #{command.inspect}" if $DEBUG
380                                                 status.push :skipping_lines
381                                                 next if args.empty?
382                                                 line = args; redo
383                                         else
384                                                 raise "Unknown command #{command.inspect}"
385                                         end
386                                 when :skipping_lines
387                                         status.pop if line.strip.chomp!(';')
388                                 when :symbols
389                                         # we can have multiple symbols per line
390                                         pairs = line.strip.split($;)
391                                         pairs.each do |pair|
392                                                 sym, rev = pair.strip.split(':',2);
393                                                 if rev
394                                                         status.pop if rev.chomp!(';')
395                                                         rcs.revision[rev].symbols << sym
396                                                 else
397                                                         status.pop
398                                                 end
399                                         end
400                                 when :desc
401                                         rcs.desc.replace lines.dup
402                                         status.pop
403                                 when :read_lines
404                                         # we sanitize lines as we read them
405
406                                         actual_line = line.dup
407
408                                         # the first line must begin with a @, which we strip
409                                         if lines.empty?
410                                                 ats = line.match(/^@+/)
411                                                 raise 'malformed line' unless ats
412                                                 actual_line.replace line.sub(/^@/,'')
413                                         end
414
415                                         # if the line ends with an ODD number of @, it's the
416                                         # last line -- we work on actual_line so that content
417                                         # such as @\n or @ work correctly (they would be
418                                         # encoded respectively as ['@@@\n','@\n'] and
419                                         # ['@@@@\n']
420                                         ats = actual_line.chomp.match(/@+$/)
421                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
422                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
423                                         end
424                                         lines << actual_line.gsub('@@','@')
425                                         if nomore
426                                                 status.pop
427                                                 redo
428                                         end
429                                 when :new_revision
430                                         case line.chomp
431                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
432                                                 rcs.revision[rev].date = $1
433                                                 rcs.revision[rev].author = $2
434                                                 rcs.revision[rev].state = $3
435                                         when /^branches\s*;/
436                                                 next
437                                         when /^branches(?:\s+|$)/
438                                                 status.push :branches
439                                                 if line.index(';')
440                                                         line = line.sub(/^branches\s+/,'')
441                                                         redo
442                                                 end
443                                         when /^next\s+(\S+)?;$/
444                                                 nxt = rcs.revision[rev].next = $1
445                                                 next unless nxt
446                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
447                                                 rcs.revision[nxt].diff_base = rev
448                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
449                                         else
450                                                 status.pop
451                                         end
452                                 when :branches
453                                         candidate = line.split(';',2)
454                                         candidate.first.strip.split.each do |branch|
455                                                 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
456                                                 rcs.revision[branch].diff_base = rev
457                                                 # we drop the last number from the branch name
458                                                 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
459                                                 rcs.revision[branch].branch_point = rev
460                                         end
461                                         status.pop if candidate.length > 1
462                                 when :revision_data
463                                         case line.chomp
464                                         when 'log'
465                                                 status.push :log
466                                                 lines.clear
467                                                 status.push :read_lines
468                                         when 'text'
469                                                 if rev == rcs.head
470                                                         status.push :head
471                                                 else
472                                                         status.push :diff
473                                                 end
474                                                 lines.clear
475                                                 status.push :read_lines
476                                         else
477                                                 status.pop
478                                         end
479                                 when :log
480                                         rcs.revision[rev].log.replace lines.dup
481                                         status.pop
482                                 when :head
483                                         rcs.revision[rev].text.replace lines.dup
484                                         puts rcs.revision[rev].blob
485                                         status.pop
486                                 when :diff
487                                         difflines.replace lines.dup
488                                         difflines.pop if difflines.last.empty?
489                                         if difflines.first.chomp.empty?
490                                                 alert "malformed diff: empty initial line @ #{rcsfile}:#{file.lineno-difflines.length-1}", "skipping"
491                                                 difflines.shift
492                                         end unless difflines.empty?
493                                         base = rcs.revision[rev].diff_base
494                                         unless rcs.revision[base].text
495                                                 pp rcs
496                                                 puts rev, base
497                                                 raise 'no diff base!'
498                                         end
499                                         # deep copy
500                                         buffer = []
501                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
502
503                                         adding = false
504                                         index = nil
505                                         count = nil
506
507                                         while l = difflines.shift
508                                                 if adding
509                                                         raise 'negative index during insertion' if index < 0
510                                                         raise 'negative count during insertion' if count < 0
511                                                         adding << l
512                                                         count -= 1
513                                                         # collected all the lines, put the before
514                                                         unless count > 0
515                                                                 unless buffer[index]
516                                                                         buffer[index] = []
517                                                                 end
518                                                                 buffer[index].unshift(*adding)
519                                                                 adding = false
520                                                         end
521                                                         next
522                                                 end
523
524                                                 l.chomp!
525                                                 raise "malformed diff @ #{rcsfile}:#{file.lineno-difflines.length-1} `#{l}`" unless l =~ /^([ad])(\d+) (\d+)$/
526                                                 diff_cmd = $1.intern
527                                                 index = $2.to_i
528                                                 count = $3.to_i
529                                                 case diff_cmd
530                                                 when :d
531                                                         # for deletion, index 1 is the first index, so the Ruby
532                                                         # index is one less than the diff one
533                                                         index -= 1
534                                                         # we replace them with empty string so that 'a' commands
535                                                         # referring to the same line work properly
536                                                         while count > 0
537                                                                 buffer[index].clear
538                                                                 index += 1
539                                                                 count -= 1
540                                                         end
541                                                 when :a
542                                                         # addition will prepend the appropriate lines
543                                                         # to the given index, and in this case Ruby
544                                                         # and diff indices are the same
545                                                         adding = []
546                                                 end
547                                         end
548
549                                         # turn the buffer into an array of lines, deleting the empty ones
550                                         buffer.delete_if { |l| l.empty? }
551                                         buffer.flatten!
552
553                                         rcs.revision[rev].text = buffer
554                                         puts rcs.revision[rev].blob
555                                         status.pop
556                                 else
557                                         raise "Unknown status #{status.last}"
558                                 end
559                         end
560                 end
561
562                 # clean up the symbols/branches: look for revisions that have
563                 # one or more symbols but no dates, and make them into
564                 # branches, pointing to the highest commit with that key
565                 branches = []
566                 keys = rcs.revision.keys
567                 rcs.revision.each do |key, rev|
568                         if rev.date.nil? and not rev.symbols.empty?
569                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
570                                 tr = rcs.revision[top]
571                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
572                                 tr.branches |= rev.symbols
573                                 branches << key
574                         end
575                 end
576                 branches.each { |k| rcs.revision.delete k }
577
578                 return rcs
579         end
580
581         class Tree
582                 def initialize(commit)
583                         @commit = commit
584                         @files = Hash.new
585                 end
586
587                 def merge!(tree)
588                         testfiles = @files.dup
589                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
590                         # the next line is only reached if all the adds were
591                         # successful, so the merge is atomic
592                         @files.replace testfiles
593                 end
594
595                 def add(rcs, rev, file_list=@files)
596                         if file_list.key? rcs
597                                 prev = file_list[rcs]
598                                 if prev.log == rev.log
599                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
600                                 else
601                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
602                                 end
603                                 if prev.text != rev.text
604                                         raise str
605                                 else
606                                         @commit.warn_about str
607                                 end
608                         end
609                         file_list[rcs] = rev
610                 end
611
612                 def each &block
613                         @files.each &block
614                 end
615
616                 def to_a
617                         files = []
618                         @files.map do |rcs, rev|
619                                 if rev.state.downcase == "dead"
620                                         files << "D #{rcs.fname}"
621                                 else
622                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
623                                 end
624                         end
625                         files
626                 end
627
628                 def filenames
629                         @files.map { |rcs, rev| rcs.fname }
630                 end
631
632                 def to_s
633                         self.to_a.join("\n")
634                 end
635         end
636
637         class Commit
638                 attr_accessor :date, :log, :symbols, :author, :branch
639                 attr_accessor :tree
640                 attr_accessor :min_date, :max_date
641                 def initialize(rcs, rev)
642                         raise NoBranchSupport if rev.branch
643                         self.date = rev.date.dup
644                         self.min_date = self.max_date = self.date
645                         self.log = rev.log.dup
646                         self.symbols = rev.symbols.dup
647                         self.author = rev.author
648                         self.branch = rev.branch
649
650                         self.tree = Tree.new self
651                         self.tree.add rcs, rev
652                 end
653
654                 def to_a
655                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
656                 end
657
658                 def warn_about(str)
659                         warn str + " for commit on #{self.date}"
660                 end
661
662                 # Sort by date and then by number of symbols
663                 def <=>(other)
664                         ds = self.date <=> other.date
665                         if ds != 0
666                                 return ds
667                         else
668                                 return self.symbols.length <=> other.symbols.length
669                         end
670                 end
671
672                 def merge!(commit)
673                         self.tree.merge! commit.tree
674                         if commit.max_date > self.max_date
675                                 self.max_date = commit.max_date
676                         end
677                         if commit.min_date < self.min_date
678                                 self.min_date = commit.min_date
679                         end
680                         self.symbols.merge commit.symbols
681                 end
682
683                 def export(opts={})
684                         xbranch = self.branch || 'master'
685                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
686                         xlog = self.log.join
687                         numdate = self.date.tv_sec
688                         xdate = "#{numdate} +0000"
689                         key = numdate.to_s
690
691                         puts "commit refs/heads/#{xbranch}"
692                         puts "mark :#{RCS.commit key}"
693                         puts "committer #{xauthor} #{xdate}"
694                         puts "data #{xlog.length}"
695                         puts xlog unless xlog.empty?
696                         # TODO branching support for multi-file export
697                         # puts "from :#{RCS.commit from}" if self.branch_point
698                         puts self.tree.to_s
699
700                         # TODO branching support for multi-file export
701                         # rev.branches.each do |sym|
702                         #       puts "reset refs/heads/#{sym}"
703                         #       puts "from :#{RCS.commit key}"
704                         # end
705
706                         self.symbols.each do |sym|
707                                 puts "reset refs/tags/#{sym}"
708                                 puts "from :#{RCS.commit key}"
709                         end
710
711                 end
712         end
713 end
714
715 require 'getoptlong'
716
717 opts = GetoptLong.new(
718         # Authors file, like git-svn and git-cvsimport, more than one can be
719         # specified
720         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
721         # RCS file suffix, like RCS
722         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
723         # Date fuzziness for commits to be considered the same (in seconds)
724         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
725         # check symbols when coalescing?
726         ['--symbol-check', GetoptLong::NO_ARGUMENT],
727         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
728         # tag each revision?
729         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
730         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
731         # prepend filenames to commit logs?
732         ['--log-filename', GetoptLong::NO_ARGUMENT],
733         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
734         # skip branches when exporting a whole tree?
735         ['--skip-branches', GetoptLong::NO_ARGUMENT],
736         # show current version
737         ['--version', '-v', GetoptLong::NO_ARGUMENT],
738         # show help/usage
739         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
740 )
741
742 # We read options in order, but they apply to all passed parameters.
743 # TODO maybe they should only apply to the following, unless there's only one
744 # file?
745 opts.ordering = GetoptLong::RETURN_IN_ORDER
746
747 file_list = []
748 parse_options = {
749         :authors => Hash.new,
750         :commit_fuzz => 300,
751         :tag_fuzz => -1,
752 }
753
754 # Read config options
755 `git config --get-all rcs.authorsfile`.each_line do |fn|
756         parse_options[:authors].merge! load_authors_file(fn.chomp)
757 end
758
759 parse_options[:tag_each_rev] = (
760         `git config --bool rcs.tageachrev`.chomp == 'true'
761 ) ? true : false
762
763 parse_options[:log_filename] = (
764         `git config --bool rcs.logfilename`.chomp == 'true'
765 ) ? true : false
766
767 fuzz = `git config --int rcs.commitFuzz`.chomp
768 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
769
770 fuzz = `git config --int rcs.tagFuzz`.chomp
771 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
772
773 parse_options[:symbol_check] = (
774         `git config --bool rcs.symbolcheck`.chomp == 'false'
775 ) ? false : true
776
777 opts.each do |opt, arg|
778         case opt
779         when '--authors-file'
780                 authors = load_authors_file(arg)
781                 redef = parse_options[:authors].keys & authors.keys
782                 warning "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
783                 parse_options[:authors].merge!(authors)
784         when '--rcs-suffixes'
785                 # TODO
786         when '--rcs-commit-fuzz'
787                 parse_options[:commit_fuzz] = arg.to_i
788         when '--rcs-tag-fuzz'
789                 parse_options[:tag_fuzz] = arg.to_i
790         when '--symbol-check'
791                 parse_options[:symbol_check] = true
792         when '--no-symbol-check'
793                 parse_options[:symbol_check] = false
794         when '--tag-each-rev'
795                 parse_options[:tag_each_rev] = true
796         when '--no-tag-each-rev'
797                 # this is the default, which is fine since the missing key
798                 # (default) returns nil which is false in Ruby
799                 parse_options[:tag_each_rev] = false
800         when '--log-filename'
801                 parse_options[:log_filename] = true
802         when '--no-log-filename'
803                 # this is the default, which is fine since the missing key
804                 # (default) returns nil which is false in Ruby
805                 parse_options[:log_filename] = false
806         when '--skip-branches'
807                 parse_options[:skip_branches] = true
808         when ''
809                 file_list << arg
810         when '--version'
811                 version
812                 exit
813         when '--help'
814                 usage
815                 exit
816         end
817 end
818
819 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
820         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
821 end
822
823 require 'etc'
824
825 user = Etc.getlogin || ENV['USER']
826
827 # steal username/email data from other init files that may contain the
828 # information
829 def steal_username
830         [
831                 # the user's .hgrc file for a username field
832                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
833                 # the user's .(g)vimrc for a changelog_username setting
834                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
835                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
836         ].each do |fn, rx, idx|
837                 file = File.expand_path fn
838                 if File.readable?(file) and File.read(file) =~ rx
839                         parse_options[:authors][user] = Regexp.last_match(idx).strip
840                         break
841                 end
842         end
843 end
844
845 if user and not user.empty? and not parse_options[:authors].has_key?(user)
846         name = ENV['GIT_AUTHOR_NAME'] || ''
847         name.replace(`git config user.name`.chomp) if name.empty?
848         name.replace(Etc.getpwnam(user).gecos) if name.empty?
849
850         if name.empty?
851                 # couldn't find a name, try to steal data from other sources
852                 steal_username
853         else
854                 # if we found a name, try to find an email too
855                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
856                 email.replace(`git config user.email`.chomp) if email.empty?
857
858                 if email.empty?
859                         # couldn't find an email, try to steal data too
860                         steal_username
861                 else
862                         # we got both a name and email, fill the info
863                         parse_options[:authors][user] = "#{name} <#{email}>"
864                 end
865         end
866 end
867
868 if file_list.empty?
869         usage
870         exit 1
871 end
872
873 SFX = ',v'
874
875 status = 0
876
877 rcs = []
878 file_list.each do |arg|
879         case ftype = File.ftype(arg)
880         when 'file'
881                 if arg[-2,2] == SFX
882                         if File.exists? arg
883                                 rcsfile = arg.dup
884                         else
885                                 not_found "RCS file #{arg}"
886                                 status |= 1
887                         end
888                         filename = File.basename(arg, SFX)
889                 else
890                         filename = File.basename(arg)
891                         path = File.dirname(arg)
892                         rcsfile = File.join(path, 'RCS', filename) + SFX
893                         unless File.exists? rcsfile
894                                 rcsfile.replace File.join(path, filename) + SFX
895                                 unless File.exists? rcsfile
896                                         not_found "RCS file for #{filename} in #{path}"
897                                 end
898                         end
899                 end
900                 rcs << RCS.parse(filename, rcsfile)
901         when 'directory'
902                 argdirname = arg.chomp(File::SEPARATOR)
903                 pattern = File.join(argdirname, '**', '*' + SFX)
904                 Dir.glob(pattern, File::FNM_DOTMATCH).each do |rcsfile|
905                         filename = File.basename(rcsfile, SFX)
906                         path = File.dirname(rcsfile)
907                         # strip trailing "/RCS" if present, or "RCS" if that's
908                         # the full path
909                         path.sub!(/(^|#{File::SEPARATOR})RCS$/, '')
910                         # strip off the portion of the path specified
911                         # on the command line from the front of the path
912                         # (or delete the path completely if it is the same
913                         # as the specified directory)
914                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
915                         filename = File.join(path, filename) unless path.empty?
916                         begin
917                                 rcs << RCS.parse(filename, rcsfile)
918                         rescue Exception => e
919                                 warning "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
920                                 raise e
921                         end
922                 end
923         else
924                 warning "Cannot handle #{arg} of #{ftype} type"
925                 status |= 1
926         end
927 end
928
929 if rcs.length == 1
930         rcs.first.export_commits(parse_options)
931 else
932         warning "Preparing commits"
933
934         commits = []
935
936         rcs.each do |r|
937                 r.revision.each do |k, rev|
938                         begin
939                                 commits << RCS::Commit.new(r, rev)
940                         rescue NoBranchSupport
941                                 if parse_options[:skip_branches]
942                                         warning "Skipping revision #{rev.rev} for #{r.fname} (branch)"
943                                 else raise
944                                 end
945                         end
946                 end
947         end
948
949         warning "Sorting by date"
950
951         commits.sort!
952
953         if $DEBUG
954                 warning "RAW commits (#{commits.length}):"
955                 commits.each do |c|
956                         PP.pp c.to_a, $stderr
957                 end
958         else
959                 warning "#{commits.length} single-file commits"
960         end
961
962         warning "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
963
964         thisindex = commits.size
965         commits.reverse_each do |c|
966                 nextindex = thisindex
967                 thisindex -= 1
968
969                 cfiles = Set.new c.tree.filenames
970                 ofiles = Set.new
971
972                 mergeable = []
973
974                 # test for mergeable commits by looking at following commits
975                 while nextindex < commits.size
976                         k = commits[nextindex]
977                         nextindex += 1
978
979                         # commits are date-sorted, so we know we can quit early if we are too far
980                         # for coalescing to work
981                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
982
983                         skipthis = false
984
985                         kfiles = Set.new k.tree.filenames
986
987                         if c.log != k.log or c.author != k.author or c.branch != k.branch
988                                 skipthis = true
989                         end
990
991                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
992                                 cflist = cfiles.to_a.join(', ')
993                                 kflist = kfiles.to_a.join(', ')
994                                 if parse_options[:symbol_check]
995                                         warning "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
996                                         warning "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
997                                         warning "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
998                                         skipthis = true
999                                 elsif $DEBUG
1000                                         warning "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1001                                         warning "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
1002                                 end
1003                         end
1004
1005                         # keep track of filenames touched by commits we are not merging with,
1006                         # since we don't want to merge with commits that touch them, to preserve
1007                         # the monotonicity of history for each file
1008                         # TODO we could forward-merge with them, unless some of our files were
1009                         # touched too.
1010                         if skipthis
1011                                 # if the candidate touches any file already in the commit,
1012                                 # we can stop looking forward
1013                                 break unless cfiles.intersection(kfiles).empty?
1014                                 ofiles |= kfiles
1015                                 next
1016                         end
1017
1018                         # the candidate has the same log, author, branch and appropriate symbols
1019                         # does it touch anything in ofiles?
1020                         unless ofiles.intersection(kfiles).empty?
1021                                 if $DEBUG
1022                                         cflist = cfiles.to_a.join(', ')
1023                                         kflist = kfiles.to_a.join(', ')
1024                                         oflist = ofiles.to_a.join(', ')
1025                                         warning "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1026                                         warning "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
1027                                 end
1028                                 next
1029                         end
1030
1031                         mergeable << k
1032                 end
1033
1034                 mergeable.each do |k|
1035                         begin
1036                                 c.merge! k
1037                         rescue RuntimeError => err
1038                                 fuzz = c.date - k.date
1039                                 warning "Fuzzy commit coalescing failed: #{err}"
1040                                 warning "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
1041                                 break
1042                         end
1043                         commits.delete k
1044                 end
1045         end
1046
1047         if $DEBUG
1048                 warning "[1] commits (#{commits.length}):"
1049                 commits.each do |c|
1050                         PP.pp c.to_a, $stderr
1051                 end
1052         else
1053                 warning "#{commits.length} coalesced commits"
1054         end
1055
1056         commits.each { |c| c.export(parse_options) }
1057
1058 end
1059
1060 exit status