flush stdout before stderr writes, avoiding confusion when debugging.
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 RCS fast export: run the script with the `--help` option for further
5 information.
6
7 No installation needed: you can run it from anywhere, including the git
8 checkout directory. For extra comfort, symlink it to some directory in
9 your PATH. I myself have this symlink:
10
11         ~/bin/rcs-fast-export -> ~/src/rcs-fast-export/rcs-fast-export.rb
12
13 allowing me to run `rcs-fast-export` from anywhere.
14 =end
15
16 =begin
17 TODO
18         * Refactor commit coalescing
19         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
20         * Add support for commitid for coalescing commits
21         * Further coalescing options? (e.g. small logfile differences)
22         * Proper branching support in multi-file export
23         * Optimize memory usage by discarding unneeded text
24 =end
25
26 require 'pp'
27 require 'set'
28
29 class NoBranchSupport < NotImplementedError ; end
30
31 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
32 # older versions
33 unless 2.respond_to? :odd?
34         class Integer
35                 def odd?
36                         self % 2 == 1
37                 end
38         end
39 end
40
41 # Set standard output to binary mode: git fast-import doesn't like Windows
42 # line-endings, and this ensures that the line termination will be a simple 0x0a
43 # on Windows too (it expands to 0x0D 0x0A otherwise).
44 STDOUT.binmode
45
46 =begin
47 RCS fast-export version: set to `git` in the repository, but can be overridden
48 by packagers, e.g. based on the latest tag, git description, custom packager
49 patches or whatever.
50
51 When the version is set to `git`, we make a little effort to find more information
52 about which commit we are at.
53 =end
54
55 RFE_VERSION="git"
56
57 def version
58         if RFE_VERSION == "git"
59                 nolinkfile = File.readlink(__FILE__) rescue __FILE__
60                 Dir.chdir File.expand_path File.dirname nolinkfile
61
62                 if File.exists? '.git' ; begin
63                         git_out = `git log -1 --pretty="%h %H%n%ai" | git name-rev --stdin`.split("\n")
64                         hash=git_out.first.split.first
65                         branch=git_out.first.split('(').last.chomp(')')
66                         date=git_out.last.split.first
67                         changed=`git diff --no-ext-diff --quiet --exit-code`
68                         branch << "*" unless $?.success?
69                         info=" [#{branch}] #{hash} (#{date})"
70                 rescue
71                         info=" (no info)"
72                 end ; end
73
74                 STDERR.puts "#{$0}: RCS fast-export, #{RFE_VERSION} version#{info}"
75         else
76                 STDERR.puts "#{$0}: RCS fast-export, version #{RFE_VERSION}"
77         end
78 end
79
80 def usage
81         $stdout.flush
82         STDERR.puts <<EOM
83 #{$0} [options] file [file ...]
84
85 Fast-export the RCS history of one or more files. If a directory is specified,
86 all RCS-tracked files in the directory and its descendants are exported.
87
88 When importing single files, their pathname is discarded during import. When
89 importing directories, only the specified directory component is discarded.
90
91 When importing a single file, RCS commits are converted one by one. Otherwise,
92 some heuristics is used to determine how to coalesce commits touching different
93 files.
94
95 Currently, commits are coalesced if they share the exact same log and if their
96 date differs by no more than the user-specified fuzziness. Additionally, the
97 symbols in one of the commit must be a subset of the symbols in the other
98 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
99 false in the git configuration.
100
101 Typical usage:
102     git init && rcs-fast-export.rb . | git fast-import && git reset
103
104 Options:
105         --help, -h, -?          display this help text
106         --authors-file, -A      specify a file containing username = Full Name <email> mappings
107         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
108                                 importing multiple files
109                                 (in seconds, defaults to 300, i.e. 5 minutes)
110         --[no-]symbol-check     [do not] check symbols when coalescing commits
111         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
112                                 importing a single file
113         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
114                                 a single file
115         --skip-branches         when exporting multiple files with a branched history, export
116                                 the main branch only instead of aborting due to the lack of
117                                 support for branched multi-file history export
118
119
120
121 Config options:
122         rcs.authorsFile         for --authors-file
123         rcs.tagEachRev          for --tag-each-rev
124         rcs.logFilename         for --log-filename
125         rcs.commitFuzz          for --rcs-commit-fuzz
126         rcs.symbolCheck         for --rcs-symbol-check
127         rcs.tagFuzz             for --rcs-tag-fuzz
128
129 EOM
130 end
131
132 def not_found(arg)
133         $stdout.flush
134         STDERR.puts "Could not find #{arg}"
135 end
136
137 # returns a hash that maps usernames to author names & emails
138 def load_authors_file(fn)
139         hash = {}
140         begin
141                 File.open(File.expand_path(fn)) do |io|
142                         io.each_line do |line|
143                                 uname, author = line.split('=', 2)
144                                 uname.strip!
145                                 author.strip!
146                                 $stdout.flush
147                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
148                                 hash[uname] = author
149                         end
150                 end
151         rescue
152                 not_found(fn)
153         end
154         return hash
155 end
156
157 # display a message about a (recoverable) error
158 def alert(msg, action)
159         STDERR.puts "ERROR:\t#{msg}"
160         STDERR.puts "\t#{action}"
161 end
162
163 class Time
164         def Time.rcs(string)
165                 fields = string.split('.')
166                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
167                 # in Ruby 1.9, '99' is interpreted as year 99, not year 1999
168                 if fields.first.length < 3
169                         fields.first.insert 0, '19'
170                 end
171                 Time.utc(*fields)
172         end
173 end
174
175 module RCS
176         # strip an optional final ;
177         def RCS.clean(arg)
178                 arg.chomp(';')
179         end
180
181         # strip the first and last @, and de-double @@s
182         def RCS.sanitize(arg)
183                 case arg
184                 when Array
185                         ret = arg.dup
186                         raise 'malformed first line' unless ret.first[0,1] == '@'
187                         raise 'malformed last line' unless ret.last[-1,1] == '@'
188                         ret.first.sub!(/^@/,'')
189                         ret.last.sub!(/@$/,'')
190                         ret.map { |l| l.gsub('@@','@') }
191                 when String
192                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
193                 else
194                         raise
195                 end
196         end
197
198         # clean and sanitize
199         def RCS.at_clean(arg)
200                 RCS.sanitize RCS.clean(arg)
201         end
202
203         def RCS.mark(key)
204                 @@marks ||= {}
205                 if @@marks.key? key
206                         @@marks[key]
207                 else
208                         @@marks[key] = @@marks.length + 1
209                 end
210         end
211
212         def RCS.blob(file, rev)
213                 RCS.mark([file, rev])
214         end
215
216         def RCS.commit(commit)
217                 RCS.mark(commit)
218         end
219
220         class File
221                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
222                 def initialize(fname, executable)
223                         @fname = fname.dup
224                         @head = nil
225                         @comment = nil
226                         @desc = []
227                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
228                         @mode = executable ? '755' : '644'
229                 end
230
231                 def has_revision?(rev)
232                         @revision.has_key?(rev) and not @revision[rev].author.nil?
233                 end
234
235                 def export_commits(opts={})
236                         counter = 0
237                         exported = []
238                         until @revision.empty?
239                                 counter += 1
240
241                                 # a string sort is a very good candidate for
242                                 # export order, getting a miss only for
243                                 # multi-digit revision components
244                                 keys = @revision.keys.sort
245
246                                 $stdout.flush
247                                 STDERR.puts "commit export loop ##{counter}"
248                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
249                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
250
251                                 keys.each do |key|
252                                         rev = @revision[key]
253                                         # the parent commit is rev.next if we're on the
254                                         # master branch (rev.branch is nil) or
255                                         # rev.diff_base otherwise
256                                         from = rev.branch.nil? ? rev.next : rev.diff_base
257                                         # A commit can only be exported if it has no
258                                         # parent, or if the parent has been exported
259                                         # already. Skip this commit otherwise
260                                         if from and not exported.include? from
261                                                 next
262                                         end
263
264                                         branch = rev.branch || 'master'
265                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
266                                         date = "#{rev.date.tv_sec} +0000"
267                                         log = String.new
268                                         if opts[:log_filename]
269                                                 log << @fname << ": "
270                                         end
271                                         log << rev.log.join
272
273                                         puts "commit refs/heads/#{branch}"
274                                         puts "mark :#{RCS.commit key}"
275                                         puts "committer #{author} #{date}"
276                                         puts "data #{log.length}"
277                                         puts log unless log.empty?
278                                         puts "from :#{RCS.commit from}" if from
279                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
280
281                                         # TODO FIXME this *should* be safe, in
282                                         # that it should not unduly move
283                                         # branches back in time, but I'm not
284                                         # 100% sure ...
285                                         rev.branches.each do |sym|
286                                                 puts "reset refs/heads/#{sym}"
287                                                 puts "from :#{RCS.commit key}"
288                                         end
289                                         rev.symbols.each do |sym|
290                                                 puts "reset refs/tags/#{sym}"
291                                                 puts "from :#{RCS.commit key}"
292                                         end
293                                         if opts[:tag_each_rev]
294                                                 puts "reset refs/tags/#{key}"
295                                                 puts "from :#{RCS.commit key}"
296                                         end
297
298                                         exported.push key
299                                 end
300                                 exported.each { |k| @revision.delete(k) }
301                         end
302                 end
303         end
304
305         class Revision
306                 attr_accessor :rev, :author, :state, :next
307                 attr_accessor :branches, :log, :text, :symbols
308                 attr_accessor :branch, :diff_base, :branch_point
309                 attr_reader   :date
310                 def initialize(file, rev)
311                         @file = file
312                         @rev = rev
313                         @author = nil
314                         @date = nil
315                         @state = nil
316                         @next = nil
317                         @branches = Set.new
318                         @branch = nil
319                         @branch_point = nil
320                         @diff_base = nil
321                         @log = []
322                         @text = []
323                         @symbols = Set.new
324                 end
325
326                 def date=(str)
327                         @date = Time.rcs(str)
328                 end
329
330                 def blob
331                         str = @text.join('')
332                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
333                         ret
334                 end
335         end
336
337         def RCS.parse(fname, rcsfile)
338                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
339
340                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
341                         status = [:basic]
342                         rev = nil
343                         lines = []
344                         difflines = []
345                         file.each_line do |line|
346                                 case status.last
347                                 when :basic
348                                         command, args = line.split($;,2)
349                                         next if command.empty?
350
351                                         if command.chomp!(';')
352                                                 $stdout.flush
353                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
354                                                 next
355                                         end
356
357                                         case command
358                                         when 'head'
359                                                 rcs.head = RCS.clean(args.chomp)
360                                         when 'symbols'
361                                                 status.push :symbols
362                                                 next if args.empty?
363                                                 line = args; redo
364                                         when 'comment'
365                                                 rcs.comment = RCS.at_clean(args.chomp)
366                                         when /^[0-9.]+$/
367                                                 rev = command.dup
368                                                 if rcs.has_revision?(rev)
369                                                         status.push :revision_data
370                                                 else
371                                                         status.push :new_revision
372                                                 end
373                                         when 'desc'
374                                                 status.push :desc
375                                                 lines.clear
376                                                 status.push :read_lines
377                                         when 'branch', 'access', 'locks', 'expand'
378                                                 $stdout.flush
379                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
380                                                 status.push :skipping_lines
381                                                 next if args.empty?
382                                                 line = args; redo
383                                         else
384                                                 raise "Unknown command #{command.inspect}"
385                                         end
386                                 when :skipping_lines
387                                         status.pop if line.strip.chomp!(';')
388                                 when :symbols
389                                         # we can have multiple symbols per line
390                                         pairs = line.strip.split($;)
391                                         pairs.each do |pair|
392                                                 sym, rev = pair.strip.split(':',2);
393                                                 if rev
394                                                         status.pop if rev.chomp!(';')
395                                                         rcs.revision[rev].symbols << sym
396                                                 else
397                                                         status.pop
398                                                 end
399                                         end
400                                 when :desc
401                                         rcs.desc.replace lines.dup
402                                         status.pop
403                                 when :read_lines
404                                         # we sanitize lines as we read them
405
406                                         actual_line = line.dup
407
408                                         # the first line must begin with a @, which we strip
409                                         if lines.empty?
410                                                 ats = line.match(/^@+/)
411                                                 raise 'malformed line' unless ats
412                                                 actual_line.replace line.sub(/^@/,'')
413                                         end
414
415                                         # if the line ends with an ODD number of @, it's the
416                                         # last line -- we work on actual_line so that content
417                                         # such as @\n or @ work correctly (they would be
418                                         # encoded respectively as ['@@@\n','@\n'] and
419                                         # ['@@@@\n']
420                                         ats = actual_line.chomp.match(/@+$/)
421                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
422                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
423                                         end
424                                         lines << actual_line.gsub('@@','@')
425                                         if nomore
426                                                 status.pop
427                                                 redo
428                                         end
429                                 when :new_revision
430                                         case line.chomp
431                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
432                                                 rcs.revision[rev].date = $1
433                                                 rcs.revision[rev].author = $2
434                                                 rcs.revision[rev].state = $3
435                                         when /^branches\s*;/
436                                                 next
437                                         when /^branches(?:\s+|$)/
438                                                 status.push :branches
439                                                 if line.index(';')
440                                                         line = line.sub(/^branches\s+/,'')
441                                                         redo
442                                                 end
443                                         when /^next\s+(\S+)?;$/
444                                                 nxt = rcs.revision[rev].next = $1
445                                                 next unless nxt
446                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
447                                                 rcs.revision[nxt].diff_base = rev
448                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
449                                         else
450                                                 status.pop
451                                         end
452                                 when :branches
453                                         candidate = line.split(';',2)
454                                         candidate.first.strip.split.each do |branch|
455                                                 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
456                                                 rcs.revision[branch].diff_base = rev
457                                                 # we drop the last number from the branch name
458                                                 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
459                                                 rcs.revision[branch].branch_point = rev
460                                         end
461                                         status.pop if candidate.length > 1
462                                 when :revision_data
463                                         case line.chomp
464                                         when 'log'
465                                                 status.push :log
466                                                 lines.clear
467                                                 status.push :read_lines
468                                         when 'text'
469                                                 if rev == rcs.head
470                                                         status.push :head
471                                                 else
472                                                         status.push :diff
473                                                 end
474                                                 lines.clear
475                                                 status.push :read_lines
476                                         else
477                                                 status.pop
478                                         end
479                                 when :log
480                                         rcs.revision[rev].log.replace lines.dup
481                                         status.pop
482                                 when :head
483                                         rcs.revision[rev].text.replace lines.dup
484                                         puts rcs.revision[rev].blob
485                                         status.pop
486                                 when :diff
487                                         difflines.replace lines.dup
488                                         difflines.pop if difflines.last.empty?
489                                         if difflines.first.chomp.empty?
490                                                 alert "malformed diff: empty initial line @ #{rcsfile}:#{file.lineno-difflines.length-1}", "skipping"
491                                                 difflines.shift
492                                         end unless difflines.empty?
493                                         base = rcs.revision[rev].diff_base
494                                         unless rcs.revision[base].text
495                                                 pp rcs
496                                                 puts rev, base
497                                                 raise 'no diff base!'
498                                         end
499                                         # deep copy
500                                         buffer = []
501                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
502
503                                         adding = false
504                                         index = nil
505                                         count = nil
506
507                                         while l = difflines.shift
508                                                 if adding
509                                                         raise 'negative index during insertion' if index < 0
510                                                         raise 'negative count during insertion' if count < 0
511                                                         adding << l
512                                                         count -= 1
513                                                         # collected all the lines, put the before
514                                                         unless count > 0
515                                                                 unless buffer[index]
516                                                                         buffer[index] = []
517                                                                 end
518                                                                 buffer[index].unshift(*adding)
519                                                                 adding = false
520                                                         end
521                                                         next
522                                                 end
523
524                                                 l.chomp!
525                                                 raise "malformed diff @ #{rcsfile}:#{file.lineno-difflines.length-1} `#{l}`" unless l =~ /^([ad])(\d+) (\d+)$/
526                                                 diff_cmd = $1.intern
527                                                 index = $2.to_i
528                                                 count = $3.to_i
529                                                 case diff_cmd
530                                                 when :d
531                                                         # for deletion, index 1 is the first index, so the Ruby
532                                                         # index is one less than the diff one
533                                                         index -= 1
534                                                         # we replace them with empty string so that 'a' commands
535                                                         # referring to the same line work properly
536                                                         while count > 0
537                                                                 buffer[index].clear
538                                                                 index += 1
539                                                                 count -= 1
540                                                         end
541                                                 when :a
542                                                         # addition will prepend the appropriate lines
543                                                         # to the given index, and in this case Ruby
544                                                         # and diff indices are the same
545                                                         adding = []
546                                                 end
547                                         end
548
549                                         # turn the buffer into an array of lines, deleting the empty ones
550                                         buffer.delete_if { |l| l.empty? }
551                                         buffer.flatten!
552
553                                         rcs.revision[rev].text = buffer
554                                         puts rcs.revision[rev].blob
555                                         status.pop
556                                 else
557                                         raise "Unknown status #{status.last}"
558                                 end
559                         end
560                 end
561
562                 # clean up the symbols/branches: look for revisions that have
563                 # one or more symbols but no dates, and make them into
564                 # branches, pointing to the highest commit with that key
565                 branches = []
566                 keys = rcs.revision.keys
567                 rcs.revision.each do |key, rev|
568                         if rev.date.nil? and not rev.symbols.empty?
569                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
570                                 tr = rcs.revision[top]
571                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
572                                 tr.branches |= rev.symbols
573                                 branches << key
574                         end
575                 end
576                 branches.each { |k| rcs.revision.delete k }
577
578                 return rcs
579         end
580
581         class Tree
582                 def initialize(commit)
583                         @commit = commit
584                         @files = Hash.new
585                 end
586
587                 def merge!(tree)
588                         testfiles = @files.dup
589                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
590                         # the next line is only reached if all the adds were
591                         # successful, so the merge is atomic
592                         @files.replace testfiles
593                 end
594
595                 def add(rcs, rev, file_list=@files)
596                         if file_list.key? rcs
597                                 prev = file_list[rcs]
598                                 if prev.log == rev.log
599                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
600                                 else
601                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
602                                 end
603                                 if prev.text != rev.text
604                                         raise str
605                                 else
606                                         @commit.warn_about str
607                                 end
608                         end
609                         file_list[rcs] = rev
610                 end
611
612                 def each &block
613                         @files.each &block
614                 end
615
616                 def to_a
617                         files = []
618                         @files.map do |rcs, rev|
619                                 if rev.state.downcase == "dead"
620                                         files << "D #{rcs.fname}"
621                                 else
622                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
623                                 end
624                         end
625                         files
626                 end
627
628                 def filenames
629                         @files.map { |rcs, rev| rcs.fname }
630                 end
631
632                 def to_s
633                         self.to_a.join("\n")
634                 end
635         end
636
637         class Commit
638                 attr_accessor :date, :log, :symbols, :author, :branch
639                 attr_accessor :tree
640                 attr_accessor :min_date, :max_date
641                 def initialize(rcs, rev)
642                         raise NoBranchSupport if rev.branch
643                         self.date = rev.date.dup
644                         self.min_date = self.max_date = self.date
645                         self.log = rev.log.dup
646                         self.symbols = rev.symbols.dup
647                         self.author = rev.author
648                         self.branch = rev.branch
649
650                         self.tree = Tree.new self
651                         self.tree.add rcs, rev
652                 end
653
654                 def to_a
655                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
656                 end
657
658                 def warn_about(str)
659                         warn str + " for commit on #{self.date}"
660                 end
661
662                 # Sort by date and then by number of symbols
663                 def <=>(other)
664                         ds = self.date <=> other.date
665                         if ds != 0
666                                 return ds
667                         else
668                                 return self.symbols.length <=> other.symbols.length
669                         end
670                 end
671
672                 def merge!(commit)
673                         self.tree.merge! commit.tree
674                         if commit.max_date > self.max_date
675                                 self.max_date = commit.max_date
676                         end
677                         if commit.min_date < self.min_date
678                                 self.min_date = commit.min_date
679                         end
680                         self.symbols.merge commit.symbols
681                 end
682
683                 def export(opts={})
684                         xbranch = self.branch || 'master'
685                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
686                         xlog = self.log.join
687                         numdate = self.date.tv_sec
688                         xdate = "#{numdate} +0000"
689                         key = numdate.to_s
690
691                         puts "commit refs/heads/#{xbranch}"
692                         puts "mark :#{RCS.commit key}"
693                         puts "committer #{xauthor} #{xdate}"
694                         puts "data #{xlog.length}"
695                         puts xlog unless xlog.empty?
696                         # TODO branching support for multi-file export
697                         # puts "from :#{RCS.commit from}" if self.branch_point
698                         puts self.tree.to_s
699
700                         # TODO branching support for multi-file export
701                         # rev.branches.each do |sym|
702                         #       puts "reset refs/heads/#{sym}"
703                         #       puts "from :#{RCS.commit key}"
704                         # end
705
706                         self.symbols.each do |sym|
707                                 puts "reset refs/tags/#{sym}"
708                                 puts "from :#{RCS.commit key}"
709                         end
710
711                 end
712         end
713 end
714
715 require 'getoptlong'
716
717 opts = GetoptLong.new(
718         # Authors file, like git-svn and git-cvsimport, more than one can be
719         # specified
720         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
721         # RCS file suffix, like RCS
722         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
723         # Date fuzziness for commits to be considered the same (in seconds)
724         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
725         # check symbols when coalescing?
726         ['--symbol-check', GetoptLong::NO_ARGUMENT],
727         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
728         # tag each revision?
729         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
730         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
731         # prepend filenames to commit logs?
732         ['--log-filename', GetoptLong::NO_ARGUMENT],
733         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
734         # skip branches when exporting a whole tree?
735         ['--skip-branches', GetoptLong::NO_ARGUMENT],
736         # show current version
737         ['--version', '-v', GetoptLong::NO_ARGUMENT],
738         # show help/usage
739         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
740 )
741
742 # We read options in order, but they apply to all passed parameters.
743 # TODO maybe they should only apply to the following, unless there's only one
744 # file?
745 opts.ordering = GetoptLong::RETURN_IN_ORDER
746
747 file_list = []
748 parse_options = {
749         :authors => Hash.new,
750         :commit_fuzz => 300,
751         :tag_fuzz => -1,
752 }
753
754 # Read config options
755 `git config --get-all rcs.authorsfile`.each_line do |fn|
756         parse_options[:authors].merge! load_authors_file(fn.chomp)
757 end
758
759 parse_options[:tag_each_rev] = (
760         `git config --bool rcs.tageachrev`.chomp == 'true'
761 ) ? true : false
762
763 parse_options[:log_filename] = (
764         `git config --bool rcs.logfilename`.chomp == 'true'
765 ) ? true : false
766
767 fuzz = `git config --int rcs.commitFuzz`.chomp
768 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
769
770 fuzz = `git config --int rcs.tagFuzz`.chomp
771 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
772
773 parse_options[:symbol_check] = (
774         `git config --bool rcs.symbolcheck`.chomp == 'false'
775 ) ? false : true
776
777 opts.each do |opt, arg|
778         case opt
779         when '--authors-file'
780                 authors = load_authors_file(arg)
781                 redef = parse_options[:authors].keys & authors.keys
782                 $stdout.flush
783                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
784                 parse_options[:authors].merge!(authors)
785         when '--rcs-suffixes'
786                 # TODO
787         when '--rcs-commit-fuzz'
788                 parse_options[:commit_fuzz] = arg.to_i
789         when '--rcs-tag-fuzz'
790                 parse_options[:tag_fuzz] = arg.to_i
791         when '--symbol-check'
792                 parse_options[:symbol_check] = true
793         when '--no-symbol-check'
794                 parse_options[:symbol_check] = false
795         when '--tag-each-rev'
796                 parse_options[:tag_each_rev] = true
797         when '--no-tag-each-rev'
798                 # this is the default, which is fine since the missing key
799                 # (default) returns nil which is false in Ruby
800                 parse_options[:tag_each_rev] = false
801         when '--log-filename'
802                 parse_options[:log_filename] = true
803         when '--no-log-filename'
804                 # this is the default, which is fine since the missing key
805                 # (default) returns nil which is false in Ruby
806                 parse_options[:log_filename] = false
807         when '--skip-branches'
808                 parse_options[:skip_branches] = true
809         when ''
810                 file_list << arg
811         when '--version'
812                 version
813                 exit
814         when '--help'
815                 usage
816                 exit
817         end
818 end
819
820 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
821         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
822 end
823
824 require 'etc'
825
826 user = Etc.getlogin || ENV['USER']
827
828 # steal username/email data from other init files that may contain the
829 # information
830 def steal_username
831         [
832                 # the user's .hgrc file for a username field
833                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
834                 # the user's .(g)vimrc for a changelog_username setting
835                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
836                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
837         ].each do |fn, rx, idx|
838                 file = File.expand_path fn
839                 if File.readable?(file) and File.read(file) =~ rx
840                         parse_options[:authors][user] = Regexp.last_match(idx).strip
841                         break
842                 end
843         end
844 end
845
846 if user and not user.empty? and not parse_options[:authors].has_key?(user)
847         name = ENV['GIT_AUTHOR_NAME'] || ''
848         name.replace(`git config user.name`.chomp) if name.empty?
849         name.replace(Etc.getpwnam(user).gecos) if name.empty?
850
851         if name.empty?
852                 # couldn't find a name, try to steal data from other sources
853                 steal_username
854         else
855                 # if we found a name, try to find an email too
856                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
857                 email.replace(`git config user.email`.chomp) if email.empty?
858
859                 if email.empty?
860                         # couldn't find an email, try to steal data too
861                         steal_username
862                 else
863                         # we got both a name and email, fill the info
864                         parse_options[:authors][user] = "#{name} <#{email}>"
865                 end
866         end
867 end
868
869 if file_list.empty?
870         usage
871         exit 1
872 end
873
874 SFX = ',v'
875
876 status = 0
877
878 rcs = []
879 file_list.each do |arg|
880         case ftype = File.ftype(arg)
881         when 'file'
882                 if arg[-2,2] == SFX
883                         if File.exists? arg
884                                 rcsfile = arg.dup
885                         else
886                                 not_found "RCS file #{arg}"
887                                 status |= 1
888                         end
889                         filename = File.basename(arg, SFX)
890                 else
891                         filename = File.basename(arg)
892                         path = File.dirname(arg)
893                         rcsfile = File.join(path, 'RCS', filename) + SFX
894                         unless File.exists? rcsfile
895                                 rcsfile.replace File.join(path, filename) + SFX
896                                 unless File.exists? rcsfile
897                                         not_found "RCS file for #{filename} in #{path}"
898                                 end
899                         end
900                 end
901                 rcs << RCS.parse(filename, rcsfile)
902         when 'directory'
903                 argdirname = arg.chomp(File::SEPARATOR)
904                 pattern = File.join(argdirname, '**', '*' + SFX)
905                 Dir.glob(pattern, File::FNM_DOTMATCH).each do |rcsfile|
906                         filename = File.basename(rcsfile, SFX)
907                         path = File.dirname(rcsfile)
908                         # strip trailing "/RCS" if present, or "RCS" if that's
909                         # the full path
910                         path.sub!(/(^|#{File::SEPARATOR})RCS$/, '')
911                         # strip off the portion of the path specified
912                         # on the command line from the front of the path
913                         # (or delete the path completely if it is the same
914                         # as the specified directory)
915                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
916                         filename = File.join(path, filename) unless path.empty?
917                         begin
918                                 rcs << RCS.parse(filename, rcsfile)
919                         rescue Exception => e
920                                 $stdout.flush
921                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
922                                 raise e
923                         end
924                 end
925         else
926                 $stdout.flush
927                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
928                 status |= 1
929         end
930 end
931
932 if rcs.length == 1
933         rcs.first.export_commits(parse_options)
934 else
935         $stdout.flush
936         STDERR.puts "Preparing commits"
937
938         commits = []
939
940         rcs.each do |r|
941                 r.revision.each do |k, rev|
942                         begin
943                                 commits << RCS::Commit.new(r, rev)
944                         rescue NoBranchSupport
945                                 if parse_options[:skip_branches]
946                                         $stdout.flush
947                                         STDERR.puts "Skipping revision #{rev.rev} for #{r.fname} (branch)"
948                                 else raise
949                                 end
950                         end
951                 end
952         end
953
954         $stdout.flush
955         STDERR.puts "Sorting by date"
956
957         commits.sort!
958
959         if $DEBUG
960                 $stdout.flush
961                 STDERR.puts "RAW commits (#{commits.length}):"
962                 commits.each do |c|
963                         PP.pp c.to_a, $stderr
964                 end
965         else
966                 $stdout.flush
967                 STDERR.puts "#{commits.length} single-file commits"
968         end
969
970         $stdout.flush
971         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
972
973         thisindex = commits.size
974         commits.reverse_each do |c|
975                 nextindex = thisindex
976                 thisindex -= 1
977
978                 cfiles = Set.new c.tree.filenames
979                 ofiles = Set.new
980
981                 mergeable = []
982
983                 # test for mergeable commits by looking at following commits
984                 while nextindex < commits.size
985                         k = commits[nextindex]
986                         nextindex += 1
987
988                         # commits are date-sorted, so we know we can quit early if we are too far
989                         # for coalescing to work
990                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
991
992                         skipthis = false
993
994                         kfiles = Set.new k.tree.filenames
995
996                         if c.log != k.log or c.author != k.author or c.branch != k.branch
997                                 skipthis = true
998                         end
999
1000                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
1001                                 cflist = cfiles.to_a.join(', ')
1002                                 kflist = kfiles.to_a.join(', ')
1003                                 if parse_options[:symbol_check]
1004                                         $stdout.flush
1005                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1006                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
1007                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
1008                                         skipthis = true
1009                                 elsif $DEBUG
1010                                         $stdout.flush
1011                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1012                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
1013                                 end
1014                         end
1015
1016                         # keep track of filenames touched by commits we are not merging with,
1017                         # since we don't want to merge with commits that touch them, to preserve
1018                         # the monotonicity of history for each file
1019                         # TODO we could forward-merge with them, unless some of our files were
1020                         # touched too.
1021                         if skipthis
1022                                 # if the candidate touches any file already in the commit,
1023                                 # we can stop looking forward
1024                                 break unless cfiles.intersection(kfiles).empty?
1025                                 ofiles |= kfiles
1026                                 next
1027                         end
1028
1029                         # the candidate has the same log, author, branch and appropriate symbols
1030                         # does it touch anything in ofiles?
1031                         unless ofiles.intersection(kfiles).empty?
1032                                 if $DEBUG
1033                                         cflist = cfiles.to_a.join(', ')
1034                                         kflist = kfiles.to_a.join(', ')
1035                                         oflist = ofiles.to_a.join(', ')
1036                                         $stdout.flush
1037                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1038                                         STDERR.puts "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
1039                                 end
1040                                 next
1041                         end
1042
1043                         mergeable << k
1044                 end
1045
1046                 mergeable.each do |k|
1047                         begin
1048                                 c.merge! k
1049                         rescue RuntimeError => err
1050                                 fuzz = c.date - k.date
1051                                 $stdout.flush
1052                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
1053                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
1054                                 break
1055                         end
1056                         commits.delete k
1057                 end
1058         end
1059
1060         if $DEBUG
1061                 $stdout.flush
1062                 STDERR.puts "[1] commits (#{commits.length}):"
1063                 commits.each do |c|
1064                         PP.pp c.to_a, $stderr
1065                 end
1066         else
1067                 $stdout.flush
1068                 STDERR.puts "#{commits.length} coalesced commits"
1069         end
1070
1071         commits.each { |c| c.export(parse_options) }
1072
1073 end
1074
1075 exit status