Allow export of main branch for multi-file repos
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 TODO
5         * Refactor commit coalescing
6         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
7         * Add support for commitid for coalescing commits
8         * Further coalescing options? (e.g. small logfile differences)
9         * Proper branching support in multi-file export
10         * Optimize memory usage by discarding unneeded text
11 =end
12
13 require 'pp'
14 require 'set'
15
16 class NoBranchSupport < NotImplementedError ; end
17
18 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
19 # older versions
20 unless 2.respond_to? :odd?
21         class Integer
22                 def odd?
23                         self % 2 == 1
24                 end
25         end
26 end
27
28 def usage
29         STDERR.puts <<EOM
30 #{$0} [options] file [file ...]
31
32 Fast-export the RCS history of one or more files. If a directory is specified,
33 all RCS-tracked files in the directory and its descendants are exported.
34
35 When importing single files, their pathname is discarded during import. When
36 importing directories, only the specified directory component is discarded.
37
38 When importing a single file, RCS commits are converted one by one. Otherwise,
39 some heuristics is used to determine how to coalesce commits touching different
40 files.
41
42 Currently, commits are coalesced if they share the exact same log and if their
43 date differs by no more than the user-specified fuzziness. Additionally, the
44 symbols in one of the commit must be a subset of the symbols in the other
45 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
46 false in the git configuration.
47
48 Typical usage:
49     git init && rcs-fast-export.rb . | git fast-import && git reset
50
51 Options:
52         --help, -h, -?          display this help text
53         --authors-file, -A      specify a file containing username = Full Name <email> mappings
54         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
55                                 importing multiple files
56                                 (in seconds, defaults to 300, i.e. 5 minutes)
57         --[no-]symbol-check     [do not] check symbols when coalescing commits
58         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
59                                 importing a single file
60         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
61                                 a single file
62         --skip-branches         when exporting multiple files with a branched history, export
63                                 the main branch only instead of aborting due to the lack of
64                                 support for branched multi-file history export
65
66
67
68 Config options:
69         rcs.authorsFile         for --authors-file
70         rcs.tagEachRev          for --tag-each-rev
71         rcs.logFilename         for --log-filename
72         rcs.commitFuzz          for --rcs-commit-fuzz
73         rcs.symbolCheck         for --rcs-symbol-check
74         rcs.tagFuzz             for --rcs-tag-fuzz
75
76 EOM
77 end
78
79 def not_found(arg)
80         STDERR.puts "Could not find #{arg}"
81 end
82
83 # returns a hash that maps usernames to author names & emails
84 def load_authors_file(fn)
85         hash = {}
86         begin
87                 File.open(File.expand_path(fn)) do |io|
88                         io.each_line do |line|
89                                 uname, author = line.split('=', 2)
90                                 uname.strip!
91                                 author.strip!
92                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
93                                 hash[uname] = author
94                         end
95                 end
96         rescue
97                 not_found(fn)
98         end
99         return hash
100 end
101
102 class Time
103         def Time.rcs(string)
104                 fields = string.split('.')
105                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
106                 Time.utc(*fields)
107         end
108 end
109
110 module RCS
111         # strip an optional final ;
112         def RCS.clean(arg)
113                 arg.chomp(';')
114         end
115
116         # strip the first and last @, and de-double @@s
117         def RCS.sanitize(arg)
118                 case arg
119                 when Array
120                         ret = arg.dup
121                         raise 'malformed first line' unless ret.first[0,1] == '@'
122                         raise 'malformed last line' unless ret.last[-1,1] == '@'
123                         ret.first.sub!(/^@/,'')
124                         ret.last.sub!(/@$/,'')
125                         ret.map { |l| l.gsub('@@','@') }
126                 when String
127                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
128                 else
129                         raise
130                 end
131         end
132
133         # clean and sanitize
134         def RCS.at_clean(arg)
135                 RCS.sanitize RCS.clean(arg)
136         end
137
138         def RCS.mark(key)
139                 @@marks ||= {}
140                 if @@marks.key? key
141                         @@marks[key]
142                 else
143                         @@marks[key] = @@marks.length + 1
144                 end
145         end
146
147         def RCS.blob(file, rev)
148                 RCS.mark([file, rev])
149         end
150
151         def RCS.commit(commit)
152                 RCS.mark(commit)
153         end
154
155         class File
156                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
157                 def initialize(fname, executable)
158                         @fname = fname.dup
159                         @head = nil
160                         @comment = nil
161                         @desc = []
162                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
163                         @mode = executable ? '755' : '644'
164                 end
165
166                 def has_revision?(rev)
167                         @revision.has_key?(rev) and not @revision[rev].author.nil?
168                 end
169
170                 def export_commits(opts={})
171                         counter = 0
172                         exported = []
173                         until @revision.empty?
174                                 counter += 1
175
176                                 # a string sort is a very good candidate for
177                                 # export order, getting a miss only for
178                                 # multi-digit revision components
179                                 keys = @revision.keys.sort
180
181                                 STDERR.puts "commit export loop ##{counter}"
182                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
183                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
184
185                                 keys.each do |key|
186                                         rev = @revision[key]
187                                         # the parent commit is rev.next if we're on the
188                                         # master branch (rev.branch is nil) or
189                                         # rev.diff_base otherwise
190                                         from = rev.branch.nil? ? rev.next : rev.diff_base
191                                         # A commit can only be exported if it has no
192                                         # parent, or if the parent has been exported
193                                         # already. Skip this commit otherwise
194                                         if from and not exported.include? from
195                                                 next
196                                         end
197
198                                         branch = rev.branch || 'master'
199                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
200                                         date = "#{rev.date.tv_sec} +0000"
201                                         log = String.new
202                                         if opts[:log_filename]
203                                                 log << @fname << ": "
204                                         end
205                                         log << rev.log.join
206
207                                         puts "commit refs/heads/#{branch}"
208                                         puts "mark :#{RCS.commit key}"
209                                         puts "committer #{author} #{date}"
210                                         puts "data #{log.length}"
211                                         puts log unless log.empty?
212                                         puts "from :#{RCS.commit from}" if rev.branch_point
213                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
214
215                                         # TODO FIXME this *should* be safe, in
216                                         # that it should not unduly move
217                                         # branches back in time, but I'm not
218                                         # 100% sure ...
219                                         rev.branches.each do |sym|
220                                                 puts "reset refs/heads/#{sym}"
221                                                 puts "from :#{RCS.commit key}"
222                                         end
223                                         rev.symbols.each do |sym|
224                                                 puts "reset refs/tags/#{sym}"
225                                                 puts "from :#{RCS.commit key}"
226                                         end
227                                         if opts[:tag_each_rev]
228                                                 puts "reset refs/tags/#{key}"
229                                                 puts "from :#{RCS.commit key}"
230                                         end
231
232                                         exported.push key
233                                 end
234                                 exported.each { |k| @revision.delete(k) }
235                         end
236                 end
237         end
238
239         class Revision
240                 attr_accessor :rev, :author, :state, :next
241                 attr_accessor :branches, :log, :text, :symbols
242                 attr_accessor :branch, :diff_base, :branch_point
243                 attr_reader   :date
244                 def initialize(file, rev)
245                         @file = file
246                         @rev = rev
247                         @author = nil
248                         @date = nil
249                         @state = nil
250                         @next = nil
251                         @branches = Set.new
252                         @branch = nil
253                         @branch_point = nil
254                         @diff_base = nil
255                         @log = []
256                         @text = []
257                         @symbols = Set.new
258                 end
259
260                 def date=(str)
261                         @date = Time.rcs(str)
262                 end
263
264                 def blob
265                         str = @text.join('')
266                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
267                         ret
268                 end
269         end
270
271         def RCS.parse(fname, rcsfile)
272                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
273
274                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
275                         status = [:basic]
276                         rev = nil
277                         lines = []
278                         difflines = []
279                         file.each_line do |line|
280                                 case status.last
281                                 when :basic
282                                         command, args = line.split($;,2)
283                                         next if command.empty?
284
285                                         if command.chomp!(';')
286                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
287                                                 next
288                                         end
289
290                                         case command
291                                         when 'head'
292                                                 rcs.head = RCS.clean(args.chomp)
293                                         when 'symbols'
294                                                 status.push :symbols
295                                                 next if args.empty?
296                                                 line = args; redo
297                                         when 'comment'
298                                                 rcs.comment = RCS.at_clean(args.chomp)
299                                         when /^[0-9.]+$/
300                                                 rev = command.dup
301                                                 if rcs.has_revision?(rev)
302                                                         status.push :revision_data
303                                                 else
304                                                         status.push :new_revision
305                                                 end
306                                         when 'desc'
307                                                 status.push :desc
308                                                 lines.clear
309                                                 status.push :read_lines
310                                         when 'branch', 'access', 'locks', 'expand'
311                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
312                                                 status.push :skipping_lines
313                                                 next if args.empty?
314                                                 line = args; redo
315                                         else
316                                                 raise "Unknown command #{command.inspect}"
317                                         end
318                                 when :skipping_lines
319                                         status.pop if line.strip.chomp!(';')
320                                 when :symbols
321                                         # we can have multiple symbols per line
322                                         pairs = line.strip.split($;)
323                                         pairs.each do |pair|
324                                                 sym, rev = pair.strip.split(':',2);
325                                                 if rev
326                                                         status.pop if rev.chomp!(';')
327                                                         rcs.revision[rev].symbols << sym
328                                                 else
329                                                         status.pop
330                                                 end
331                                         end
332                                 when :desc
333                                         rcs.desc.replace lines.dup
334                                         status.pop
335                                 when :read_lines
336                                         # we sanitize lines as we read them
337
338                                         actual_line = line.dup
339
340                                         # the first line must begin with a @, which we strip
341                                         if lines.empty?
342                                                 ats = line.match(/^@+/)
343                                                 raise 'malformed line' unless ats
344                                                 actual_line.replace line.sub(/^@/,'')
345                                         end
346
347                                         # if the line ends with an ODD number of @, it's the
348                                         # last line -- we work on actual_line so that content
349                                         # such as @\n or @ work correctly (they would be
350                                         # encoded respectively as ['@@@\n','@\n'] and
351                                         # ['@@@@\n']
352                                         ats = actual_line.chomp.match(/@+$/)
353                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
354                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
355                                         end
356                                         lines << actual_line.gsub('@@','@')
357                                         if nomore
358                                                 status.pop
359                                                 redo
360                                         end
361                                 when :new_revision
362                                         case line.chomp
363                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
364                                                 rcs.revision[rev].date = $1
365                                                 rcs.revision[rev].author = $2
366                                                 rcs.revision[rev].state = $3
367                                         when 'branches'
368                                                 status.push :branches
369                                         when /branches\s*;/
370                                                 next
371                                         when /^next\s+(\S+)?;$/
372                                                 nxt = rcs.revision[rev].next = $1
373                                                 next unless nxt
374                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
375                                                 rcs.revision[nxt].diff_base = rev
376                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
377                                         else
378                                                 status.pop
379                                         end
380                                 when :branches
381                                         candidate = line.split(';',2)
382                                         branch = candidate.first.strip
383                                         rcs.revision[rev].branches << branch
384                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
385                                         rcs.revision[branch].diff_base = rev
386                                         # we drop the last number from the branch name
387                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
388                                         rcs.revision[branch].branch_point = rev
389                                         status.pop if candidate.length > 1
390                                 when :revision_data
391                                         case line.chomp
392                                         when 'log'
393                                                 status.push :log
394                                                 lines.clear
395                                                 status.push :read_lines
396                                         when 'text'
397                                                 if rev == rcs.head
398                                                         status.push :head
399                                                 else
400                                                         status.push :diff
401                                                 end
402                                                 lines.clear
403                                                 status.push :read_lines
404                                         else
405                                                 status.pop
406                                         end
407                                 when :log
408                                         rcs.revision[rev].log.replace lines.dup
409                                         status.pop
410                                 when :head
411                                         rcs.revision[rev].text.replace lines.dup
412                                         puts rcs.revision[rev].blob
413                                         status.pop
414                                 when :diff
415                                         difflines.replace lines.dup
416                                         difflines.pop if difflines.last.empty?
417                                         base = rcs.revision[rev].diff_base
418                                         unless rcs.revision[base].text
419                                                 pp rcs
420                                                 puts rev, base
421                                                 raise 'no diff base!'
422                                         end
423                                         # deep copy
424                                         buffer = []
425                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
426
427                                         adding = false
428                                         index = nil
429                                         count = nil
430
431                                         while l = difflines.shift
432                                                 if adding
433                                                         raise 'negative index during insertion' if index < 0
434                                                         raise 'negative count during insertion' if count < 0
435                                                         adding << l
436                                                         count -= 1
437                                                         # collected all the lines, put the before
438                                                         unless count > 0
439                                                                 unless buffer[index]
440                                                                         buffer[index] = []
441                                                                 end
442                                                                 buffer[index].unshift(*adding)
443                                                                 adding = false
444                                                         end
445                                                         next
446                                                 end
447
448                                                 l.chomp!
449                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
450                                                 diff_cmd = $1.intern
451                                                 index = $2.to_i
452                                                 count = $3.to_i
453                                                 case diff_cmd
454                                                 when :d
455                                                         # for deletion, index 1 is the first index, so the Ruby
456                                                         # index is one less than the diff one
457                                                         index -= 1
458                                                         # we replace them with empty string so that 'a' commands
459                                                         # referring to the same line work properly
460                                                         while count > 0
461                                                                 buffer[index].clear
462                                                                 index += 1
463                                                                 count -= 1
464                                                         end
465                                                 when :a
466                                                         # addition will prepend the appropriate lines
467                                                         # to the given index, and in this case Ruby
468                                                         # and diff indices are the same
469                                                         adding = []
470                                                 end
471                                         end
472
473                                         # turn the buffer into an array of lines, deleting the empty ones
474                                         buffer.delete_if { |l| l.empty? }
475                                         buffer.flatten!
476
477                                         rcs.revision[rev].text = buffer
478                                         puts rcs.revision[rev].blob
479                                         status.pop
480                                 else
481                                         raise "Unknown status #{status.last}"
482                                 end
483                         end
484                 end
485
486                 # clean up the symbols/branches: look for revisions that have
487                 # one or more symbols but no dates, and make them into
488                 # branches, pointing to the highest commit with that key
489                 branches = []
490                 keys = rcs.revision.keys
491                 rcs.revision.each do |key, rev|
492                         if rev.date.nil? and not rev.symbols.empty?
493                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
494                                 tr = rcs.revision[top]
495                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
496                                 tr.branches |= rev.symbols
497                                 branches << key
498                         end
499                 end
500                 branches.each { |k| rcs.revision.delete k }
501
502                 return rcs
503         end
504
505         class Tree
506                 def initialize(commit)
507                         @commit = commit
508                         @files = Hash.new
509                 end
510
511                 def merge!(tree)
512                         testfiles = @files.dup
513                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
514                         # the next line is only reached if all the adds were
515                         # successful, so the merge is atomic
516                         @files.replace testfiles
517                 end
518
519                 def add(rcs, rev, file_list=@files)
520                         if file_list.key? rcs
521                                 prev = file_list[rcs]
522                                 if prev.log == rev.log
523                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
524                                 else
525                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
526                                 end
527                                 if prev.text != rev.text
528                                         raise str
529                                 else
530                                         @commit.warn_about str
531                                 end
532                         end
533                         file_list[rcs] = rev
534                 end
535
536                 def each &block
537                         @files.each &block
538                 end
539
540                 def to_a
541                         files = []
542                         @files.map do |rcs, rev|
543                                 files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
544                         end
545                         files
546                 end
547
548                 def filenames
549                         @files.map { |rcs, rev| rcs.fname }
550                 end
551
552                 def to_s
553                         self.to_a.join("\n")
554                 end
555         end
556
557         class Commit
558                 attr_accessor :date, :log, :symbols, :author, :branch
559                 attr_accessor :tree
560                 def initialize(rcs, rev)
561                         raise NoBranchSupport if rev.branch
562                         self.date = rev.date.dup
563                         self.log = rev.log.dup
564                         self.symbols = rev.symbols.dup
565                         self.author = rev.author
566                         self.branch = rev.branch
567
568                         self.tree = Tree.new self
569                         self.tree.add rcs, rev
570                 end
571
572                 def to_a
573                         [self.date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
574                 end
575
576                 def warn_about(str)
577                         warn str + " for commit on #{self.date}"
578                 end
579
580                 # Sort by date and then by number of symbols
581                 def <=>(other)
582                         ds = self.date <=> other.date
583                         if ds != 0
584                                 return ds
585                         else
586                                 return self.symbols.length <=> other.symbols.length
587                         end
588                 end
589
590                 def merge!(commit)
591                         self.tree.merge! commit.tree
592                         if commit.date > self.date
593                                 warn_about "updating date to #{commit.date}"
594                                 self.date = commit.date
595                         end
596                         self.symbols.merge commit.symbols
597                 end
598
599                 def export(opts={})
600                         xbranch = self.branch || 'master'
601                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
602                         xlog = self.log.to_s
603                         numdate = self.date.tv_sec
604                         xdate = "#{numdate} +0000"
605                         key = numdate.to_s
606
607                         puts "commit refs/heads/#{xbranch}"
608                         puts "mark :#{RCS.commit key}"
609                         puts "committer #{xauthor} #{xdate}"
610                         puts "data #{xlog.length}"
611                         puts xlog unless xlog.empty?
612                         # TODO branching support for multi-file export
613                         # puts "from :#{RCS.commit from}" if self.branch_point
614                         puts self.tree.to_s
615
616                         # TODO branching support for multi-file export
617                         # rev.branches.each do |sym|
618                         #       puts "reset refs/heads/#{sym}"
619                         #       puts "from :#{RCS.commit key}"
620                         # end
621
622                         self.symbols.each do |sym|
623                                 puts "reset refs/tags/#{sym}"
624                                 puts "from :#{RCS.commit key}"
625                         end
626
627                 end
628         end
629 end
630
631 require 'getoptlong'
632
633 opts = GetoptLong.new(
634         # Authors file, like git-svn and git-cvsimport, more than one can be
635         # specified
636         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
637         # RCS file suffix, like RCS
638         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
639         # Date fuzziness for commits to be considered the same (in seconds)
640         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
641         # check symbols when coalescing?
642         ['--symbol-check', GetoptLong::NO_ARGUMENT],
643         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
644         # tag each revision?
645         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
646         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
647         # prepend filenames to commit logs?
648         ['--log-filename', GetoptLong::NO_ARGUMENT],
649         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
650         # skip branches when exporting a whole tree?
651         ['--skip-branches', GetoptLong::NO_ARGUMENT],
652         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
653 )
654
655 # We read options in order, but they apply to all passed parameters.
656 # TODO maybe they should only apply to the following, unless there's only one
657 # file?
658 opts.ordering = GetoptLong::RETURN_IN_ORDER
659
660 file_list = []
661 parse_options = {
662         :authors => Hash.new,
663         :commit_fuzz => 300,
664         :tag_fuzz => -1,
665 }
666
667 # Read config options
668 `git config --get-all rcs.authorsfile`.each_line do |fn|
669         parse_options[:authors].merge! load_authors_file(fn.chomp)
670 end
671
672 parse_options[:tag_each_rev] = (
673         `git config --bool rcs.tageachrev`.chomp == 'true'
674 ) ? true : false
675
676 parse_options[:log_filename] = (
677         `git config --bool rcs.logfilename`.chomp == 'true'
678 ) ? true : false
679
680 fuzz = `git config --int rcs.commitFuzz`.chomp
681 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
682
683 fuzz = `git config --int rcs.tagFuzz`.chomp
684 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
685
686 parse_options[:symbol_check] = (
687         `git config --bool rcs.symbolcheck`.chomp == 'false'
688 ) ? false : true
689
690 opts.each do |opt, arg|
691         case opt
692         when '--authors-file'
693                 authors = load_authors_file(arg)
694                 redef = parse_options[:authors].keys & authors.keys
695                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
696                 parse_options[:authors].merge!(authors)
697         when '--rcs-suffixes'
698                 # TODO
699         when '--rcs-commit-fuzz'
700                 parse_options[:commit_fuzz] = arg.to_i
701         when '--rcs-tag-fuzz'
702                 parse_options[:tag_fuzz] = arg.to_i
703         when '--symbol-check'
704                 parse_options[:symbol_check] = true
705         when '--no-symbol-check'
706                 parse_options[:symbol_check] = false
707         when '--tag-each-rev'
708                 parse_options[:tag_each_rev] = true
709         when '--no-tag-each-rev'
710                 # this is the default, which is fine since the missing key
711                 # (default) returns nil which is false in Ruby
712                 parse_options[:tag_each_rev] = false
713         when '--log-filename'
714                 parse_options[:log_filename] = true
715         when '--no-log-filename'
716                 # this is the default, which is fine since the missing key
717                 # (default) returns nil which is false in Ruby
718                 parse_options[:log_filename] = false
719         when '--skip-branches'
720                 parse_options[:skip_branches] = true
721         when ''
722                 file_list << arg
723         when '--help'
724                 usage
725                 exit
726         end
727 end
728
729 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
730         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
731 end
732
733 require 'etc'
734
735 user = Etc.getlogin || ENV['USER']
736
737 # steal username/email data from other init files that may contain the
738 # information
739 def steal_username
740         [
741                 # the user's .hgrc file for a username field
742                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
743                 # the user's .(g)vimrc for a changelog_username setting
744                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
745                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
746                 []
747         ].each do |fn, rx, idx|
748                 file = File.expand_path fn
749                 if File.readable?(file) and File.read(file) =~ rx
750                         parse_options[:authors][user] = Regexp.last_match(idx).strip
751                         break
752                 end
753         end
754 end
755
756 if user and not user.empty? and not parse_options[:authors].has_key?(user)
757         name = ENV['GIT_AUTHOR_NAME'] || ''
758         name.replace(`git config user.name`.chomp) if name.empty?
759         name.replace(Etc.getpwnam(user).gecos) if name.empty?
760
761         if name.empty?
762                 # couldn't find a name, try to steal data from other sources
763                 steal_username
764         else
765                 # if we found a name, try to find an email too
766                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
767                 email.replace(`git config user.email`.chomp) if email.empty?
768
769                 if email.empty?
770                         # couldn't find an email, try to steal data too
771                         steal_username
772                 else
773                         # we got both a name and email, fill the info
774                         parse_options[:authors][user] = "#{name} <#{email}>"
775                 end
776         end
777 end
778
779 if file_list.empty?
780         usage
781         exit 1
782 end
783
784 SFX = ',v'
785
786 status = 0
787
788 rcs = []
789 file_list.each do |arg|
790         case ftype = File.ftype(arg)
791         when 'file'
792                 if arg[-2,2] == SFX
793                         if File.exists? arg
794                                 rcsfile = arg.dup
795                         else
796                                 not_found "RCS file #{arg}"
797                                 status |= 1
798                         end
799                         filename = File.basename(arg, SFX)
800                 else
801                         filename = File.basename(arg)
802                         path = File.dirname(arg)
803                         rcsfile = File.join(path, 'RCS', filename) + SFX
804                         unless File.exists? rcsfile
805                                 rcsfile.replace File.join(path, filename) + SFX
806                                 unless File.exists? rcsfile
807                                         not_found "RCS file for #{filename} in #{path}"
808                                 end
809                         end
810                 end
811                 rcs << RCS.parse(filename, rcsfile)
812         when 'directory'
813                 pattern = File.join(arg, '**', '*' + SFX)
814                 Dir.glob(pattern).each do |rcsfile|
815                         filename = File.basename(rcsfile, SFX)
816                         path = File.dirname(rcsfile)
817                         path.sub!(/\/?RCS$/, '') # strip final /RCS if present
818                         path.sub!(/^#{Regexp.escape arg}\/?/, '') # strip initial dirname
819                         filename = File.join(path, filename) unless path.empty?
820                         begin
821                                 rcs << RCS.parse(filename, rcsfile)
822                         rescue Exception => e
823                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
824                                 raise e
825                         end
826                 end
827         else
828                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
829                 status |= 1
830         end
831 end
832
833 if rcs.length == 1
834         rcs.first.export_commits(parse_options)
835 else
836         STDERR.puts "Preparing commits"
837
838         commits = []
839
840         rcs.each do |r|
841                 r.revision.each do |k, rev|
842                         begin
843                                 commits << RCS::Commit.new(r, rev)
844                         rescue NoBranchSupport
845                                 if parse_options[:skip_branches]
846                                         STDERR.puts "Skipping revision #{rev.rev} for #{r.fname} (branch)"
847                                 else raise
848                                 end
849                         end
850                 end
851         end
852
853         STDERR.puts "Sorting by date"
854
855         commits.sort!
856
857         if $DEBUG
858                 STDERR.puts "RAW commits (#{commits.length}):"
859                 commits.each do |c|
860                         PP.pp c.to_a, $stderr
861                 end
862         else
863                 STDERR.puts "#{commits.length} single-file commits"
864         end
865
866         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
867
868         commits.reverse_each do |c|
869                 commits.reverse_each do |k|
870                         break if k.date < c.date - parse_options[:commit_fuzz]
871                         next if k == c
872                         next if c.log != k.log or c.author != k.author or c.branch != k.branch
873                         next if k.date > c.date
874                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
875                                 if parse_options[:symbol_check]
876                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{c.tree.filenames.join(', ')})\n\tand (#{k.tree.filenames.join(', ')})"
877                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
878                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
879                                         next
880                                 elsif $DEBUG
881                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{c.tree.filenames.join(', ')})\n\tand (#{k.tree.filenames.join(', ')})"
882                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
883                                 end
884                         end
885                         begin
886                                 c.merge! k
887                         rescue RuntimeError => err
888                                 fuzz = c.date - k.date
889                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
890                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
891                                 break
892                         end
893                         commits.delete k
894                 end
895         end
896
897         if $DEBUG
898                 STDERR.puts "[1] commits (#{commits.length}):"
899                 commits.each do |c|
900                         PP.pp c.to_a, $stderr
901                 end
902         else
903                 STDERR.puts "#{commits.length} coalesced commits"
904         end
905
906         commits.each { |c| c.export(parse_options) }
907
908 end
909
910 exit status