Fix branch handling
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 TODO
5         * Refactor commit coalescing
6         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
7         * Add support for commitid for coalescing commits
8         * Further coalescing options? (e.g. small logfile differences)
9         * Proper branching support in multi-file export
10         * Optimize memory usage by discarding unneeded text
11 =end
12
13 require 'pp'
14 require 'set'
15
16 class NoBranchSupport < NotImplementedError ; end
17
18 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
19 # older versions
20 unless 2.respond_to? :odd?
21         class Integer
22                 def odd?
23                         self % 2 == 1
24                 end
25         end
26 end
27
28 def usage
29         STDERR.puts <<EOM
30 #{$0} [options] file [file ...]
31
32 Fast-export the RCS history of one or more files. If a directory is specified,
33 all RCS-tracked files in the directory and its descendants are exported.
34
35 When importing single files, their pathname is discarded during import. When
36 importing directories, only the specified directory component is discarded.
37
38 When importing a single file, RCS commits are converted one by one. Otherwise,
39 some heuristics is used to determine how to coalesce commits touching different
40 files.
41
42 Currently, commits are coalesced if they share the exact same log and if their
43 date differs by no more than the user-specified fuzziness. Additionally, the
44 symbols in one of the commit must be a subset of the symbols in the other
45 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
46 false in the git configuration.
47
48 Typical usage:
49     git init && rcs-fast-export.rb . | git fast-import && git reset
50
51 Options:
52         --help, -h, -?          display this help text
53         --authors-file, -A      specify a file containing username = Full Name <email> mappings
54         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
55                                 importing multiple files
56                                 (in seconds, defaults to 300, i.e. 5 minutes)
57         --[no-]symbol-check     [do not] check symbols when coalescing commits
58         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
59                                 importing a single file
60         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
61                                 a single file
62         --skip-branches         when exporting multiple files with a branched history, export
63                                 the main branch only instead of aborting due to the lack of
64                                 support for branched multi-file history export
65
66
67
68 Config options:
69         rcs.authorsFile         for --authors-file
70         rcs.tagEachRev          for --tag-each-rev
71         rcs.logFilename         for --log-filename
72         rcs.commitFuzz          for --rcs-commit-fuzz
73         rcs.symbolCheck         for --rcs-symbol-check
74         rcs.tagFuzz             for --rcs-tag-fuzz
75
76 EOM
77 end
78
79 def not_found(arg)
80         STDERR.puts "Could not find #{arg}"
81 end
82
83 # returns a hash that maps usernames to author names & emails
84 def load_authors_file(fn)
85         hash = {}
86         begin
87                 File.open(File.expand_path(fn)) do |io|
88                         io.each_line do |line|
89                                 uname, author = line.split('=', 2)
90                                 uname.strip!
91                                 author.strip!
92                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
93                                 hash[uname] = author
94                         end
95                 end
96         rescue
97                 not_found(fn)
98         end
99         return hash
100 end
101
102 class Time
103         def Time.rcs(string)
104                 fields = string.split('.')
105                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
106                 Time.utc(*fields)
107         end
108 end
109
110 module RCS
111         # strip an optional final ;
112         def RCS.clean(arg)
113                 arg.chomp(';')
114         end
115
116         # strip the first and last @, and de-double @@s
117         def RCS.sanitize(arg)
118                 case arg
119                 when Array
120                         ret = arg.dup
121                         raise 'malformed first line' unless ret.first[0,1] == '@'
122                         raise 'malformed last line' unless ret.last[-1,1] == '@'
123                         ret.first.sub!(/^@/,'')
124                         ret.last.sub!(/@$/,'')
125                         ret.map { |l| l.gsub('@@','@') }
126                 when String
127                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
128                 else
129                         raise
130                 end
131         end
132
133         # clean and sanitize
134         def RCS.at_clean(arg)
135                 RCS.sanitize RCS.clean(arg)
136         end
137
138         def RCS.mark(key)
139                 @@marks ||= {}
140                 if @@marks.key? key
141                         @@marks[key]
142                 else
143                         @@marks[key] = @@marks.length + 1
144                 end
145         end
146
147         def RCS.blob(file, rev)
148                 RCS.mark([file, rev])
149         end
150
151         def RCS.commit(commit)
152                 RCS.mark(commit)
153         end
154
155         class File
156                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
157                 def initialize(fname, executable)
158                         @fname = fname.dup
159                         @head = nil
160                         @comment = nil
161                         @desc = []
162                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
163                         @mode = executable ? '755' : '644'
164                 end
165
166                 def has_revision?(rev)
167                         @revision.has_key?(rev) and not @revision[rev].author.nil?
168                 end
169
170                 def export_commits(opts={})
171                         counter = 0
172                         exported = []
173                         until @revision.empty?
174                                 counter += 1
175
176                                 # a string sort is a very good candidate for
177                                 # export order, getting a miss only for
178                                 # multi-digit revision components
179                                 keys = @revision.keys.sort
180
181                                 STDERR.puts "commit export loop ##{counter}"
182                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
183                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
184
185                                 keys.each do |key|
186                                         rev = @revision[key]
187                                         # the parent commit is rev.next if we're on the
188                                         # master branch (rev.branch is nil) or
189                                         # rev.diff_base otherwise
190                                         from = rev.branch.nil? ? rev.next : rev.diff_base
191                                         # A commit can only be exported if it has no
192                                         # parent, or if the parent has been exported
193                                         # already. Skip this commit otherwise
194                                         if from and not exported.include? from
195                                                 next
196                                         end
197
198                                         branch = rev.branch || 'master'
199                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
200                                         date = "#{rev.date.tv_sec} +0000"
201                                         log = String.new
202                                         if opts[:log_filename]
203                                                 log << @fname << ": "
204                                         end
205                                         log << rev.log.join
206
207                                         puts "commit refs/heads/#{branch}"
208                                         puts "mark :#{RCS.commit key}"
209                                         puts "committer #{author} #{date}"
210                                         puts "data #{log.length}"
211                                         puts log unless log.empty?
212                                         puts "from :#{RCS.commit from}" if rev.branch_point
213                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
214
215                                         # TODO FIXME this *should* be safe, in
216                                         # that it should not unduly move
217                                         # branches back in time, but I'm not
218                                         # 100% sure ...
219                                         rev.branches.each do |sym|
220                                                 puts "reset refs/heads/#{sym}"
221                                                 puts "from :#{RCS.commit key}"
222                                         end
223                                         rev.symbols.each do |sym|
224                                                 puts "reset refs/tags/#{sym}"
225                                                 puts "from :#{RCS.commit key}"
226                                         end
227                                         if opts[:tag_each_rev]
228                                                 puts "reset refs/tags/#{key}"
229                                                 puts "from :#{RCS.commit key}"
230                                         end
231
232                                         exported.push key
233                                 end
234                                 exported.each { |k| @revision.delete(k) }
235                         end
236                 end
237         end
238
239         class Revision
240                 attr_accessor :rev, :author, :state, :next
241                 attr_accessor :branches, :log, :text, :symbols
242                 attr_accessor :branch, :diff_base, :branch_point
243                 attr_reader   :date
244                 def initialize(file, rev)
245                         @file = file
246                         @rev = rev
247                         @author = nil
248                         @date = nil
249                         @state = nil
250                         @next = nil
251                         @branches = Set.new
252                         @branch = nil
253                         @branch_point = nil
254                         @diff_base = nil
255                         @log = []
256                         @text = []
257                         @symbols = Set.new
258                 end
259
260                 def date=(str)
261                         @date = Time.rcs(str)
262                 end
263
264                 def blob
265                         str = @text.join('')
266                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
267                         ret
268                 end
269         end
270
271         def RCS.parse(fname, rcsfile)
272                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
273
274                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
275                         status = [:basic]
276                         rev = nil
277                         lines = []
278                         difflines = []
279                         file.each_line do |line|
280                                 case status.last
281                                 when :basic
282                                         command, args = line.split($;,2)
283                                         next if command.empty?
284
285                                         if command.chomp!(';')
286                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
287                                                 next
288                                         end
289
290                                         case command
291                                         when 'head'
292                                                 rcs.head = RCS.clean(args.chomp)
293                                         when 'symbols'
294                                                 status.push :symbols
295                                                 next if args.empty?
296                                                 line = args; redo
297                                         when 'comment'
298                                                 rcs.comment = RCS.at_clean(args.chomp)
299                                         when /^[0-9.]+$/
300                                                 rev = command.dup
301                                                 if rcs.has_revision?(rev)
302                                                         status.push :revision_data
303                                                 else
304                                                         status.push :new_revision
305                                                 end
306                                         when 'desc'
307                                                 status.push :desc
308                                                 lines.clear
309                                                 status.push :read_lines
310                                         when 'branch', 'access', 'locks', 'expand'
311                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
312                                                 status.push :skipping_lines
313                                                 next if args.empty?
314                                                 line = args; redo
315                                         else
316                                                 raise "Unknown command #{command.inspect}"
317                                         end
318                                 when :skipping_lines
319                                         status.pop if line.strip.chomp!(';')
320                                 when :symbols
321                                         # we can have multiple symbols per line
322                                         pairs = line.strip.split($;)
323                                         pairs.each do |pair|
324                                                 sym, rev = pair.strip.split(':',2);
325                                                 if rev
326                                                         status.pop if rev.chomp!(';')
327                                                         rcs.revision[rev].symbols << sym
328                                                 else
329                                                         status.pop
330                                                 end
331                                         end
332                                 when :desc
333                                         rcs.desc.replace lines.dup
334                                         status.pop
335                                 when :read_lines
336                                         # we sanitize lines as we read them
337
338                                         actual_line = line.dup
339
340                                         # the first line must begin with a @, which we strip
341                                         if lines.empty?
342                                                 ats = line.match(/^@+/)
343                                                 raise 'malformed line' unless ats
344                                                 actual_line.replace line.sub(/^@/,'')
345                                         end
346
347                                         # if the line ends with an ODD number of @, it's the
348                                         # last line -- we work on actual_line so that content
349                                         # such as @\n or @ work correctly (they would be
350                                         # encoded respectively as ['@@@\n','@\n'] and
351                                         # ['@@@@\n']
352                                         ats = actual_line.chomp.match(/@+$/)
353                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
354                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
355                                         end
356                                         lines << actual_line.gsub('@@','@')
357                                         if nomore
358                                                 status.pop
359                                                 redo
360                                         end
361                                 when :new_revision
362                                         case line.chomp
363                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
364                                                 rcs.revision[rev].date = $1
365                                                 rcs.revision[rev].author = $2
366                                                 rcs.revision[rev].state = $3
367                                         when /^branches\s*;/
368                                                 next
369                                         when /^branches\s+/
370                                                 status.push :branches
371                                                 if line.index(';')
372                                                         line = line.sub(/^branches\s+/,'')
373                                                         redo
374                                                 end
375                                         when /^next\s+(\S+)?;$/
376                                                 nxt = rcs.revision[rev].next = $1
377                                                 next unless nxt
378                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
379                                                 rcs.revision[nxt].diff_base = rev
380                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
381                                         else
382                                                 status.pop
383                                         end
384                                 when :branches
385                                         candidate = line.split(';',2)
386                                         branch = candidate.first.strip
387                                         rcs.revision[rev].branches << branch
388                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
389                                         rcs.revision[branch].diff_base = rev
390                                         # we drop the last number from the branch name
391                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
392                                         rcs.revision[branch].branch_point = rev
393                                         status.pop if candidate.length > 1
394                                 when :revision_data
395                                         case line.chomp
396                                         when 'log'
397                                                 status.push :log
398                                                 lines.clear
399                                                 status.push :read_lines
400                                         when 'text'
401                                                 if rev == rcs.head
402                                                         status.push :head
403                                                 else
404                                                         status.push :diff
405                                                 end
406                                                 lines.clear
407                                                 status.push :read_lines
408                                         else
409                                                 status.pop
410                                         end
411                                 when :log
412                                         rcs.revision[rev].log.replace lines.dup
413                                         status.pop
414                                 when :head
415                                         rcs.revision[rev].text.replace lines.dup
416                                         puts rcs.revision[rev].blob
417                                         status.pop
418                                 when :diff
419                                         difflines.replace lines.dup
420                                         difflines.pop if difflines.last.empty?
421                                         base = rcs.revision[rev].diff_base
422                                         unless rcs.revision[base].text
423                                                 pp rcs
424                                                 puts rev, base
425                                                 raise 'no diff base!'
426                                         end
427                                         # deep copy
428                                         buffer = []
429                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
430
431                                         adding = false
432                                         index = nil
433                                         count = nil
434
435                                         while l = difflines.shift
436                                                 if adding
437                                                         raise 'negative index during insertion' if index < 0
438                                                         raise 'negative count during insertion' if count < 0
439                                                         adding << l
440                                                         count -= 1
441                                                         # collected all the lines, put the before
442                                                         unless count > 0
443                                                                 unless buffer[index]
444                                                                         buffer[index] = []
445                                                                 end
446                                                                 buffer[index].unshift(*adding)
447                                                                 adding = false
448                                                         end
449                                                         next
450                                                 end
451
452                                                 l.chomp!
453                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
454                                                 diff_cmd = $1.intern
455                                                 index = $2.to_i
456                                                 count = $3.to_i
457                                                 case diff_cmd
458                                                 when :d
459                                                         # for deletion, index 1 is the first index, so the Ruby
460                                                         # index is one less than the diff one
461                                                         index -= 1
462                                                         # we replace them with empty string so that 'a' commands
463                                                         # referring to the same line work properly
464                                                         while count > 0
465                                                                 buffer[index].clear
466                                                                 index += 1
467                                                                 count -= 1
468                                                         end
469                                                 when :a
470                                                         # addition will prepend the appropriate lines
471                                                         # to the given index, and in this case Ruby
472                                                         # and diff indices are the same
473                                                         adding = []
474                                                 end
475                                         end
476
477                                         # turn the buffer into an array of lines, deleting the empty ones
478                                         buffer.delete_if { |l| l.empty? }
479                                         buffer.flatten!
480
481                                         rcs.revision[rev].text = buffer
482                                         puts rcs.revision[rev].blob
483                                         status.pop
484                                 else
485                                         raise "Unknown status #{status.last}"
486                                 end
487                         end
488                 end
489
490                 # clean up the symbols/branches: look for revisions that have
491                 # one or more symbols but no dates, and make them into
492                 # branches, pointing to the highest commit with that key
493                 branches = []
494                 keys = rcs.revision.keys
495                 rcs.revision.each do |key, rev|
496                         if rev.date.nil? and not rev.symbols.empty?
497                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
498                                 tr = rcs.revision[top]
499                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
500                                 tr.branches |= rev.symbols
501                                 branches << key
502                         end
503                 end
504                 branches.each { |k| rcs.revision.delete k }
505
506                 return rcs
507         end
508
509         class Tree
510                 def initialize(commit)
511                         @commit = commit
512                         @files = Hash.new
513                 end
514
515                 def merge!(tree)
516                         testfiles = @files.dup
517                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
518                         # the next line is only reached if all the adds were
519                         # successful, so the merge is atomic
520                         @files.replace testfiles
521                 end
522
523                 def add(rcs, rev, file_list=@files)
524                         if file_list.key? rcs
525                                 prev = file_list[rcs]
526                                 if prev.log == rev.log
527                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
528                                 else
529                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
530                                 end
531                                 if prev.text != rev.text
532                                         raise str
533                                 else
534                                         @commit.warn_about str
535                                 end
536                         end
537                         file_list[rcs] = rev
538                 end
539
540                 def each &block
541                         @files.each &block
542                 end
543
544                 def to_a
545                         files = []
546                         @files.map do |rcs, rev|
547                                 if rev.state.downcase == "dead"
548                                         files << "D #{rcs.fname}"
549                                 else
550                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
551                                 end
552                         end
553                         files
554                 end
555
556                 def filenames
557                         @files.map { |rcs, rev| rcs.fname }
558                 end
559
560                 def to_s
561                         self.to_a.join("\n")
562                 end
563         end
564
565         class Commit
566                 attr_accessor :date, :log, :symbols, :author, :branch
567                 attr_accessor :tree
568                 attr_accessor :min_date, :max_date
569                 def initialize(rcs, rev)
570                         raise NoBranchSupport if rev.branch
571                         self.date = rev.date.dup
572                         self.min_date = self.max_date = self.date
573                         self.log = rev.log.dup
574                         self.symbols = rev.symbols.dup
575                         self.author = rev.author
576                         self.branch = rev.branch
577
578                         self.tree = Tree.new self
579                         self.tree.add rcs, rev
580                 end
581
582                 def to_a
583                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
584                 end
585
586                 def warn_about(str)
587                         warn str + " for commit on #{self.date}"
588                 end
589
590                 # Sort by date and then by number of symbols
591                 def <=>(other)
592                         ds = self.date <=> other.date
593                         if ds != 0
594                                 return ds
595                         else
596                                 return self.symbols.length <=> other.symbols.length
597                         end
598                 end
599
600                 def merge!(commit)
601                         self.tree.merge! commit.tree
602                         if commit.max_date > self.max_date
603                                 self.max_date = commit.max_date
604                         end
605                         if commit.min_date < self.min_date
606                                 self.min_date = commit.min_date
607                         end
608                         self.symbols.merge commit.symbols
609                 end
610
611                 def export(opts={})
612                         xbranch = self.branch || 'master'
613                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
614                         xlog = self.log.join
615                         numdate = self.date.tv_sec
616                         xdate = "#{numdate} +0000"
617                         key = numdate.to_s
618
619                         puts "commit refs/heads/#{xbranch}"
620                         puts "mark :#{RCS.commit key}"
621                         puts "committer #{xauthor} #{xdate}"
622                         puts "data #{xlog.length}"
623                         puts xlog unless xlog.empty?
624                         # TODO branching support for multi-file export
625                         # puts "from :#{RCS.commit from}" if self.branch_point
626                         puts self.tree.to_s
627
628                         # TODO branching support for multi-file export
629                         # rev.branches.each do |sym|
630                         #       puts "reset refs/heads/#{sym}"
631                         #       puts "from :#{RCS.commit key}"
632                         # end
633
634                         self.symbols.each do |sym|
635                                 puts "reset refs/tags/#{sym}"
636                                 puts "from :#{RCS.commit key}"
637                         end
638
639                 end
640         end
641 end
642
643 require 'getoptlong'
644
645 opts = GetoptLong.new(
646         # Authors file, like git-svn and git-cvsimport, more than one can be
647         # specified
648         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
649         # RCS file suffix, like RCS
650         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
651         # Date fuzziness for commits to be considered the same (in seconds)
652         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
653         # check symbols when coalescing?
654         ['--symbol-check', GetoptLong::NO_ARGUMENT],
655         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
656         # tag each revision?
657         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
658         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
659         # prepend filenames to commit logs?
660         ['--log-filename', GetoptLong::NO_ARGUMENT],
661         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
662         # skip branches when exporting a whole tree?
663         ['--skip-branches', GetoptLong::NO_ARGUMENT],
664         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
665 )
666
667 # We read options in order, but they apply to all passed parameters.
668 # TODO maybe they should only apply to the following, unless there's only one
669 # file?
670 opts.ordering = GetoptLong::RETURN_IN_ORDER
671
672 file_list = []
673 parse_options = {
674         :authors => Hash.new,
675         :commit_fuzz => 300,
676         :tag_fuzz => -1,
677 }
678
679 # Read config options
680 `git config --get-all rcs.authorsfile`.each_line do |fn|
681         parse_options[:authors].merge! load_authors_file(fn.chomp)
682 end
683
684 parse_options[:tag_each_rev] = (
685         `git config --bool rcs.tageachrev`.chomp == 'true'
686 ) ? true : false
687
688 parse_options[:log_filename] = (
689         `git config --bool rcs.logfilename`.chomp == 'true'
690 ) ? true : false
691
692 fuzz = `git config --int rcs.commitFuzz`.chomp
693 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
694
695 fuzz = `git config --int rcs.tagFuzz`.chomp
696 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
697
698 parse_options[:symbol_check] = (
699         `git config --bool rcs.symbolcheck`.chomp == 'false'
700 ) ? false : true
701
702 opts.each do |opt, arg|
703         case opt
704         when '--authors-file'
705                 authors = load_authors_file(arg)
706                 redef = parse_options[:authors].keys & authors.keys
707                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
708                 parse_options[:authors].merge!(authors)
709         when '--rcs-suffixes'
710                 # TODO
711         when '--rcs-commit-fuzz'
712                 parse_options[:commit_fuzz] = arg.to_i
713         when '--rcs-tag-fuzz'
714                 parse_options[:tag_fuzz] = arg.to_i
715         when '--symbol-check'
716                 parse_options[:symbol_check] = true
717         when '--no-symbol-check'
718                 parse_options[:symbol_check] = false
719         when '--tag-each-rev'
720                 parse_options[:tag_each_rev] = true
721         when '--no-tag-each-rev'
722                 # this is the default, which is fine since the missing key
723                 # (default) returns nil which is false in Ruby
724                 parse_options[:tag_each_rev] = false
725         when '--log-filename'
726                 parse_options[:log_filename] = true
727         when '--no-log-filename'
728                 # this is the default, which is fine since the missing key
729                 # (default) returns nil which is false in Ruby
730                 parse_options[:log_filename] = false
731         when '--skip-branches'
732                 parse_options[:skip_branches] = true
733         when ''
734                 file_list << arg
735         when '--help'
736                 usage
737                 exit
738         end
739 end
740
741 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
742         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
743 end
744
745 require 'etc'
746
747 user = Etc.getlogin || ENV['USER']
748
749 # steal username/email data from other init files that may contain the
750 # information
751 def steal_username
752         [
753                 # the user's .hgrc file for a username field
754                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
755                 # the user's .(g)vimrc for a changelog_username setting
756                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
757                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
758         ].each do |fn, rx, idx|
759                 file = File.expand_path fn
760                 if File.readable?(file) and File.read(file) =~ rx
761                         parse_options[:authors][user] = Regexp.last_match(idx).strip
762                         break
763                 end
764         end
765 end
766
767 if user and not user.empty? and not parse_options[:authors].has_key?(user)
768         name = ENV['GIT_AUTHOR_NAME'] || ''
769         name.replace(`git config user.name`.chomp) if name.empty?
770         name.replace(Etc.getpwnam(user).gecos) if name.empty?
771
772         if name.empty?
773                 # couldn't find a name, try to steal data from other sources
774                 steal_username
775         else
776                 # if we found a name, try to find an email too
777                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
778                 email.replace(`git config user.email`.chomp) if email.empty?
779
780                 if email.empty?
781                         # couldn't find an email, try to steal data too
782                         steal_username
783                 else
784                         # we got both a name and email, fill the info
785                         parse_options[:authors][user] = "#{name} <#{email}>"
786                 end
787         end
788 end
789
790 if file_list.empty?
791         usage
792         exit 1
793 end
794
795 SFX = ',v'
796
797 status = 0
798
799 rcs = []
800 file_list.each do |arg|
801         case ftype = File.ftype(arg)
802         when 'file'
803                 if arg[-2,2] == SFX
804                         if File.exists? arg
805                                 rcsfile = arg.dup
806                         else
807                                 not_found "RCS file #{arg}"
808                                 status |= 1
809                         end
810                         filename = File.basename(arg, SFX)
811                 else
812                         filename = File.basename(arg)
813                         path = File.dirname(arg)
814                         rcsfile = File.join(path, 'RCS', filename) + SFX
815                         unless File.exists? rcsfile
816                                 rcsfile.replace File.join(path, filename) + SFX
817                                 unless File.exists? rcsfile
818                                         not_found "RCS file for #{filename} in #{path}"
819                                 end
820                         end
821                 end
822                 rcs << RCS.parse(filename, rcsfile)
823         when 'directory'
824                 argdirname = arg.chomp(File::SEPARATOR)
825                 pattern = File.join(argdirname, '**', '*' + SFX)
826                 Dir.glob(pattern).each do |rcsfile|
827                         filename = File.basename(rcsfile, SFX)
828                         path = File.dirname(rcsfile)
829                         # strip trailing "/RCS" if present, or "RCS" if that's
830                         # the full path
831                         path.sub!(/(^|#{File::SEPARATOR})RCS$/, '')
832                         # strip off the portion of the path sepecified
833                         # on the command line from the front of the path
834                         # (or delete the path completely if it is the same
835                         # as the specified directory)
836                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
837                         filename = File.join(path, filename) unless path.empty?
838                         begin
839                                 rcs << RCS.parse(filename, rcsfile)
840                         rescue Exception => e
841                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
842                                 raise e
843                         end
844                 end
845         else
846                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
847                 status |= 1
848         end
849 end
850
851 if rcs.length == 1
852         rcs.first.export_commits(parse_options)
853 else
854         STDERR.puts "Preparing commits"
855
856         commits = []
857
858         rcs.each do |r|
859                 r.revision.each do |k, rev|
860                         begin
861                                 commits << RCS::Commit.new(r, rev)
862                         rescue NoBranchSupport
863                                 if parse_options[:skip_branches]
864                                         STDERR.puts "Skipping revision #{rev.rev} for #{r.fname} (branch)"
865                                 else raise
866                                 end
867                         end
868                 end
869         end
870
871         STDERR.puts "Sorting by date"
872
873         commits.sort!
874
875         if $DEBUG
876                 STDERR.puts "RAW commits (#{commits.length}):"
877                 commits.each do |c|
878                         PP.pp c.to_a, $stderr
879                 end
880         else
881                 STDERR.puts "#{commits.length} single-file commits"
882         end
883
884         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
885
886         thisindex = commits.size
887         commits.reverse_each do |c|
888                 nextindex = thisindex
889                 thisindex -= 1
890
891                 cfiles = Set.new c.tree.filenames
892                 ofiles = Set.new
893
894                 mergeable = []
895
896                 # test for mergeable commits by looking at following commits
897                 while nextindex < commits.size
898                         k = commits[nextindex]
899                         nextindex += 1
900
901                         # commits are date-sorted, so we know we can quit early if we are too far
902                         # for coalescing to work
903                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
904
905                         skipthis = false
906
907                         kfiles = Set.new k.tree.filenames
908
909                         if c.log != k.log or c.author != k.author or c.branch != k.branch
910                                 skipthis = true
911                         end
912
913                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
914                                 cflist = cfiles.to_a.join(', ')
915                                 kflist = kfiles.to_a.join(', ')
916                                 if parse_options[:symbol_check]
917                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
918                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
919                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
920                                         skipthis = true
921                                 elsif $DEBUG
922                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
923                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
924                                 end
925                         end
926
927                         # keep track of filenames touched by commits we are not merging with,
928                         # since we don't want to merge with commits that touch them, to preserve
929                         # the monotonicity of history for each file
930                         # TODO we could forward-merge with them, unless some of our files were
931                         # touched too.
932                         if skipthis
933                                 # if the candidate touches any file already in the commit,
934                                 # we can stop looking forward
935                                 break unless cfiles.intersection(kfiles).empty?
936                                 ofiles |= kfiles
937                                 next
938                         end
939
940                         # the candidate has the same log, author, branch and appropriate symbols
941                         # does it touch anything in ofiles?
942                         unless ofiles.intersection(kfiles).empty?
943                                 if $DEBUG
944                                         cflist = cfiles.to_a.join(', ')
945                                         kflist = kfiles.to_a.join(', ')
946                                         oflist = ofiles.to_a.join(', ')
947                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
948                                         STDERR.puts "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
949                                 end
950                                 next
951                         end
952
953                         mergeable << k
954                 end
955
956                 mergeable.each do |k|
957                         begin
958                                 c.merge! k
959                         rescue RuntimeError => err
960                                 fuzz = c.date - k.date
961                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
962                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
963                                 break
964                         end
965                         commits.delete k
966                 end
967         end
968
969         if $DEBUG
970                 STDERR.puts "[1] commits (#{commits.length}):"
971                 commits.each do |c|
972                         PP.pp c.to_a, $stderr
973                 end
974         else
975                 STDERR.puts "#{commits.length} coalesced commits"
976         end
977
978         commits.each { |c| c.export(parse_options) }
979
980 end
981
982 exit status