Don't misinterpret `branches`
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 TODO
5         * Refactor commit coalescing
6         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
7         * Add support for commitid for coalescing commits
8         * Further coalescing options? (e.g. small logfile differences)
9         * Proper branching support in multi-file export
10         * Optimize memory usage by discarding unneeded text
11 =end
12
13 require 'pp'
14 require 'set'
15
16 class NoBranchSupport < NotImplementedError ; end
17
18 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
19 # older versions
20 unless 2.respond_to? :odd?
21         class Integer
22                 def odd?
23                         self % 2 == 1
24                 end
25         end
26 end
27
28 def usage
29         STDERR.puts <<EOM
30 #{$0} [options] file [file ...]
31
32 Fast-export the RCS history of one or more files. If a directory is specified,
33 all RCS-tracked files in the directory and its descendants are exported.
34
35 When importing single files, their pathname is discarded during import. When
36 importing directories, only the specified directory component is discarded.
37
38 When importing a single file, RCS commits are converted one by one. Otherwise,
39 some heuristics is used to determine how to coalesce commits touching different
40 files.
41
42 Currently, commits are coalesced if they share the exact same log and if their
43 date differs by no more than the user-specified fuzziness. Additionally, the
44 symbols in one of the commit must be a subset of the symbols in the other
45 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
46 false in the git configuration.
47
48 Typical usage:
49     git init && rcs-fast-export.rb . | git fast-import && git reset
50
51 Options:
52         --help, -h, -?          display this help text
53         --authors-file, -A      specify a file containing username = Full Name <email> mappings
54         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
55                                 importing multiple files
56                                 (in seconds, defaults to 300, i.e. 5 minutes)
57         --[no-]symbol-check     [do not] check symbols when coalescing commits
58         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
59                                 importing a single file
60         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
61                                 a single file
62         --skip-branches         when exporting multiple files with a branched history, export
63                                 the main branch only instead of aborting due to the lack of
64                                 support for branched multi-file history export
65
66
67
68 Config options:
69         rcs.authorsFile         for --authors-file
70         rcs.tagEachRev          for --tag-each-rev
71         rcs.logFilename         for --log-filename
72         rcs.commitFuzz          for --rcs-commit-fuzz
73         rcs.symbolCheck         for --rcs-symbol-check
74         rcs.tagFuzz             for --rcs-tag-fuzz
75
76 EOM
77 end
78
79 def not_found(arg)
80         STDERR.puts "Could not find #{arg}"
81 end
82
83 # returns a hash that maps usernames to author names & emails
84 def load_authors_file(fn)
85         hash = {}
86         begin
87                 File.open(File.expand_path(fn)) do |io|
88                         io.each_line do |line|
89                                 uname, author = line.split('=', 2)
90                                 uname.strip!
91                                 author.strip!
92                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
93                                 hash[uname] = author
94                         end
95                 end
96         rescue
97                 not_found(fn)
98         end
99         return hash
100 end
101
102 class Time
103         def Time.rcs(string)
104                 fields = string.split('.')
105                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
106                 Time.utc(*fields)
107         end
108 end
109
110 module RCS
111         # strip an optional final ;
112         def RCS.clean(arg)
113                 arg.chomp(';')
114         end
115
116         # strip the first and last @, and de-double @@s
117         def RCS.sanitize(arg)
118                 case arg
119                 when Array
120                         ret = arg.dup
121                         raise 'malformed first line' unless ret.first[0,1] == '@'
122                         raise 'malformed last line' unless ret.last[-1,1] == '@'
123                         ret.first.sub!(/^@/,'')
124                         ret.last.sub!(/@$/,'')
125                         ret.map { |l| l.gsub('@@','@') }
126                 when String
127                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
128                 else
129                         raise
130                 end
131         end
132
133         # clean and sanitize
134         def RCS.at_clean(arg)
135                 RCS.sanitize RCS.clean(arg)
136         end
137
138         def RCS.mark(key)
139                 @@marks ||= {}
140                 if @@marks.key? key
141                         @@marks[key]
142                 else
143                         @@marks[key] = @@marks.length + 1
144                 end
145         end
146
147         def RCS.blob(file, rev)
148                 RCS.mark([file, rev])
149         end
150
151         def RCS.commit(commit)
152                 RCS.mark(commit)
153         end
154
155         class File
156                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
157                 def initialize(fname, executable)
158                         @fname = fname.dup
159                         @head = nil
160                         @comment = nil
161                         @desc = []
162                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
163                         @mode = executable ? '755' : '644'
164                 end
165
166                 def has_revision?(rev)
167                         @revision.has_key?(rev) and not @revision[rev].author.nil?
168                 end
169
170                 def export_commits(opts={})
171                         counter = 0
172                         exported = []
173                         until @revision.empty?
174                                 counter += 1
175
176                                 # a string sort is a very good candidate for
177                                 # export order, getting a miss only for
178                                 # multi-digit revision components
179                                 keys = @revision.keys.sort
180
181                                 STDERR.puts "commit export loop ##{counter}"
182                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
183                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
184
185                                 keys.each do |key|
186                                         rev = @revision[key]
187                                         # the parent commit is rev.next if we're on the
188                                         # master branch (rev.branch is nil) or
189                                         # rev.diff_base otherwise
190                                         from = rev.branch.nil? ? rev.next : rev.diff_base
191                                         # A commit can only be exported if it has no
192                                         # parent, or if the parent has been exported
193                                         # already. Skip this commit otherwise
194                                         if from and not exported.include? from
195                                                 next
196                                         end
197
198                                         branch = rev.branch || 'master'
199                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
200                                         date = "#{rev.date.tv_sec} +0000"
201                                         log = String.new
202                                         if opts[:log_filename]
203                                                 log << @fname << ": "
204                                         end
205                                         log << rev.log.join
206
207                                         puts "commit refs/heads/#{branch}"
208                                         puts "mark :#{RCS.commit key}"
209                                         puts "committer #{author} #{date}"
210                                         puts "data #{log.length}"
211                                         puts log unless log.empty?
212                                         puts "from :#{RCS.commit from}" if from
213                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
214
215                                         # TODO FIXME this *should* be safe, in
216                                         # that it should not unduly move
217                                         # branches back in time, but I'm not
218                                         # 100% sure ...
219                                         rev.branches.each do |sym|
220                                                 puts "reset refs/heads/#{sym}"
221                                                 puts "from :#{RCS.commit key}"
222                                         end
223                                         rev.symbols.each do |sym|
224                                                 puts "reset refs/tags/#{sym}"
225                                                 puts "from :#{RCS.commit key}"
226                                         end
227                                         if opts[:tag_each_rev]
228                                                 puts "reset refs/tags/#{key}"
229                                                 puts "from :#{RCS.commit key}"
230                                         end
231
232                                         exported.push key
233                                 end
234                                 exported.each { |k| @revision.delete(k) }
235                         end
236                 end
237         end
238
239         class Revision
240                 attr_accessor :rev, :author, :state, :next
241                 attr_accessor :branches, :log, :text, :symbols
242                 attr_accessor :branch, :diff_base, :branch_point
243                 attr_reader   :date
244                 def initialize(file, rev)
245                         @file = file
246                         @rev = rev
247                         @author = nil
248                         @date = nil
249                         @state = nil
250                         @next = nil
251                         @branches = Set.new
252                         @branch = nil
253                         @branch_point = nil
254                         @diff_base = nil
255                         @log = []
256                         @text = []
257                         @symbols = Set.new
258                 end
259
260                 def date=(str)
261                         @date = Time.rcs(str)
262                 end
263
264                 def blob
265                         str = @text.join('')
266                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
267                         ret
268                 end
269         end
270
271         def RCS.parse(fname, rcsfile)
272                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
273
274                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
275                         status = [:basic]
276                         rev = nil
277                         lines = []
278                         difflines = []
279                         file.each_line do |line|
280                                 case status.last
281                                 when :basic
282                                         command, args = line.split($;,2)
283                                         next if command.empty?
284
285                                         if command.chomp!(';')
286                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
287                                                 next
288                                         end
289
290                                         case command
291                                         when 'head'
292                                                 rcs.head = RCS.clean(args.chomp)
293                                         when 'symbols'
294                                                 status.push :symbols
295                                                 next if args.empty?
296                                                 line = args; redo
297                                         when 'comment'
298                                                 rcs.comment = RCS.at_clean(args.chomp)
299                                         when /^[0-9.]+$/
300                                                 rev = command.dup
301                                                 if rcs.has_revision?(rev)
302                                                         status.push :revision_data
303                                                 else
304                                                         status.push :new_revision
305                                                 end
306                                         when 'desc'
307                                                 status.push :desc
308                                                 lines.clear
309                                                 status.push :read_lines
310                                         when 'branch', 'access', 'locks', 'expand'
311                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
312                                                 status.push :skipping_lines
313                                                 next if args.empty?
314                                                 line = args; redo
315                                         else
316                                                 raise "Unknown command #{command.inspect}"
317                                         end
318                                 when :skipping_lines
319                                         status.pop if line.strip.chomp!(';')
320                                 when :symbols
321                                         # we can have multiple symbols per line
322                                         pairs = line.strip.split($;)
323                                         pairs.each do |pair|
324                                                 sym, rev = pair.strip.split(':',2);
325                                                 if rev
326                                                         status.pop if rev.chomp!(';')
327                                                         rcs.revision[rev].symbols << sym
328                                                 else
329                                                         status.pop
330                                                 end
331                                         end
332                                 when :desc
333                                         rcs.desc.replace lines.dup
334                                         status.pop
335                                 when :read_lines
336                                         # we sanitize lines as we read them
337
338                                         actual_line = line.dup
339
340                                         # the first line must begin with a @, which we strip
341                                         if lines.empty?
342                                                 ats = line.match(/^@+/)
343                                                 raise 'malformed line' unless ats
344                                                 actual_line.replace line.sub(/^@/,'')
345                                         end
346
347                                         # if the line ends with an ODD number of @, it's the
348                                         # last line -- we work on actual_line so that content
349                                         # such as @\n or @ work correctly (they would be
350                                         # encoded respectively as ['@@@\n','@\n'] and
351                                         # ['@@@@\n']
352                                         ats = actual_line.chomp.match(/@+$/)
353                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
354                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
355                                         end
356                                         lines << actual_line.gsub('@@','@')
357                                         if nomore
358                                                 status.pop
359                                                 redo
360                                         end
361                                 when :new_revision
362                                         case line.chomp
363                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
364                                                 rcs.revision[rev].date = $1
365                                                 rcs.revision[rev].author = $2
366                                                 rcs.revision[rev].state = $3
367                                         when /^branches\s*;/
368                                                 next
369                                         when /^branches(?:\s+|$)/
370                                                 status.push :branches
371                                                 if line.index(';')
372                                                         line = line.sub(/^branches\s+/,'')
373                                                         redo
374                                                 end
375                                         when /^next\s+(\S+)?;$/
376                                                 nxt = rcs.revision[rev].next = $1
377                                                 next unless nxt
378                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
379                                                 rcs.revision[nxt].diff_base = rev
380                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
381                                         else
382                                                 status.pop
383                                         end
384                                 when :branches
385                                         candidate = line.split(';',2)
386                                         branch = candidate.first.strip
387                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
388                                         rcs.revision[branch].diff_base = rev
389                                         # we drop the last number from the branch name
390                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
391                                         rcs.revision[branch].branch_point = rev
392                                         status.pop if candidate.length > 1
393                                 when :revision_data
394                                         case line.chomp
395                                         when 'log'
396                                                 status.push :log
397                                                 lines.clear
398                                                 status.push :read_lines
399                                         when 'text'
400                                                 if rev == rcs.head
401                                                         status.push :head
402                                                 else
403                                                         status.push :diff
404                                                 end
405                                                 lines.clear
406                                                 status.push :read_lines
407                                         else
408                                                 status.pop
409                                         end
410                                 when :log
411                                         rcs.revision[rev].log.replace lines.dup
412                                         status.pop
413                                 when :head
414                                         rcs.revision[rev].text.replace lines.dup
415                                         puts rcs.revision[rev].blob
416                                         status.pop
417                                 when :diff
418                                         difflines.replace lines.dup
419                                         difflines.pop if difflines.last.empty?
420                                         base = rcs.revision[rev].diff_base
421                                         unless rcs.revision[base].text
422                                                 pp rcs
423                                                 puts rev, base
424                                                 raise 'no diff base!'
425                                         end
426                                         # deep copy
427                                         buffer = []
428                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
429
430                                         adding = false
431                                         index = nil
432                                         count = nil
433
434                                         while l = difflines.shift
435                                                 if adding
436                                                         raise 'negative index during insertion' if index < 0
437                                                         raise 'negative count during insertion' if count < 0
438                                                         adding << l
439                                                         count -= 1
440                                                         # collected all the lines, put the before
441                                                         unless count > 0
442                                                                 unless buffer[index]
443                                                                         buffer[index] = []
444                                                                 end
445                                                                 buffer[index].unshift(*adding)
446                                                                 adding = false
447                                                         end
448                                                         next
449                                                 end
450
451                                                 l.chomp!
452                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
453                                                 diff_cmd = $1.intern
454                                                 index = $2.to_i
455                                                 count = $3.to_i
456                                                 case diff_cmd
457                                                 when :d
458                                                         # for deletion, index 1 is the first index, so the Ruby
459                                                         # index is one less than the diff one
460                                                         index -= 1
461                                                         # we replace them with empty string so that 'a' commands
462                                                         # referring to the same line work properly
463                                                         while count > 0
464                                                                 buffer[index].clear
465                                                                 index += 1
466                                                                 count -= 1
467                                                         end
468                                                 when :a
469                                                         # addition will prepend the appropriate lines
470                                                         # to the given index, and in this case Ruby
471                                                         # and diff indices are the same
472                                                         adding = []
473                                                 end
474                                         end
475
476                                         # turn the buffer into an array of lines, deleting the empty ones
477                                         buffer.delete_if { |l| l.empty? }
478                                         buffer.flatten!
479
480                                         rcs.revision[rev].text = buffer
481                                         puts rcs.revision[rev].blob
482                                         status.pop
483                                 else
484                                         raise "Unknown status #{status.last}"
485                                 end
486                         end
487                 end
488
489                 # clean up the symbols/branches: look for revisions that have
490                 # one or more symbols but no dates, and make them into
491                 # branches, pointing to the highest commit with that key
492                 branches = []
493                 keys = rcs.revision.keys
494                 rcs.revision.each do |key, rev|
495                         if rev.date.nil? and not rev.symbols.empty?
496                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
497                                 tr = rcs.revision[top]
498                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
499                                 tr.branches |= rev.symbols
500                                 branches << key
501                         end
502                 end
503                 branches.each { |k| rcs.revision.delete k }
504
505                 return rcs
506         end
507
508         class Tree
509                 def initialize(commit)
510                         @commit = commit
511                         @files = Hash.new
512                 end
513
514                 def merge!(tree)
515                         testfiles = @files.dup
516                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
517                         # the next line is only reached if all the adds were
518                         # successful, so the merge is atomic
519                         @files.replace testfiles
520                 end
521
522                 def add(rcs, rev, file_list=@files)
523                         if file_list.key? rcs
524                                 prev = file_list[rcs]
525                                 if prev.log == rev.log
526                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
527                                 else
528                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
529                                 end
530                                 if prev.text != rev.text
531                                         raise str
532                                 else
533                                         @commit.warn_about str
534                                 end
535                         end
536                         file_list[rcs] = rev
537                 end
538
539                 def each &block
540                         @files.each &block
541                 end
542
543                 def to_a
544                         files = []
545                         @files.map do |rcs, rev|
546                                 if rev.state.downcase == "dead"
547                                         files << "D #{rcs.fname}"
548                                 else
549                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
550                                 end
551                         end
552                         files
553                 end
554
555                 def filenames
556                         @files.map { |rcs, rev| rcs.fname }
557                 end
558
559                 def to_s
560                         self.to_a.join("\n")
561                 end
562         end
563
564         class Commit
565                 attr_accessor :date, :log, :symbols, :author, :branch
566                 attr_accessor :tree
567                 attr_accessor :min_date, :max_date
568                 def initialize(rcs, rev)
569                         raise NoBranchSupport if rev.branch
570                         self.date = rev.date.dup
571                         self.min_date = self.max_date = self.date
572                         self.log = rev.log.dup
573                         self.symbols = rev.symbols.dup
574                         self.author = rev.author
575                         self.branch = rev.branch
576
577                         self.tree = Tree.new self
578                         self.tree.add rcs, rev
579                 end
580
581                 def to_a
582                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
583                 end
584
585                 def warn_about(str)
586                         warn str + " for commit on #{self.date}"
587                 end
588
589                 # Sort by date and then by number of symbols
590                 def <=>(other)
591                         ds = self.date <=> other.date
592                         if ds != 0
593                                 return ds
594                         else
595                                 return self.symbols.length <=> other.symbols.length
596                         end
597                 end
598
599                 def merge!(commit)
600                         self.tree.merge! commit.tree
601                         if commit.max_date > self.max_date
602                                 self.max_date = commit.max_date
603                         end
604                         if commit.min_date < self.min_date
605                                 self.min_date = commit.min_date
606                         end
607                         self.symbols.merge commit.symbols
608                 end
609
610                 def export(opts={})
611                         xbranch = self.branch || 'master'
612                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
613                         xlog = self.log.join
614                         numdate = self.date.tv_sec
615                         xdate = "#{numdate} +0000"
616                         key = numdate.to_s
617
618                         puts "commit refs/heads/#{xbranch}"
619                         puts "mark :#{RCS.commit key}"
620                         puts "committer #{xauthor} #{xdate}"
621                         puts "data #{xlog.length}"
622                         puts xlog unless xlog.empty?
623                         # TODO branching support for multi-file export
624                         # puts "from :#{RCS.commit from}" if self.branch_point
625                         puts self.tree.to_s
626
627                         # TODO branching support for multi-file export
628                         # rev.branches.each do |sym|
629                         #       puts "reset refs/heads/#{sym}"
630                         #       puts "from :#{RCS.commit key}"
631                         # end
632
633                         self.symbols.each do |sym|
634                                 puts "reset refs/tags/#{sym}"
635                                 puts "from :#{RCS.commit key}"
636                         end
637
638                 end
639         end
640 end
641
642 require 'getoptlong'
643
644 opts = GetoptLong.new(
645         # Authors file, like git-svn and git-cvsimport, more than one can be
646         # specified
647         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
648         # RCS file suffix, like RCS
649         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
650         # Date fuzziness for commits to be considered the same (in seconds)
651         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
652         # check symbols when coalescing?
653         ['--symbol-check', GetoptLong::NO_ARGUMENT],
654         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
655         # tag each revision?
656         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
657         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
658         # prepend filenames to commit logs?
659         ['--log-filename', GetoptLong::NO_ARGUMENT],
660         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
661         # skip branches when exporting a whole tree?
662         ['--skip-branches', GetoptLong::NO_ARGUMENT],
663         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
664 )
665
666 # We read options in order, but they apply to all passed parameters.
667 # TODO maybe they should only apply to the following, unless there's only one
668 # file?
669 opts.ordering = GetoptLong::RETURN_IN_ORDER
670
671 file_list = []
672 parse_options = {
673         :authors => Hash.new,
674         :commit_fuzz => 300,
675         :tag_fuzz => -1,
676 }
677
678 # Read config options
679 `git config --get-all rcs.authorsfile`.each_line do |fn|
680         parse_options[:authors].merge! load_authors_file(fn.chomp)
681 end
682
683 parse_options[:tag_each_rev] = (
684         `git config --bool rcs.tageachrev`.chomp == 'true'
685 ) ? true : false
686
687 parse_options[:log_filename] = (
688         `git config --bool rcs.logfilename`.chomp == 'true'
689 ) ? true : false
690
691 fuzz = `git config --int rcs.commitFuzz`.chomp
692 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
693
694 fuzz = `git config --int rcs.tagFuzz`.chomp
695 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
696
697 parse_options[:symbol_check] = (
698         `git config --bool rcs.symbolcheck`.chomp == 'false'
699 ) ? false : true
700
701 opts.each do |opt, arg|
702         case opt
703         when '--authors-file'
704                 authors = load_authors_file(arg)
705                 redef = parse_options[:authors].keys & authors.keys
706                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
707                 parse_options[:authors].merge!(authors)
708         when '--rcs-suffixes'
709                 # TODO
710         when '--rcs-commit-fuzz'
711                 parse_options[:commit_fuzz] = arg.to_i
712         when '--rcs-tag-fuzz'
713                 parse_options[:tag_fuzz] = arg.to_i
714         when '--symbol-check'
715                 parse_options[:symbol_check] = true
716         when '--no-symbol-check'
717                 parse_options[:symbol_check] = false
718         when '--tag-each-rev'
719                 parse_options[:tag_each_rev] = true
720         when '--no-tag-each-rev'
721                 # this is the default, which is fine since the missing key
722                 # (default) returns nil which is false in Ruby
723                 parse_options[:tag_each_rev] = false
724         when '--log-filename'
725                 parse_options[:log_filename] = true
726         when '--no-log-filename'
727                 # this is the default, which is fine since the missing key
728                 # (default) returns nil which is false in Ruby
729                 parse_options[:log_filename] = false
730         when '--skip-branches'
731                 parse_options[:skip_branches] = true
732         when ''
733                 file_list << arg
734         when '--help'
735                 usage
736                 exit
737         end
738 end
739
740 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
741         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
742 end
743
744 require 'etc'
745
746 user = Etc.getlogin || ENV['USER']
747
748 # steal username/email data from other init files that may contain the
749 # information
750 def steal_username
751         [
752                 # the user's .hgrc file for a username field
753                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
754                 # the user's .(g)vimrc for a changelog_username setting
755                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
756                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
757         ].each do |fn, rx, idx|
758                 file = File.expand_path fn
759                 if File.readable?(file) and File.read(file) =~ rx
760                         parse_options[:authors][user] = Regexp.last_match(idx).strip
761                         break
762                 end
763         end
764 end
765
766 if user and not user.empty? and not parse_options[:authors].has_key?(user)
767         name = ENV['GIT_AUTHOR_NAME'] || ''
768         name.replace(`git config user.name`.chomp) if name.empty?
769         name.replace(Etc.getpwnam(user).gecos) if name.empty?
770
771         if name.empty?
772                 # couldn't find a name, try to steal data from other sources
773                 steal_username
774         else
775                 # if we found a name, try to find an email too
776                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
777                 email.replace(`git config user.email`.chomp) if email.empty?
778
779                 if email.empty?
780                         # couldn't find an email, try to steal data too
781                         steal_username
782                 else
783                         # we got both a name and email, fill the info
784                         parse_options[:authors][user] = "#{name} <#{email}>"
785                 end
786         end
787 end
788
789 if file_list.empty?
790         usage
791         exit 1
792 end
793
794 SFX = ',v'
795
796 status = 0
797
798 rcs = []
799 file_list.each do |arg|
800         case ftype = File.ftype(arg)
801         when 'file'
802                 if arg[-2,2] == SFX
803                         if File.exists? arg
804                                 rcsfile = arg.dup
805                         else
806                                 not_found "RCS file #{arg}"
807                                 status |= 1
808                         end
809                         filename = File.basename(arg, SFX)
810                 else
811                         filename = File.basename(arg)
812                         path = File.dirname(arg)
813                         rcsfile = File.join(path, 'RCS', filename) + SFX
814                         unless File.exists? rcsfile
815                                 rcsfile.replace File.join(path, filename) + SFX
816                                 unless File.exists? rcsfile
817                                         not_found "RCS file for #{filename} in #{path}"
818                                 end
819                         end
820                 end
821                 rcs << RCS.parse(filename, rcsfile)
822         when 'directory'
823                 argdirname = arg.chomp(File::SEPARATOR)
824                 pattern = File.join(argdirname, '**', '*' + SFX)
825                 Dir.glob(pattern).each do |rcsfile|
826                         filename = File.basename(rcsfile, SFX)
827                         path = File.dirname(rcsfile)
828                         # strip trailing "/RCS" if present, or "RCS" if that's
829                         # the full path
830                         path.sub!(/(^|#{File::SEPARATOR})RCS$/, '')
831                         # strip off the portion of the path sepecified
832                         # on the command line from the front of the path
833                         # (or delete the path completely if it is the same
834                         # as the specified directory)
835                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
836                         filename = File.join(path, filename) unless path.empty?
837                         begin
838                                 rcs << RCS.parse(filename, rcsfile)
839                         rescue Exception => e
840                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
841                                 raise e
842                         end
843                 end
844         else
845                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
846                 status |= 1
847         end
848 end
849
850 if rcs.length == 1
851         rcs.first.export_commits(parse_options)
852 else
853         STDERR.puts "Preparing commits"
854
855         commits = []
856
857         rcs.each do |r|
858                 r.revision.each do |k, rev|
859                         begin
860                                 commits << RCS::Commit.new(r, rev)
861                         rescue NoBranchSupport
862                                 if parse_options[:skip_branches]
863                                         STDERR.puts "Skipping revision #{rev.rev} for #{r.fname} (branch)"
864                                 else raise
865                                 end
866                         end
867                 end
868         end
869
870         STDERR.puts "Sorting by date"
871
872         commits.sort!
873
874         if $DEBUG
875                 STDERR.puts "RAW commits (#{commits.length}):"
876                 commits.each do |c|
877                         PP.pp c.to_a, $stderr
878                 end
879         else
880                 STDERR.puts "#{commits.length} single-file commits"
881         end
882
883         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
884
885         thisindex = commits.size
886         commits.reverse_each do |c|
887                 nextindex = thisindex
888                 thisindex -= 1
889
890                 cfiles = Set.new c.tree.filenames
891                 ofiles = Set.new
892
893                 mergeable = []
894
895                 # test for mergeable commits by looking at following commits
896                 while nextindex < commits.size
897                         k = commits[nextindex]
898                         nextindex += 1
899
900                         # commits are date-sorted, so we know we can quit early if we are too far
901                         # for coalescing to work
902                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
903
904                         skipthis = false
905
906                         kfiles = Set.new k.tree.filenames
907
908                         if c.log != k.log or c.author != k.author or c.branch != k.branch
909                                 skipthis = true
910                         end
911
912                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
913                                 cflist = cfiles.to_a.join(', ')
914                                 kflist = kfiles.to_a.join(', ')
915                                 if parse_options[:symbol_check]
916                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
917                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
918                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
919                                         skipthis = true
920                                 elsif $DEBUG
921                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
922                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
923                                 end
924                         end
925
926                         # keep track of filenames touched by commits we are not merging with,
927                         # since we don't want to merge with commits that touch them, to preserve
928                         # the monotonicity of history for each file
929                         # TODO we could forward-merge with them, unless some of our files were
930                         # touched too.
931                         if skipthis
932                                 # if the candidate touches any file already in the commit,
933                                 # we can stop looking forward
934                                 break unless cfiles.intersection(kfiles).empty?
935                                 ofiles |= kfiles
936                                 next
937                         end
938
939                         # the candidate has the same log, author, branch and appropriate symbols
940                         # does it touch anything in ofiles?
941                         unless ofiles.intersection(kfiles).empty?
942                                 if $DEBUG
943                                         cflist = cfiles.to_a.join(', ')
944                                         kflist = kfiles.to_a.join(', ')
945                                         oflist = ofiles.to_a.join(', ')
946                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
947                                         STDERR.puts "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
948                                 end
949                                 next
950                         end
951
952                         mergeable << k
953                 end
954
955                 mergeable.each do |k|
956                         begin
957                                 c.merge! k
958                         rescue RuntimeError => err
959                                 fuzz = c.date - k.date
960                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
961                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
962                                 break
963                         end
964                         commits.delete k
965                 end
966         end
967
968         if $DEBUG
969                 STDERR.puts "[1] commits (#{commits.length}):"
970                 commits.each do |c|
971                         PP.pp c.to_a, $stderr
972                 end
973         else
974                 STDERR.puts "#{commits.length} coalesced commits"
975         end
976
977         commits.each { |c| c.export(parse_options) }
978
979 end
980
981 exit status