Verboser error on malformed diffs
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 RCS fast export: run the script with the `--usage` option for further
5 information.
6
7 No installation needed: you can run it from anywhere, including the git
8 checkout directory. For extra comfort, symlink it to some directory in
9 your PATH. I myself have this symlink:
10
11         ~/bin/rcs-fast-export -> ~/src/rcs-fast-export/rcs-fast-export.rb
12
13 allowing me to run `rcs-fast-export` from anywhere.
14 =end
15
16 =begin
17 TODO
18         * Refactor commit coalescing
19         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
20         * Add support for commitid for coalescing commits
21         * Further coalescing options? (e.g. small logfile differences)
22         * Proper branching support in multi-file export
23         * Optimize memory usage by discarding unneeded text
24 =end
25
26 require 'pp'
27 require 'set'
28
29 class NoBranchSupport < NotImplementedError ; end
30
31 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
32 # older versions
33 unless 2.respond_to? :odd?
34         class Integer
35                 def odd?
36                         self % 2 == 1
37                 end
38         end
39 end
40
41 =begin
42 RCS fast-export version: set to `git` in the repository, but can be overridden
43 by packagers, e.g. based on the latest tag, git description, custom packager
44 patches or whatever.
45
46 When the version is set to `git`, we make a little effort to find more information
47 about which commit we are at.
48 =end
49
50 RFE_VERSION="git"
51
52 def version
53         if RFE_VERSION == "git"
54                 Dir.chdir File.expand_path File.dirname File.readlink __FILE__
55
56                 if File.exists? '.git' ; begin
57                         git_out = `git log -1 --pretty="%h %H%n%ai" | git name-rev --stdin`.split("\n")
58                         hash=git_out.first.split.first
59                         branch=git_out.first.split('(').last.chomp(')')
60                         date=git_out.last.split.first
61                         changed=`git diff --no-ext-diff --quiet --exit-code`
62                         branch << "*" unless $?.success?
63                         info=" [#{branch}] #{hash} (#{date})"
64                 rescue
65                         info=" (no info)"
66                 end ; end
67
68                 STDERR.puts "#{$0}: RCS fast-export, #{RFE_VERSION} version#{info}"
69         else
70                 STDERR.puts "#{$0}: RCS fast-export, version #{RFE_VERSION}"
71         end
72 end
73
74 def usage
75         STDERR.puts <<EOM
76 #{$0} [options] file [file ...]
77
78 Fast-export the RCS history of one or more files. If a directory is specified,
79 all RCS-tracked files in the directory and its descendants are exported.
80
81 When importing single files, their pathname is discarded during import. When
82 importing directories, only the specified directory component is discarded.
83
84 When importing a single file, RCS commits are converted one by one. Otherwise,
85 some heuristics is used to determine how to coalesce commits touching different
86 files.
87
88 Currently, commits are coalesced if they share the exact same log and if their
89 date differs by no more than the user-specified fuzziness. Additionally, the
90 symbols in one of the commit must be a subset of the symbols in the other
91 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
92 false in the git configuration.
93
94 Typical usage:
95     git init && rcs-fast-export.rb . | git fast-import && git reset
96
97 Options:
98         --help, -h, -?          display this help text
99         --authors-file, -A      specify a file containing username = Full Name <email> mappings
100         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
101                                 importing multiple files
102                                 (in seconds, defaults to 300, i.e. 5 minutes)
103         --[no-]symbol-check     [do not] check symbols when coalescing commits
104         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
105                                 importing a single file
106         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
107                                 a single file
108         --skip-branches         when exporting multiple files with a branched history, export
109                                 the main branch only instead of aborting due to the lack of
110                                 support for branched multi-file history export
111
112
113
114 Config options:
115         rcs.authorsFile         for --authors-file
116         rcs.tagEachRev          for --tag-each-rev
117         rcs.logFilename         for --log-filename
118         rcs.commitFuzz          for --rcs-commit-fuzz
119         rcs.symbolCheck         for --rcs-symbol-check
120         rcs.tagFuzz             for --rcs-tag-fuzz
121
122 EOM
123 end
124
125 def not_found(arg)
126         STDERR.puts "Could not find #{arg}"
127 end
128
129 # returns a hash that maps usernames to author names & emails
130 def load_authors_file(fn)
131         hash = {}
132         begin
133                 File.open(File.expand_path(fn)) do |io|
134                         io.each_line do |line|
135                                 uname, author = line.split('=', 2)
136                                 uname.strip!
137                                 author.strip!
138                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
139                                 hash[uname] = author
140                         end
141                 end
142         rescue
143                 not_found(fn)
144         end
145         return hash
146 end
147
148 class Time
149         def Time.rcs(string)
150                 fields = string.split('.')
151                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
152                 Time.utc(*fields)
153         end
154 end
155
156 module RCS
157         # strip an optional final ;
158         def RCS.clean(arg)
159                 arg.chomp(';')
160         end
161
162         # strip the first and last @, and de-double @@s
163         def RCS.sanitize(arg)
164                 case arg
165                 when Array
166                         ret = arg.dup
167                         raise 'malformed first line' unless ret.first[0,1] == '@'
168                         raise 'malformed last line' unless ret.last[-1,1] == '@'
169                         ret.first.sub!(/^@/,'')
170                         ret.last.sub!(/@$/,'')
171                         ret.map { |l| l.gsub('@@','@') }
172                 when String
173                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
174                 else
175                         raise
176                 end
177         end
178
179         # clean and sanitize
180         def RCS.at_clean(arg)
181                 RCS.sanitize RCS.clean(arg)
182         end
183
184         def RCS.mark(key)
185                 @@marks ||= {}
186                 if @@marks.key? key
187                         @@marks[key]
188                 else
189                         @@marks[key] = @@marks.length + 1
190                 end
191         end
192
193         def RCS.blob(file, rev)
194                 RCS.mark([file, rev])
195         end
196
197         def RCS.commit(commit)
198                 RCS.mark(commit)
199         end
200
201         class File
202                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
203                 def initialize(fname, executable)
204                         @fname = fname.dup
205                         @head = nil
206                         @comment = nil
207                         @desc = []
208                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
209                         @mode = executable ? '755' : '644'
210                 end
211
212                 def has_revision?(rev)
213                         @revision.has_key?(rev) and not @revision[rev].author.nil?
214                 end
215
216                 def export_commits(opts={})
217                         counter = 0
218                         exported = []
219                         until @revision.empty?
220                                 counter += 1
221
222                                 # a string sort is a very good candidate for
223                                 # export order, getting a miss only for
224                                 # multi-digit revision components
225                                 keys = @revision.keys.sort
226
227                                 STDERR.puts "commit export loop ##{counter}"
228                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
229                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
230
231                                 keys.each do |key|
232                                         rev = @revision[key]
233                                         # the parent commit is rev.next if we're on the
234                                         # master branch (rev.branch is nil) or
235                                         # rev.diff_base otherwise
236                                         from = rev.branch.nil? ? rev.next : rev.diff_base
237                                         # A commit can only be exported if it has no
238                                         # parent, or if the parent has been exported
239                                         # already. Skip this commit otherwise
240                                         if from and not exported.include? from
241                                                 next
242                                         end
243
244                                         branch = rev.branch || 'master'
245                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
246                                         date = "#{rev.date.tv_sec} +0000"
247                                         log = String.new
248                                         if opts[:log_filename]
249                                                 log << @fname << ": "
250                                         end
251                                         log << rev.log.join
252
253                                         puts "commit refs/heads/#{branch}"
254                                         puts "mark :#{RCS.commit key}"
255                                         puts "committer #{author} #{date}"
256                                         puts "data #{log.length}"
257                                         puts log unless log.empty?
258                                         puts "from :#{RCS.commit from}" if from
259                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
260
261                                         # TODO FIXME this *should* be safe, in
262                                         # that it should not unduly move
263                                         # branches back in time, but I'm not
264                                         # 100% sure ...
265                                         rev.branches.each do |sym|
266                                                 puts "reset refs/heads/#{sym}"
267                                                 puts "from :#{RCS.commit key}"
268                                         end
269                                         rev.symbols.each do |sym|
270                                                 puts "reset refs/tags/#{sym}"
271                                                 puts "from :#{RCS.commit key}"
272                                         end
273                                         if opts[:tag_each_rev]
274                                                 puts "reset refs/tags/#{key}"
275                                                 puts "from :#{RCS.commit key}"
276                                         end
277
278                                         exported.push key
279                                 end
280                                 exported.each { |k| @revision.delete(k) }
281                         end
282                 end
283         end
284
285         class Revision
286                 attr_accessor :rev, :author, :state, :next
287                 attr_accessor :branches, :log, :text, :symbols
288                 attr_accessor :branch, :diff_base, :branch_point
289                 attr_reader   :date
290                 def initialize(file, rev)
291                         @file = file
292                         @rev = rev
293                         @author = nil
294                         @date = nil
295                         @state = nil
296                         @next = nil
297                         @branches = Set.new
298                         @branch = nil
299                         @branch_point = nil
300                         @diff_base = nil
301                         @log = []
302                         @text = []
303                         @symbols = Set.new
304                 end
305
306                 def date=(str)
307                         @date = Time.rcs(str)
308                 end
309
310                 def blob
311                         str = @text.join('')
312                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
313                         ret
314                 end
315         end
316
317         def RCS.parse(fname, rcsfile)
318                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
319
320                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
321                         status = [:basic]
322                         rev = nil
323                         lines = []
324                         difflines = []
325                         file.each_line do |line|
326                                 case status.last
327                                 when :basic
328                                         command, args = line.split($;,2)
329                                         next if command.empty?
330
331                                         if command.chomp!(';')
332                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
333                                                 next
334                                         end
335
336                                         case command
337                                         when 'head'
338                                                 rcs.head = RCS.clean(args.chomp)
339                                         when 'symbols'
340                                                 status.push :symbols
341                                                 next if args.empty?
342                                                 line = args; redo
343                                         when 'comment'
344                                                 rcs.comment = RCS.at_clean(args.chomp)
345                                         when /^[0-9.]+$/
346                                                 rev = command.dup
347                                                 if rcs.has_revision?(rev)
348                                                         status.push :revision_data
349                                                 else
350                                                         status.push :new_revision
351                                                 end
352                                         when 'desc'
353                                                 status.push :desc
354                                                 lines.clear
355                                                 status.push :read_lines
356                                         when 'branch', 'access', 'locks', 'expand'
357                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
358                                                 status.push :skipping_lines
359                                                 next if args.empty?
360                                                 line = args; redo
361                                         else
362                                                 raise "Unknown command #{command.inspect}"
363                                         end
364                                 when :skipping_lines
365                                         status.pop if line.strip.chomp!(';')
366                                 when :symbols
367                                         # we can have multiple symbols per line
368                                         pairs = line.strip.split($;)
369                                         pairs.each do |pair|
370                                                 sym, rev = pair.strip.split(':',2);
371                                                 if rev
372                                                         status.pop if rev.chomp!(';')
373                                                         rcs.revision[rev].symbols << sym
374                                                 else
375                                                         status.pop
376                                                 end
377                                         end
378                                 when :desc
379                                         rcs.desc.replace lines.dup
380                                         status.pop
381                                 when :read_lines
382                                         # we sanitize lines as we read them
383
384                                         actual_line = line.dup
385
386                                         # the first line must begin with a @, which we strip
387                                         if lines.empty?
388                                                 ats = line.match(/^@+/)
389                                                 raise 'malformed line' unless ats
390                                                 actual_line.replace line.sub(/^@/,'')
391                                         end
392
393                                         # if the line ends with an ODD number of @, it's the
394                                         # last line -- we work on actual_line so that content
395                                         # such as @\n or @ work correctly (they would be
396                                         # encoded respectively as ['@@@\n','@\n'] and
397                                         # ['@@@@\n']
398                                         ats = actual_line.chomp.match(/@+$/)
399                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
400                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
401                                         end
402                                         lines << actual_line.gsub('@@','@')
403                                         if nomore
404                                                 status.pop
405                                                 redo
406                                         end
407                                 when :new_revision
408                                         case line.chomp
409                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
410                                                 rcs.revision[rev].date = $1
411                                                 rcs.revision[rev].author = $2
412                                                 rcs.revision[rev].state = $3
413                                         when /^branches\s*;/
414                                                 next
415                                         when /^branches(?:\s+|$)/
416                                                 status.push :branches
417                                                 if line.index(';')
418                                                         line = line.sub(/^branches\s+/,'')
419                                                         redo
420                                                 end
421                                         when /^next\s+(\S+)?;$/
422                                                 nxt = rcs.revision[rev].next = $1
423                                                 next unless nxt
424                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
425                                                 rcs.revision[nxt].diff_base = rev
426                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
427                                         else
428                                                 status.pop
429                                         end
430                                 when :branches
431                                         candidate = line.split(';',2)
432                                         candidate.first.strip.split.each do |branch|
433                                                 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
434                                                 rcs.revision[branch].diff_base = rev
435                                                 # we drop the last number from the branch name
436                                                 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
437                                                 rcs.revision[branch].branch_point = rev
438                                         end
439                                         status.pop if candidate.length > 1
440                                 when :revision_data
441                                         case line.chomp
442                                         when 'log'
443                                                 status.push :log
444                                                 lines.clear
445                                                 status.push :read_lines
446                                         when 'text'
447                                                 if rev == rcs.head
448                                                         status.push :head
449                                                 else
450                                                         status.push :diff
451                                                 end
452                                                 lines.clear
453                                                 status.push :read_lines
454                                         else
455                                                 status.pop
456                                         end
457                                 when :log
458                                         rcs.revision[rev].log.replace lines.dup
459                                         status.pop
460                                 when :head
461                                         rcs.revision[rev].text.replace lines.dup
462                                         puts rcs.revision[rev].blob
463                                         status.pop
464                                 when :diff
465                                         difflines.replace lines.dup
466                                         difflines.pop if difflines.last.empty?
467                                         base = rcs.revision[rev].diff_base
468                                         unless rcs.revision[base].text
469                                                 pp rcs
470                                                 puts rev, base
471                                                 raise 'no diff base!'
472                                         end
473                                         # deep copy
474                                         buffer = []
475                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
476
477                                         adding = false
478                                         index = nil
479                                         count = nil
480
481                                         while l = difflines.shift
482                                                 if adding
483                                                         raise 'negative index during insertion' if index < 0
484                                                         raise 'negative count during insertion' if count < 0
485                                                         adding << l
486                                                         count -= 1
487                                                         # collected all the lines, put the before
488                                                         unless count > 0
489                                                                 unless buffer[index]
490                                                                         buffer[index] = []
491                                                                 end
492                                                                 buffer[index].unshift(*adding)
493                                                                 adding = false
494                                                         end
495                                                         next
496                                                 end
497
498                                                 l.chomp!
499                                                 raise "malformed diff @ #{rcsfile}:#{file.lineno-difflines.length-1} `#{l}`" unless l =~ /^([ad])(\d+) (\d+)$/
500                                                 diff_cmd = $1.intern
501                                                 index = $2.to_i
502                                                 count = $3.to_i
503                                                 case diff_cmd
504                                                 when :d
505                                                         # for deletion, index 1 is the first index, so the Ruby
506                                                         # index is one less than the diff one
507                                                         index -= 1
508                                                         # we replace them with empty string so that 'a' commands
509                                                         # referring to the same line work properly
510                                                         while count > 0
511                                                                 buffer[index].clear
512                                                                 index += 1
513                                                                 count -= 1
514                                                         end
515                                                 when :a
516                                                         # addition will prepend the appropriate lines
517                                                         # to the given index, and in this case Ruby
518                                                         # and diff indices are the same
519                                                         adding = []
520                                                 end
521                                         end
522
523                                         # turn the buffer into an array of lines, deleting the empty ones
524                                         buffer.delete_if { |l| l.empty? }
525                                         buffer.flatten!
526
527                                         rcs.revision[rev].text = buffer
528                                         puts rcs.revision[rev].blob
529                                         status.pop
530                                 else
531                                         raise "Unknown status #{status.last}"
532                                 end
533                         end
534                 end
535
536                 # clean up the symbols/branches: look for revisions that have
537                 # one or more symbols but no dates, and make them into
538                 # branches, pointing to the highest commit with that key
539                 branches = []
540                 keys = rcs.revision.keys
541                 rcs.revision.each do |key, rev|
542                         if rev.date.nil? and not rev.symbols.empty?
543                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
544                                 tr = rcs.revision[top]
545                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
546                                 tr.branches |= rev.symbols
547                                 branches << key
548                         end
549                 end
550                 branches.each { |k| rcs.revision.delete k }
551
552                 return rcs
553         end
554
555         class Tree
556                 def initialize(commit)
557                         @commit = commit
558                         @files = Hash.new
559                 end
560
561                 def merge!(tree)
562                         testfiles = @files.dup
563                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
564                         # the next line is only reached if all the adds were
565                         # successful, so the merge is atomic
566                         @files.replace testfiles
567                 end
568
569                 def add(rcs, rev, file_list=@files)
570                         if file_list.key? rcs
571                                 prev = file_list[rcs]
572                                 if prev.log == rev.log
573                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
574                                 else
575                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
576                                 end
577                                 if prev.text != rev.text
578                                         raise str
579                                 else
580                                         @commit.warn_about str
581                                 end
582                         end
583                         file_list[rcs] = rev
584                 end
585
586                 def each &block
587                         @files.each &block
588                 end
589
590                 def to_a
591                         files = []
592                         @files.map do |rcs, rev|
593                                 if rev.state.downcase == "dead"
594                                         files << "D #{rcs.fname}"
595                                 else
596                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
597                                 end
598                         end
599                         files
600                 end
601
602                 def filenames
603                         @files.map { |rcs, rev| rcs.fname }
604                 end
605
606                 def to_s
607                         self.to_a.join("\n")
608                 end
609         end
610
611         class Commit
612                 attr_accessor :date, :log, :symbols, :author, :branch
613                 attr_accessor :tree
614                 attr_accessor :min_date, :max_date
615                 def initialize(rcs, rev)
616                         raise NoBranchSupport if rev.branch
617                         self.date = rev.date.dup
618                         self.min_date = self.max_date = self.date
619                         self.log = rev.log.dup
620                         self.symbols = rev.symbols.dup
621                         self.author = rev.author
622                         self.branch = rev.branch
623
624                         self.tree = Tree.new self
625                         self.tree.add rcs, rev
626                 end
627
628                 def to_a
629                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
630                 end
631
632                 def warn_about(str)
633                         warn str + " for commit on #{self.date}"
634                 end
635
636                 # Sort by date and then by number of symbols
637                 def <=>(other)
638                         ds = self.date <=> other.date
639                         if ds != 0
640                                 return ds
641                         else
642                                 return self.symbols.length <=> other.symbols.length
643                         end
644                 end
645
646                 def merge!(commit)
647                         self.tree.merge! commit.tree
648                         if commit.max_date > self.max_date
649                                 self.max_date = commit.max_date
650                         end
651                         if commit.min_date < self.min_date
652                                 self.min_date = commit.min_date
653                         end
654                         self.symbols.merge commit.symbols
655                 end
656
657                 def export(opts={})
658                         xbranch = self.branch || 'master'
659                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
660                         xlog = self.log.join
661                         numdate = self.date.tv_sec
662                         xdate = "#{numdate} +0000"
663                         key = numdate.to_s
664
665                         puts "commit refs/heads/#{xbranch}"
666                         puts "mark :#{RCS.commit key}"
667                         puts "committer #{xauthor} #{xdate}"
668                         puts "data #{xlog.length}"
669                         puts xlog unless xlog.empty?
670                         # TODO branching support for multi-file export
671                         # puts "from :#{RCS.commit from}" if self.branch_point
672                         puts self.tree.to_s
673
674                         # TODO branching support for multi-file export
675                         # rev.branches.each do |sym|
676                         #       puts "reset refs/heads/#{sym}"
677                         #       puts "from :#{RCS.commit key}"
678                         # end
679
680                         self.symbols.each do |sym|
681                                 puts "reset refs/tags/#{sym}"
682                                 puts "from :#{RCS.commit key}"
683                         end
684
685                 end
686         end
687 end
688
689 require 'getoptlong'
690
691 opts = GetoptLong.new(
692         # Authors file, like git-svn and git-cvsimport, more than one can be
693         # specified
694         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
695         # RCS file suffix, like RCS
696         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
697         # Date fuzziness for commits to be considered the same (in seconds)
698         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
699         # check symbols when coalescing?
700         ['--symbol-check', GetoptLong::NO_ARGUMENT],
701         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
702         # tag each revision?
703         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
704         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
705         # prepend filenames to commit logs?
706         ['--log-filename', GetoptLong::NO_ARGUMENT],
707         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
708         # skip branches when exporting a whole tree?
709         ['--skip-branches', GetoptLong::NO_ARGUMENT],
710         # show current version
711         ['--version', '-v', GetoptLong::NO_ARGUMENT],
712         # show help/usage
713         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
714 )
715
716 # We read options in order, but they apply to all passed parameters.
717 # TODO maybe they should only apply to the following, unless there's only one
718 # file?
719 opts.ordering = GetoptLong::RETURN_IN_ORDER
720
721 file_list = []
722 parse_options = {
723         :authors => Hash.new,
724         :commit_fuzz => 300,
725         :tag_fuzz => -1,
726 }
727
728 # Read config options
729 `git config --get-all rcs.authorsfile`.each_line do |fn|
730         parse_options[:authors].merge! load_authors_file(fn.chomp)
731 end
732
733 parse_options[:tag_each_rev] = (
734         `git config --bool rcs.tageachrev`.chomp == 'true'
735 ) ? true : false
736
737 parse_options[:log_filename] = (
738         `git config --bool rcs.logfilename`.chomp == 'true'
739 ) ? true : false
740
741 fuzz = `git config --int rcs.commitFuzz`.chomp
742 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
743
744 fuzz = `git config --int rcs.tagFuzz`.chomp
745 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
746
747 parse_options[:symbol_check] = (
748         `git config --bool rcs.symbolcheck`.chomp == 'false'
749 ) ? false : true
750
751 opts.each do |opt, arg|
752         case opt
753         when '--authors-file'
754                 authors = load_authors_file(arg)
755                 redef = parse_options[:authors].keys & authors.keys
756                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
757                 parse_options[:authors].merge!(authors)
758         when '--rcs-suffixes'
759                 # TODO
760         when '--rcs-commit-fuzz'
761                 parse_options[:commit_fuzz] = arg.to_i
762         when '--rcs-tag-fuzz'
763                 parse_options[:tag_fuzz] = arg.to_i
764         when '--symbol-check'
765                 parse_options[:symbol_check] = true
766         when '--no-symbol-check'
767                 parse_options[:symbol_check] = false
768         when '--tag-each-rev'
769                 parse_options[:tag_each_rev] = true
770         when '--no-tag-each-rev'
771                 # this is the default, which is fine since the missing key
772                 # (default) returns nil which is false in Ruby
773                 parse_options[:tag_each_rev] = false
774         when '--log-filename'
775                 parse_options[:log_filename] = true
776         when '--no-log-filename'
777                 # this is the default, which is fine since the missing key
778                 # (default) returns nil which is false in Ruby
779                 parse_options[:log_filename] = false
780         when '--skip-branches'
781                 parse_options[:skip_branches] = true
782         when ''
783                 file_list << arg
784         when '--version'
785                 version
786                 exit
787         when '--help'
788                 usage
789                 exit
790         end
791 end
792
793 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
794         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
795 end
796
797 require 'etc'
798
799 user = Etc.getlogin || ENV['USER']
800
801 # steal username/email data from other init files that may contain the
802 # information
803 def steal_username
804         [
805                 # the user's .hgrc file for a username field
806                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
807                 # the user's .(g)vimrc for a changelog_username setting
808                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
809                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
810         ].each do |fn, rx, idx|
811                 file = File.expand_path fn
812                 if File.readable?(file) and File.read(file) =~ rx
813                         parse_options[:authors][user] = Regexp.last_match(idx).strip
814                         break
815                 end
816         end
817 end
818
819 if user and not user.empty? and not parse_options[:authors].has_key?(user)
820         name = ENV['GIT_AUTHOR_NAME'] || ''
821         name.replace(`git config user.name`.chomp) if name.empty?
822         name.replace(Etc.getpwnam(user).gecos) if name.empty?
823
824         if name.empty?
825                 # couldn't find a name, try to steal data from other sources
826                 steal_username
827         else
828                 # if we found a name, try to find an email too
829                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
830                 email.replace(`git config user.email`.chomp) if email.empty?
831
832                 if email.empty?
833                         # couldn't find an email, try to steal data too
834                         steal_username
835                 else
836                         # we got both a name and email, fill the info
837                         parse_options[:authors][user] = "#{name} <#{email}>"
838                 end
839         end
840 end
841
842 if file_list.empty?
843         usage
844         exit 1
845 end
846
847 SFX = ',v'
848
849 status = 0
850
851 rcs = []
852 file_list.each do |arg|
853         case ftype = File.ftype(arg)
854         when 'file'
855                 if arg[-2,2] == SFX
856                         if File.exists? arg
857                                 rcsfile = arg.dup
858                         else
859                                 not_found "RCS file #{arg}"
860                                 status |= 1
861                         end
862                         filename = File.basename(arg, SFX)
863                 else
864                         filename = File.basename(arg)
865                         path = File.dirname(arg)
866                         rcsfile = File.join(path, 'RCS', filename) + SFX
867                         unless File.exists? rcsfile
868                                 rcsfile.replace File.join(path, filename) + SFX
869                                 unless File.exists? rcsfile
870                                         not_found "RCS file for #{filename} in #{path}"
871                                 end
872                         end
873                 end
874                 rcs << RCS.parse(filename, rcsfile)
875         when 'directory'
876                 argdirname = arg.chomp(File::SEPARATOR)
877                 pattern = File.join(argdirname, '**', '*' + SFX)
878                 Dir.glob(pattern).each do |rcsfile|
879                         filename = File.basename(rcsfile, SFX)
880                         path = File.dirname(rcsfile)
881                         # strip trailing "/RCS" if present, or "RCS" if that's
882                         # the full path
883                         path.sub!(/(^|#{File::SEPARATOR})RCS$/, '')
884                         # strip off the portion of the path sepecified
885                         # on the command line from the front of the path
886                         # (or delete the path completely if it is the same
887                         # as the specified directory)
888                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
889                         filename = File.join(path, filename) unless path.empty?
890                         begin
891                                 rcs << RCS.parse(filename, rcsfile)
892                         rescue Exception => e
893                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
894                                 raise e
895                         end
896                 end
897         else
898                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
899                 status |= 1
900         end
901 end
902
903 if rcs.length == 1
904         rcs.first.export_commits(parse_options)
905 else
906         STDERR.puts "Preparing commits"
907
908         commits = []
909
910         rcs.each do |r|
911                 r.revision.each do |k, rev|
912                         begin
913                                 commits << RCS::Commit.new(r, rev)
914                         rescue NoBranchSupport
915                                 if parse_options[:skip_branches]
916                                         STDERR.puts "Skipping revision #{rev.rev} for #{r.fname} (branch)"
917                                 else raise
918                                 end
919                         end
920                 end
921         end
922
923         STDERR.puts "Sorting by date"
924
925         commits.sort!
926
927         if $DEBUG
928                 STDERR.puts "RAW commits (#{commits.length}):"
929                 commits.each do |c|
930                         PP.pp c.to_a, $stderr
931                 end
932         else
933                 STDERR.puts "#{commits.length} single-file commits"
934         end
935
936         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
937
938         thisindex = commits.size
939         commits.reverse_each do |c|
940                 nextindex = thisindex
941                 thisindex -= 1
942
943                 cfiles = Set.new c.tree.filenames
944                 ofiles = Set.new
945
946                 mergeable = []
947
948                 # test for mergeable commits by looking at following commits
949                 while nextindex < commits.size
950                         k = commits[nextindex]
951                         nextindex += 1
952
953                         # commits are date-sorted, so we know we can quit early if we are too far
954                         # for coalescing to work
955                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
956
957                         skipthis = false
958
959                         kfiles = Set.new k.tree.filenames
960
961                         if c.log != k.log or c.author != k.author or c.branch != k.branch
962                                 skipthis = true
963                         end
964
965                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
966                                 cflist = cfiles.to_a.join(', ')
967                                 kflist = kfiles.to_a.join(', ')
968                                 if parse_options[:symbol_check]
969                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
970                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
971                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
972                                         skipthis = true
973                                 elsif $DEBUG
974                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
975                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
976                                 end
977                         end
978
979                         # keep track of filenames touched by commits we are not merging with,
980                         # since we don't want to merge with commits that touch them, to preserve
981                         # the monotonicity of history for each file
982                         # TODO we could forward-merge with them, unless some of our files were
983                         # touched too.
984                         if skipthis
985                                 # if the candidate touches any file already in the commit,
986                                 # we can stop looking forward
987                                 break unless cfiles.intersection(kfiles).empty?
988                                 ofiles |= kfiles
989                                 next
990                         end
991
992                         # the candidate has the same log, author, branch and appropriate symbols
993                         # does it touch anything in ofiles?
994                         unless ofiles.intersection(kfiles).empty?
995                                 if $DEBUG
996                                         cflist = cfiles.to_a.join(', ')
997                                         kflist = kfiles.to_a.join(', ')
998                                         oflist = ofiles.to_a.join(', ')
999                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1000                                         STDERR.puts "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
1001                                 end
1002                                 next
1003                         end
1004
1005                         mergeable << k
1006                 end
1007
1008                 mergeable.each do |k|
1009                         begin
1010                                 c.merge! k
1011                         rescue RuntimeError => err
1012                                 fuzz = c.date - k.date
1013                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
1014                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
1015                                 break
1016                         end
1017                         commits.delete k
1018                 end
1019         end
1020
1021         if $DEBUG
1022                 STDERR.puts "[1] commits (#{commits.length}):"
1023                 commits.each do |c|
1024                         PP.pp c.to_a, $stderr
1025                 end
1026         else
1027                 STDERR.puts "#{commits.length} coalesced commits"
1028         end
1029
1030         commits.each { |c| c.export(parse_options) }
1031
1032 end
1033
1034 exit status