ignore trailing slash on directory argument
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 TODO
5         * Refactor commit coalescing
6         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
7         * Add support for commitid for coalescing commits
8         * Further coalescing options? (e.g. small logfile differences)
9         * Proper branching support in multi-file export
10         * Optimize memory usage by discarding unneeded text
11 =end
12
13 require 'pp'
14 require 'set'
15
16 class NoBranchSupport < NotImplementedError ; end
17
18 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
19 # older versions
20 unless 2.respond_to? :odd?
21         class Integer
22                 def odd?
23                         self % 2 == 1
24                 end
25         end
26 end
27
28 def usage
29         STDERR.puts <<EOM
30 #{$0} [options] file [file ...]
31
32 Fast-export the RCS history of one or more files. If a directory is specified,
33 all RCS-tracked files in the directory and its descendants are exported.
34
35 When importing single files, their pathname is discarded during import. When
36 importing directories, only the specified directory component is discarded.
37
38 When importing a single file, RCS commits are converted one by one. Otherwise,
39 some heuristics is used to determine how to coalesce commits touching different
40 files.
41
42 Currently, commits are coalesced if they share the exact same log and if their
43 date differs by no more than the user-specified fuzziness. Additionally, the
44 symbols in one of the commit must be a subset of the symbols in the other
45 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
46 false in the git configuration.
47
48 Typical usage:
49     git init && rcs-fast-export.rb . | git fast-import && git reset
50
51 Options:
52         --help, -h, -?          display this help text
53         --authors-file, -A      specify a file containing username = Full Name <email> mappings
54         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
55                                 importing multiple files
56                                 (in seconds, defaults to 300, i.e. 5 minutes)
57         --[no-]symbol-check     [do not] check symbols when coalescing commits
58         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
59                                 importing a single file
60         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
61                                 a single file
62         --skip-branches         when exporting multiple files with a branched history, export
63                                 the main branch only instead of aborting due to the lack of
64                                 support for branched multi-file history export
65
66
67
68 Config options:
69         rcs.authorsFile         for --authors-file
70         rcs.tagEachRev          for --tag-each-rev
71         rcs.logFilename         for --log-filename
72         rcs.commitFuzz          for --rcs-commit-fuzz
73         rcs.symbolCheck         for --rcs-symbol-check
74         rcs.tagFuzz             for --rcs-tag-fuzz
75
76 EOM
77 end
78
79 def not_found(arg)
80         STDERR.puts "Could not find #{arg}"
81 end
82
83 # returns a hash that maps usernames to author names & emails
84 def load_authors_file(fn)
85         hash = {}
86         begin
87                 File.open(File.expand_path(fn)) do |io|
88                         io.each_line do |line|
89                                 uname, author = line.split('=', 2)
90                                 uname.strip!
91                                 author.strip!
92                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
93                                 hash[uname] = author
94                         end
95                 end
96         rescue
97                 not_found(fn)
98         end
99         return hash
100 end
101
102 class Time
103         def Time.rcs(string)
104                 fields = string.split('.')
105                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
106                 Time.utc(*fields)
107         end
108 end
109
110 module RCS
111         # strip an optional final ;
112         def RCS.clean(arg)
113                 arg.chomp(';')
114         end
115
116         # strip the first and last @, and de-double @@s
117         def RCS.sanitize(arg)
118                 case arg
119                 when Array
120                         ret = arg.dup
121                         raise 'malformed first line' unless ret.first[0,1] == '@'
122                         raise 'malformed last line' unless ret.last[-1,1] == '@'
123                         ret.first.sub!(/^@/,'')
124                         ret.last.sub!(/@$/,'')
125                         ret.map { |l| l.gsub('@@','@') }
126                 when String
127                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
128                 else
129                         raise
130                 end
131         end
132
133         # clean and sanitize
134         def RCS.at_clean(arg)
135                 RCS.sanitize RCS.clean(arg)
136         end
137
138         def RCS.mark(key)
139                 @@marks ||= {}
140                 if @@marks.key? key
141                         @@marks[key]
142                 else
143                         @@marks[key] = @@marks.length + 1
144                 end
145         end
146
147         def RCS.blob(file, rev)
148                 RCS.mark([file, rev])
149         end
150
151         def RCS.commit(commit)
152                 RCS.mark(commit)
153         end
154
155         class File
156                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
157                 def initialize(fname, executable)
158                         @fname = fname.dup
159                         @head = nil
160                         @comment = nil
161                         @desc = []
162                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
163                         @mode = executable ? '755' : '644'
164                 end
165
166                 def has_revision?(rev)
167                         @revision.has_key?(rev) and not @revision[rev].author.nil?
168                 end
169
170                 def export_commits(opts={})
171                         counter = 0
172                         exported = []
173                         until @revision.empty?
174                                 counter += 1
175
176                                 # a string sort is a very good candidate for
177                                 # export order, getting a miss only for
178                                 # multi-digit revision components
179                                 keys = @revision.keys.sort
180
181                                 STDERR.puts "commit export loop ##{counter}"
182                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
183                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
184
185                                 keys.each do |key|
186                                         rev = @revision[key]
187                                         # the parent commit is rev.next if we're on the
188                                         # master branch (rev.branch is nil) or
189                                         # rev.diff_base otherwise
190                                         from = rev.branch.nil? ? rev.next : rev.diff_base
191                                         # A commit can only be exported if it has no
192                                         # parent, or if the parent has been exported
193                                         # already. Skip this commit otherwise
194                                         if from and not exported.include? from
195                                                 next
196                                         end
197
198                                         branch = rev.branch || 'master'
199                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
200                                         date = "#{rev.date.tv_sec} +0000"
201                                         log = String.new
202                                         if opts[:log_filename]
203                                                 log << @fname << ": "
204                                         end
205                                         log << rev.log.join
206
207                                         puts "commit refs/heads/#{branch}"
208                                         puts "mark :#{RCS.commit key}"
209                                         puts "committer #{author} #{date}"
210                                         puts "data #{log.length}"
211                                         puts log unless log.empty?
212                                         puts "from :#{RCS.commit from}" if rev.branch_point
213                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
214
215                                         # TODO FIXME this *should* be safe, in
216                                         # that it should not unduly move
217                                         # branches back in time, but I'm not
218                                         # 100% sure ...
219                                         rev.branches.each do |sym|
220                                                 puts "reset refs/heads/#{sym}"
221                                                 puts "from :#{RCS.commit key}"
222                                         end
223                                         rev.symbols.each do |sym|
224                                                 puts "reset refs/tags/#{sym}"
225                                                 puts "from :#{RCS.commit key}"
226                                         end
227                                         if opts[:tag_each_rev]
228                                                 puts "reset refs/tags/#{key}"
229                                                 puts "from :#{RCS.commit key}"
230                                         end
231
232                                         exported.push key
233                                 end
234                                 exported.each { |k| @revision.delete(k) }
235                         end
236                 end
237         end
238
239         class Revision
240                 attr_accessor :rev, :author, :state, :next
241                 attr_accessor :branches, :log, :text, :symbols
242                 attr_accessor :branch, :diff_base, :branch_point
243                 attr_reader   :date
244                 def initialize(file, rev)
245                         @file = file
246                         @rev = rev
247                         @author = nil
248                         @date = nil
249                         @state = nil
250                         @next = nil
251                         @branches = Set.new
252                         @branch = nil
253                         @branch_point = nil
254                         @diff_base = nil
255                         @log = []
256                         @text = []
257                         @symbols = Set.new
258                 end
259
260                 def date=(str)
261                         @date = Time.rcs(str)
262                 end
263
264                 def blob
265                         str = @text.join('')
266                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
267                         ret
268                 end
269         end
270
271         def RCS.parse(fname, rcsfile)
272                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
273
274                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
275                         status = [:basic]
276                         rev = nil
277                         lines = []
278                         difflines = []
279                         file.each_line do |line|
280                                 case status.last
281                                 when :basic
282                                         command, args = line.split($;,2)
283                                         next if command.empty?
284
285                                         if command.chomp!(';')
286                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
287                                                 next
288                                         end
289
290                                         case command
291                                         when 'head'
292                                                 rcs.head = RCS.clean(args.chomp)
293                                         when 'symbols'
294                                                 status.push :symbols
295                                                 next if args.empty?
296                                                 line = args; redo
297                                         when 'comment'
298                                                 rcs.comment = RCS.at_clean(args.chomp)
299                                         when /^[0-9.]+$/
300                                                 rev = command.dup
301                                                 if rcs.has_revision?(rev)
302                                                         status.push :revision_data
303                                                 else
304                                                         status.push :new_revision
305                                                 end
306                                         when 'desc'
307                                                 status.push :desc
308                                                 lines.clear
309                                                 status.push :read_lines
310                                         when 'branch', 'access', 'locks', 'expand'
311                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
312                                                 status.push :skipping_lines
313                                                 next if args.empty?
314                                                 line = args; redo
315                                         else
316                                                 raise "Unknown command #{command.inspect}"
317                                         end
318                                 when :skipping_lines
319                                         status.pop if line.strip.chomp!(';')
320                                 when :symbols
321                                         # we can have multiple symbols per line
322                                         pairs = line.strip.split($;)
323                                         pairs.each do |pair|
324                                                 sym, rev = pair.strip.split(':',2);
325                                                 if rev
326                                                         status.pop if rev.chomp!(';')
327                                                         rcs.revision[rev].symbols << sym
328                                                 else
329                                                         status.pop
330                                                 end
331                                         end
332                                 when :desc
333                                         rcs.desc.replace lines.dup
334                                         status.pop
335                                 when :read_lines
336                                         # we sanitize lines as we read them
337
338                                         actual_line = line.dup
339
340                                         # the first line must begin with a @, which we strip
341                                         if lines.empty?
342                                                 ats = line.match(/^@+/)
343                                                 raise 'malformed line' unless ats
344                                                 actual_line.replace line.sub(/^@/,'')
345                                         end
346
347                                         # if the line ends with an ODD number of @, it's the
348                                         # last line -- we work on actual_line so that content
349                                         # such as @\n or @ work correctly (they would be
350                                         # encoded respectively as ['@@@\n','@\n'] and
351                                         # ['@@@@\n']
352                                         ats = actual_line.chomp.match(/@+$/)
353                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
354                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
355                                         end
356                                         lines << actual_line.gsub('@@','@')
357                                         if nomore
358                                                 status.pop
359                                                 redo
360                                         end
361                                 when :new_revision
362                                         case line.chomp
363                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
364                                                 rcs.revision[rev].date = $1
365                                                 rcs.revision[rev].author = $2
366                                                 rcs.revision[rev].state = $3
367                                         when 'branches'
368                                                 status.push :branches
369                                         when /branches\s*;/
370                                                 next
371                                         when /^next\s+(\S+)?;$/
372                                                 nxt = rcs.revision[rev].next = $1
373                                                 next unless nxt
374                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
375                                                 rcs.revision[nxt].diff_base = rev
376                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
377                                         else
378                                                 status.pop
379                                         end
380                                 when :branches
381                                         candidate = line.split(';',2)
382                                         branch = candidate.first.strip
383                                         rcs.revision[rev].branches << branch
384                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
385                                         rcs.revision[branch].diff_base = rev
386                                         # we drop the last number from the branch name
387                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
388                                         rcs.revision[branch].branch_point = rev
389                                         status.pop if candidate.length > 1
390                                 when :revision_data
391                                         case line.chomp
392                                         when 'log'
393                                                 status.push :log
394                                                 lines.clear
395                                                 status.push :read_lines
396                                         when 'text'
397                                                 if rev == rcs.head
398                                                         status.push :head
399                                                 else
400                                                         status.push :diff
401                                                 end
402                                                 lines.clear
403                                                 status.push :read_lines
404                                         else
405                                                 status.pop
406                                         end
407                                 when :log
408                                         rcs.revision[rev].log.replace lines.dup
409                                         status.pop
410                                 when :head
411                                         rcs.revision[rev].text.replace lines.dup
412                                         puts rcs.revision[rev].blob
413                                         status.pop
414                                 when :diff
415                                         difflines.replace lines.dup
416                                         difflines.pop if difflines.last.empty?
417                                         base = rcs.revision[rev].diff_base
418                                         unless rcs.revision[base].text
419                                                 pp rcs
420                                                 puts rev, base
421                                                 raise 'no diff base!'
422                                         end
423                                         # deep copy
424                                         buffer = []
425                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
426
427                                         adding = false
428                                         index = nil
429                                         count = nil
430
431                                         while l = difflines.shift
432                                                 if adding
433                                                         raise 'negative index during insertion' if index < 0
434                                                         raise 'negative count during insertion' if count < 0
435                                                         adding << l
436                                                         count -= 1
437                                                         # collected all the lines, put the before
438                                                         unless count > 0
439                                                                 unless buffer[index]
440                                                                         buffer[index] = []
441                                                                 end
442                                                                 buffer[index].unshift(*adding)
443                                                                 adding = false
444                                                         end
445                                                         next
446                                                 end
447
448                                                 l.chomp!
449                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
450                                                 diff_cmd = $1.intern
451                                                 index = $2.to_i
452                                                 count = $3.to_i
453                                                 case diff_cmd
454                                                 when :d
455                                                         # for deletion, index 1 is the first index, so the Ruby
456                                                         # index is one less than the diff one
457                                                         index -= 1
458                                                         # we replace them with empty string so that 'a' commands
459                                                         # referring to the same line work properly
460                                                         while count > 0
461                                                                 buffer[index].clear
462                                                                 index += 1
463                                                                 count -= 1
464                                                         end
465                                                 when :a
466                                                         # addition will prepend the appropriate lines
467                                                         # to the given index, and in this case Ruby
468                                                         # and diff indices are the same
469                                                         adding = []
470                                                 end
471                                         end
472
473                                         # turn the buffer into an array of lines, deleting the empty ones
474                                         buffer.delete_if { |l| l.empty? }
475                                         buffer.flatten!
476
477                                         rcs.revision[rev].text = buffer
478                                         puts rcs.revision[rev].blob
479                                         status.pop
480                                 else
481                                         raise "Unknown status #{status.last}"
482                                 end
483                         end
484                 end
485
486                 # clean up the symbols/branches: look for revisions that have
487                 # one or more symbols but no dates, and make them into
488                 # branches, pointing to the highest commit with that key
489                 branches = []
490                 keys = rcs.revision.keys
491                 rcs.revision.each do |key, rev|
492                         if rev.date.nil? and not rev.symbols.empty?
493                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
494                                 tr = rcs.revision[top]
495                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
496                                 tr.branches |= rev.symbols
497                                 branches << key
498                         end
499                 end
500                 branches.each { |k| rcs.revision.delete k }
501
502                 return rcs
503         end
504
505         class Tree
506                 def initialize(commit)
507                         @commit = commit
508                         @files = Hash.new
509                 end
510
511                 def merge!(tree)
512                         testfiles = @files.dup
513                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
514                         # the next line is only reached if all the adds were
515                         # successful, so the merge is atomic
516                         @files.replace testfiles
517                 end
518
519                 def add(rcs, rev, file_list=@files)
520                         if file_list.key? rcs
521                                 prev = file_list[rcs]
522                                 if prev.log == rev.log
523                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
524                                 else
525                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
526                                 end
527                                 if prev.text != rev.text
528                                         raise str
529                                 else
530                                         @commit.warn_about str
531                                 end
532                         end
533                         file_list[rcs] = rev
534                 end
535
536                 def each &block
537                         @files.each &block
538                 end
539
540                 def to_a
541                         files = []
542                         @files.map do |rcs, rev|
543                                 if rev.state.downcase == "dead"
544                                         files << "D #{rcs.fname}"
545                                 else
546                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
547                                 end
548                         end
549                         files
550                 end
551
552                 def filenames
553                         @files.map { |rcs, rev| rcs.fname }
554                 end
555
556                 def to_s
557                         self.to_a.join("\n")
558                 end
559         end
560
561         class Commit
562                 attr_accessor :date, :log, :symbols, :author, :branch
563                 attr_accessor :tree
564                 attr_accessor :min_date, :max_date
565                 def initialize(rcs, rev)
566                         raise NoBranchSupport if rev.branch
567                         self.date = rev.date.dup
568                         self.min_date = self.max_date = self.date
569                         self.log = rev.log.dup
570                         self.symbols = rev.symbols.dup
571                         self.author = rev.author
572                         self.branch = rev.branch
573
574                         self.tree = Tree.new self
575                         self.tree.add rcs, rev
576                 end
577
578                 def to_a
579                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
580                 end
581
582                 def warn_about(str)
583                         warn str + " for commit on #{self.date}"
584                 end
585
586                 # Sort by date and then by number of symbols
587                 def <=>(other)
588                         ds = self.date <=> other.date
589                         if ds != 0
590                                 return ds
591                         else
592                                 return self.symbols.length <=> other.symbols.length
593                         end
594                 end
595
596                 def merge!(commit)
597                         self.tree.merge! commit.tree
598                         if commit.max_date > self.max_date
599                                 self.max_date = commit.max_date
600                         end
601                         if commit.min_date < self.min_date
602                                 self.min_date = commit.min_date
603                         end
604                         self.symbols.merge commit.symbols
605                 end
606
607                 def export(opts={})
608                         xbranch = self.branch || 'master'
609                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
610                         xlog = self.log.join
611                         numdate = self.date.tv_sec
612                         xdate = "#{numdate} +0000"
613                         key = numdate.to_s
614
615                         puts "commit refs/heads/#{xbranch}"
616                         puts "mark :#{RCS.commit key}"
617                         puts "committer #{xauthor} #{xdate}"
618                         puts "data #{xlog.length}"
619                         puts xlog unless xlog.empty?
620                         # TODO branching support for multi-file export
621                         # puts "from :#{RCS.commit from}" if self.branch_point
622                         puts self.tree.to_s
623
624                         # TODO branching support for multi-file export
625                         # rev.branches.each do |sym|
626                         #       puts "reset refs/heads/#{sym}"
627                         #       puts "from :#{RCS.commit key}"
628                         # end
629
630                         self.symbols.each do |sym|
631                                 puts "reset refs/tags/#{sym}"
632                                 puts "from :#{RCS.commit key}"
633                         end
634
635                 end
636         end
637 end
638
639 require 'getoptlong'
640
641 opts = GetoptLong.new(
642         # Authors file, like git-svn and git-cvsimport, more than one can be
643         # specified
644         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
645         # RCS file suffix, like RCS
646         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
647         # Date fuzziness for commits to be considered the same (in seconds)
648         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
649         # check symbols when coalescing?
650         ['--symbol-check', GetoptLong::NO_ARGUMENT],
651         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
652         # tag each revision?
653         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
654         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
655         # prepend filenames to commit logs?
656         ['--log-filename', GetoptLong::NO_ARGUMENT],
657         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
658         # skip branches when exporting a whole tree?
659         ['--skip-branches', GetoptLong::NO_ARGUMENT],
660         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
661 )
662
663 # We read options in order, but they apply to all passed parameters.
664 # TODO maybe they should only apply to the following, unless there's only one
665 # file?
666 opts.ordering = GetoptLong::RETURN_IN_ORDER
667
668 file_list = []
669 parse_options = {
670         :authors => Hash.new,
671         :commit_fuzz => 300,
672         :tag_fuzz => -1,
673 }
674
675 # Read config options
676 `git config --get-all rcs.authorsfile`.each_line do |fn|
677         parse_options[:authors].merge! load_authors_file(fn.chomp)
678 end
679
680 parse_options[:tag_each_rev] = (
681         `git config --bool rcs.tageachrev`.chomp == 'true'
682 ) ? true : false
683
684 parse_options[:log_filename] = (
685         `git config --bool rcs.logfilename`.chomp == 'true'
686 ) ? true : false
687
688 fuzz = `git config --int rcs.commitFuzz`.chomp
689 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
690
691 fuzz = `git config --int rcs.tagFuzz`.chomp
692 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
693
694 parse_options[:symbol_check] = (
695         `git config --bool rcs.symbolcheck`.chomp == 'false'
696 ) ? false : true
697
698 opts.each do |opt, arg|
699         case opt
700         when '--authors-file'
701                 authors = load_authors_file(arg)
702                 redef = parse_options[:authors].keys & authors.keys
703                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
704                 parse_options[:authors].merge!(authors)
705         when '--rcs-suffixes'
706                 # TODO
707         when '--rcs-commit-fuzz'
708                 parse_options[:commit_fuzz] = arg.to_i
709         when '--rcs-tag-fuzz'
710                 parse_options[:tag_fuzz] = arg.to_i
711         when '--symbol-check'
712                 parse_options[:symbol_check] = true
713         when '--no-symbol-check'
714                 parse_options[:symbol_check] = false
715         when '--tag-each-rev'
716                 parse_options[:tag_each_rev] = true
717         when '--no-tag-each-rev'
718                 # this is the default, which is fine since the missing key
719                 # (default) returns nil which is false in Ruby
720                 parse_options[:tag_each_rev] = false
721         when '--log-filename'
722                 parse_options[:log_filename] = true
723         when '--no-log-filename'
724                 # this is the default, which is fine since the missing key
725                 # (default) returns nil which is false in Ruby
726                 parse_options[:log_filename] = false
727         when '--skip-branches'
728                 parse_options[:skip_branches] = true
729         when ''
730                 file_list << arg
731         when '--help'
732                 usage
733                 exit
734         end
735 end
736
737 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
738         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
739 end
740
741 require 'etc'
742
743 user = Etc.getlogin || ENV['USER']
744
745 # steal username/email data from other init files that may contain the
746 # information
747 def steal_username
748         [
749                 # the user's .hgrc file for a username field
750                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
751                 # the user's .(g)vimrc for a changelog_username setting
752                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
753                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
754         ].each do |fn, rx, idx|
755                 file = File.expand_path fn
756                 if File.readable?(file) and File.read(file) =~ rx
757                         parse_options[:authors][user] = Regexp.last_match(idx).strip
758                         break
759                 end
760         end
761 end
762
763 if user and not user.empty? and not parse_options[:authors].has_key?(user)
764         name = ENV['GIT_AUTHOR_NAME'] || ''
765         name.replace(`git config user.name`.chomp) if name.empty?
766         name.replace(Etc.getpwnam(user).gecos) if name.empty?
767
768         if name.empty?
769                 # couldn't find a name, try to steal data from other sources
770                 steal_username
771         else
772                 # if we found a name, try to find an email too
773                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
774                 email.replace(`git config user.email`.chomp) if email.empty?
775
776                 if email.empty?
777                         # couldn't find an email, try to steal data too
778                         steal_username
779                 else
780                         # we got both a name and email, fill the info
781                         parse_options[:authors][user] = "#{name} <#{email}>"
782                 end
783         end
784 end
785
786 if file_list.empty?
787         usage
788         exit 1
789 end
790
791 SFX = ',v'
792
793 status = 0
794
795 rcs = []
796 file_list.each do |arg|
797         case ftype = File.ftype(arg)
798         when 'file'
799                 if arg[-2,2] == SFX
800                         if File.exists? arg
801                                 rcsfile = arg.dup
802                         else
803                                 not_found "RCS file #{arg}"
804                                 status |= 1
805                         end
806                         filename = File.basename(arg, SFX)
807                 else
808                         filename = File.basename(arg)
809                         path = File.dirname(arg)
810                         rcsfile = File.join(path, 'RCS', filename) + SFX
811                         unless File.exists? rcsfile
812                                 rcsfile.replace File.join(path, filename) + SFX
813                                 unless File.exists? rcsfile
814                                         not_found "RCS file for #{filename} in #{path}"
815                                 end
816                         end
817                 end
818                 rcs << RCS.parse(filename, rcsfile)
819         when 'directory'
820                 argdirname = arg.chomp(File::SEPARATOR)
821                 pattern = File.join(argdirname, '**', '*' + SFX)
822                 Dir.glob(pattern).each do |rcsfile|
823                         filename = File.basename(rcsfile, SFX)
824                         path = File.dirname(rcsfile)
825                         path.sub!(/\/?RCS$/, '') # strip final /RCS if present
826                         # strip off the portion of the path sepecified
827                         # on the command line from the front of the path
828                         # (or delete the path completely if it is the same
829                         # as the specified directory)
830                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
831                         filename = File.join(path, filename) unless path.empty?
832                         begin
833                                 rcs << RCS.parse(filename, rcsfile)
834                         rescue Exception => e
835                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
836                                 raise e
837                         end
838                 end
839         else
840                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
841                 status |= 1
842         end
843 end
844
845 if rcs.length == 1
846         rcs.first.export_commits(parse_options)
847 else
848         STDERR.puts "Preparing commits"
849
850         commits = []
851
852         rcs.each do |r|
853                 r.revision.each do |k, rev|
854                         begin
855                                 commits << RCS::Commit.new(r, rev)
856                         rescue NoBranchSupport
857                                 if parse_options[:skip_branches]
858                                         STDERR.puts "Skipping revision #{rev.rev} for #{r.fname} (branch)"
859                                 else raise
860                                 end
861                         end
862                 end
863         end
864
865         STDERR.puts "Sorting by date"
866
867         commits.sort!
868
869         if $DEBUG
870                 STDERR.puts "RAW commits (#{commits.length}):"
871                 commits.each do |c|
872                         PP.pp c.to_a, $stderr
873                 end
874         else
875                 STDERR.puts "#{commits.length} single-file commits"
876         end
877
878         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
879
880         thisindex = commits.size
881         commits.reverse_each do |c|
882                 nextindex = thisindex
883                 thisindex -= 1
884
885                 cfiles = Set.new c.tree.filenames
886                 ofiles = Set.new
887
888                 mergeable = []
889
890                 # test for mergeable commits by looking at following commits
891                 while nextindex < commits.size
892                         k = commits[nextindex]
893                         nextindex += 1
894
895                         # commits are date-sorted, so we know we can quit early if we are too far
896                         # for coalescing to work
897                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
898
899                         skipthis = false
900
901                         kfiles = Set.new k.tree.filenames
902
903                         if c.log != k.log or c.author != k.author or c.branch != k.branch
904                                 skipthis = true
905                         end
906
907                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
908                                 cflist = cfiles.to_a.join(', ')
909                                 kflist = kfiles.to_a.join(', ')
910                                 if parse_options[:symbol_check]
911                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
912                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
913                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
914                                         skipthis = true
915                                 elsif $DEBUG
916                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
917                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
918                                 end
919                         end
920
921                         # keep track of filenames touched by commits we are not merging with,
922                         # since we don't want to merge with commits that touch them, to preserve
923                         # the monotonicity of history for each file
924                         # TODO we could forward-merge with them, unless some of our files were
925                         # touched too.
926                         if skipthis
927                                 # if the candidate touches any file already in the commit,
928                                 # we can stop looking forward
929                                 break unless cfiles.intersection(kfiles).empty?
930                                 ofiles |= kfiles
931                                 next
932                         end
933
934                         # the candidate has the same log, author, branch and appropriate symbols
935                         # does it touch anything in ofiles?
936                         unless ofiles.intersection(kfiles).empty?
937                                 if $DEBUG
938                                         cflist = cfiles.to_a.join(', ')
939                                         kflist = kfiles.to_a.join(', ')
940                                         oflist = ofiles.to_a.join(', ')
941                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
942                                         STDERR.puts "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
943                                 end
944                                 next
945                         end
946
947                         mergeable << k
948                 end
949
950                 mergeable.each do |k|
951                         begin
952                                 c.merge! k
953                         rescue RuntimeError => err
954                                 fuzz = c.date - k.date
955                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
956                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
957                                 break
958                         end
959                         commits.delete k
960                 end
961         end
962
963         if $DEBUG
964                 STDERR.puts "[1] commits (#{commits.length}):"
965                 commits.each do |c|
966                         PP.pp c.to_a, $stderr
967                 end
968         else
969                 STDERR.puts "#{commits.length} coalesced commits"
970         end
971
972         commits.each { |c| c.export(parse_options) }
973
974 end
975
976 exit status