Set standard output in binary mode
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 RCS fast export: run the script with the `--usage` option for further
5 information.
6
7 No installation needed: you can run it from anywhere, including the git
8 checkout directory. For extra comfort, symlink it to some directory in
9 your PATH. I myself have this symlink:
10
11         ~/bin/rcs-fast-export -> ~/src/rcs-fast-export/rcs-fast-export.rb
12
13 allowing me to run `rcs-fast-export` from anywhere.
14 =end
15
16 =begin
17 TODO
18         * Refactor commit coalescing
19         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
20         * Add support for commitid for coalescing commits
21         * Further coalescing options? (e.g. small logfile differences)
22         * Proper branching support in multi-file export
23         * Optimize memory usage by discarding unneeded text
24 =end
25
26 require 'pp'
27 require 'set'
28
29 class NoBranchSupport < NotImplementedError ; end
30
31 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
32 # older versions
33 unless 2.respond_to? :odd?
34         class Integer
35                 def odd?
36                         self % 2 == 1
37                 end
38         end
39 end
40
41 # Set standard output to binary mode: git fast-import doesn't like Windows
42 # line-endings, and this ensures that the line termination will be a simple 0x0a
43 # on Windows too (it expands to 0x0D 0x0A otherwise).
44 STDOUT.binmode
45
46 =begin
47 RCS fast-export version: set to `git` in the repository, but can be overridden
48 by packagers, e.g. based on the latest tag, git description, custom packager
49 patches or whatever.
50
51 When the version is set to `git`, we make a little effort to find more information
52 about which commit we are at.
53 =end
54
55 RFE_VERSION="git"
56
57 def version
58         if RFE_VERSION == "git"
59                 nolinkfile = File.readlink(__FILE__) rescue __FILE__
60                 Dir.chdir File.expand_path File.dirname nolinkfile
61
62                 if File.exists? '.git' ; begin
63                         git_out = `git log -1 --pretty="%h %H%n%ai" | git name-rev --stdin`.split("\n")
64                         hash=git_out.first.split.first
65                         branch=git_out.first.split('(').last.chomp(')')
66                         date=git_out.last.split.first
67                         changed=`git diff --no-ext-diff --quiet --exit-code`
68                         branch << "*" unless $?.success?
69                         info=" [#{branch}] #{hash} (#{date})"
70                 rescue
71                         info=" (no info)"
72                 end ; end
73
74                 STDERR.puts "#{$0}: RCS fast-export, #{RFE_VERSION} version#{info}"
75         else
76                 STDERR.puts "#{$0}: RCS fast-export, version #{RFE_VERSION}"
77         end
78 end
79
80 def usage
81         STDERR.puts <<EOM
82 #{$0} [options] file [file ...]
83
84 Fast-export the RCS history of one or more files. If a directory is specified,
85 all RCS-tracked files in the directory and its descendants are exported.
86
87 When importing single files, their pathname is discarded during import. When
88 importing directories, only the specified directory component is discarded.
89
90 When importing a single file, RCS commits are converted one by one. Otherwise,
91 some heuristics is used to determine how to coalesce commits touching different
92 files.
93
94 Currently, commits are coalesced if they share the exact same log and if their
95 date differs by no more than the user-specified fuzziness. Additionally, the
96 symbols in one of the commit must be a subset of the symbols in the other
97 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
98 false in the git configuration.
99
100 Typical usage:
101     git init && rcs-fast-export.rb . | git fast-import && git reset
102
103 Options:
104         --help, -h, -?          display this help text
105         --authors-file, -A      specify a file containing username = Full Name <email> mappings
106         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
107                                 importing multiple files
108                                 (in seconds, defaults to 300, i.e. 5 minutes)
109         --[no-]symbol-check     [do not] check symbols when coalescing commits
110         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
111                                 importing a single file
112         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
113                                 a single file
114         --skip-branches         when exporting multiple files with a branched history, export
115                                 the main branch only instead of aborting due to the lack of
116                                 support for branched multi-file history export
117
118
119
120 Config options:
121         rcs.authorsFile         for --authors-file
122         rcs.tagEachRev          for --tag-each-rev
123         rcs.logFilename         for --log-filename
124         rcs.commitFuzz          for --rcs-commit-fuzz
125         rcs.symbolCheck         for --rcs-symbol-check
126         rcs.tagFuzz             for --rcs-tag-fuzz
127
128 EOM
129 end
130
131 def not_found(arg)
132         STDERR.puts "Could not find #{arg}"
133 end
134
135 # returns a hash that maps usernames to author names & emails
136 def load_authors_file(fn)
137         hash = {}
138         begin
139                 File.open(File.expand_path(fn)) do |io|
140                         io.each_line do |line|
141                                 uname, author = line.split('=', 2)
142                                 uname.strip!
143                                 author.strip!
144                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
145                                 hash[uname] = author
146                         end
147                 end
148         rescue
149                 not_found(fn)
150         end
151         return hash
152 end
153
154 # display a message about a (recoverable) error
155 def alert(msg, action)
156         STDERR.puts "ERROR:\t#{msg}"
157         STDERR.puts "\t#{action}"
158 end
159
160 class Time
161         def Time.rcs(string)
162                 fields = string.split('.')
163                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
164                 Time.utc(*fields)
165         end
166 end
167
168 module RCS
169         # strip an optional final ;
170         def RCS.clean(arg)
171                 arg.chomp(';')
172         end
173
174         # strip the first and last @, and de-double @@s
175         def RCS.sanitize(arg)
176                 case arg
177                 when Array
178                         ret = arg.dup
179                         raise 'malformed first line' unless ret.first[0,1] == '@'
180                         raise 'malformed last line' unless ret.last[-1,1] == '@'
181                         ret.first.sub!(/^@/,'')
182                         ret.last.sub!(/@$/,'')
183                         ret.map { |l| l.gsub('@@','@') }
184                 when String
185                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
186                 else
187                         raise
188                 end
189         end
190
191         # clean and sanitize
192         def RCS.at_clean(arg)
193                 RCS.sanitize RCS.clean(arg)
194         end
195
196         def RCS.mark(key)
197                 @@marks ||= {}
198                 if @@marks.key? key
199                         @@marks[key]
200                 else
201                         @@marks[key] = @@marks.length + 1
202                 end
203         end
204
205         def RCS.blob(file, rev)
206                 RCS.mark([file, rev])
207         end
208
209         def RCS.commit(commit)
210                 RCS.mark(commit)
211         end
212
213         class File
214                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
215                 def initialize(fname, executable)
216                         @fname = fname.dup
217                         @head = nil
218                         @comment = nil
219                         @desc = []
220                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
221                         @mode = executable ? '755' : '644'
222                 end
223
224                 def has_revision?(rev)
225                         @revision.has_key?(rev) and not @revision[rev].author.nil?
226                 end
227
228                 def export_commits(opts={})
229                         counter = 0
230                         exported = []
231                         until @revision.empty?
232                                 counter += 1
233
234                                 # a string sort is a very good candidate for
235                                 # export order, getting a miss only for
236                                 # multi-digit revision components
237                                 keys = @revision.keys.sort
238
239                                 STDERR.puts "commit export loop ##{counter}"
240                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
241                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
242
243                                 keys.each do |key|
244                                         rev = @revision[key]
245                                         # the parent commit is rev.next if we're on the
246                                         # master branch (rev.branch is nil) or
247                                         # rev.diff_base otherwise
248                                         from = rev.branch.nil? ? rev.next : rev.diff_base
249                                         # A commit can only be exported if it has no
250                                         # parent, or if the parent has been exported
251                                         # already. Skip this commit otherwise
252                                         if from and not exported.include? from
253                                                 next
254                                         end
255
256                                         branch = rev.branch || 'master'
257                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
258                                         date = "#{rev.date.tv_sec} +0000"
259                                         log = String.new
260                                         if opts[:log_filename]
261                                                 log << @fname << ": "
262                                         end
263                                         log << rev.log.join
264
265                                         puts "commit refs/heads/#{branch}"
266                                         puts "mark :#{RCS.commit key}"
267                                         puts "committer #{author} #{date}"
268                                         puts "data #{log.length}"
269                                         puts log unless log.empty?
270                                         puts "from :#{RCS.commit from}" if from
271                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
272
273                                         # TODO FIXME this *should* be safe, in
274                                         # that it should not unduly move
275                                         # branches back in time, but I'm not
276                                         # 100% sure ...
277                                         rev.branches.each do |sym|
278                                                 puts "reset refs/heads/#{sym}"
279                                                 puts "from :#{RCS.commit key}"
280                                         end
281                                         rev.symbols.each do |sym|
282                                                 puts "reset refs/tags/#{sym}"
283                                                 puts "from :#{RCS.commit key}"
284                                         end
285                                         if opts[:tag_each_rev]
286                                                 puts "reset refs/tags/#{key}"
287                                                 puts "from :#{RCS.commit key}"
288                                         end
289
290                                         exported.push key
291                                 end
292                                 exported.each { |k| @revision.delete(k) }
293                         end
294                 end
295         end
296
297         class Revision
298                 attr_accessor :rev, :author, :state, :next
299                 attr_accessor :branches, :log, :text, :symbols
300                 attr_accessor :branch, :diff_base, :branch_point
301                 attr_reader   :date
302                 def initialize(file, rev)
303                         @file = file
304                         @rev = rev
305                         @author = nil
306                         @date = nil
307                         @state = nil
308                         @next = nil
309                         @branches = Set.new
310                         @branch = nil
311                         @branch_point = nil
312                         @diff_base = nil
313                         @log = []
314                         @text = []
315                         @symbols = Set.new
316                 end
317
318                 def date=(str)
319                         @date = Time.rcs(str)
320                 end
321
322                 def blob
323                         str = @text.join('')
324                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
325                         ret
326                 end
327         end
328
329         def RCS.parse(fname, rcsfile)
330                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
331
332                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
333                         status = [:basic]
334                         rev = nil
335                         lines = []
336                         difflines = []
337                         file.each_line do |line|
338                                 case status.last
339                                 when :basic
340                                         command, args = line.split($;,2)
341                                         next if command.empty?
342
343                                         if command.chomp!(';')
344                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
345                                                 next
346                                         end
347
348                                         case command
349                                         when 'head'
350                                                 rcs.head = RCS.clean(args.chomp)
351                                         when 'symbols'
352                                                 status.push :symbols
353                                                 next if args.empty?
354                                                 line = args; redo
355                                         when 'comment'
356                                                 rcs.comment = RCS.at_clean(args.chomp)
357                                         when /^[0-9.]+$/
358                                                 rev = command.dup
359                                                 if rcs.has_revision?(rev)
360                                                         status.push :revision_data
361                                                 else
362                                                         status.push :new_revision
363                                                 end
364                                         when 'desc'
365                                                 status.push :desc
366                                                 lines.clear
367                                                 status.push :read_lines
368                                         when 'branch', 'access', 'locks', 'expand'
369                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
370                                                 status.push :skipping_lines
371                                                 next if args.empty?
372                                                 line = args; redo
373                                         else
374                                                 raise "Unknown command #{command.inspect}"
375                                         end
376                                 when :skipping_lines
377                                         status.pop if line.strip.chomp!(';')
378                                 when :symbols
379                                         # we can have multiple symbols per line
380                                         pairs = line.strip.split($;)
381                                         pairs.each do |pair|
382                                                 sym, rev = pair.strip.split(':',2);
383                                                 if rev
384                                                         status.pop if rev.chomp!(';')
385                                                         rcs.revision[rev].symbols << sym
386                                                 else
387                                                         status.pop
388                                                 end
389                                         end
390                                 when :desc
391                                         rcs.desc.replace lines.dup
392                                         status.pop
393                                 when :read_lines
394                                         # we sanitize lines as we read them
395
396                                         actual_line = line.dup
397
398                                         # the first line must begin with a @, which we strip
399                                         if lines.empty?
400                                                 ats = line.match(/^@+/)
401                                                 raise 'malformed line' unless ats
402                                                 actual_line.replace line.sub(/^@/,'')
403                                         end
404
405                                         # if the line ends with an ODD number of @, it's the
406                                         # last line -- we work on actual_line so that content
407                                         # such as @\n or @ work correctly (they would be
408                                         # encoded respectively as ['@@@\n','@\n'] and
409                                         # ['@@@@\n']
410                                         ats = actual_line.chomp.match(/@+$/)
411                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
412                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
413                                         end
414                                         lines << actual_line.gsub('@@','@')
415                                         if nomore
416                                                 status.pop
417                                                 redo
418                                         end
419                                 when :new_revision
420                                         case line.chomp
421                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
422                                                 rcs.revision[rev].date = $1
423                                                 rcs.revision[rev].author = $2
424                                                 rcs.revision[rev].state = $3
425                                         when /^branches\s*;/
426                                                 next
427                                         when /^branches(?:\s+|$)/
428                                                 status.push :branches
429                                                 if line.index(';')
430                                                         line = line.sub(/^branches\s+/,'')
431                                                         redo
432                                                 end
433                                         when /^next\s+(\S+)?;$/
434                                                 nxt = rcs.revision[rev].next = $1
435                                                 next unless nxt
436                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
437                                                 rcs.revision[nxt].diff_base = rev
438                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
439                                         else
440                                                 status.pop
441                                         end
442                                 when :branches
443                                         candidate = line.split(';',2)
444                                         candidate.first.strip.split.each do |branch|
445                                                 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
446                                                 rcs.revision[branch].diff_base = rev
447                                                 # we drop the last number from the branch name
448                                                 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
449                                                 rcs.revision[branch].branch_point = rev
450                                         end
451                                         status.pop if candidate.length > 1
452                                 when :revision_data
453                                         case line.chomp
454                                         when 'log'
455                                                 status.push :log
456                                                 lines.clear
457                                                 status.push :read_lines
458                                         when 'text'
459                                                 if rev == rcs.head
460                                                         status.push :head
461                                                 else
462                                                         status.push :diff
463                                                 end
464                                                 lines.clear
465                                                 status.push :read_lines
466                                         else
467                                                 status.pop
468                                         end
469                                 when :log
470                                         rcs.revision[rev].log.replace lines.dup
471                                         status.pop
472                                 when :head
473                                         rcs.revision[rev].text.replace lines.dup
474                                         puts rcs.revision[rev].blob
475                                         status.pop
476                                 when :diff
477                                         difflines.replace lines.dup
478                                         difflines.pop if difflines.last.empty?
479                                         if difflines.first.chomp.empty?
480                                                 alert "malformed diff: empty initial line @ #{rcsfile}:#{file.lineno-difflines.length-1}", "skipping"
481                                                 difflines.shift
482                                         end
483                                         base = rcs.revision[rev].diff_base
484                                         unless rcs.revision[base].text
485                                                 pp rcs
486                                                 puts rev, base
487                                                 raise 'no diff base!'
488                                         end
489                                         # deep copy
490                                         buffer = []
491                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
492
493                                         adding = false
494                                         index = nil
495                                         count = nil
496
497                                         while l = difflines.shift
498                                                 if adding
499                                                         raise 'negative index during insertion' if index < 0
500                                                         raise 'negative count during insertion' if count < 0
501                                                         adding << l
502                                                         count -= 1
503                                                         # collected all the lines, put the before
504                                                         unless count > 0
505                                                                 unless buffer[index]
506                                                                         buffer[index] = []
507                                                                 end
508                                                                 buffer[index].unshift(*adding)
509                                                                 adding = false
510                                                         end
511                                                         next
512                                                 end
513
514                                                 l.chomp!
515                                                 raise "malformed diff @ #{rcsfile}:#{file.lineno-difflines.length-1} `#{l}`" unless l =~ /^([ad])(\d+) (\d+)$/
516                                                 diff_cmd = $1.intern
517                                                 index = $2.to_i
518                                                 count = $3.to_i
519                                                 case diff_cmd
520                                                 when :d
521                                                         # for deletion, index 1 is the first index, so the Ruby
522                                                         # index is one less than the diff one
523                                                         index -= 1
524                                                         # we replace them with empty string so that 'a' commands
525                                                         # referring to the same line work properly
526                                                         while count > 0
527                                                                 buffer[index].clear
528                                                                 index += 1
529                                                                 count -= 1
530                                                         end
531                                                 when :a
532                                                         # addition will prepend the appropriate lines
533                                                         # to the given index, and in this case Ruby
534                                                         # and diff indices are the same
535                                                         adding = []
536                                                 end
537                                         end
538
539                                         # turn the buffer into an array of lines, deleting the empty ones
540                                         buffer.delete_if { |l| l.empty? }
541                                         buffer.flatten!
542
543                                         rcs.revision[rev].text = buffer
544                                         puts rcs.revision[rev].blob
545                                         status.pop
546                                 else
547                                         raise "Unknown status #{status.last}"
548                                 end
549                         end
550                 end
551
552                 # clean up the symbols/branches: look for revisions that have
553                 # one or more symbols but no dates, and make them into
554                 # branches, pointing to the highest commit with that key
555                 branches = []
556                 keys = rcs.revision.keys
557                 rcs.revision.each do |key, rev|
558                         if rev.date.nil? and not rev.symbols.empty?
559                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
560                                 tr = rcs.revision[top]
561                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
562                                 tr.branches |= rev.symbols
563                                 branches << key
564                         end
565                 end
566                 branches.each { |k| rcs.revision.delete k }
567
568                 return rcs
569         end
570
571         class Tree
572                 def initialize(commit)
573                         @commit = commit
574                         @files = Hash.new
575                 end
576
577                 def merge!(tree)
578                         testfiles = @files.dup
579                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
580                         # the next line is only reached if all the adds were
581                         # successful, so the merge is atomic
582                         @files.replace testfiles
583                 end
584
585                 def add(rcs, rev, file_list=@files)
586                         if file_list.key? rcs
587                                 prev = file_list[rcs]
588                                 if prev.log == rev.log
589                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
590                                 else
591                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
592                                 end
593                                 if prev.text != rev.text
594                                         raise str
595                                 else
596                                         @commit.warn_about str
597                                 end
598                         end
599                         file_list[rcs] = rev
600                 end
601
602                 def each &block
603                         @files.each &block
604                 end
605
606                 def to_a
607                         files = []
608                         @files.map do |rcs, rev|
609                                 if rev.state.downcase == "dead"
610                                         files << "D #{rcs.fname}"
611                                 else
612                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
613                                 end
614                         end
615                         files
616                 end
617
618                 def filenames
619                         @files.map { |rcs, rev| rcs.fname }
620                 end
621
622                 def to_s
623                         self.to_a.join("\n")
624                 end
625         end
626
627         class Commit
628                 attr_accessor :date, :log, :symbols, :author, :branch
629                 attr_accessor :tree
630                 attr_accessor :min_date, :max_date
631                 def initialize(rcs, rev)
632                         raise NoBranchSupport if rev.branch
633                         self.date = rev.date.dup
634                         self.min_date = self.max_date = self.date
635                         self.log = rev.log.dup
636                         self.symbols = rev.symbols.dup
637                         self.author = rev.author
638                         self.branch = rev.branch
639
640                         self.tree = Tree.new self
641                         self.tree.add rcs, rev
642                 end
643
644                 def to_a
645                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
646                 end
647
648                 def warn_about(str)
649                         warn str + " for commit on #{self.date}"
650                 end
651
652                 # Sort by date and then by number of symbols
653                 def <=>(other)
654                         ds = self.date <=> other.date
655                         if ds != 0
656                                 return ds
657                         else
658                                 return self.symbols.length <=> other.symbols.length
659                         end
660                 end
661
662                 def merge!(commit)
663                         self.tree.merge! commit.tree
664                         if commit.max_date > self.max_date
665                                 self.max_date = commit.max_date
666                         end
667                         if commit.min_date < self.min_date
668                                 self.min_date = commit.min_date
669                         end
670                         self.symbols.merge commit.symbols
671                 end
672
673                 def export(opts={})
674                         xbranch = self.branch || 'master'
675                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
676                         xlog = self.log.join
677                         numdate = self.date.tv_sec
678                         xdate = "#{numdate} +0000"
679                         key = numdate.to_s
680
681                         puts "commit refs/heads/#{xbranch}"
682                         puts "mark :#{RCS.commit key}"
683                         puts "committer #{xauthor} #{xdate}"
684                         puts "data #{xlog.length}"
685                         puts xlog unless xlog.empty?
686                         # TODO branching support for multi-file export
687                         # puts "from :#{RCS.commit from}" if self.branch_point
688                         puts self.tree.to_s
689
690                         # TODO branching support for multi-file export
691                         # rev.branches.each do |sym|
692                         #       puts "reset refs/heads/#{sym}"
693                         #       puts "from :#{RCS.commit key}"
694                         # end
695
696                         self.symbols.each do |sym|
697                                 puts "reset refs/tags/#{sym}"
698                                 puts "from :#{RCS.commit key}"
699                         end
700
701                 end
702         end
703 end
704
705 require 'getoptlong'
706
707 opts = GetoptLong.new(
708         # Authors file, like git-svn and git-cvsimport, more than one can be
709         # specified
710         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
711         # RCS file suffix, like RCS
712         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
713         # Date fuzziness for commits to be considered the same (in seconds)
714         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
715         # check symbols when coalescing?
716         ['--symbol-check', GetoptLong::NO_ARGUMENT],
717         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
718         # tag each revision?
719         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
720         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
721         # prepend filenames to commit logs?
722         ['--log-filename', GetoptLong::NO_ARGUMENT],
723         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
724         # skip branches when exporting a whole tree?
725         ['--skip-branches', GetoptLong::NO_ARGUMENT],
726         # show current version
727         ['--version', '-v', GetoptLong::NO_ARGUMENT],
728         # show help/usage
729         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
730 )
731
732 # We read options in order, but they apply to all passed parameters.
733 # TODO maybe they should only apply to the following, unless there's only one
734 # file?
735 opts.ordering = GetoptLong::RETURN_IN_ORDER
736
737 file_list = []
738 parse_options = {
739         :authors => Hash.new,
740         :commit_fuzz => 300,
741         :tag_fuzz => -1,
742 }
743
744 # Read config options
745 `git config --get-all rcs.authorsfile`.each_line do |fn|
746         parse_options[:authors].merge! load_authors_file(fn.chomp)
747 end
748
749 parse_options[:tag_each_rev] = (
750         `git config --bool rcs.tageachrev`.chomp == 'true'
751 ) ? true : false
752
753 parse_options[:log_filename] = (
754         `git config --bool rcs.logfilename`.chomp == 'true'
755 ) ? true : false
756
757 fuzz = `git config --int rcs.commitFuzz`.chomp
758 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
759
760 fuzz = `git config --int rcs.tagFuzz`.chomp
761 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
762
763 parse_options[:symbol_check] = (
764         `git config --bool rcs.symbolcheck`.chomp == 'false'
765 ) ? false : true
766
767 opts.each do |opt, arg|
768         case opt
769         when '--authors-file'
770                 authors = load_authors_file(arg)
771                 redef = parse_options[:authors].keys & authors.keys
772                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
773                 parse_options[:authors].merge!(authors)
774         when '--rcs-suffixes'
775                 # TODO
776         when '--rcs-commit-fuzz'
777                 parse_options[:commit_fuzz] = arg.to_i
778         when '--rcs-tag-fuzz'
779                 parse_options[:tag_fuzz] = arg.to_i
780         when '--symbol-check'
781                 parse_options[:symbol_check] = true
782         when '--no-symbol-check'
783                 parse_options[:symbol_check] = false
784         when '--tag-each-rev'
785                 parse_options[:tag_each_rev] = true
786         when '--no-tag-each-rev'
787                 # this is the default, which is fine since the missing key
788                 # (default) returns nil which is false in Ruby
789                 parse_options[:tag_each_rev] = false
790         when '--log-filename'
791                 parse_options[:log_filename] = true
792         when '--no-log-filename'
793                 # this is the default, which is fine since the missing key
794                 # (default) returns nil which is false in Ruby
795                 parse_options[:log_filename] = false
796         when '--skip-branches'
797                 parse_options[:skip_branches] = true
798         when ''
799                 file_list << arg
800         when '--version'
801                 version
802                 exit
803         when '--help'
804                 usage
805                 exit
806         end
807 end
808
809 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
810         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
811 end
812
813 require 'etc'
814
815 user = Etc.getlogin || ENV['USER']
816
817 # steal username/email data from other init files that may contain the
818 # information
819 def steal_username
820         [
821                 # the user's .hgrc file for a username field
822                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
823                 # the user's .(g)vimrc for a changelog_username setting
824                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
825                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
826         ].each do |fn, rx, idx|
827                 file = File.expand_path fn
828                 if File.readable?(file) and File.read(file) =~ rx
829                         parse_options[:authors][user] = Regexp.last_match(idx).strip
830                         break
831                 end
832         end
833 end
834
835 if user and not user.empty? and not parse_options[:authors].has_key?(user)
836         name = ENV['GIT_AUTHOR_NAME'] || ''
837         name.replace(`git config user.name`.chomp) if name.empty?
838         name.replace(Etc.getpwnam(user).gecos) if name.empty?
839
840         if name.empty?
841                 # couldn't find a name, try to steal data from other sources
842                 steal_username
843         else
844                 # if we found a name, try to find an email too
845                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
846                 email.replace(`git config user.email`.chomp) if email.empty?
847
848                 if email.empty?
849                         # couldn't find an email, try to steal data too
850                         steal_username
851                 else
852                         # we got both a name and email, fill the info
853                         parse_options[:authors][user] = "#{name} <#{email}>"
854                 end
855         end
856 end
857
858 if file_list.empty?
859         usage
860         exit 1
861 end
862
863 SFX = ',v'
864
865 status = 0
866
867 rcs = []
868 file_list.each do |arg|
869         case ftype = File.ftype(arg)
870         when 'file'
871                 if arg[-2,2] == SFX
872                         if File.exists? arg
873                                 rcsfile = arg.dup
874                         else
875                                 not_found "RCS file #{arg}"
876                                 status |= 1
877                         end
878                         filename = File.basename(arg, SFX)
879                 else
880                         filename = File.basename(arg)
881                         path = File.dirname(arg)
882                         rcsfile = File.join(path, 'RCS', filename) + SFX
883                         unless File.exists? rcsfile
884                                 rcsfile.replace File.join(path, filename) + SFX
885                                 unless File.exists? rcsfile
886                                         not_found "RCS file for #{filename} in #{path}"
887                                 end
888                         end
889                 end
890                 rcs << RCS.parse(filename, rcsfile)
891         when 'directory'
892                 argdirname = arg.chomp(File::SEPARATOR)
893                 pattern = File.join(argdirname, '**', '*' + SFX)
894                 Dir.glob(pattern).each do |rcsfile|
895                         filename = File.basename(rcsfile, SFX)
896                         path = File.dirname(rcsfile)
897                         # strip trailing "/RCS" if present, or "RCS" if that's
898                         # the full path
899                         path.sub!(/(^|#{File::SEPARATOR})RCS$/, '')
900                         # strip off the portion of the path sepecified
901                         # on the command line from the front of the path
902                         # (or delete the path completely if it is the same
903                         # as the specified directory)
904                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
905                         filename = File.join(path, filename) unless path.empty?
906                         begin
907                                 rcs << RCS.parse(filename, rcsfile)
908                         rescue Exception => e
909                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
910                                 raise e
911                         end
912                 end
913         else
914                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
915                 status |= 1
916         end
917 end
918
919 if rcs.length == 1
920         rcs.first.export_commits(parse_options)
921 else
922         STDERR.puts "Preparing commits"
923
924         commits = []
925
926         rcs.each do |r|
927                 r.revision.each do |k, rev|
928                         begin
929                                 commits << RCS::Commit.new(r, rev)
930                         rescue NoBranchSupport
931                                 if parse_options[:skip_branches]
932                                         STDERR.puts "Skipping revision #{rev.rev} for #{r.fname} (branch)"
933                                 else raise
934                                 end
935                         end
936                 end
937         end
938
939         STDERR.puts "Sorting by date"
940
941         commits.sort!
942
943         if $DEBUG
944                 STDERR.puts "RAW commits (#{commits.length}):"
945                 commits.each do |c|
946                         PP.pp c.to_a, $stderr
947                 end
948         else
949                 STDERR.puts "#{commits.length} single-file commits"
950         end
951
952         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
953
954         thisindex = commits.size
955         commits.reverse_each do |c|
956                 nextindex = thisindex
957                 thisindex -= 1
958
959                 cfiles = Set.new c.tree.filenames
960                 ofiles = Set.new
961
962                 mergeable = []
963
964                 # test for mergeable commits by looking at following commits
965                 while nextindex < commits.size
966                         k = commits[nextindex]
967                         nextindex += 1
968
969                         # commits are date-sorted, so we know we can quit early if we are too far
970                         # for coalescing to work
971                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
972
973                         skipthis = false
974
975                         kfiles = Set.new k.tree.filenames
976
977                         if c.log != k.log or c.author != k.author or c.branch != k.branch
978                                 skipthis = true
979                         end
980
981                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
982                                 cflist = cfiles.to_a.join(', ')
983                                 kflist = kfiles.to_a.join(', ')
984                                 if parse_options[:symbol_check]
985                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
986                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
987                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
988                                         skipthis = true
989                                 elsif $DEBUG
990                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
991                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
992                                 end
993                         end
994
995                         # keep track of filenames touched by commits we are not merging with,
996                         # since we don't want to merge with commits that touch them, to preserve
997                         # the monotonicity of history for each file
998                         # TODO we could forward-merge with them, unless some of our files were
999                         # touched too.
1000                         if skipthis
1001                                 # if the candidate touches any file already in the commit,
1002                                 # we can stop looking forward
1003                                 break unless cfiles.intersection(kfiles).empty?
1004                                 ofiles |= kfiles
1005                                 next
1006                         end
1007
1008                         # the candidate has the same log, author, branch and appropriate symbols
1009                         # does it touch anything in ofiles?
1010                         unless ofiles.intersection(kfiles).empty?
1011                                 if $DEBUG
1012                                         cflist = cfiles.to_a.join(', ')
1013                                         kflist = kfiles.to_a.join(', ')
1014                                         oflist = ofiles.to_a.join(', ')
1015                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1016                                         STDERR.puts "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
1017                                 end
1018                                 next
1019                         end
1020
1021                         mergeable << k
1022                 end
1023
1024                 mergeable.each do |k|
1025                         begin
1026                                 c.merge! k
1027                         rescue RuntimeError => err
1028                                 fuzz = c.date - k.date
1029                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
1030                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
1031                                 break
1032                         end
1033                         commits.delete k
1034                 end
1035         end
1036
1037         if $DEBUG
1038                 STDERR.puts "[1] commits (#{commits.length}):"
1039                 commits.each do |c|
1040                         PP.pp c.to_a, $stderr
1041                 end
1042         else
1043                 STDERR.puts "#{commits.length} coalesced commits"
1044         end
1045
1046         commits.each { |c| c.export(parse_options) }
1047
1048 end
1049
1050 exit status