Typo in comment
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 RCS fast export: run the script with the `--help` option for further
5 information.
6
7 No installation needed: you can run it from anywhere, including the git
8 checkout directory. For extra comfort, symlink it to some directory in
9 your PATH. I myself have this symlink:
10
11         ~/bin/rcs-fast-export -> ~/src/rcs-fast-export/rcs-fast-export.rb
12
13 allowing me to run `rcs-fast-export` from anywhere.
14 =end
15
16 =begin
17 TODO
18         * Refactor commit coalescing
19         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
20         * Add support for commitid for coalescing commits
21         * Further coalescing options? (e.g. small logfile differences)
22         * Proper branching support in multi-file export
23         * Optimize memory usage by discarding unneeded text
24 =end
25
26 require 'pp'
27 require 'set'
28
29 class NoBranchSupport < NotImplementedError ; end
30
31 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
32 # older versions
33 unless 2.respond_to? :odd?
34         class Integer
35                 def odd?
36                         self % 2 == 1
37                 end
38         end
39 end
40
41 # Set standard output to binary mode: git fast-import doesn't like Windows
42 # line-endings, and this ensures that the line termination will be a simple 0x0a
43 # on Windows too (it expands to 0x0D 0x0A otherwise).
44 STDOUT.binmode
45
46 =begin
47 RCS fast-export version: set to `git` in the repository, but can be overridden
48 by packagers, e.g. based on the latest tag, git description, custom packager
49 patches or whatever.
50
51 When the version is set to `git`, we make a little effort to find more information
52 about which commit we are at.
53 =end
54
55 RFE_VERSION="git"
56
57 def version
58         if RFE_VERSION == "git"
59                 nolinkfile = File.readlink(__FILE__) rescue __FILE__
60                 Dir.chdir File.expand_path File.dirname nolinkfile
61
62                 if File.exists? '.git' ; begin
63                         git_out = `git log -1 --pretty="%h %H%n%ai" | git name-rev --stdin`.split("\n")
64                         hash=git_out.first.split.first
65                         branch=git_out.first.split('(').last.chomp(')')
66                         date=git_out.last.split.first
67                         changed=`git diff --no-ext-diff --quiet --exit-code`
68                         branch << "*" unless $?.success?
69                         info=" [#{branch}] #{hash} (#{date})"
70                 rescue
71                         info=" (no info)"
72                 end ; end
73
74                 STDERR.puts "#{$0}: RCS fast-export, #{RFE_VERSION} version#{info}"
75         else
76                 STDERR.puts "#{$0}: RCS fast-export, version #{RFE_VERSION}"
77         end
78 end
79
80 def usage
81         STDERR.puts <<EOM
82 #{$0} [options] file [file ...]
83
84 Fast-export the RCS history of one or more files. If a directory is specified,
85 all RCS-tracked files in the directory and its descendants are exported.
86
87 When importing single files, their pathname is discarded during import. When
88 importing directories, only the specified directory component is discarded.
89
90 When importing a single file, RCS commits are converted one by one. Otherwise,
91 some heuristics is used to determine how to coalesce commits touching different
92 files.
93
94 Currently, commits are coalesced if they share the exact same log and if their
95 date differs by no more than the user-specified fuzziness. Additionally, the
96 symbols in one of the commit must be a subset of the symbols in the other
97 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
98 false in the git configuration.
99
100 Typical usage:
101     git init && rcs-fast-export.rb . | git fast-import && git reset
102
103 Options:
104         --help, -h, -?          display this help text
105         --authors-file, -A      specify a file containing username = Full Name <email> mappings
106         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
107                                 importing multiple files
108                                 (in seconds, defaults to 300, i.e. 5 minutes)
109         --[no-]symbol-check     [do not] check symbols when coalescing commits
110         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
111                                 importing a single file
112         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
113                                 a single file
114         --skip-branches         when exporting multiple files with a branched history, export
115                                 the main branch only instead of aborting due to the lack of
116                                 support for branched multi-file history export
117
118
119
120 Config options:
121         rcs.authorsFile         for --authors-file
122         rcs.tagEachRev          for --tag-each-rev
123         rcs.logFilename         for --log-filename
124         rcs.commitFuzz          for --rcs-commit-fuzz
125         rcs.symbolCheck         for --rcs-symbol-check
126         rcs.tagFuzz             for --rcs-tag-fuzz
127
128 EOM
129 end
130
131 def not_found(arg)
132         STDERR.puts "Could not find #{arg}"
133 end
134
135 # returns a hash that maps usernames to author names & emails
136 def load_authors_file(fn)
137         hash = {}
138         begin
139                 File.open(File.expand_path(fn)) do |io|
140                         io.each_line do |line|
141                                 uname, author = line.split('=', 2)
142                                 uname.strip!
143                                 author.strip!
144                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
145                                 hash[uname] = author
146                         end
147                 end
148         rescue
149                 not_found(fn)
150         end
151         return hash
152 end
153
154 # display a message about a (recoverable) error
155 def alert(msg, action)
156         STDERR.puts "ERROR:\t#{msg}"
157         STDERR.puts "\t#{action}"
158 end
159
160 class Time
161         def Time.rcs(string)
162                 fields = string.split('.')
163                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
164                 # in Ruby 1.9, '99' is interpreted as year 99, not year 1999
165                 if fields.first.length < 3
166                         fields.first.insert 0, '19'
167                 end
168                 Time.utc(*fields)
169         end
170 end
171
172 module RCS
173         # strip an optional final ;
174         def RCS.clean(arg)
175                 arg.chomp(';')
176         end
177
178         # strip the first and last @, and de-double @@s
179         def RCS.sanitize(arg)
180                 case arg
181                 when Array
182                         ret = arg.dup
183                         raise 'malformed first line' unless ret.first[0,1] == '@'
184                         raise 'malformed last line' unless ret.last[-1,1] == '@'
185                         ret.first.sub!(/^@/,'')
186                         ret.last.sub!(/@$/,'')
187                         ret.map { |l| l.gsub('@@','@') }
188                 when String
189                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
190                 else
191                         raise
192                 end
193         end
194
195         # clean and sanitize
196         def RCS.at_clean(arg)
197                 RCS.sanitize RCS.clean(arg)
198         end
199
200         def RCS.mark(key)
201                 @@marks ||= {}
202                 if @@marks.key? key
203                         @@marks[key]
204                 else
205                         @@marks[key] = @@marks.length + 1
206                 end
207         end
208
209         def RCS.blob(file, rev)
210                 RCS.mark([file, rev])
211         end
212
213         def RCS.commit(commit)
214                 RCS.mark(commit)
215         end
216
217         class File
218                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
219                 def initialize(fname, executable)
220                         @fname = fname.dup
221                         @head = nil
222                         @comment = nil
223                         @desc = []
224                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
225                         @mode = executable ? '755' : '644'
226                 end
227
228                 def has_revision?(rev)
229                         @revision.has_key?(rev) and not @revision[rev].author.nil?
230                 end
231
232                 def export_commits(opts={})
233                         counter = 0
234                         exported = []
235                         until @revision.empty?
236                                 counter += 1
237
238                                 # a string sort is a very good candidate for
239                                 # export order, getting a miss only for
240                                 # multi-digit revision components
241                                 keys = @revision.keys.sort
242
243                                 STDERR.puts "commit export loop ##{counter}"
244                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
245                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
246
247                                 keys.each do |key|
248                                         rev = @revision[key]
249                                         # the parent commit is rev.next if we're on the
250                                         # master branch (rev.branch is nil) or
251                                         # rev.diff_base otherwise
252                                         from = rev.branch.nil? ? rev.next : rev.diff_base
253                                         # A commit can only be exported if it has no
254                                         # parent, or if the parent has been exported
255                                         # already. Skip this commit otherwise
256                                         if from and not exported.include? from
257                                                 next
258                                         end
259
260                                         branch = rev.branch || 'master'
261                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
262                                         date = "#{rev.date.tv_sec} +0000"
263                                         log = String.new
264                                         if opts[:log_filename]
265                                                 log << @fname << ": "
266                                         end
267                                         log << rev.log.join
268
269                                         puts "commit refs/heads/#{branch}"
270                                         puts "mark :#{RCS.commit key}"
271                                         puts "committer #{author} #{date}"
272                                         puts "data #{log.length}"
273                                         puts log unless log.empty?
274                                         puts "from :#{RCS.commit from}" if from
275                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
276
277                                         # TODO FIXME this *should* be safe, in
278                                         # that it should not unduly move
279                                         # branches back in time, but I'm not
280                                         # 100% sure ...
281                                         rev.branches.each do |sym|
282                                                 puts "reset refs/heads/#{sym}"
283                                                 puts "from :#{RCS.commit key}"
284                                         end
285                                         rev.symbols.each do |sym|
286                                                 puts "reset refs/tags/#{sym}"
287                                                 puts "from :#{RCS.commit key}"
288                                         end
289                                         if opts[:tag_each_rev]
290                                                 puts "reset refs/tags/#{key}"
291                                                 puts "from :#{RCS.commit key}"
292                                         end
293
294                                         exported.push key
295                                 end
296                                 exported.each { |k| @revision.delete(k) }
297                         end
298                 end
299         end
300
301         class Revision
302                 attr_accessor :rev, :author, :state, :next
303                 attr_accessor :branches, :log, :text, :symbols
304                 attr_accessor :branch, :diff_base, :branch_point
305                 attr_reader   :date
306                 def initialize(file, rev)
307                         @file = file
308                         @rev = rev
309                         @author = nil
310                         @date = nil
311                         @state = nil
312                         @next = nil
313                         @branches = Set.new
314                         @branch = nil
315                         @branch_point = nil
316                         @diff_base = nil
317                         @log = []
318                         @text = []
319                         @symbols = Set.new
320                 end
321
322                 def date=(str)
323                         @date = Time.rcs(str)
324                 end
325
326                 def blob
327                         str = @text.join('')
328                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
329                         ret
330                 end
331         end
332
333         def RCS.parse(fname, rcsfile)
334                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
335
336                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
337                         status = [:basic]
338                         rev = nil
339                         lines = []
340                         difflines = []
341                         file.each_line do |line|
342                                 case status.last
343                                 when :basic
344                                         command, args = line.split($;,2)
345                                         next if command.empty?
346
347                                         if command.chomp!(';')
348                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
349                                                 next
350                                         end
351
352                                         case command
353                                         when 'head'
354                                                 rcs.head = RCS.clean(args.chomp)
355                                         when 'symbols'
356                                                 status.push :symbols
357                                                 next if args.empty?
358                                                 line = args; redo
359                                         when 'comment'
360                                                 rcs.comment = RCS.at_clean(args.chomp)
361                                         when /^[0-9.]+$/
362                                                 rev = command.dup
363                                                 if rcs.has_revision?(rev)
364                                                         status.push :revision_data
365                                                 else
366                                                         status.push :new_revision
367                                                 end
368                                         when 'desc'
369                                                 status.push :desc
370                                                 lines.clear
371                                                 status.push :read_lines
372                                         when 'branch', 'access', 'locks', 'expand'
373                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
374                                                 status.push :skipping_lines
375                                                 next if args.empty?
376                                                 line = args; redo
377                                         else
378                                                 raise "Unknown command #{command.inspect}"
379                                         end
380                                 when :skipping_lines
381                                         status.pop if line.strip.chomp!(';')
382                                 when :symbols
383                                         # we can have multiple symbols per line
384                                         pairs = line.strip.split($;)
385                                         pairs.each do |pair|
386                                                 sym, rev = pair.strip.split(':',2);
387                                                 if rev
388                                                         status.pop if rev.chomp!(';')
389                                                         rcs.revision[rev].symbols << sym
390                                                 else
391                                                         status.pop
392                                                 end
393                                         end
394                                 when :desc
395                                         rcs.desc.replace lines.dup
396                                         status.pop
397                                 when :read_lines
398                                         # we sanitize lines as we read them
399
400                                         actual_line = line.dup
401
402                                         # the first line must begin with a @, which we strip
403                                         if lines.empty?
404                                                 ats = line.match(/^@+/)
405                                                 raise 'malformed line' unless ats
406                                                 actual_line.replace line.sub(/^@/,'')
407                                         end
408
409                                         # if the line ends with an ODD number of @, it's the
410                                         # last line -- we work on actual_line so that content
411                                         # such as @\n or @ work correctly (they would be
412                                         # encoded respectively as ['@@@\n','@\n'] and
413                                         # ['@@@@\n']
414                                         ats = actual_line.chomp.match(/@+$/)
415                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
416                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
417                                         end
418                                         lines << actual_line.gsub('@@','@')
419                                         if nomore
420                                                 status.pop
421                                                 redo
422                                         end
423                                 when :new_revision
424                                         case line.chomp
425                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
426                                                 rcs.revision[rev].date = $1
427                                                 rcs.revision[rev].author = $2
428                                                 rcs.revision[rev].state = $3
429                                         when /^branches\s*;/
430                                                 next
431                                         when /^branches(?:\s+|$)/
432                                                 status.push :branches
433                                                 if line.index(';')
434                                                         line = line.sub(/^branches\s+/,'')
435                                                         redo
436                                                 end
437                                         when /^next\s+(\S+)?;$/
438                                                 nxt = rcs.revision[rev].next = $1
439                                                 next unless nxt
440                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
441                                                 rcs.revision[nxt].diff_base = rev
442                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
443                                         else
444                                                 status.pop
445                                         end
446                                 when :branches
447                                         candidate = line.split(';',2)
448                                         candidate.first.strip.split.each do |branch|
449                                                 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
450                                                 rcs.revision[branch].diff_base = rev
451                                                 # we drop the last number from the branch name
452                                                 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
453                                                 rcs.revision[branch].branch_point = rev
454                                         end
455                                         status.pop if candidate.length > 1
456                                 when :revision_data
457                                         case line.chomp
458                                         when 'log'
459                                                 status.push :log
460                                                 lines.clear
461                                                 status.push :read_lines
462                                         when 'text'
463                                                 if rev == rcs.head
464                                                         status.push :head
465                                                 else
466                                                         status.push :diff
467                                                 end
468                                                 lines.clear
469                                                 status.push :read_lines
470                                         else
471                                                 status.pop
472                                         end
473                                 when :log
474                                         rcs.revision[rev].log.replace lines.dup
475                                         status.pop
476                                 when :head
477                                         rcs.revision[rev].text.replace lines.dup
478                                         puts rcs.revision[rev].blob
479                                         status.pop
480                                 when :diff
481                                         difflines.replace lines.dup
482                                         difflines.pop if difflines.last.empty?
483                                         if difflines.first.chomp.empty?
484                                                 alert "malformed diff: empty initial line @ #{rcsfile}:#{file.lineno-difflines.length-1}", "skipping"
485                                                 difflines.shift
486                                         end unless difflines.empty?
487                                         base = rcs.revision[rev].diff_base
488                                         unless rcs.revision[base].text
489                                                 pp rcs
490                                                 puts rev, base
491                                                 raise 'no diff base!'
492                                         end
493                                         # deep copy
494                                         buffer = []
495                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
496
497                                         adding = false
498                                         index = nil
499                                         count = nil
500
501                                         while l = difflines.shift
502                                                 if adding
503                                                         raise 'negative index during insertion' if index < 0
504                                                         raise 'negative count during insertion' if count < 0
505                                                         adding << l
506                                                         count -= 1
507                                                         # collected all the lines, put the before
508                                                         unless count > 0
509                                                                 unless buffer[index]
510                                                                         buffer[index] = []
511                                                                 end
512                                                                 buffer[index].unshift(*adding)
513                                                                 adding = false
514                                                         end
515                                                         next
516                                                 end
517
518                                                 l.chomp!
519                                                 raise "malformed diff @ #{rcsfile}:#{file.lineno-difflines.length-1} `#{l}`" unless l =~ /^([ad])(\d+) (\d+)$/
520                                                 diff_cmd = $1.intern
521                                                 index = $2.to_i
522                                                 count = $3.to_i
523                                                 case diff_cmd
524                                                 when :d
525                                                         # for deletion, index 1 is the first index, so the Ruby
526                                                         # index is one less than the diff one
527                                                         index -= 1
528                                                         # we replace them with empty string so that 'a' commands
529                                                         # referring to the same line work properly
530                                                         while count > 0
531                                                                 buffer[index].clear
532                                                                 index += 1
533                                                                 count -= 1
534                                                         end
535                                                 when :a
536                                                         # addition will prepend the appropriate lines
537                                                         # to the given index, and in this case Ruby
538                                                         # and diff indices are the same
539                                                         adding = []
540                                                 end
541                                         end
542
543                                         # turn the buffer into an array of lines, deleting the empty ones
544                                         buffer.delete_if { |l| l.empty? }
545                                         buffer.flatten!
546
547                                         rcs.revision[rev].text = buffer
548                                         puts rcs.revision[rev].blob
549                                         status.pop
550                                 else
551                                         raise "Unknown status #{status.last}"
552                                 end
553                         end
554                 end
555
556                 # clean up the symbols/branches: look for revisions that have
557                 # one or more symbols but no dates, and make them into
558                 # branches, pointing to the highest commit with that key
559                 branches = []
560                 keys = rcs.revision.keys
561                 rcs.revision.each do |key, rev|
562                         if rev.date.nil? and not rev.symbols.empty?
563                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
564                                 tr = rcs.revision[top]
565                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
566                                 tr.branches |= rev.symbols
567                                 branches << key
568                         end
569                 end
570                 branches.each { |k| rcs.revision.delete k }
571
572                 return rcs
573         end
574
575         class Tree
576                 def initialize(commit)
577                         @commit = commit
578                         @files = Hash.new
579                 end
580
581                 def merge!(tree)
582                         testfiles = @files.dup
583                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
584                         # the next line is only reached if all the adds were
585                         # successful, so the merge is atomic
586                         @files.replace testfiles
587                 end
588
589                 def add(rcs, rev, file_list=@files)
590                         if file_list.key? rcs
591                                 prev = file_list[rcs]
592                                 if prev.log == rev.log
593                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
594                                 else
595                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
596                                 end
597                                 if prev.text != rev.text
598                                         raise str
599                                 else
600                                         @commit.warn_about str
601                                 end
602                         end
603                         file_list[rcs] = rev
604                 end
605
606                 def each &block
607                         @files.each &block
608                 end
609
610                 def to_a
611                         files = []
612                         @files.map do |rcs, rev|
613                                 if rev.state.downcase == "dead"
614                                         files << "D #{rcs.fname}"
615                                 else
616                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
617                                 end
618                         end
619                         files
620                 end
621
622                 def filenames
623                         @files.map { |rcs, rev| rcs.fname }
624                 end
625
626                 def to_s
627                         self.to_a.join("\n")
628                 end
629         end
630
631         class Commit
632                 attr_accessor :date, :log, :symbols, :author, :branch
633                 attr_accessor :tree
634                 attr_accessor :min_date, :max_date
635                 def initialize(rcs, rev)
636                         raise NoBranchSupport if rev.branch
637                         self.date = rev.date.dup
638                         self.min_date = self.max_date = self.date
639                         self.log = rev.log.dup
640                         self.symbols = rev.symbols.dup
641                         self.author = rev.author
642                         self.branch = rev.branch
643
644                         self.tree = Tree.new self
645                         self.tree.add rcs, rev
646                 end
647
648                 def to_a
649                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
650                 end
651
652                 def warn_about(str)
653                         warn str + " for commit on #{self.date}"
654                 end
655
656                 # Sort by date and then by number of symbols
657                 def <=>(other)
658                         ds = self.date <=> other.date
659                         if ds != 0
660                                 return ds
661                         else
662                                 return self.symbols.length <=> other.symbols.length
663                         end
664                 end
665
666                 def merge!(commit)
667                         self.tree.merge! commit.tree
668                         if commit.max_date > self.max_date
669                                 self.max_date = commit.max_date
670                         end
671                         if commit.min_date < self.min_date
672                                 self.min_date = commit.min_date
673                         end
674                         self.symbols.merge commit.symbols
675                 end
676
677                 def export(opts={})
678                         xbranch = self.branch || 'master'
679                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
680                         xlog = self.log.join
681                         numdate = self.date.tv_sec
682                         xdate = "#{numdate} +0000"
683                         key = numdate.to_s
684
685                         puts "commit refs/heads/#{xbranch}"
686                         puts "mark :#{RCS.commit key}"
687                         puts "committer #{xauthor} #{xdate}"
688                         puts "data #{xlog.length}"
689                         puts xlog unless xlog.empty?
690                         # TODO branching support for multi-file export
691                         # puts "from :#{RCS.commit from}" if self.branch_point
692                         puts self.tree.to_s
693
694                         # TODO branching support for multi-file export
695                         # rev.branches.each do |sym|
696                         #       puts "reset refs/heads/#{sym}"
697                         #       puts "from :#{RCS.commit key}"
698                         # end
699
700                         self.symbols.each do |sym|
701                                 puts "reset refs/tags/#{sym}"
702                                 puts "from :#{RCS.commit key}"
703                         end
704
705                 end
706         end
707 end
708
709 require 'getoptlong'
710
711 opts = GetoptLong.new(
712         # Authors file, like git-svn and git-cvsimport, more than one can be
713         # specified
714         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
715         # RCS file suffix, like RCS
716         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
717         # Date fuzziness for commits to be considered the same (in seconds)
718         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
719         # check symbols when coalescing?
720         ['--symbol-check', GetoptLong::NO_ARGUMENT],
721         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
722         # tag each revision?
723         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
724         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
725         # prepend filenames to commit logs?
726         ['--log-filename', GetoptLong::NO_ARGUMENT],
727         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
728         # skip branches when exporting a whole tree?
729         ['--skip-branches', GetoptLong::NO_ARGUMENT],
730         # show current version
731         ['--version', '-v', GetoptLong::NO_ARGUMENT],
732         # show help/usage
733         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
734 )
735
736 # We read options in order, but they apply to all passed parameters.
737 # TODO maybe they should only apply to the following, unless there's only one
738 # file?
739 opts.ordering = GetoptLong::RETURN_IN_ORDER
740
741 file_list = []
742 parse_options = {
743         :authors => Hash.new,
744         :commit_fuzz => 300,
745         :tag_fuzz => -1,
746 }
747
748 # Read config options
749 `git config --get-all rcs.authorsfile`.each_line do |fn|
750         parse_options[:authors].merge! load_authors_file(fn.chomp)
751 end
752
753 parse_options[:tag_each_rev] = (
754         `git config --bool rcs.tageachrev`.chomp == 'true'
755 ) ? true : false
756
757 parse_options[:log_filename] = (
758         `git config --bool rcs.logfilename`.chomp == 'true'
759 ) ? true : false
760
761 fuzz = `git config --int rcs.commitFuzz`.chomp
762 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
763
764 fuzz = `git config --int rcs.tagFuzz`.chomp
765 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
766
767 parse_options[:symbol_check] = (
768         `git config --bool rcs.symbolcheck`.chomp == 'false'
769 ) ? false : true
770
771 opts.each do |opt, arg|
772         case opt
773         when '--authors-file'
774                 authors = load_authors_file(arg)
775                 redef = parse_options[:authors].keys & authors.keys
776                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
777                 parse_options[:authors].merge!(authors)
778         when '--rcs-suffixes'
779                 # TODO
780         when '--rcs-commit-fuzz'
781                 parse_options[:commit_fuzz] = arg.to_i
782         when '--rcs-tag-fuzz'
783                 parse_options[:tag_fuzz] = arg.to_i
784         when '--symbol-check'
785                 parse_options[:symbol_check] = true
786         when '--no-symbol-check'
787                 parse_options[:symbol_check] = false
788         when '--tag-each-rev'
789                 parse_options[:tag_each_rev] = true
790         when '--no-tag-each-rev'
791                 # this is the default, which is fine since the missing key
792                 # (default) returns nil which is false in Ruby
793                 parse_options[:tag_each_rev] = false
794         when '--log-filename'
795                 parse_options[:log_filename] = true
796         when '--no-log-filename'
797                 # this is the default, which is fine since the missing key
798                 # (default) returns nil which is false in Ruby
799                 parse_options[:log_filename] = false
800         when '--skip-branches'
801                 parse_options[:skip_branches] = true
802         when ''
803                 file_list << arg
804         when '--version'
805                 version
806                 exit
807         when '--help'
808                 usage
809                 exit
810         end
811 end
812
813 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
814         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
815 end
816
817 require 'etc'
818
819 user = Etc.getlogin || ENV['USER']
820
821 # steal username/email data from other init files that may contain the
822 # information
823 def steal_username
824         [
825                 # the user's .hgrc file for a username field
826                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
827                 # the user's .(g)vimrc for a changelog_username setting
828                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
829                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
830         ].each do |fn, rx, idx|
831                 file = File.expand_path fn
832                 if File.readable?(file) and File.read(file) =~ rx
833                         parse_options[:authors][user] = Regexp.last_match(idx).strip
834                         break
835                 end
836         end
837 end
838
839 if user and not user.empty? and not parse_options[:authors].has_key?(user)
840         name = ENV['GIT_AUTHOR_NAME'] || ''
841         name.replace(`git config user.name`.chomp) if name.empty?
842         name.replace(Etc.getpwnam(user).gecos) if name.empty?
843
844         if name.empty?
845                 # couldn't find a name, try to steal data from other sources
846                 steal_username
847         else
848                 # if we found a name, try to find an email too
849                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
850                 email.replace(`git config user.email`.chomp) if email.empty?
851
852                 if email.empty?
853                         # couldn't find an email, try to steal data too
854                         steal_username
855                 else
856                         # we got both a name and email, fill the info
857                         parse_options[:authors][user] = "#{name} <#{email}>"
858                 end
859         end
860 end
861
862 if file_list.empty?
863         usage
864         exit 1
865 end
866
867 SFX = ',v'
868
869 status = 0
870
871 rcs = []
872 file_list.each do |arg|
873         case ftype = File.ftype(arg)
874         when 'file'
875                 if arg[-2,2] == SFX
876                         if File.exists? arg
877                                 rcsfile = arg.dup
878                         else
879                                 not_found "RCS file #{arg}"
880                                 status |= 1
881                         end
882                         filename = File.basename(arg, SFX)
883                 else
884                         filename = File.basename(arg)
885                         path = File.dirname(arg)
886                         rcsfile = File.join(path, 'RCS', filename) + SFX
887                         unless File.exists? rcsfile
888                                 rcsfile.replace File.join(path, filename) + SFX
889                                 unless File.exists? rcsfile
890                                         not_found "RCS file for #{filename} in #{path}"
891                                 end
892                         end
893                 end
894                 rcs << RCS.parse(filename, rcsfile)
895         when 'directory'
896                 argdirname = arg.chomp(File::SEPARATOR)
897                 pattern = File.join(argdirname, '**', '*' + SFX)
898                 Dir.glob(pattern, File::FNM_DOTMATCH).each do |rcsfile|
899                         filename = File.basename(rcsfile, SFX)
900                         path = File.dirname(rcsfile)
901                         # strip trailing "/RCS" if present, or "RCS" if that's
902                         # the full path
903                         path.sub!(/(^|#{File::SEPARATOR})RCS$/, '')
904                         # strip off the portion of the path specified
905                         # on the command line from the front of the path
906                         # (or delete the path completely if it is the same
907                         # as the specified directory)
908                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
909                         filename = File.join(path, filename) unless path.empty?
910                         begin
911                                 rcs << RCS.parse(filename, rcsfile)
912                         rescue Exception => e
913                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
914                                 raise e
915                         end
916                 end
917         else
918                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
919                 status |= 1
920         end
921 end
922
923 if rcs.length == 1
924         rcs.first.export_commits(parse_options)
925 else
926         STDERR.puts "Preparing commits"
927
928         commits = []
929
930         rcs.each do |r|
931                 r.revision.each do |k, rev|
932                         begin
933                                 commits << RCS::Commit.new(r, rev)
934                         rescue NoBranchSupport
935                                 if parse_options[:skip_branches]
936                                         STDERR.puts "Skipping revision #{rev.rev} for #{r.fname} (branch)"
937                                 else raise
938                                 end
939                         end
940                 end
941         end
942
943         STDERR.puts "Sorting by date"
944
945         commits.sort!
946
947         if $DEBUG
948                 STDERR.puts "RAW commits (#{commits.length}):"
949                 commits.each do |c|
950                         PP.pp c.to_a, $stderr
951                 end
952         else
953                 STDERR.puts "#{commits.length} single-file commits"
954         end
955
956         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
957
958         thisindex = commits.size
959         commits.reverse_each do |c|
960                 nextindex = thisindex
961                 thisindex -= 1
962
963                 cfiles = Set.new c.tree.filenames
964                 ofiles = Set.new
965
966                 mergeable = []
967
968                 # test for mergeable commits by looking at following commits
969                 while nextindex < commits.size
970                         k = commits[nextindex]
971                         nextindex += 1
972
973                         # commits are date-sorted, so we know we can quit early if we are too far
974                         # for coalescing to work
975                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
976
977                         skipthis = false
978
979                         kfiles = Set.new k.tree.filenames
980
981                         if c.log != k.log or c.author != k.author or c.branch != k.branch
982                                 skipthis = true
983                         end
984
985                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
986                                 cflist = cfiles.to_a.join(', ')
987                                 kflist = kfiles.to_a.join(', ')
988                                 if parse_options[:symbol_check]
989                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
990                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
991                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
992                                         skipthis = true
993                                 elsif $DEBUG
994                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
995                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
996                                 end
997                         end
998
999                         # keep track of filenames touched by commits we are not merging with,
1000                         # since we don't want to merge with commits that touch them, to preserve
1001                         # the monotonicity of history for each file
1002                         # TODO we could forward-merge with them, unless some of our files were
1003                         # touched too.
1004                         if skipthis
1005                                 # if the candidate touches any file already in the commit,
1006                                 # we can stop looking forward
1007                                 break unless cfiles.intersection(kfiles).empty?
1008                                 ofiles |= kfiles
1009                                 next
1010                         end
1011
1012                         # the candidate has the same log, author, branch and appropriate symbols
1013                         # does it touch anything in ofiles?
1014                         unless ofiles.intersection(kfiles).empty?
1015                                 if $DEBUG
1016                                         cflist = cfiles.to_a.join(', ')
1017                                         kflist = kfiles.to_a.join(', ')
1018                                         oflist = ofiles.to_a.join(', ')
1019                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1020                                         STDERR.puts "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
1021                                 end
1022                                 next
1023                         end
1024
1025                         mergeable << k
1026                 end
1027
1028                 mergeable.each do |k|
1029                         begin
1030                                 c.merge! k
1031                         rescue RuntimeError => err
1032                                 fuzz = c.date - k.date
1033                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
1034                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
1035                                 break
1036                         end
1037                         commits.delete k
1038                 end
1039         end
1040
1041         if $DEBUG
1042                 STDERR.puts "[1] commits (#{commits.length}):"
1043                 commits.each do |c|
1044                         PP.pp c.to_a, $stderr
1045                 end
1046         else
1047                 STDERR.puts "#{commits.length} coalesced commits"
1048         end
1049
1050         commits.each { |c| c.export(parse_options) }
1051
1052 end
1053
1054 exit status