Open files in binary mode
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 RCS fast export: run the script with the `--help` option for further
5 information.
6
7 No installation needed: you can run it from anywhere, including the git
8 checkout directory. For extra comfort, symlink it to some directory in
9 your PATH. I myself have this symlink:
10
11         ~/bin/rcs-fast-export -> ~/src/rcs-fast-export/rcs-fast-export.rb
12
13 allowing me to run `rcs-fast-export` from anywhere.
14 =end
15
16 =begin
17 TODO
18         * Refactor commit coalescing
19         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
20         * Add support for commitid for coalescing commits
21         * Further coalescing options? (e.g. small logfile differences)
22         * Proper branching support in multi-file export
23         * Optimize memory usage by discarding unneeded text
24         * Provide an option that marks a file as deleted based on symbolic revisions
25 =end
26
27 require 'pp'
28 require 'set'
29
30 require 'shellwords'
31
32 class NoBranchSupport < NotImplementedError ; end
33
34 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
35 # older versions
36 unless 2.respond_to? :odd?
37         class Integer
38                 def odd?
39                         self % 2 == 1
40                 end
41         end
42 end
43
44 # Set standard output to binary mode: git fast-import doesn't like Windows
45 # line-endings, and this ensures that the line termination will be a simple 0x0a
46 # on Windows too (it expands to 0x0D 0x0A otherwise).
47 STDOUT.binmode
48
49 =begin
50 RCS fast-export version: set to `git` in the repository, but can be overridden
51 by packagers, e.g. based on the latest tag, git description, custom packager
52 patches or whatever.
53
54 When the version is set to `git`, we make a little effort to find more information
55 about which commit we are at.
56 =end
57
58 RFE_VERSION="git"
59
60 def version
61         if RFE_VERSION == "git"
62                 nolinkfile = File.readlink(__FILE__) rescue __FILE__
63                 Dir.chdir File.expand_path File.dirname nolinkfile
64
65                 if File.exists? '.git' ; begin
66                         git_out = `git log -1 --pretty="%h %H%n%ai" | git name-rev --stdin`.split("\n")
67                         hash=git_out.first.split.first
68                         branch=git_out.first.split('(').last.chomp(')')
69                         date=git_out.last.split.first
70                         changed=`git diff --no-ext-diff --quiet --exit-code`
71                         branch << "*" unless $?.success?
72                         info=" [#{branch}] #{hash} (#{date})"
73                 rescue
74                         info=" (no info)"
75                 end ; end
76
77                 STDERR.puts "#{$0}: RCS fast-export, #{RFE_VERSION} version#{info}"
78         else
79                 STDERR.puts "#{$0}: RCS fast-export, version #{RFE_VERSION}"
80         end
81 end
82
83 def usage
84         $stdout.flush
85         STDERR.puts <<EOM
86 #{$0} [options] file [file ...]
87
88 Fast-export the RCS history of one or more files. If a directory is specified,
89 all RCS-tracked files in the directory and its descendants are exported.
90
91 When importing single files, their pathname is discarded during import. When
92 importing directories, only the specified directory component is discarded.
93
94 When importing a single file, RCS commits are converted one by one. Otherwise,
95 some heuristics is used to determine how to coalesce commits touching different
96 files.
97
98 Currently, commits are coalesced if they share the exact same log and if their
99 date differs by no more than the user-specified fuzziness. Additionally, the
100 symbols in one of the commit must be a subset of the symbols in the other
101 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
102 false in the git configuration.
103
104 Typical usage:
105     git init && rcs-fast-export.rb . | git fast-import && git reset
106
107 Options:
108         --help, -h, -?          display this help text
109         --authors-file, -A      specify a file containing username = Full Name <email> mappings
110         --[no-]author-is-committer
111                                 use the author name and date as committer identity
112         --ignore                ignore the specified files (shell pattern)
113         --log-encoding          specify the encoding of log messages, for transcoding to UTF-8
114         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
115                                 importing multiple files
116                                 (in seconds, defaults to 300, i.e. 5 minutes)
117         --[no-]warn-missing-authors
118                                 [do not] warn about usernames missing from the map file
119         --[no-]symbol-check     [do not] check symbols when coalescing commits
120         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
121                                 importing a single file
122         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
123                                 a single file
124         --skip-branches         when exporting multiple files with a branched history, export
125                                 the main branch only instead of aborting due to the lack of
126                                 support for branched multi-file history export
127
128
129
130 Config options:
131         rcs.authorsFile         for --authors-file
132         rcs.authorIsCommitter   for --author-is-committer
133         rcs.tagEachRev          for --tag-each-rev
134         rcs.logFilename         for --log-filename
135         rcs.commitFuzz          for --rcs-commit-fuzz
136         rcs.warnMissingAuthors  for --warn-missing-authors
137         rcs.symbolCheck         for --rcs-symbol-check
138         rcs.tagFuzz             for --rcs-tag-fuzz
139
140 EOM
141 end
142
143 def warning(msg)
144         $stdout.flush
145         STDERR.puts msg
146 end
147
148 def not_found(arg)
149         warning "Could not find #{arg}"
150 end
151
152 def emit_committer(opts, author, date)
153         if opts[:author_is_committer]
154                 committer = "#{author} #{date}"
155         else
156                 committer = `git var GIT_COMMITTER_IDENT`.chomp
157         end
158         puts "committer #{committer}"
159 end
160
161 # returns a hash that maps usernames to author names & emails
162 def load_authors_file(fn)
163         hash = {}
164         begin
165                 File.open(File.expand_path(fn)) do |io|
166                         io.each_line do |line|
167                                 uname, author = line.split('=', 2)
168                                 uname.strip!
169                                 author.strip!
170                                 warning "Username #{uname} redefined to #{author}" if hash.has_key? uname
171                                 hash[uname] = author
172                         end
173                 end
174         rescue
175                 not_found(fn)
176         end
177         return hash
178 end
179
180 def username_to_author(name, opts)
181         map = opts[:authors]
182         raise "no authors map defined" unless map and Hash === map
183
184         # if name is not found in map, provide a default one, optionally giving a warning (once)
185         unless map.key? name
186                 warning "no author found for #{name}" if opts[:warn_missing_authors]
187                 map[name] = "#{name} <empty>"
188         end
189         return map[name]
190 end
191
192 # display a message about a (recoverable) error
193 def alert(msg, action)
194         STDERR.puts "ERROR:\t#{msg}"
195         STDERR.puts "\t#{action}"
196 end
197
198 class Time
199         def Time.rcs(string)
200                 fields = string.split('.')
201                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
202                 # in Ruby 1.9, '99' is interpreted as year 99, not year 1999
203                 if fields.first.length < 3
204                         fields.first.insert 0, '19'
205                 end
206                 Time.utc(*fields)
207         end
208 end
209
210 module RCS
211         # strip an optional final ;
212         def RCS.clean(arg)
213                 arg.chomp(';')
214         end
215
216         # strip the first and last @, and de-double @@s
217         def RCS.sanitize(arg)
218                 case arg
219                 when Array
220                         ret = arg.dup
221                         raise 'malformed first line' unless ret.first[0,1] == '@'
222                         raise 'malformed last line' unless ret.last[-1,1] == '@'
223                         ret.first.sub!(/^@/,'')
224                         ret.last.sub!(/@$/,'')
225                         ret.map { |l| l.gsub('@@','@') }
226                 when String
227                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
228                 else
229                         raise
230                 end
231         end
232
233         # clean and sanitize
234         def RCS.at_clean(arg)
235                 RCS.sanitize RCS.clean(arg)
236         end
237
238         def RCS.mark(key)
239                 @@marks ||= {}
240                 if @@marks.key? key
241                         @@marks[key]
242                 else
243                         @@marks[key] = @@marks.length + 1
244                 end
245         end
246
247         def RCS.blob(file, rev)
248                 RCS.mark([file, rev])
249         end
250
251         def RCS.commit(commit)
252                 RCS.mark(commit)
253         end
254
255         class File
256                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
257                 def initialize(fname, executable)
258                         @fname = fname.dup
259                         @head = nil
260                         @comment = nil
261                         @desc = []
262                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
263                         @mode = executable ? '755' : '644'
264                 end
265
266                 def has_revision?(rev)
267                         @revision.has_key?(rev) and not @revision[rev].author.nil?
268                 end
269
270                 def export_commits(opts={})
271                         counter = 0
272                         exported = []
273                         log_enc = opts[:log_encoding]
274                         until @revision.empty?
275                                 counter += 1
276
277                                 # a string sort is a very good candidate for
278                                 # export order, getting a miss only for
279                                 # multi-digit revision components
280                                 keys = @revision.keys.sort
281
282                                 warning "commit export loop ##{counter}"
283                                 warning "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
284                                 warning "\t#{keys.size} to export: #{keys.join(', ')}"
285
286                                 keys.each do |key|
287                                         rev = @revision[key]
288                                         # the parent commit is rev.next if we're on the
289                                         # master branch (rev.branch is nil) or
290                                         # rev.diff_base otherwise
291                                         from = rev.branch.nil? ? rev.next : rev.diff_base
292                                         # A commit can only be exported if it has no
293                                         # parent, or if the parent has been exported
294                                         # already. Skip this commit otherwise
295                                         if from and not exported.include? from
296                                                 next
297                                         end
298
299                                         branch = rev.branch || 'master'
300                                         author = username_to_author(rev.author, opts)
301                                         date = "#{rev.date.tv_sec} +0000"
302                                         log = String.new
303                                         if opts[:log_filename]
304                                                 log << @fname << ": "
305                                         end
306                                         if log_enc
307                                                 # git fast-import expects logs to be in UTF-8, so if a different log encoding
308                                                 # is specified for the log we transcode from whatever was specified to UTF-8.
309                                                 # we then mark the string as ASCII-8BIT (as everything else) so that string
310                                                 # lengths are computed in bytes
311                                                 log << rev.log.join.encode('UTF-8', log_enc).force_encoding('ASCII-8BIT')
312                                         else
313                                                 log << rev.log.join
314                                         end
315
316                                         puts "commit refs/heads/#{branch}"
317                                         puts "mark :#{RCS.commit key}"
318                                         puts "author #{author} #{date}"
319                                         emit_committer(opts, author, date)
320                                         puts "data #{log.length}"
321                                         puts log unless log.empty?
322                                         puts "from :#{RCS.commit from}" if from
323                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
324
325                                         # TODO FIXME this *should* be safe, in
326                                         # that it should not unduly move
327                                         # branches back in time, but I'm not
328                                         # 100% sure ...
329                                         rev.branches.each do |sym|
330                                                 puts "reset refs/heads/#{sym}"
331                                                 puts "from :#{RCS.commit key}"
332                                         end
333                                         rev.symbols.each do |sym|
334                                                 puts "reset refs/tags/#{sym}"
335                                                 puts "from :#{RCS.commit key}"
336                                         end
337                                         if opts[:tag_each_rev]
338                                                 puts "reset refs/tags/#{key}"
339                                                 puts "from :#{RCS.commit key}"
340                                         end
341
342                                         exported.push key
343                                 end
344                                 exported.each { |k| @revision.delete(k) }
345                         end
346                 end
347         end
348
349         class Revision
350                 attr_accessor :rev, :author, :state, :next
351                 attr_accessor :branches, :log, :text, :symbols
352                 attr_accessor :branch, :diff_base, :branch_point
353                 attr_reader   :date
354                 def initialize(file, rev)
355                         @file = file
356                         @rev = rev
357                         @author = nil
358                         @date = nil
359                         @state = nil
360                         @next = nil
361                         @branches = Set.new
362                         @branch = nil
363                         @branch_point = nil
364                         @diff_base = nil
365                         @log = []
366                         @text = []
367                         @symbols = Set.new
368                 end
369
370                 def date=(str)
371                         @date = Time.rcs(str)
372                 end
373
374                 def blob
375                         str = @text.join('')
376                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
377                         ret
378                 end
379         end
380
381         # TODO: what if a revision does not end with newline?
382         # TODO this should be done internally, not piping out to RCS
383         def RCS.expand_keywords(rcsfile, revision)
384                 ret = ::File.read("|co -q -p#{revision} #{Shellwords.escape rcsfile}")
385                 lines = []
386                 ret.each_line do |line|
387                         lines << line
388                 end
389                 lines
390         end
391
392         def RCS.parse(fname, rcsfile, opts={})
393                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
394
395                 ::File.open(rcsfile, 'rb') do |file|
396                         status = [:basic]
397                         rev = nil
398                         lines = []
399                         difflines = []
400                         file.each_line do |line|
401                                 case status.last
402                                 when :basic
403                                         command, args = line.split($;,2)
404                                         next if command.empty?
405
406                                         if command.chomp!(';')
407                                                 warning "Skipping empty command #{command.inspect}" if $DEBUG
408                                                 next
409                                         end
410
411                                         case command
412                                         when 'head'
413                                                 rcs.head = RCS.clean(args.chomp)
414                                         when 'symbols'
415                                                 status.push :symbols
416                                                 next if args.empty?
417                                                 line = args; redo
418                                         when 'comment'
419                                                 rcs.comment = RCS.at_clean(args.chomp)
420                                         when /^[0-9.]+$/
421                                                 rev = command.dup
422                                                 if rcs.has_revision?(rev)
423                                                         status.push :revision_data
424                                                 else
425                                                         status.push :new_revision
426                                                 end
427                                         when 'desc'
428                                                 status.push :desc
429                                                 lines.clear
430                                                 status.push :read_lines
431                                         when 'branch', 'access', 'locks', 'expand'
432                                                 warning "Skipping unhandled command #{command.inspect}" if $DEBUG
433                                                 status.push :skipping_lines
434                                                 next if args.empty?
435                                                 line = args; redo
436                                         else
437                                                 raise "Unknown command #{command.inspect}"
438                                         end
439                                 when :skipping_lines
440                                         status.pop if line.strip.chomp!(';')
441                                 when :symbols
442                                         # we can have multiple symbols per line
443                                         pairs = line.strip.split($;)
444                                         pairs.each do |pair|
445                                                 sym, rev = pair.strip.split(':',2);
446                                                 if rev
447                                                         status.pop if rev.chomp!(';')
448                                                         rcs.revision[rev].symbols << sym
449                                                 else
450                                                         status.pop
451                                                 end
452                                         end
453                                 when :desc
454                                         rcs.desc.replace lines.dup
455                                         status.pop
456                                 when :read_lines
457                                         # we sanitize lines as we read them
458
459                                         actual_line = line.dup
460
461                                         # the first line must begin with a @, which we strip
462                                         if lines.empty?
463                                                 ats = line.match(/^@+/)
464                                                 raise 'malformed line' unless ats
465                                                 actual_line.replace line.sub(/^@/,'')
466                                         end
467
468                                         # if the line ends with an ODD number of @, it's the
469                                         # last line -- we work on actual_line so that content
470                                         # such as @\n or @ work correctly (they would be
471                                         # encoded respectively as ['@@@\n','@\n'] and
472                                         # ['@@@@\n']
473                                         ats = actual_line.chomp.match(/@+$/)
474                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
475                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
476                                         end
477                                         lines << actual_line.gsub('@@','@')
478                                         if nomore
479                                                 status.pop
480                                                 redo
481                                         end
482                                 when :new_revision
483                                         case line.chomp
484                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
485                                                 rcs.revision[rev].date = $1
486                                                 rcs.revision[rev].author = $2
487                                                 rcs.revision[rev].state = $3
488                                         when /^branches\s*;/
489                                                 next
490                                         when /^branches(?:\s+|$)/
491                                                 status.push :branches
492                                                 if line.index(';')
493                                                         line = line.sub(/^branches\s+/,'')
494                                                         redo
495                                                 end
496                                         when /^next\s+(\S+)?;$/
497                                                 nxt = rcs.revision[rev].next = $1
498                                                 next unless nxt
499                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
500                                                 rcs.revision[nxt].diff_base = rev
501                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
502                                         else
503                                                 status.pop
504                                         end
505                                 when :branches
506                                         candidate = line.split(';',2)
507                                         candidate.first.strip.split.each do |branch|
508                                                 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
509                                                 rcs.revision[branch].diff_base = rev
510                                                 # we drop the last number from the branch name
511                                                 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
512                                                 rcs.revision[branch].branch_point = rev
513                                         end
514                                         status.pop if candidate.length > 1
515                                 when :revision_data
516                                         case line.chomp
517                                         when 'log'
518                                                 status.push :log
519                                                 lines.clear
520                                                 status.push :read_lines
521                                         when 'text'
522                                                 if rev == rcs.head
523                                                         status.push :head
524                                                 else
525                                                         status.push :diff
526                                                 end
527                                                 lines.clear
528                                                 status.push :read_lines
529                                         else
530                                                 status.pop
531                                         end
532                                 when :log
533                                         rcs.revision[rev].log.replace lines.dup
534                                         status.pop
535                                 when :head
536                                         if opts[:expand_keywords]
537                                                 rcs.revision[rev].text.replace RCS.expand_keywords(rcsfile, rev)
538                                         else
539                                                 rcs.revision[rev].text.replace lines.dup
540                                         end
541                                         puts rcs.revision[rev].blob
542                                         status.pop
543                                 when :diff
544                                         if opts[:expand_keywords]
545                                                 rcs.revision[rev].text.replace RCS.expand_keywords(rcsfile, rev)
546                                         else
547                                                 difflines.replace lines.dup
548                                                 difflines.pop if difflines.last.empty?
549                                                 if difflines.first.chomp.empty?
550                                                         alert "malformed diff: empty initial line @ #{rcsfile}:#{file.lineno-difflines.length-1}", "skipping"
551                                                         difflines.shift
552                                                 end unless difflines.empty?
553                                                 base = rcs.revision[rev].diff_base
554                                                 unless rcs.revision[base].text
555                                                         pp rcs
556                                                         puts rev, base
557                                                         raise 'no diff base!'
558                                                 end
559                                                 # deep copy
560                                                 buffer = []
561                                                 rcs.revision[base].text.each { |l| buffer << [l.dup] }
562
563                                                 adding = false
564                                                 index = nil
565                                                 count = nil
566
567                                                 while l = difflines.shift
568                                                         if adding
569                                                                 raise 'negative index during insertion' if index < 0
570                                                                 raise 'negative count during insertion' if count < 0
571                                                                 adding << l
572                                                                 count -= 1
573                                                                 # collected all the lines, put the before
574                                                                 unless count > 0
575                                                                         unless buffer[index]
576                                                                                 buffer[index] = []
577                                                                         end
578                                                                         buffer[index].unshift(*adding)
579                                                                         adding = false
580                                                                 end
581                                                                 next
582                                                         end
583
584                                                         l.chomp!
585                                                         raise "malformed diff @ #{rcsfile}:#{file.lineno-difflines.length-1} `#{l}`" unless l =~ /^([ad])(\d+) (\d+)$/
586                                                         diff_cmd = $1.intern
587                                                         index = $2.to_i
588                                                         count = $3.to_i
589                                                         case diff_cmd
590                                                         when :d
591                                                                 # for deletion, index 1 is the first index, so the Ruby
592                                                                 # index is one less than the diff one
593                                                                 index -= 1
594                                                                 # we replace them with empty string so that 'a' commands
595                                                                 # referring to the same line work properly
596                                                                 while count > 0
597                                                                         buffer[index].clear
598                                                                         index += 1
599                                                                         count -= 1
600                                                                 end
601                                                         when :a
602                                                                 # addition will prepend the appropriate lines
603                                                                 # to the given index, and in this case Ruby
604                                                                 # and diff indices are the same
605                                                                 adding = []
606                                                         end
607                                                 end
608
609                                                 # turn the buffer into an array of lines, deleting the empty ones
610                                                 buffer.delete_if { |l| l.empty? }
611                                                 buffer.flatten!
612
613                                                 rcs.revision[rev].text = buffer
614                                         end
615                                         puts rcs.revision[rev].blob
616                                         status.pop
617                                 else
618                                         raise "Unknown status #{status.last}"
619                                 end
620                         end
621                 end
622
623                 # clean up the symbols/branches: look for revisions that have
624                 # one or more symbols but no dates, and make them into
625                 # branches, pointing to the highest commit with that key
626                 branches = []
627                 keys = rcs.revision.keys
628                 rcs.revision.each do |key, rev|
629                         if rev.date.nil? and not rev.symbols.empty?
630                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
631                                 tr = rcs.revision[top]
632                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
633                                 tr.branches |= rev.symbols
634                                 branches << key
635                         end
636                 end
637                 branches.each { |k| rcs.revision.delete k }
638
639                 return rcs
640         end
641
642         class Tree
643                 def initialize(commit)
644                         @commit = commit
645                         @files = Hash.new
646                 end
647
648                 def merge!(tree)
649                         testfiles = @files.dup
650                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
651                         # the next line is only reached if all the adds were
652                         # successful, so the merge is atomic
653                         @files.replace testfiles
654                 end
655
656                 def add(rcs, rev, file_list=@files)
657                         if file_list.key? rcs
658                                 prev = file_list[rcs]
659                                 if prev.log == rev.log
660                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
661                                 else
662                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
663                                 end
664                                 if prev.text != rev.text
665                                         raise str
666                                 else
667                                         @commit.warn_about str
668                                 end
669                         end
670                         file_list[rcs] = rev
671                 end
672
673                 def each &block
674                         @files.each &block
675                 end
676
677                 def to_a
678                         files = []
679                         @files.map do |rcs, rev|
680                                 if rev.state.downcase == "dead"
681                                         files << "D #{rcs.fname}"
682                                 else
683                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
684                                 end
685                         end
686                         files
687                 end
688
689                 def filenames
690                         @files.map { |rcs, rev| rcs.fname }
691                 end
692
693                 def to_s
694                         self.to_a.join("\n")
695                 end
696         end
697
698         class Commit
699                 attr_accessor :date, :log, :symbols, :author, :branch
700                 attr_accessor :tree
701                 attr_accessor :min_date, :max_date
702                 def initialize(rcs, rev)
703                         raise NoBranchSupport if rev.branch
704                         self.date = rev.date.dup
705                         self.min_date = self.max_date = self.date
706                         self.log = rev.log.dup
707                         self.symbols = rev.symbols.dup
708                         self.author = rev.author
709                         self.branch = rev.branch
710
711                         self.tree = Tree.new self
712                         self.tree.add rcs, rev
713                 end
714
715                 def to_a
716                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
717                 end
718
719                 def warn_about(str)
720                         warn str + " for commit on #{self.date}"
721                 end
722
723                 # Sort by date and then by number of symbols
724                 def <=>(other)
725                         ds = self.date <=> other.date
726                         if ds != 0
727                                 return ds
728                         else
729                                 return self.symbols.length <=> other.symbols.length
730                         end
731                 end
732
733                 def merge!(commit)
734                         self.tree.merge! commit.tree
735                         if commit.max_date > self.max_date
736                                 self.max_date = commit.max_date
737                         end
738                         if commit.min_date < self.min_date
739                                 self.min_date = commit.min_date
740                         end
741                         self.symbols.merge commit.symbols
742                 end
743
744                 def export(opts={})
745                         xbranch = self.branch || 'master'
746                         xauthor = username_to_author(self.author, opts)
747                         xlog = self.log.join
748                         numdate = self.date.tv_sec
749                         xdate = "#{numdate} +0000"
750                         key = numdate.to_s
751
752                         puts "commit refs/heads/#{xbranch}"
753                         puts "mark :#{RCS.commit key}"
754                         puts "author #{xauthor} #{xdate}"
755                         emit_committer(opts, xauthor, xdate)
756                         puts "data #{xlog.length}"
757                         puts xlog unless xlog.empty?
758                         # TODO branching support for multi-file export
759                         # puts "from :#{RCS.commit from}" if self.branch_point
760                         puts self.tree.to_s
761
762                         # TODO branching support for multi-file export
763                         # rev.branches.each do |sym|
764                         #       puts "reset refs/heads/#{sym}"
765                         #       puts "from :#{RCS.commit key}"
766                         # end
767
768                         self.symbols.each do |sym|
769                                 puts "reset refs/tags/#{sym}"
770                                 puts "from :#{RCS.commit key}"
771                         end
772
773                 end
774         end
775 end
776
777 require 'getoptlong'
778
779 opts = GetoptLong.new(
780         # Authors file, like git-svn and git-cvsimport, more than one can be
781         # specified
782         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
783         # Use author identity as committer identity?
784         ['--author-is-committer', GetoptLong::NO_ARGUMENT],
785         ['--no-author-is-committer', GetoptLong::NO_ARGUMENT],
786         # Use "co" to obtain the actual revision with keywords expanded.
787         ['--expand-keywords', GetoptLong::NO_ARGUMENT],
788         # RCS file suffix, like RCS
789         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
790         # Shell pattern to identify files to be ignored
791         ['--ignore', GetoptLong::REQUIRED_ARGUMENT],
792         # Encoding of log messages in the RCS files
793         ['--log-encoding', GetoptLong::REQUIRED_ARGUMENT],
794         # Date fuzziness for commits to be considered the same (in seconds)
795         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
796         # warn about usernames missing in authors file map?
797         ['--warn-missing-authors', GetoptLong::NO_ARGUMENT],
798         ['--no-warn-missing-authors', GetoptLong::NO_ARGUMENT],
799         # check symbols when coalescing?
800         ['--symbol-check', GetoptLong::NO_ARGUMENT],
801         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
802         # tag each revision?
803         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
804         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
805         # prepend filenames to commit logs?
806         ['--log-filename', GetoptLong::NO_ARGUMENT],
807         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
808         # skip branches when exporting a whole tree?
809         ['--skip-branches', GetoptLong::NO_ARGUMENT],
810         # show current version
811         ['--version', '-v', GetoptLong::NO_ARGUMENT],
812         # show help/usage
813         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
814 )
815
816 # We read options in order, but they apply to all passed parameters.
817 # TODO maybe they should only apply to the following, unless there's only one
818 # file?
819 opts.ordering = GetoptLong::RETURN_IN_ORDER
820
821 file_list = []
822 parse_options = {
823         :authors => Hash.new,
824         :ignore => Array.new,
825         :commit_fuzz => 300,
826         :tag_fuzz => -1,
827 }
828
829 # Read config options
830 `git config --get-all rcs.authorsfile`.each_line do |fn|
831         parse_options[:authors].merge! load_authors_file(fn.chomp)
832 end
833
834 parse_options[:author_is_committer] = (
835         `git config --bool rcs.authoriscommitter`.chomp == 'false'
836 ) ? false : true
837
838 parse_options[:tag_each_rev] = (
839         `git config --bool rcs.tageachrev`.chomp == 'true'
840 ) ? true : false
841
842 parse_options[:log_filename] = (
843         `git config --bool rcs.logfilename`.chomp == 'true'
844 ) ? true : false
845
846 fuzz = `git config --int rcs.commitFuzz`.chomp
847 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
848
849 fuzz = `git config --int rcs.tagFuzz`.chomp
850 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
851
852 parse_options[:symbol_check] = (
853         `git config --bool rcs.symbolcheck`.chomp == 'false'
854 ) ? false : true
855
856 parse_options[:warn_missing_authors] = (
857         `git config --bool rcs.warnmissingauthors`.chomp == 'false'
858 ) ? false : true
859
860 opts.each do |opt, arg|
861         case opt
862         when '--authors-file'
863                 authors = load_authors_file(arg)
864                 redef = parse_options[:authors].keys & authors.keys
865                 warning "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
866                 parse_options[:authors].merge!(authors)
867         when '--author-is-committer'
868                 parse_options[:author_is_committer] = true
869         when '--no-author-is-committer'
870                 parse_options[:author_is_committer] = false
871         when '--expand-keywords'
872                 parse_options[:expand_keywords] = true
873         when '--rcs-suffixes'
874                 # TODO
875         when '--ignore'
876                 parse_options[:ignore] << arg
877         when '--log-encoding'
878                 parse_options[:log_encoding] = Encoding.find(arg)
879         when '--rcs-commit-fuzz'
880                 parse_options[:commit_fuzz] = arg.to_i
881         when '--rcs-tag-fuzz'
882                 parse_options[:tag_fuzz] = arg.to_i
883         when '--symbol-check'
884                 parse_options[:symbol_check] = true
885         when '--no-symbol-check'
886                 parse_options[:symbol_check] = false
887         when '--tag-each-rev'
888                 parse_options[:tag_each_rev] = true
889         when '--no-tag-each-rev'
890                 # this is the default, which is fine since the missing key
891                 # (default) returns nil which is false in Ruby
892                 parse_options[:tag_each_rev] = false
893         when '--log-filename'
894                 parse_options[:log_filename] = true
895         when '--no-log-filename'
896                 # this is the default, which is fine since the missing key
897                 # (default) returns nil which is false in Ruby
898                 parse_options[:log_filename] = false
899         when '--skip-branches'
900                 parse_options[:skip_branches] = true
901         when ''
902                 file_list << arg
903         when '--version'
904                 version
905                 exit
906         when '--help'
907                 usage
908                 exit
909         end
910 end
911
912 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
913         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
914 end
915
916 require 'etc'
917
918 user = Etc.getlogin || ENV['USER']
919
920 # steal username/email data from other init files that may contain the
921 # information
922 def steal_username
923         [
924                 # the user's .hgrc file for a username field
925                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
926                 # the user's .(g)vimrc for a changelog_username setting
927                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
928                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
929         ].each do |fn, rx, idx|
930                 file = File.expand_path fn
931                 if File.readable?(file) and File.read(file) =~ rx
932                         parse_options[:authors][user] = Regexp.last_match(idx).strip
933                         break
934                 end
935         end
936 end
937
938 if user and not user.empty? and not parse_options[:authors].has_key?(user)
939         name = ENV['GIT_AUTHOR_NAME'] || ''
940         name.replace(`git config user.name`.chomp) if name.empty?
941         name.replace(Etc.getpwnam(user).gecos) if name.empty?
942
943         if name.empty?
944                 # couldn't find a name, try to steal data from other sources
945                 steal_username
946         else
947                 # if we found a name, try to find an email too
948                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
949                 email.replace(`git config user.email`.chomp) if email.empty?
950
951                 if email.empty?
952                         # couldn't find an email, try to steal data too
953                         steal_username
954                 else
955                         # we got both a name and email, fill the info
956                         parse_options[:authors][user] = "#{name} <#{email}>"
957                 end
958         end
959 end
960
961 if file_list.empty?
962         usage
963         exit 1
964 end
965
966 SFX = ',v'
967
968 status = 0
969
970 rcs = []
971 file_list.each do |arg|
972         case ftype = File.ftype(arg)
973         when 'file'
974                 if arg[-2,2] == SFX
975                         if File.exists? arg
976                                 rcsfile = arg.dup
977                         else
978                                 not_found "RCS file #{arg}"
979                                 status |= 1
980                         end
981                         filename = File.basename(arg, SFX)
982                 else
983                         filename = File.basename(arg)
984                         path = File.dirname(arg)
985                         rcsfile = File.join(path, 'RCS', filename) + SFX
986                         unless File.exists? rcsfile
987                                 rcsfile.replace File.join(path, filename) + SFX
988                                 unless File.exists? rcsfile
989                                         not_found "RCS file for #{filename} in #{path}"
990                                 end
991                         end
992                 end
993                 rcs << RCS.parse(filename, rcsfile, parse_options)
994         when 'directory'
995                 argdirname = arg.chomp(File::SEPARATOR)
996                 pattern = File.join(argdirname, '**', '*' + SFX)
997                 Dir.glob(pattern, File::FNM_DOTMATCH).each do |rcsfile|
998                         filename = File.basename(rcsfile, SFX)
999                         path = File.dirname(rcsfile)
1000                         # strip trailing "/RCS" if present, or "RCS" if that's
1001                         # the full path
1002                         path.sub!(/(^|#{File::SEPARATOR})RCS$/, '')
1003                         # strip off the portion of the path specified
1004                         # on the command line from the front of the path
1005                         # (or delete the path completely if it is the same
1006                         # as the specified directory)
1007                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
1008                         filename = File.join(path, filename) unless path.empty?
1009
1010                         # skip file if it's to be ignored
1011                         unless parse_options[:ignore].empty?
1012                                 ignored = false
1013                                 parse_options[:ignore].each do |pat|
1014                                         if File.fnmatch?(pat, filename, File::FNM_PATHNAME)
1015                                                 ignored = true
1016                                                 break
1017                                         end
1018                                 end
1019                                 next if ignored
1020                         end
1021
1022                         # proceed
1023                         begin
1024                                 rcs << RCS.parse(filename, rcsfile, parse_options)
1025                         rescue Exception => e
1026                                 warning "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
1027                                 raise e
1028                         end
1029                 end
1030         else
1031                 warning "Cannot handle #{arg} of #{ftype} type"
1032                 status |= 1
1033         end
1034 end
1035
1036 if rcs.length == 1
1037         rcs.first.export_commits(parse_options)
1038 else
1039         warning "Preparing commits"
1040
1041         commits = []
1042
1043         rcs.each do |r|
1044                 r.revision.each do |k, rev|
1045                         begin
1046                                 commits << RCS::Commit.new(r, rev)
1047                         rescue NoBranchSupport
1048                                 if parse_options[:skip_branches]
1049                                         warning "Skipping revision #{rev.rev} for #{r.fname} (branch)"
1050                                 else raise
1051                                 end
1052                         end
1053                 end
1054         end
1055
1056         warning "Sorting by date"
1057
1058         commits.sort!
1059
1060         if $DEBUG
1061                 warning "RAW commits (#{commits.length}):"
1062                 commits.each do |c|
1063                         PP.pp c.to_a, $stderr
1064                 end
1065         else
1066                 warning "#{commits.length} single-file commits"
1067         end
1068
1069         warning "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
1070
1071         thisindex = commits.size
1072         commits.reverse_each do |c|
1073                 nextindex = thisindex
1074                 thisindex -= 1
1075
1076                 cfiles = Set.new c.tree.filenames
1077                 ofiles = Set.new
1078
1079                 mergeable = []
1080
1081                 # test for mergeable commits by looking at following commits
1082                 while nextindex < commits.size
1083                         k = commits[nextindex]
1084                         nextindex += 1
1085
1086                         # commits are date-sorted, so we know we can quit early if we are too far
1087                         # for coalescing to work
1088                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
1089
1090                         skipthis = false
1091
1092                         kfiles = Set.new k.tree.filenames
1093
1094                         if c.log != k.log or c.author != k.author or c.branch != k.branch
1095                                 skipthis = true
1096                         end
1097
1098                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
1099                                 cflist = cfiles.to_a.join(', ')
1100                                 kflist = kfiles.to_a.join(', ')
1101                                 if parse_options[:symbol_check]
1102                                         warning "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1103                                         warning "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
1104                                         warning "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
1105                                         skipthis = true
1106                                 elsif $DEBUG
1107                                         warning "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1108                                         warning "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
1109                                 end
1110                         end
1111
1112                         # keep track of filenames touched by commits we are not merging with,
1113                         # since we don't want to merge with commits that touch them, to preserve
1114                         # the monotonicity of history for each file
1115                         # TODO we could forward-merge with them, unless some of our files were
1116                         # touched too.
1117                         if skipthis
1118                                 # if the candidate touches any file already in the commit,
1119                                 # we can stop looking forward
1120                                 break unless cfiles.intersection(kfiles).empty?
1121                                 ofiles |= kfiles
1122                                 next
1123                         end
1124
1125                         # the candidate has the same log, author, branch and appropriate symbols
1126                         # does it touch anything in ofiles?
1127                         unless ofiles.intersection(kfiles).empty?
1128                                 if $DEBUG
1129                                         cflist = cfiles.to_a.join(', ')
1130                                         kflist = kfiles.to_a.join(', ')
1131                                         oflist = ofiles.to_a.join(', ')
1132                                         warning "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1133                                         warning "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
1134                                 end
1135                                 next
1136                         end
1137
1138                         mergeable << k
1139                 end
1140
1141                 mergeable.each do |k|
1142                         begin
1143                                 c.merge! k
1144                         rescue RuntimeError => err
1145                                 fuzz = c.date - k.date
1146                                 warning "Fuzzy commit coalescing failed: #{err}"
1147                                 warning "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
1148                                 break
1149                         end
1150                         commits.delete k
1151                 end
1152         end
1153
1154         if $DEBUG
1155                 warning "[1] commits (#{commits.length}):"
1156                 commits.each do |c|
1157                         PP.pp c.to_a, $stderr
1158                 end
1159         else
1160                 warning "#{commits.length} coalesced commits"
1161         end
1162
1163         commits.each { |c| c.export(parse_options) }
1164
1165 end
1166
1167 exit status