Suggest a simple "git reset" after import
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 TODO
5         * Refactor commit coalescing
6         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
7         * Further coalescing options? (e.g. small logfile differences)
8         * Proper branching support in multi-file export
9         * Optimize memory usage by discarding unneeded text
10 =end
11
12 require 'pp'
13 require 'set'
14
15 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
16 # older versions
17 unless 2.respond_to? :odd?
18         class Integer
19                 def odd?
20                         self % 2 == 1
21                 end
22         end
23 end
24
25 def usage
26         STDERR.puts <<EOM
27 #{$0} [options] file [file ...]
28
29 Fast-export the RCS history of one or more files. If a directory is specified,
30 all RCS-tracked files in the directory and its descendants are exported.
31
32 When importing single files, their pathname is discarded during import. When
33 importing directories, only the specified directory component is discarded.
34
35 When importing a single file, RCS commits are converted one by one. Otherwise,
36 some heuristics is used to determine how to coalesce commits touching different
37 files.
38
39 Currently, commits are coalesced if they share the exact same log and if their
40 date differs by no more than the user-specified fuzziness. Additionally, the
41 symbols in one of the commit must be a subset of the symbols in the other
42 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
43 false in the git configuration.
44
45 Typical usage:
46     git init && rcs-fast-export.rb . | git fast-import && git reset
47
48 Options:
49         --help, -h, -?          display this help text
50         --authors-file, -A      specify a file containing username = Full Name <email> mappings
51         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
52                                 importing multiple files
53                                 (in seconds, defaults to 300, i.e. 5 minutes)
54         --[no-]symbol-check     [do not] check symbols when coalescing commits
55         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
56                                 importing a single file
57         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
58                                 a single file
59
60 Config options:
61         rcs.authorsFile         for --authors-file
62         rcs.tagEachRev          for --tag-each-rev
63         rcs.logFilename         for --log-filename
64         rcs.commitFuzz          for --rcs-commit-fuzz
65         rcs.symbolCheck         for --rcs-symbol-check
66         rcs.tagFuzz             for --rcs-tag-fuzz
67
68 EOM
69 end
70
71 def not_found(arg)
72         STDERR.puts "Could not find #{arg}"
73 end
74
75 # returns a hash that maps usernames to author names & emails
76 def load_authors_file(fn)
77         hash = {}
78         begin
79                 File.open(File.expand_path(fn)) do |io|
80                         io.each_line do |line|
81                                 uname, author = line.split('=', 2)
82                                 uname.strip!
83                                 author.strip!
84                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
85                                 hash[uname] = author
86                         end
87                 end
88         rescue
89                 not_found(fn)
90         end
91         return hash
92 end
93
94 class Time
95         def Time.rcs(string)
96                 fields = string.split('.')
97                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
98                 Time.utc(*fields)
99         end
100 end
101
102 module RCS
103         # strip an optional final ;
104         def RCS.clean(arg)
105                 arg.chomp(';')
106         end
107
108         # strip the first and last @, and de-double @@s
109         def RCS.sanitize(arg)
110                 case arg
111                 when Array
112                         ret = arg.dup
113                         raise 'malformed first line' unless ret.first[0,1] == '@'
114                         raise 'malformed last line' unless ret.last[-1,1] == '@'
115                         ret.first.sub!(/^@/,'')
116                         ret.last.sub!(/@$/,'')
117                         ret.map { |l| l.gsub('@@','@') }
118                 when String
119                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
120                 else
121                         raise
122                 end
123         end
124
125         # clean and sanitize
126         def RCS.at_clean(arg)
127                 RCS.sanitize RCS.clean(arg)
128         end
129
130         def RCS.mark(key)
131                 @@marks ||= {}
132                 if @@marks.key? key
133                         @@marks[key]
134                 else
135                         @@marks[key] = @@marks.length + 1
136                 end
137         end
138
139         def RCS.blob(file, rev)
140                 RCS.mark([file, rev])
141         end
142
143         def RCS.commit(commit)
144                 RCS.mark(commit)
145         end
146
147         class File
148                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
149                 def initialize(fname, executable)
150                         @fname = fname.dup
151                         @head = nil
152                         @comment = nil
153                         @desc = []
154                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
155                         @mode = executable ? '755' : '644'
156                 end
157
158                 def has_revision?(rev)
159                         @revision.has_key?(rev) and not @revision[rev].author.nil?
160                 end
161
162                 def export_commits(opts={})
163                         counter = 0
164                         exported = []
165                         until @revision.empty?
166                                 counter += 1
167
168                                 # a string sort is a very good candidate for
169                                 # export order, getting a miss only for
170                                 # multi-digit revision components
171                                 keys = @revision.keys.sort
172
173                                 STDERR.puts "commit export loop ##{counter}"
174                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
175                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
176
177                                 keys.each do |key|
178                                         rev = @revision[key]
179                                         # the parent commit is rev.next if we're on the
180                                         # master branch (rev.branch is nil) or
181                                         # rev.diff_base otherwise
182                                         from = rev.branch.nil? ? rev.next : rev.diff_base
183                                         # A commit can only be exported if it has no
184                                         # parent, or if the parent has been exported
185                                         # already. Skip this commit otherwise
186                                         if from and not exported.include? from
187                                                 next
188                                         end
189
190                                         branch = rev.branch || 'master'
191                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
192                                         date = "#{rev.date.tv_sec} +0000"
193                                         log = String.new
194                                         if opts[:log_filename]
195                                                 log << @fname << ": "
196                                         end
197                                         log << rev.log.to_s
198
199                                         puts "commit refs/heads/#{branch}"
200                                         puts "mark :#{RCS.commit key}"
201                                         puts "committer #{author} #{date}"
202                                         puts "data #{log.length}"
203                                         puts log unless log.empty?
204                                         puts "from :#{RCS.commit from}" if rev.branch_point
205                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
206
207                                         # TODO FIXME this *should* be safe, in
208                                         # that it should not unduly move
209                                         # branches back in time, but I'm not
210                                         # 100% sure ...
211                                         rev.branches.each do |sym|
212                                                 puts "reset refs/heads/#{sym}"
213                                                 puts "from :#{RCS.commit key}"
214                                         end
215                                         rev.symbols.each do |sym|
216                                                 puts "reset refs/tags/#{sym}"
217                                                 puts "from :#{RCS.commit key}"
218                                         end
219                                         if opts[:tag_each_rev]
220                                                 puts "reset refs/tags/#{key}"
221                                                 puts "from :#{RCS.commit key}"
222                                         end
223
224                                         exported.push key
225                                 end
226                                 exported.each { |k| @revision.delete(k) }
227                         end
228                 end
229         end
230
231         class Revision
232                 attr_accessor :rev, :author, :state, :next
233                 attr_accessor :branches, :log, :text, :symbols
234                 attr_accessor :branch, :diff_base, :branch_point
235                 attr_reader   :date
236                 def initialize(file, rev)
237                         @file = file
238                         @rev = rev
239                         @author = nil
240                         @date = nil
241                         @state = nil
242                         @next = nil
243                         @branches = []
244                         @branch = nil
245                         @branch_point = nil
246                         @diff_base = nil
247                         @log = []
248                         @text = []
249                         @symbols = Set.new
250                 end
251
252                 def date=(str)
253                         @date = Time.rcs(str)
254                 end
255
256                 def blob
257                         str = @text.join('')
258                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
259                         ret
260                 end
261         end
262
263         def RCS.parse(fname, rcsfile)
264                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
265
266                 ::File.open(rcsfile, 'r') do |file|
267                         status = [:basic]
268                         rev = nil
269                         lines = []
270                         difflines = []
271                         file.each_line do |line|
272                                 case status.last
273                                 when :basic
274                                         command, args = line.split($;,2)
275                                         next if command.empty?
276
277                                         if command.chomp!(';')
278                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
279                                                 next
280                                         end
281
282                                         case command
283                                         when 'head'
284                                                 rcs.head = RCS.clean(args.chomp)
285                                         when 'symbols'
286                                                 status.push :symbols
287                                                 next if args.empty?
288                                                 line = args; redo
289                                         when 'comment'
290                                                 rcs.comment = RCS.at_clean(args.chomp)
291                                         when /^[0-9.]+$/
292                                                 rev = command.dup
293                                                 if rcs.has_revision?(rev)
294                                                         status.push :revision_data
295                                                 else
296                                                         status.push :new_revision
297                                                 end
298                                         when 'desc'
299                                                 status.push :desc
300                                                 lines.clear
301                                                 status.push :read_lines
302                                         when 'branch', 'access', 'locks', 'expand'
303                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
304                                                 status.push :skipping_lines
305                                                 next if args.empty?
306                                                 line = args; redo
307                                         else
308                                                 raise "Unknown command #{command.inspect}"
309                                         end
310                                 when :skipping_lines
311                                         status.pop if line.strip.chomp!(';')
312                                 when :symbols
313                                         # we can have multiple symbols per line
314                                         pairs = line.strip.split($;)
315                                         pairs.each do |pair|
316                                                 sym, rev = pair.strip.split(':',2);
317                                                 if rev
318                                                         status.pop if rev.chomp!(';')
319                                                         rcs.revision[rev].symbols << sym
320                                                 else
321                                                         status.pop
322                                                 end
323                                         end
324                                 when :desc
325                                         rcs.desc.replace lines.dup
326                                         status.pop
327                                 when :read_lines
328                                         # we sanitize lines as we read them
329
330                                         actual_line = line.dup
331
332                                         # the first line must begin with a @, which we strip
333                                         if lines.empty?
334                                                 ats = line.match(/^@+/)
335                                                 raise 'malformed line' unless ats
336                                                 actual_line.replace line.sub(/^@/,'')
337                                         end
338
339                                         # if the line ends with an ODD number of @, it's the
340                                         # last line -- we work on actual_line so that content
341                                         # such as @\n or @ work correctly (they would be
342                                         # encoded respectively as ['@@@\n','@\n'] and
343                                         # ['@@@@\n']
344                                         ats = actual_line.chomp.match(/@+$/)
345                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
346                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
347                                         end
348                                         lines << actual_line.gsub('@@','@')
349                                         if nomore
350                                                 status.pop
351                                                 redo
352                                         end
353                                 when :new_revision
354                                         case line.chomp
355                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
356                                                 rcs.revision[rev].date = $1
357                                                 rcs.revision[rev].author = $2
358                                                 rcs.revision[rev].state = $3
359                                         when 'branches'
360                                                 status.push :branches
361                                         when /branches\s*;/
362                                                 next
363                                         when /^next\s+(\S+)?;$/
364                                                 nxt = rcs.revision[rev].next = $1
365                                                 next unless nxt
366                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
367                                                 rcs.revision[nxt].diff_base = rev
368                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
369                                         else
370                                                 status.pop
371                                         end
372                                 when :branches
373                                         candidate = line.split(';',2)
374                                         branch = candidate.first.strip
375                                         rcs.revision[rev].branches.push branch
376                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
377                                         rcs.revision[branch].diff_base = rev
378                                         # we drop the last number from the branch name
379                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
380                                         rcs.revision[branch].branch_point = rev
381                                         status.pop if candidate.length > 1
382                                 when :revision_data
383                                         case line.chomp
384                                         when 'log'
385                                                 status.push :log
386                                                 lines.clear
387                                                 status.push :read_lines
388                                         when 'text'
389                                                 if rev == rcs.head
390                                                         status.push :head
391                                                 else
392                                                         status.push :diff
393                                                 end
394                                                 lines.clear
395                                                 status.push :read_lines
396                                         else
397                                                 status.pop
398                                         end
399                                 when :log
400                                         rcs.revision[rev].log.replace lines.dup
401                                         status.pop
402                                 when :head
403                                         rcs.revision[rev].text.replace lines.dup
404                                         puts rcs.revision[rev].blob
405                                         status.pop
406                                 when :diff
407                                         difflines.replace lines.dup
408                                         difflines.pop if difflines.last.empty?
409                                         base = rcs.revision[rev].diff_base
410                                         unless rcs.revision[base].text
411                                                 pp rcs
412                                                 puts rev, base
413                                                 raise 'no diff base!'
414                                         end
415                                         # deep copy
416                                         buffer = []
417                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
418
419                                         adding = false
420                                         index = nil
421                                         count = nil
422
423                                         while l = difflines.shift
424                                                 if adding
425                                                         raise 'negative index during insertion' if index < 0
426                                                         raise 'negative count during insertion' if count < 0
427                                                         adding << l
428                                                         count -= 1
429                                                         # collected all the lines, put the before
430                                                         unless count > 0
431                                                                 unless buffer[index]
432                                                                         buffer[index] = []
433                                                                 end
434                                                                 buffer[index].unshift(*adding)
435                                                                 adding = false
436                                                         end
437                                                         next
438                                                 end
439
440                                                 l.chomp!
441                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
442                                                 diff_cmd = $1.intern
443                                                 index = $2.to_i
444                                                 count = $3.to_i
445                                                 case diff_cmd
446                                                 when :d
447                                                         # for deletion, index 1 is the first index, so the Ruby
448                                                         # index is one less than the diff one
449                                                         index -= 1
450                                                         # we replace them with empty string so that 'a' commands
451                                                         # referring to the same line work properly
452                                                         while count > 0
453                                                                 buffer[index].clear
454                                                                 index += 1
455                                                                 count -= 1
456                                                         end
457                                                 when :a
458                                                         # addition will prepend the appropriate lines
459                                                         # to the given index, and in this case Ruby
460                                                         # and diff indices are the same
461                                                         adding = []
462                                                 end
463                                         end
464
465                                         # turn the buffer into an array of lines, deleting the empty ones
466                                         buffer.delete_if { |l| l.empty? }
467                                         buffer.flatten!
468
469                                         rcs.revision[rev].text = buffer
470                                         puts rcs.revision[rev].blob
471                                         status.pop
472                                 else
473                                         raise "Unknown status #{status.last}"
474                                 end
475                         end
476                 end
477
478                 # clean up the symbols/branches: look for revisions that have
479                 # one or more symbols but no dates, and make them into
480                 # branches, pointing to the highest commit with that key
481                 branches = []
482                 keys = rcs.revision.keys
483                 rcs.revision.each do |key, rev|
484                         if rev.date.nil? and not rev.symbols.empty?
485                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
486                                 tr = rcs.revision[top]
487                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
488                                 tr.branches |= rev.symbols
489                                 branches << key
490                         end
491                 end
492                 branches.each { |k| rcs.revision.delete k }
493
494                 return rcs
495         end
496
497         class Tree
498                 def initialize(commit)
499                         @commit = commit
500                         @files = Hash.new
501                 end
502
503                 def merge!(tree)
504                         testfiles = @files.dup
505                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
506                         # the next line is only reached if all the adds were
507                         # successful, so the merge is atomic
508                         @files.replace testfiles
509                 end
510
511                 def add(rcs, rev, file_list=@files)
512                         if file_list.key? rcs
513                                 prev = file_list[rcs]
514                                 if prev.log == rev.log
515                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
516                                 else
517                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
518                                 end
519                                 if prev.text != rev.text
520                                         raise str
521                                 else
522                                         @commit.warn_about str
523                                 end
524                         end
525                         file_list[rcs] = rev
526                 end
527
528                 def each &block
529                         @files.each &block
530                 end
531
532                 def to_a
533                         files = []
534                         @files.map do |rcs, rev|
535                                 files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
536                         end
537                         files
538                 end
539
540                 def filenames
541                         @files.map { |rcs, rev| rcs.fname }
542                 end
543
544                 def to_s
545                         self.to_a.join("\n")
546                 end
547         end
548
549         class Commit
550                 attr_accessor :date, :log, :symbols, :author, :branch
551                 attr_accessor :tree
552                 def initialize(rcs, rev)
553                         raise NotImplementedError if rev.branch
554                         self.date = rev.date.dup
555                         self.log = rev.log.dup
556                         self.symbols = rev.symbols.dup
557                         self.author = rev.author
558                         self.branch = rev.branch
559
560                         self.tree = Tree.new self
561                         self.tree.add rcs, rev
562                 end
563
564                 def to_a
565                         [self.date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
566                 end
567
568                 def warn_about(str)
569                         warn str + " for commit on #{self.date}"
570                 end
571
572                 # Sort by date and then by number of symbols
573                 def <=>(other)
574                         ds = self.date <=> other.date
575                         if ds != 0
576                                 return ds
577                         else
578                                 return self.symbols.length <=> other.symbols.length
579                         end
580                 end
581
582                 def merge!(commit)
583                         self.tree.merge! commit.tree
584                         if commit.date > self.date
585                                 warn_about "updating date to #{commit.date}"
586                                 self.date = commit.date
587                         end
588                         self.symbols.merge commit.symbols
589                 end
590
591                 def export(opts={})
592                         xbranch = self.branch || 'master'
593                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
594                         xlog = self.log.to_s
595                         numdate = self.date.tv_sec
596                         xdate = "#{numdate} +0000"
597                         key = numdate.to_s
598
599                         puts "commit refs/heads/#{xbranch}"
600                         puts "mark :#{RCS.commit key}"
601                         puts "committer #{xauthor} #{xdate}"
602                         puts "data #{xlog.length}"
603                         puts xlog unless xlog.empty?
604                         # TODO branching support for multi-file export
605                         # puts "from :#{RCS.commit from}" if self.branch_point
606                         puts self.tree.to_s
607
608                         # TODO branching support for multi-file export
609                         # rev.branches.each do |sym|
610                         #       puts "reset refs/heads/#{sym}"
611                         #       puts "from :#{RCS.commit key}"
612                         # end
613
614                         self.symbols.each do |sym|
615                                 puts "reset refs/tags/#{sym}"
616                                 puts "from :#{RCS.commit key}"
617                         end
618
619                 end
620         end
621 end
622
623 require 'getoptlong'
624
625 opts = GetoptLong.new(
626         # Authors file, like git-svn and git-cvsimport, more than one can be
627         # specified
628         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
629         # RCS file suffix, like RCS
630         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
631         # Date fuzziness for commits to be considered the same (in seconds)
632         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
633         # check symbols when coalescing?
634         ['--symbol-check', GetoptLong::NO_ARGUMENT],
635         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
636         # tag each revision?
637         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
638         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
639         # prepend filenames to commit logs?
640         ['--log-filename', GetoptLong::NO_ARGUMENT],
641         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
642         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
643 )
644
645 # We read options in order, but they apply to all passed parameters.
646 # TODO maybe they should only apply to the following, unless there's only one
647 # file?
648 opts.ordering = GetoptLong::RETURN_IN_ORDER
649
650 file_list = []
651 parse_options = {
652         :authors => Hash.new,
653         :commit_fuzz => 300,
654         :tag_fuzz => -1,
655 }
656
657 # Read config options
658 `git config --get-all rcs.authorsfile`.each_line do |fn|
659         parse_options[:authors].merge! load_authors_file(fn.chomp)
660 end
661
662 parse_options[:tag_each_rev] = (
663         `git config --bool rcs.tageachrev`.chomp == 'true'
664 ) ? true : false
665
666 parse_options[:log_filename] = (
667         `git config --bool rcs.logfilename`.chomp == 'true'
668 ) ? true : false
669
670 fuzz = `git config --int rcs.commitFuzz`.chomp
671 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
672
673 fuzz = `git config --int rcs.tagFuzz`.chomp
674 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
675
676 parse_options[:symbol_check] = (
677         `git config --bool rcs.symbolcheck`.chomp == 'false'
678 ) ? false : true
679
680 opts.each do |opt, arg|
681         case opt
682         when '--authors-file'
683                 authors = load_authors_file(arg)
684                 redef = parse_options[:authors].keys & authors.keys
685                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
686                 parse_options[:authors].merge!(authors)
687         when '--rcs-suffixes'
688                 # TODO
689         when '--rcs-commit-fuzz'
690                 parse_options[:commit_fuzz] = arg.to_i
691         when '--rcs-tag-fuzz'
692                 parse_options[:tag_fuzz] = arg.to_i
693         when '--symbol-check'
694                 parse_options[:symbol_check] = true
695         when '--no-symbol-check'
696                 parse_options[:symbol_check] = false
697         when '--tag-each-rev'
698                 parse_options[:tag_each_rev] = true
699         when '--no-tag-each-rev'
700                 # this is the default, which is fine since the missing key
701                 # (default) returns nil which is false in Ruby
702                 parse_options[:tag_each_rev] = false
703         when '--log-filename'
704                 parse_options[:log_filename] = true
705         when '--no-log-filename'
706                 # this is the default, which is fine since the missing key
707                 # (default) returns nil which is false in Ruby
708                 parse_options[:log_filename] = false
709         when ''
710                 file_list << arg
711         when '--help'
712                 usage
713                 exit
714         end
715 end
716
717 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
718         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
719 end
720
721 require 'etc'
722
723 user = Etc.getlogin || ENV['USER']
724
725 # steal username/email data from other init files that may contain the
726 # information
727 def steal_username
728         [
729                 # the user's .hgrc file for a username field
730                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
731                 # the user's .(g)vimrc for a changelog_username setting
732                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
733                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
734                 []
735         ].each do |fn, rx, idx|
736                 file = File.expand_path fn
737                 if File.readable?(file) and File.read(file) =~ rx
738                         parse_options[:authors][user] = Regexp.last_match(idx).strip
739                         break
740                 end
741         end
742 end
743
744 if user and not user.empty? and not parse_options[:authors].has_key?(user)
745         name = ENV['GIT_AUTHOR_NAME'] || ''
746         name.replace(`git config user.name`.chomp) if name.empty?
747         name.replace(Etc.getpwnam(user).gecos) if name.empty?
748
749         if name.empty?
750                 # couldn't find a name, try to steal data from other sources
751                 steal_username
752         else
753                 # if we found a name, try to find an email too
754                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
755                 email.replace(`git config user.email`.chomp) if email.empty?
756
757                 if email.empty?
758                         # couldn't find an email, try to steal data too
759                         steal_username
760                 else
761                         # we got both a name and email, fill the info
762                         parse_options[:authors][user] = "#{name} <#{email}>"
763                 end
764         end
765 end
766
767 if file_list.empty?
768         usage
769         exit 1
770 end
771
772 SFX = ',v'
773
774 status = 0
775
776 rcs = []
777 file_list.each do |arg|
778         case ftype = File.ftype(arg)
779         when 'file'
780                 if arg[-2,2] == SFX
781                         if File.exists? arg
782                                 rcsfile = arg.dup
783                         else
784                                 not_found "RCS file #{arg}"
785                                 status |= 1
786                         end
787                         filename = File.basename(arg, SFX)
788                 else
789                         filename = File.basename(arg)
790                         path = File.dirname(arg)
791                         rcsfile = File.join(path, 'RCS', filename) + SFX
792                         unless File.exists? rcsfile
793                                 rcsfile.replace File.join(path, filename) + SFX
794                                 unless File.exists? rcsfile
795                                         not_found "RCS file for #{filename} in #{path}"
796                                 end
797                         end
798                 end
799                 rcs << RCS.parse(filename, rcsfile)
800         when 'directory'
801                 pattern = File.join(arg, '**', '*' + SFX)
802                 Dir.glob(pattern).each do |rcsfile|
803                         filename = File.basename(rcsfile, SFX)
804                         path = File.dirname(rcsfile)
805                         path.sub!(/\/?RCS$/, '') # strip final /RCS if present
806                         path.sub!(/^#{Regexp.escape arg}\/?/, '') # strip initial dirname
807                         filename = File.join(path, filename) unless path.empty?
808                         begin
809                                 rcs << RCS.parse(filename, rcsfile)
810                         rescue Exception => e
811                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
812                                 raise e
813                         end
814                 end
815         else
816                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
817                 status |= 1
818         end
819 end
820
821 if rcs.length == 1
822         rcs.first.export_commits(parse_options)
823 else
824         STDERR.puts "Preparing commits"
825
826         commits = []
827
828         rcs.each do |r|
829                 r.revision.each do |k, rev|
830                         commits << RCS::Commit.new(r, rev)
831                 end
832         end
833
834         STDERR.puts "Sorting by date"
835
836         commits.sort!
837
838         if $DEBUG
839                 STDERR.puts "RAW commits (#{commits.length}):"
840                 commits.each do |c|
841                         PP.pp c.to_a, $stderr
842                 end
843         else
844                 STDERR.puts "#{commits.length} single-file commits"
845         end
846
847         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
848
849         commits.reverse_each do |c|
850                 commits.reverse_each do |k|
851                         break if k.date < c.date - parse_options[:commit_fuzz]
852                         next if k == c
853                         next if c.log != k.log or c.author != k.author or c.branch != k.branch
854                         next if k.date > c.date
855                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
856                                 if parse_options[:symbol_check]
857                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{c.tree.filenames.join(', ')})\n\tand (#{k.tree.filenames.join(', ')})"
858                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
859                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
860                                         next
861                                 elsif $DEBUG
862                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{c.tree.filenames.join(', ')})\n\tand (#{k.tree.filenames.join(', ')})"
863                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
864                                 end
865                         end
866                         begin
867                                 c.merge! k
868                         rescue RuntimeError => err
869                                 fuzz = c.date - k.date
870                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
871                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
872                                 break
873                         end
874                         commits.delete k
875                 end
876         end
877
878         if $DEBUG
879                 STDERR.puts "[1] commits (#{commits.length}):"
880                 commits.each do |c|
881                         PP.pp c.to_a, $stderr
882                 end
883         else
884                 STDERR.puts "#{commits.length} coalesced commits"
885         end
886
887         commits.each { |c| c.export(parse_options) }
888
889 end
890
891 exit status