ruby 1.9: Array#to_s != Array#join
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 TODO
5         * Refactor commit coalescing
6         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
7         * Add support for commitid for coalescing commits
8         * Further coalescing options? (e.g. small logfile differences)
9         * Proper branching support in multi-file export
10         * Optimize memory usage by discarding unneeded text
11 =end
12
13 require 'pp'
14 require 'set'
15
16 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
17 # older versions
18 unless 2.respond_to? :odd?
19         class Integer
20                 def odd?
21                         self % 2 == 1
22                 end
23         end
24 end
25
26 def usage
27         STDERR.puts <<EOM
28 #{$0} [options] file [file ...]
29
30 Fast-export the RCS history of one or more files. If a directory is specified,
31 all RCS-tracked files in the directory and its descendants are exported.
32
33 When importing single files, their pathname is discarded during import. When
34 importing directories, only the specified directory component is discarded.
35
36 When importing a single file, RCS commits are converted one by one. Otherwise,
37 some heuristics is used to determine how to coalesce commits touching different
38 files.
39
40 Currently, commits are coalesced if they share the exact same log and if their
41 date differs by no more than the user-specified fuzziness. Additionally, the
42 symbols in one of the commit must be a subset of the symbols in the other
43 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
44 false in the git configuration.
45
46 Typical usage:
47     git init && rcs-fast-export.rb . | git fast-import && git reset
48
49 Options:
50         --help, -h, -?          display this help text
51         --authors-file, -A      specify a file containing username = Full Name <email> mappings
52         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
53                                 importing multiple files
54                                 (in seconds, defaults to 300, i.e. 5 minutes)
55         --[no-]symbol-check     [do not] check symbols when coalescing commits
56         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
57                                 importing a single file
58         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
59                                 a single file
60
61 Config options:
62         rcs.authorsFile         for --authors-file
63         rcs.tagEachRev          for --tag-each-rev
64         rcs.logFilename         for --log-filename
65         rcs.commitFuzz          for --rcs-commit-fuzz
66         rcs.symbolCheck         for --rcs-symbol-check
67         rcs.tagFuzz             for --rcs-tag-fuzz
68
69 EOM
70 end
71
72 def not_found(arg)
73         STDERR.puts "Could not find #{arg}"
74 end
75
76 # returns a hash that maps usernames to author names & emails
77 def load_authors_file(fn)
78         hash = {}
79         begin
80                 File.open(File.expand_path(fn)) do |io|
81                         io.each_line do |line|
82                                 uname, author = line.split('=', 2)
83                                 uname.strip!
84                                 author.strip!
85                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
86                                 hash[uname] = author
87                         end
88                 end
89         rescue
90                 not_found(fn)
91         end
92         return hash
93 end
94
95 class Time
96         def Time.rcs(string)
97                 fields = string.split('.')
98                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
99                 Time.utc(*fields)
100         end
101 end
102
103 module RCS
104         # strip an optional final ;
105         def RCS.clean(arg)
106                 arg.chomp(';')
107         end
108
109         # strip the first and last @, and de-double @@s
110         def RCS.sanitize(arg)
111                 case arg
112                 when Array
113                         ret = arg.dup
114                         raise 'malformed first line' unless ret.first[0,1] == '@'
115                         raise 'malformed last line' unless ret.last[-1,1] == '@'
116                         ret.first.sub!(/^@/,'')
117                         ret.last.sub!(/@$/,'')
118                         ret.map { |l| l.gsub('@@','@') }
119                 when String
120                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
121                 else
122                         raise
123                 end
124         end
125
126         # clean and sanitize
127         def RCS.at_clean(arg)
128                 RCS.sanitize RCS.clean(arg)
129         end
130
131         def RCS.mark(key)
132                 @@marks ||= {}
133                 if @@marks.key? key
134                         @@marks[key]
135                 else
136                         @@marks[key] = @@marks.length + 1
137                 end
138         end
139
140         def RCS.blob(file, rev)
141                 RCS.mark([file, rev])
142         end
143
144         def RCS.commit(commit)
145                 RCS.mark(commit)
146         end
147
148         class File
149                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
150                 def initialize(fname, executable)
151                         @fname = fname.dup
152                         @head = nil
153                         @comment = nil
154                         @desc = []
155                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
156                         @mode = executable ? '755' : '644'
157                 end
158
159                 def has_revision?(rev)
160                         @revision.has_key?(rev) and not @revision[rev].author.nil?
161                 end
162
163                 def export_commits(opts={})
164                         counter = 0
165                         exported = []
166                         until @revision.empty?
167                                 counter += 1
168
169                                 # a string sort is a very good candidate for
170                                 # export order, getting a miss only for
171                                 # multi-digit revision components
172                                 keys = @revision.keys.sort
173
174                                 STDERR.puts "commit export loop ##{counter}"
175                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
176                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
177
178                                 keys.each do |key|
179                                         rev = @revision[key]
180                                         # the parent commit is rev.next if we're on the
181                                         # master branch (rev.branch is nil) or
182                                         # rev.diff_base otherwise
183                                         from = rev.branch.nil? ? rev.next : rev.diff_base
184                                         # A commit can only be exported if it has no
185                                         # parent, or if the parent has been exported
186                                         # already. Skip this commit otherwise
187                                         if from and not exported.include? from
188                                                 next
189                                         end
190
191                                         branch = rev.branch || 'master'
192                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
193                                         date = "#{rev.date.tv_sec} +0000"
194                                         log = String.new
195                                         if opts[:log_filename]
196                                                 log << @fname << ": "
197                                         end
198                                         log << rev.log.join
199
200                                         puts "commit refs/heads/#{branch}"
201                                         puts "mark :#{RCS.commit key}"
202                                         puts "committer #{author} #{date}"
203                                         puts "data #{log.length}"
204                                         puts log unless log.empty?
205                                         puts "from :#{RCS.commit from}" if rev.branch_point
206                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
207
208                                         # TODO FIXME this *should* be safe, in
209                                         # that it should not unduly move
210                                         # branches back in time, but I'm not
211                                         # 100% sure ...
212                                         rev.branches.each do |sym|
213                                                 puts "reset refs/heads/#{sym}"
214                                                 puts "from :#{RCS.commit key}"
215                                         end
216                                         rev.symbols.each do |sym|
217                                                 puts "reset refs/tags/#{sym}"
218                                                 puts "from :#{RCS.commit key}"
219                                         end
220                                         if opts[:tag_each_rev]
221                                                 puts "reset refs/tags/#{key}"
222                                                 puts "from :#{RCS.commit key}"
223                                         end
224
225                                         exported.push key
226                                 end
227                                 exported.each { |k| @revision.delete(k) }
228                         end
229                 end
230         end
231
232         class Revision
233                 attr_accessor :rev, :author, :state, :next
234                 attr_accessor :branches, :log, :text, :symbols
235                 attr_accessor :branch, :diff_base, :branch_point
236                 attr_reader   :date
237                 def initialize(file, rev)
238                         @file = file
239                         @rev = rev
240                         @author = nil
241                         @date = nil
242                         @state = nil
243                         @next = nil
244                         @branches = []
245                         @branch = nil
246                         @branch_point = nil
247                         @diff_base = nil
248                         @log = []
249                         @text = []
250                         @symbols = Set.new
251                 end
252
253                 def date=(str)
254                         @date = Time.rcs(str)
255                 end
256
257                 def blob
258                         str = @text.join('')
259                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
260                         ret
261                 end
262         end
263
264         def RCS.parse(fname, rcsfile)
265                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
266
267                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
268                         status = [:basic]
269                         rev = nil
270                         lines = []
271                         difflines = []
272                         file.each_line do |line|
273                                 case status.last
274                                 when :basic
275                                         command, args = line.split($;,2)
276                                         next if command.empty?
277
278                                         if command.chomp!(';')
279                                                 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
280                                                 next
281                                         end
282
283                                         case command
284                                         when 'head'
285                                                 rcs.head = RCS.clean(args.chomp)
286                                         when 'symbols'
287                                                 status.push :symbols
288                                                 next if args.empty?
289                                                 line = args; redo
290                                         when 'comment'
291                                                 rcs.comment = RCS.at_clean(args.chomp)
292                                         when /^[0-9.]+$/
293                                                 rev = command.dup
294                                                 if rcs.has_revision?(rev)
295                                                         status.push :revision_data
296                                                 else
297                                                         status.push :new_revision
298                                                 end
299                                         when 'desc'
300                                                 status.push :desc
301                                                 lines.clear
302                                                 status.push :read_lines
303                                         when 'branch', 'access', 'locks', 'expand'
304                                                 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
305                                                 status.push :skipping_lines
306                                                 next if args.empty?
307                                                 line = args; redo
308                                         else
309                                                 raise "Unknown command #{command.inspect}"
310                                         end
311                                 when :skipping_lines
312                                         status.pop if line.strip.chomp!(';')
313                                 when :symbols
314                                         # we can have multiple symbols per line
315                                         pairs = line.strip.split($;)
316                                         pairs.each do |pair|
317                                                 sym, rev = pair.strip.split(':',2);
318                                                 if rev
319                                                         status.pop if rev.chomp!(';')
320                                                         rcs.revision[rev].symbols << sym
321                                                 else
322                                                         status.pop
323                                                 end
324                                         end
325                                 when :desc
326                                         rcs.desc.replace lines.dup
327                                         status.pop
328                                 when :read_lines
329                                         # we sanitize lines as we read them
330
331                                         actual_line = line.dup
332
333                                         # the first line must begin with a @, which we strip
334                                         if lines.empty?
335                                                 ats = line.match(/^@+/)
336                                                 raise 'malformed line' unless ats
337                                                 actual_line.replace line.sub(/^@/,'')
338                                         end
339
340                                         # if the line ends with an ODD number of @, it's the
341                                         # last line -- we work on actual_line so that content
342                                         # such as @\n or @ work correctly (they would be
343                                         # encoded respectively as ['@@@\n','@\n'] and
344                                         # ['@@@@\n']
345                                         ats = actual_line.chomp.match(/@+$/)
346                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
347                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
348                                         end
349                                         lines << actual_line.gsub('@@','@')
350                                         if nomore
351                                                 status.pop
352                                                 redo
353                                         end
354                                 when :new_revision
355                                         case line.chomp
356                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
357                                                 rcs.revision[rev].date = $1
358                                                 rcs.revision[rev].author = $2
359                                                 rcs.revision[rev].state = $3
360                                         when 'branches'
361                                                 status.push :branches
362                                         when /branches\s*;/
363                                                 next
364                                         when /^next\s+(\S+)?;$/
365                                                 nxt = rcs.revision[rev].next = $1
366                                                 next unless nxt
367                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
368                                                 rcs.revision[nxt].diff_base = rev
369                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
370                                         else
371                                                 status.pop
372                                         end
373                                 when :branches
374                                         candidate = line.split(';',2)
375                                         branch = candidate.first.strip
376                                         rcs.revision[rev].branches.push branch
377                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
378                                         rcs.revision[branch].diff_base = rev
379                                         # we drop the last number from the branch name
380                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
381                                         rcs.revision[branch].branch_point = rev
382                                         status.pop if candidate.length > 1
383                                 when :revision_data
384                                         case line.chomp
385                                         when 'log'
386                                                 status.push :log
387                                                 lines.clear
388                                                 status.push :read_lines
389                                         when 'text'
390                                                 if rev == rcs.head
391                                                         status.push :head
392                                                 else
393                                                         status.push :diff
394                                                 end
395                                                 lines.clear
396                                                 status.push :read_lines
397                                         else
398                                                 status.pop
399                                         end
400                                 when :log
401                                         rcs.revision[rev].log.replace lines.dup
402                                         status.pop
403                                 when :head
404                                         rcs.revision[rev].text.replace lines.dup
405                                         puts rcs.revision[rev].blob
406                                         status.pop
407                                 when :diff
408                                         difflines.replace lines.dup
409                                         difflines.pop if difflines.last.empty?
410                                         base = rcs.revision[rev].diff_base
411                                         unless rcs.revision[base].text
412                                                 pp rcs
413                                                 puts rev, base
414                                                 raise 'no diff base!'
415                                         end
416                                         # deep copy
417                                         buffer = []
418                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
419
420                                         adding = false
421                                         index = nil
422                                         count = nil
423
424                                         while l = difflines.shift
425                                                 if adding
426                                                         raise 'negative index during insertion' if index < 0
427                                                         raise 'negative count during insertion' if count < 0
428                                                         adding << l
429                                                         count -= 1
430                                                         # collected all the lines, put the before
431                                                         unless count > 0
432                                                                 unless buffer[index]
433                                                                         buffer[index] = []
434                                                                 end
435                                                                 buffer[index].unshift(*adding)
436                                                                 adding = false
437                                                         end
438                                                         next
439                                                 end
440
441                                                 l.chomp!
442                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
443                                                 diff_cmd = $1.intern
444                                                 index = $2.to_i
445                                                 count = $3.to_i
446                                                 case diff_cmd
447                                                 when :d
448                                                         # for deletion, index 1 is the first index, so the Ruby
449                                                         # index is one less than the diff one
450                                                         index -= 1
451                                                         # we replace them with empty string so that 'a' commands
452                                                         # referring to the same line work properly
453                                                         while count > 0
454                                                                 buffer[index].clear
455                                                                 index += 1
456                                                                 count -= 1
457                                                         end
458                                                 when :a
459                                                         # addition will prepend the appropriate lines
460                                                         # to the given index, and in this case Ruby
461                                                         # and diff indices are the same
462                                                         adding = []
463                                                 end
464                                         end
465
466                                         # turn the buffer into an array of lines, deleting the empty ones
467                                         buffer.delete_if { |l| l.empty? }
468                                         buffer.flatten!
469
470                                         rcs.revision[rev].text = buffer
471                                         puts rcs.revision[rev].blob
472                                         status.pop
473                                 else
474                                         raise "Unknown status #{status.last}"
475                                 end
476                         end
477                 end
478
479                 # clean up the symbols/branches: look for revisions that have
480                 # one or more symbols but no dates, and make them into
481                 # branches, pointing to the highest commit with that key
482                 branches = []
483                 keys = rcs.revision.keys
484                 rcs.revision.each do |key, rev|
485                         if rev.date.nil? and not rev.symbols.empty?
486                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
487                                 tr = rcs.revision[top]
488                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
489                                 tr.branches |= rev.symbols
490                                 branches << key
491                         end
492                 end
493                 branches.each { |k| rcs.revision.delete k }
494
495                 return rcs
496         end
497
498         class Tree
499                 def initialize(commit)
500                         @commit = commit
501                         @files = Hash.new
502                 end
503
504                 def merge!(tree)
505                         testfiles = @files.dup
506                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
507                         # the next line is only reached if all the adds were
508                         # successful, so the merge is atomic
509                         @files.replace testfiles
510                 end
511
512                 def add(rcs, rev, file_list=@files)
513                         if file_list.key? rcs
514                                 prev = file_list[rcs]
515                                 if prev.log == rev.log
516                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
517                                 else
518                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
519                                 end
520                                 if prev.text != rev.text
521                                         raise str
522                                 else
523                                         @commit.warn_about str
524                                 end
525                         end
526                         file_list[rcs] = rev
527                 end
528
529                 def each &block
530                         @files.each &block
531                 end
532
533                 def to_a
534                         files = []
535                         @files.map do |rcs, rev|
536                                 files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
537                         end
538                         files
539                 end
540
541                 def filenames
542                         @files.map { |rcs, rev| rcs.fname }
543                 end
544
545                 def to_s
546                         self.to_a.join("\n")
547                 end
548         end
549
550         class Commit
551                 attr_accessor :date, :log, :symbols, :author, :branch
552                 attr_accessor :tree
553                 def initialize(rcs, rev)
554                         raise NotImplementedError if rev.branch
555                         self.date = rev.date.dup
556                         self.log = rev.log.dup
557                         self.symbols = rev.symbols.dup
558                         self.author = rev.author
559                         self.branch = rev.branch
560
561                         self.tree = Tree.new self
562                         self.tree.add rcs, rev
563                 end
564
565                 def to_a
566                         [self.date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
567                 end
568
569                 def warn_about(str)
570                         warn str + " for commit on #{self.date}"
571                 end
572
573                 # Sort by date and then by number of symbols
574                 def <=>(other)
575                         ds = self.date <=> other.date
576                         if ds != 0
577                                 return ds
578                         else
579                                 return self.symbols.length <=> other.symbols.length
580                         end
581                 end
582
583                 def merge!(commit)
584                         self.tree.merge! commit.tree
585                         if commit.date > self.date
586                                 warn_about "updating date to #{commit.date}"
587                                 self.date = commit.date
588                         end
589                         self.symbols.merge commit.symbols
590                 end
591
592                 def export(opts={})
593                         xbranch = self.branch || 'master'
594                         xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
595                         xlog = self.log.to_s
596                         numdate = self.date.tv_sec
597                         xdate = "#{numdate} +0000"
598                         key = numdate.to_s
599
600                         puts "commit refs/heads/#{xbranch}"
601                         puts "mark :#{RCS.commit key}"
602                         puts "committer #{xauthor} #{xdate}"
603                         puts "data #{xlog.length}"
604                         puts xlog unless xlog.empty?
605                         # TODO branching support for multi-file export
606                         # puts "from :#{RCS.commit from}" if self.branch_point
607                         puts self.tree.to_s
608
609                         # TODO branching support for multi-file export
610                         # rev.branches.each do |sym|
611                         #       puts "reset refs/heads/#{sym}"
612                         #       puts "from :#{RCS.commit key}"
613                         # end
614
615                         self.symbols.each do |sym|
616                                 puts "reset refs/tags/#{sym}"
617                                 puts "from :#{RCS.commit key}"
618                         end
619
620                 end
621         end
622 end
623
624 require 'getoptlong'
625
626 opts = GetoptLong.new(
627         # Authors file, like git-svn and git-cvsimport, more than one can be
628         # specified
629         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
630         # RCS file suffix, like RCS
631         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
632         # Date fuzziness for commits to be considered the same (in seconds)
633         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
634         # check symbols when coalescing?
635         ['--symbol-check', GetoptLong::NO_ARGUMENT],
636         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
637         # tag each revision?
638         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
639         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
640         # prepend filenames to commit logs?
641         ['--log-filename', GetoptLong::NO_ARGUMENT],
642         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
643         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
644 )
645
646 # We read options in order, but they apply to all passed parameters.
647 # TODO maybe they should only apply to the following, unless there's only one
648 # file?
649 opts.ordering = GetoptLong::RETURN_IN_ORDER
650
651 file_list = []
652 parse_options = {
653         :authors => Hash.new,
654         :commit_fuzz => 300,
655         :tag_fuzz => -1,
656 }
657
658 # Read config options
659 `git config --get-all rcs.authorsfile`.each_line do |fn|
660         parse_options[:authors].merge! load_authors_file(fn.chomp)
661 end
662
663 parse_options[:tag_each_rev] = (
664         `git config --bool rcs.tageachrev`.chomp == 'true'
665 ) ? true : false
666
667 parse_options[:log_filename] = (
668         `git config --bool rcs.logfilename`.chomp == 'true'
669 ) ? true : false
670
671 fuzz = `git config --int rcs.commitFuzz`.chomp
672 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
673
674 fuzz = `git config --int rcs.tagFuzz`.chomp
675 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
676
677 parse_options[:symbol_check] = (
678         `git config --bool rcs.symbolcheck`.chomp == 'false'
679 ) ? false : true
680
681 opts.each do |opt, arg|
682         case opt
683         when '--authors-file'
684                 authors = load_authors_file(arg)
685                 redef = parse_options[:authors].keys & authors.keys
686                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
687                 parse_options[:authors].merge!(authors)
688         when '--rcs-suffixes'
689                 # TODO
690         when '--rcs-commit-fuzz'
691                 parse_options[:commit_fuzz] = arg.to_i
692         when '--rcs-tag-fuzz'
693                 parse_options[:tag_fuzz] = arg.to_i
694         when '--symbol-check'
695                 parse_options[:symbol_check] = true
696         when '--no-symbol-check'
697                 parse_options[:symbol_check] = false
698         when '--tag-each-rev'
699                 parse_options[:tag_each_rev] = true
700         when '--no-tag-each-rev'
701                 # this is the default, which is fine since the missing key
702                 # (default) returns nil which is false in Ruby
703                 parse_options[:tag_each_rev] = false
704         when '--log-filename'
705                 parse_options[:log_filename] = true
706         when '--no-log-filename'
707                 # this is the default, which is fine since the missing key
708                 # (default) returns nil which is false in Ruby
709                 parse_options[:log_filename] = false
710         when ''
711                 file_list << arg
712         when '--help'
713                 usage
714                 exit
715         end
716 end
717
718 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
719         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
720 end
721
722 require 'etc'
723
724 user = Etc.getlogin || ENV['USER']
725
726 # steal username/email data from other init files that may contain the
727 # information
728 def steal_username
729         [
730                 # the user's .hgrc file for a username field
731                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
732                 # the user's .(g)vimrc for a changelog_username setting
733                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
734                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
735                 []
736         ].each do |fn, rx, idx|
737                 file = File.expand_path fn
738                 if File.readable?(file) and File.read(file) =~ rx
739                         parse_options[:authors][user] = Regexp.last_match(idx).strip
740                         break
741                 end
742         end
743 end
744
745 if user and not user.empty? and not parse_options[:authors].has_key?(user)
746         name = ENV['GIT_AUTHOR_NAME'] || ''
747         name.replace(`git config user.name`.chomp) if name.empty?
748         name.replace(Etc.getpwnam(user).gecos) if name.empty?
749
750         if name.empty?
751                 # couldn't find a name, try to steal data from other sources
752                 steal_username
753         else
754                 # if we found a name, try to find an email too
755                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
756                 email.replace(`git config user.email`.chomp) if email.empty?
757
758                 if email.empty?
759                         # couldn't find an email, try to steal data too
760                         steal_username
761                 else
762                         # we got both a name and email, fill the info
763                         parse_options[:authors][user] = "#{name} <#{email}>"
764                 end
765         end
766 end
767
768 if file_list.empty?
769         usage
770         exit 1
771 end
772
773 SFX = ',v'
774
775 status = 0
776
777 rcs = []
778 file_list.each do |arg|
779         case ftype = File.ftype(arg)
780         when 'file'
781                 if arg[-2,2] == SFX
782                         if File.exists? arg
783                                 rcsfile = arg.dup
784                         else
785                                 not_found "RCS file #{arg}"
786                                 status |= 1
787                         end
788                         filename = File.basename(arg, SFX)
789                 else
790                         filename = File.basename(arg)
791                         path = File.dirname(arg)
792                         rcsfile = File.join(path, 'RCS', filename) + SFX
793                         unless File.exists? rcsfile
794                                 rcsfile.replace File.join(path, filename) + SFX
795                                 unless File.exists? rcsfile
796                                         not_found "RCS file for #{filename} in #{path}"
797                                 end
798                         end
799                 end
800                 rcs << RCS.parse(filename, rcsfile)
801         when 'directory'
802                 pattern = File.join(arg, '**', '*' + SFX)
803                 Dir.glob(pattern).each do |rcsfile|
804                         filename = File.basename(rcsfile, SFX)
805                         path = File.dirname(rcsfile)
806                         path.sub!(/\/?RCS$/, '') # strip final /RCS if present
807                         path.sub!(/^#{Regexp.escape arg}\/?/, '') # strip initial dirname
808                         filename = File.join(path, filename) unless path.empty?
809                         begin
810                                 rcs << RCS.parse(filename, rcsfile)
811                         rescue Exception => e
812                                 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
813                                 raise e
814                         end
815                 end
816         else
817                 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
818                 status |= 1
819         end
820 end
821
822 if rcs.length == 1
823         rcs.first.export_commits(parse_options)
824 else
825         STDERR.puts "Preparing commits"
826
827         commits = []
828
829         rcs.each do |r|
830                 r.revision.each do |k, rev|
831                         commits << RCS::Commit.new(r, rev)
832                 end
833         end
834
835         STDERR.puts "Sorting by date"
836
837         commits.sort!
838
839         if $DEBUG
840                 STDERR.puts "RAW commits (#{commits.length}):"
841                 commits.each do |c|
842                         PP.pp c.to_a, $stderr
843                 end
844         else
845                 STDERR.puts "#{commits.length} single-file commits"
846         end
847
848         STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
849
850         commits.reverse_each do |c|
851                 commits.reverse_each do |k|
852                         break if k.date < c.date - parse_options[:commit_fuzz]
853                         next if k == c
854                         next if c.log != k.log or c.author != k.author or c.branch != k.branch
855                         next if k.date > c.date
856                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
857                                 if parse_options[:symbol_check]
858                                         STDERR.puts "Not coalescing #{c.log.inspect}\n\tfor (#{c.tree.filenames.join(', ')})\n\tand (#{k.tree.filenames.join(', ')})"
859                                         STDERR.puts "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
860                                         STDERR.puts "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
861                                         next
862                                 elsif $DEBUG
863                                         STDERR.puts "Coalescing #{c.log.inspect}\n\tfor (#{c.tree.filenames.join(', ')})\n\tand (#{k.tree.filenames.join(', ')})"
864                                         STDERR.puts "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
865                                 end
866                         end
867                         begin
868                                 c.merge! k
869                         rescue RuntimeError => err
870                                 fuzz = c.date - k.date
871                                 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
872                                 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
873                                 break
874                         end
875                         commits.delete k
876                 end
877         end
878
879         if $DEBUG
880                 STDERR.puts "[1] commits (#{commits.length}):"
881                 commits.each do |c|
882                         PP.pp c.to_a, $stderr
883                 end
884         else
885                 STDERR.puts "#{commits.length} coalesced commits"
886         end
887
888         commits.each { |c| c.export(parse_options) }
889
890 end
891
892 exit status