implement --expand-keywords option, which replaces the head/diff text that
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 =begin
4 RCS fast export: run the script with the `--help` option for further
5 information.
6
7 No installation needed: you can run it from anywhere, including the git
8 checkout directory. For extra comfort, symlink it to some directory in
9 your PATH. I myself have this symlink:
10
11         ~/bin/rcs-fast-export -> ~/src/rcs-fast-export/rcs-fast-export.rb
12
13 allowing me to run `rcs-fast-export` from anywhere.
14 =end
15
16 =begin
17 TODO
18         * Refactor commit coalescing
19         * Add --strict-symbol-check to only coalesce commits if their symbol lists are equal
20         * Add support for commitid for coalescing commits
21         * Further coalescing options? (e.g. small logfile differences)
22         * Proper branching support in multi-file export
23         * Optimize memory usage by discarding unneeded text
24 =end
25
26 require 'pp'
27 require 'set'
28
29 class NoBranchSupport < NotImplementedError ; end
30
31 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
32 # older versions
33 unless 2.respond_to? :odd?
34         class Integer
35                 def odd?
36                         self % 2 == 1
37                 end
38         end
39 end
40
41 # Set standard output to binary mode: git fast-import doesn't like Windows
42 # line-endings, and this ensures that the line termination will be a simple 0x0a
43 # on Windows too (it expands to 0x0D 0x0A otherwise).
44 STDOUT.binmode
45
46 =begin
47 RCS fast-export version: set to `git` in the repository, but can be overridden
48 by packagers, e.g. based on the latest tag, git description, custom packager
49 patches or whatever.
50
51 When the version is set to `git`, we make a little effort to find more information
52 about which commit we are at.
53 =end
54
55 RFE_VERSION="git"
56
57 def version
58         if RFE_VERSION == "git"
59                 nolinkfile = File.readlink(__FILE__) rescue __FILE__
60                 Dir.chdir File.expand_path File.dirname nolinkfile
61
62                 if File.exists? '.git' ; begin
63                         git_out = `git log -1 --pretty="%h %H%n%ai" | git name-rev --stdin`.split("\n")
64                         hash=git_out.first.split.first
65                         branch=git_out.first.split('(').last.chomp(')')
66                         date=git_out.last.split.first
67                         changed=`git diff --no-ext-diff --quiet --exit-code`
68                         branch << "*" unless $?.success?
69                         info=" [#{branch}] #{hash} (#{date})"
70                 rescue
71                         info=" (no info)"
72                 end ; end
73
74                 STDERR.puts "#{$0}: RCS fast-export, #{RFE_VERSION} version#{info}"
75         else
76                 STDERR.puts "#{$0}: RCS fast-export, version #{RFE_VERSION}"
77         end
78 end
79
80 def usage
81         $stdout.flush
82         STDERR.puts <<EOM
83 #{$0} [options] file [file ...]
84
85 Fast-export the RCS history of one or more files. If a directory is specified,
86 all RCS-tracked files in the directory and its descendants are exported.
87
88 When importing single files, their pathname is discarded during import. When
89 importing directories, only the specified directory component is discarded.
90
91 When importing a single file, RCS commits are converted one by one. Otherwise,
92 some heuristics is used to determine how to coalesce commits touching different
93 files.
94
95 Currently, commits are coalesced if they share the exact same log and if their
96 date differs by no more than the user-specified fuzziness. Additionally, the
97 symbols in one of the commit must be a subset of the symbols in the other
98 commit, unless --no-symbol-check is specified or rcs.symbolCheck is set to
99 false in the git configuration.
100
101 Typical usage:
102     git init && rcs-fast-export.rb . | git fast-import && git reset
103
104 Options:
105         --help, -h, -?          display this help text
106         --authors-file, -A      specify a file containing username = Full Name <email> mappings
107         --ignore                ignore the specified files (shell pattern)
108         --rcs-commit-fuzz       fuzziness in RCS commits to be considered a single one when
109                                 importing multiple files
110                                 (in seconds, defaults to 300, i.e. 5 minutes)
111         --[no-]warn-missing-authors
112                                 [do not] warn about usernames missing from the map file
113         --[no-]symbol-check     [do not] check symbols when coalescing commits
114         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision when
115                                 importing a single file
116         --[no-]log-filename     [do not] prepend the filename to the commit log when importing
117                                 a single file
118         --skip-branches         when exporting multiple files with a branched history, export
119                                 the main branch only instead of aborting due to the lack of
120                                 support for branched multi-file history export
121
122
123
124 Config options:
125         rcs.authorsFile         for --authors-file
126         rcs.tagEachRev          for --tag-each-rev
127         rcs.logFilename         for --log-filename
128         rcs.commitFuzz          for --rcs-commit-fuzz
129         rcs.warnMissingAuthors  for --warn-missing-authors
130         rcs.symbolCheck         for --rcs-symbol-check
131         rcs.tagFuzz             for --rcs-tag-fuzz
132
133 EOM
134 end
135
136 def warning(msg)
137         $stdout.flush
138         STDERR.puts msg
139 end
140
141 def not_found(arg)
142         warning "Could not find #{arg}"
143 end
144
145 # returns a hash that maps usernames to author names & emails
146 def load_authors_file(fn)
147         hash = {}
148         begin
149                 File.open(File.expand_path(fn)) do |io|
150                         io.each_line do |line|
151                                 uname, author = line.split('=', 2)
152                                 uname.strip!
153                                 author.strip!
154                                 warning "Username #{uname} redefined to #{author}" if hash.has_key? uname
155                                 hash[uname] = author
156                         end
157                 end
158         rescue
159                 not_found(fn)
160         end
161         return hash
162 end
163
164 def username_to_author(name, opts)
165         map = opts[:authors]
166         raise "no authors map defined" unless map and Hash === map
167
168         # if name is not found in map, provide a default one, optionally giving a warning (once)
169         unless map.key? name
170                 warning "no author found for #{name}" if opts[:warn_missing_authors]
171                 map[name] = "#{name} <empty>"
172         end
173         return map[name]
174 end
175
176 # display a message about a (recoverable) error
177 def alert(msg, action)
178         STDERR.puts "ERROR:\t#{msg}"
179         STDERR.puts "\t#{action}"
180 end
181
182 class Time
183         def Time.rcs(string)
184                 fields = string.split('.')
185                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
186                 # in Ruby 1.9, '99' is interpreted as year 99, not year 1999
187                 if fields.first.length < 3
188                         fields.first.insert 0, '19'
189                 end
190                 Time.utc(*fields)
191         end
192 end
193
194 module RCS
195         # strip an optional final ;
196         def RCS.clean(arg)
197                 arg.chomp(';')
198         end
199
200         # strip the first and last @, and de-double @@s
201         def RCS.sanitize(arg)
202                 case arg
203                 when Array
204                         ret = arg.dup
205                         raise 'malformed first line' unless ret.first[0,1] == '@'
206                         raise 'malformed last line' unless ret.last[-1,1] == '@'
207                         ret.first.sub!(/^@/,'')
208                         ret.last.sub!(/@$/,'')
209                         ret.map { |l| l.gsub('@@','@') }
210                 when String
211                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
212                 else
213                         raise
214                 end
215         end
216
217         # clean and sanitize
218         def RCS.at_clean(arg)
219                 RCS.sanitize RCS.clean(arg)
220         end
221
222         def RCS.mark(key)
223                 @@marks ||= {}
224                 if @@marks.key? key
225                         @@marks[key]
226                 else
227                         @@marks[key] = @@marks.length + 1
228                 end
229         end
230
231         def RCS.blob(file, rev)
232                 RCS.mark([file, rev])
233         end
234
235         def RCS.commit(commit)
236                 RCS.mark(commit)
237         end
238
239         class File
240                 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
241                 def initialize(fname, executable)
242                         @fname = fname.dup
243                         @head = nil
244                         @comment = nil
245                         @desc = []
246                         @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
247                         @mode = executable ? '755' : '644'
248                 end
249
250                 def has_revision?(rev)
251                         @revision.has_key?(rev) and not @revision[rev].author.nil?
252                 end
253
254                 def export_commits(opts={})
255                         counter = 0
256                         exported = []
257                         until @revision.empty?
258                                 counter += 1
259
260                                 # a string sort is a very good candidate for
261                                 # export order, getting a miss only for
262                                 # multi-digit revision components
263                                 keys = @revision.keys.sort
264
265                                 warning "commit export loop ##{counter}"
266                                 warning "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
267                                 warning "\t#{keys.size} to export: #{keys.join(', ')}"
268
269                                 keys.each do |key|
270                                         rev = @revision[key]
271                                         # the parent commit is rev.next if we're on the
272                                         # master branch (rev.branch is nil) or
273                                         # rev.diff_base otherwise
274                                         from = rev.branch.nil? ? rev.next : rev.diff_base
275                                         # A commit can only be exported if it has no
276                                         # parent, or if the parent has been exported
277                                         # already. Skip this commit otherwise
278                                         if from and not exported.include? from
279                                                 next
280                                         end
281
282                                         branch = rev.branch || 'master'
283                                         author = username_to_author(rev.author, opts)
284                                         date = "#{rev.date.tv_sec} +0000"
285                                         log = String.new
286                                         if opts[:log_filename]
287                                                 log << @fname << ": "
288                                         end
289                                         log << rev.log.join
290
291                                         puts "commit refs/heads/#{branch}"
292                                         puts "mark :#{RCS.commit key}"
293                                         puts "committer #{author} #{date}"
294                                         puts "data #{log.length}"
295                                         puts log unless log.empty?
296                                         puts "from :#{RCS.commit from}" if from
297                                         puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
298
299                                         # TODO FIXME this *should* be safe, in
300                                         # that it should not unduly move
301                                         # branches back in time, but I'm not
302                                         # 100% sure ...
303                                         rev.branches.each do |sym|
304                                                 puts "reset refs/heads/#{sym}"
305                                                 puts "from :#{RCS.commit key}"
306                                         end
307                                         rev.symbols.each do |sym|
308                                                 puts "reset refs/tags/#{sym}"
309                                                 puts "from :#{RCS.commit key}"
310                                         end
311                                         if opts[:tag_each_rev]
312                                                 puts "reset refs/tags/#{key}"
313                                                 puts "from :#{RCS.commit key}"
314                                         end
315
316                                         exported.push key
317                                 end
318                                 exported.each { |k| @revision.delete(k) }
319                         end
320                 end
321         end
322
323         class Revision
324                 attr_accessor :rev, :author, :state, :next
325                 attr_accessor :branches, :log, :text, :symbols
326                 attr_accessor :branch, :diff_base, :branch_point
327                 attr_reader   :date
328                 def initialize(file, rev)
329                         @file = file
330                         @rev = rev
331                         @author = nil
332                         @date = nil
333                         @state = nil
334                         @next = nil
335                         @branches = Set.new
336                         @branch = nil
337                         @branch_point = nil
338                         @diff_base = nil
339                         @log = []
340                         @text = []
341                         @symbols = Set.new
342                 end
343
344                 def date=(str)
345                         @date = Time.rcs(str)
346                 end
347
348                 def blob
349                         str = @text.join('')
350                         ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
351                         ret
352                 end
353         end
354
355         # TODO: what if a revision does not end with newline?
356         def RCS.expand_keywords(rcsfile, revision)
357                 ret = ::File.read("|co -q -p#{revision} #{rcsfile}")
358                 lines = []
359                 ret.each_line do |line|
360                         lines << line
361                 end
362                 lines
363         end
364
365         def RCS.parse(fname, rcsfile, opts={})
366                 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
367
368                 ::File.open(rcsfile, 'r:ASCII-8BIT') do |file|
369                         status = [:basic]
370                         rev = nil
371                         lines = []
372                         difflines = []
373                         file.each_line do |line|
374                                 case status.last
375                                 when :basic
376                                         command, args = line.split($;,2)
377                                         next if command.empty?
378
379                                         if command.chomp!(';')
380                                                 warning "Skipping empty command #{command.inspect}" if $DEBUG
381                                                 next
382                                         end
383
384                                         case command
385                                         when 'head'
386                                                 rcs.head = RCS.clean(args.chomp)
387                                         when 'symbols'
388                                                 status.push :symbols
389                                                 next if args.empty?
390                                                 line = args; redo
391                                         when 'comment'
392                                                 rcs.comment = RCS.at_clean(args.chomp)
393                                         when /^[0-9.]+$/
394                                                 rev = command.dup
395                                                 if rcs.has_revision?(rev)
396                                                         status.push :revision_data
397                                                 else
398                                                         status.push :new_revision
399                                                 end
400                                         when 'desc'
401                                                 status.push :desc
402                                                 lines.clear
403                                                 status.push :read_lines
404                                         when 'branch', 'access', 'locks', 'expand'
405                                                 warning "Skipping unhandled command #{command.inspect}" if $DEBUG
406                                                 status.push :skipping_lines
407                                                 next if args.empty?
408                                                 line = args; redo
409                                         else
410                                                 raise "Unknown command #{command.inspect}"
411                                         end
412                                 when :skipping_lines
413                                         status.pop if line.strip.chomp!(';')
414                                 when :symbols
415                                         # we can have multiple symbols per line
416                                         pairs = line.strip.split($;)
417                                         pairs.each do |pair|
418                                                 sym, rev = pair.strip.split(':',2);
419                                                 if rev
420                                                         status.pop if rev.chomp!(';')
421                                                         rcs.revision[rev].symbols << sym
422                                                 else
423                                                         status.pop
424                                                 end
425                                         end
426                                 when :desc
427                                         rcs.desc.replace lines.dup
428                                         status.pop
429                                 when :read_lines
430                                         # we sanitize lines as we read them
431
432                                         actual_line = line.dup
433
434                                         # the first line must begin with a @, which we strip
435                                         if lines.empty?
436                                                 ats = line.match(/^@+/)
437                                                 raise 'malformed line' unless ats
438                                                 actual_line.replace line.sub(/^@/,'')
439                                         end
440
441                                         # if the line ends with an ODD number of @, it's the
442                                         # last line -- we work on actual_line so that content
443                                         # such as @\n or @ work correctly (they would be
444                                         # encoded respectively as ['@@@\n','@\n'] and
445                                         # ['@@@@\n']
446                                         ats = actual_line.chomp.match(/@+$/)
447                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
448                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
449                                         end
450                                         lines << actual_line.gsub('@@','@')
451                                         if nomore
452                                                 status.pop
453                                                 redo
454                                         end
455                                 when :new_revision
456                                         case line.chomp
457                                         when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
458                                                 rcs.revision[rev].date = $1
459                                                 rcs.revision[rev].author = $2
460                                                 rcs.revision[rev].state = $3
461                                         when /^branches\s*;/
462                                                 next
463                                         when /^branches(?:\s+|$)/
464                                                 status.push :branches
465                                                 if line.index(';')
466                                                         line = line.sub(/^branches\s+/,'')
467                                                         redo
468                                                 end
469                                         when /^next\s+(\S+)?;$/
470                                                 nxt = rcs.revision[rev].next = $1
471                                                 next unless nxt
472                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
473                                                 rcs.revision[nxt].diff_base = rev
474                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
475                                         else
476                                                 status.pop
477                                         end
478                                 when :branches
479                                         candidate = line.split(';',2)
480                                         candidate.first.strip.split.each do |branch|
481                                                 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
482                                                 rcs.revision[branch].diff_base = rev
483                                                 # we drop the last number from the branch name
484                                                 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
485                                                 rcs.revision[branch].branch_point = rev
486                                         end
487                                         status.pop if candidate.length > 1
488                                 when :revision_data
489                                         case line.chomp
490                                         when 'log'
491                                                 status.push :log
492                                                 lines.clear
493                                                 status.push :read_lines
494                                         when 'text'
495                                                 if rev == rcs.head
496                                                         status.push :head
497                                                 else
498                                                         status.push :diff
499                                                 end
500                                                 lines.clear
501                                                 status.push :read_lines
502                                         else
503                                                 status.pop
504                                         end
505                                 when :log
506                                         rcs.revision[rev].log.replace lines.dup
507                                         status.pop
508                                 when :head
509                                         rcs.revision[rev].text.replace lines.dup
510                                         if opts[:expand_keywords]
511                                                 rcs.revision[rev].text.replace RCS.expand_keywords(rcsfile, rev)
512                                         end
513                                         puts rcs.revision[rev].blob
514                                         status.pop
515                                 when :diff
516                                         difflines.replace lines.dup
517                                         difflines.pop if difflines.last.empty?
518                                         if difflines.first.chomp.empty?
519                                                 alert "malformed diff: empty initial line @ #{rcsfile}:#{file.lineno-difflines.length-1}", "skipping"
520                                                 difflines.shift
521                                         end unless difflines.empty?
522                                         base = rcs.revision[rev].diff_base
523                                         unless rcs.revision[base].text
524                                                 pp rcs
525                                                 puts rev, base
526                                                 raise 'no diff base!'
527                                         end
528                                         # deep copy
529                                         buffer = []
530                                         rcs.revision[base].text.each { |l| buffer << [l.dup] }
531
532                                         adding = false
533                                         index = nil
534                                         count = nil
535
536                                         while l = difflines.shift
537                                                 if adding
538                                                         raise 'negative index during insertion' if index < 0
539                                                         raise 'negative count during insertion' if count < 0
540                                                         adding << l
541                                                         count -= 1
542                                                         # collected all the lines, put the before
543                                                         unless count > 0
544                                                                 unless buffer[index]
545                                                                         buffer[index] = []
546                                                                 end
547                                                                 buffer[index].unshift(*adding)
548                                                                 adding = false
549                                                         end
550                                                         next
551                                                 end
552
553                                                 l.chomp!
554                                                 raise "malformed diff @ #{rcsfile}:#{file.lineno-difflines.length-1} `#{l}`" unless l =~ /^([ad])(\d+) (\d+)$/
555                                                 diff_cmd = $1.intern
556                                                 index = $2.to_i
557                                                 count = $3.to_i
558                                                 case diff_cmd
559                                                 when :d
560                                                         # for deletion, index 1 is the first index, so the Ruby
561                                                         # index is one less than the diff one
562                                                         index -= 1
563                                                         # we replace them with empty string so that 'a' commands
564                                                         # referring to the same line work properly
565                                                         while count > 0
566                                                                 buffer[index].clear
567                                                                 index += 1
568                                                                 count -= 1
569                                                         end
570                                                 when :a
571                                                         # addition will prepend the appropriate lines
572                                                         # to the given index, and in this case Ruby
573                                                         # and diff indices are the same
574                                                         adding = []
575                                                 end
576                                         end
577
578                                         # turn the buffer into an array of lines, deleting the empty ones
579                                         buffer.delete_if { |l| l.empty? }
580                                         buffer.flatten!
581
582                                         rcs.revision[rev].text = buffer
583                                         if opts[:expand_keywords]
584                                                 rcs.revision[rev].text.replace RCS.expand_keywords(rcsfile, rev)
585                                         end
586                                         puts rcs.revision[rev].blob
587                                         status.pop
588                                 else
589                                         raise "Unknown status #{status.last}"
590                                 end
591                         end
592                 end
593
594                 # clean up the symbols/branches: look for revisions that have
595                 # one or more symbols but no dates, and make them into
596                 # branches, pointing to the highest commit with that key
597                 branches = []
598                 keys = rcs.revision.keys
599                 rcs.revision.each do |key, rev|
600                         if rev.date.nil? and not rev.symbols.empty?
601                                 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
602                                 tr = rcs.revision[top]
603                                 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
604                                 tr.branches |= rev.symbols
605                                 branches << key
606                         end
607                 end
608                 branches.each { |k| rcs.revision.delete k }
609
610                 return rcs
611         end
612
613         class Tree
614                 def initialize(commit)
615                         @commit = commit
616                         @files = Hash.new
617                 end
618
619                 def merge!(tree)
620                         testfiles = @files.dup
621                         tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
622                         # the next line is only reached if all the adds were
623                         # successful, so the merge is atomic
624                         @files.replace testfiles
625                 end
626
627                 def add(rcs, rev, file_list=@files)
628                         if file_list.key? rcs
629                                 prev = file_list[rcs]
630                                 if prev.log == rev.log
631                                         str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
632                                 else
633                                         str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
634                                 end
635                                 if prev.text != rev.text
636                                         raise str
637                                 else
638                                         @commit.warn_about str
639                                 end
640                         end
641                         file_list[rcs] = rev
642                 end
643
644                 def each &block
645                         @files.each &block
646                 end
647
648                 def to_a
649                         files = []
650                         @files.map do |rcs, rev|
651                                 if rev.state.downcase == "dead"
652                                         files << "D #{rcs.fname}"
653                                 else
654                                         files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
655                                 end
656                         end
657                         files
658                 end
659
660                 def filenames
661                         @files.map { |rcs, rev| rcs.fname }
662                 end
663
664                 def to_s
665                         self.to_a.join("\n")
666                 end
667         end
668
669         class Commit
670                 attr_accessor :date, :log, :symbols, :author, :branch
671                 attr_accessor :tree
672                 attr_accessor :min_date, :max_date
673                 def initialize(rcs, rev)
674                         raise NoBranchSupport if rev.branch
675                         self.date = rev.date.dup
676                         self.min_date = self.max_date = self.date
677                         self.log = rev.log.dup
678                         self.symbols = rev.symbols.dup
679                         self.author = rev.author
680                         self.branch = rev.branch
681
682                         self.tree = Tree.new self
683                         self.tree.add rcs, rev
684                 end
685
686                 def to_a
687                         [self.min_date, self.date, self.max_date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
688                 end
689
690                 def warn_about(str)
691                         warn str + " for commit on #{self.date}"
692                 end
693
694                 # Sort by date and then by number of symbols
695                 def <=>(other)
696                         ds = self.date <=> other.date
697                         if ds != 0
698                                 return ds
699                         else
700                                 return self.symbols.length <=> other.symbols.length
701                         end
702                 end
703
704                 def merge!(commit)
705                         self.tree.merge! commit.tree
706                         if commit.max_date > self.max_date
707                                 self.max_date = commit.max_date
708                         end
709                         if commit.min_date < self.min_date
710                                 self.min_date = commit.min_date
711                         end
712                         self.symbols.merge commit.symbols
713                 end
714
715                 def export(opts={})
716                         xbranch = self.branch || 'master'
717                         xauthor = username_to_author(self.author, opts)
718                         xlog = self.log.join
719                         numdate = self.date.tv_sec
720                         xdate = "#{numdate} +0000"
721                         key = numdate.to_s
722
723                         puts "commit refs/heads/#{xbranch}"
724                         puts "mark :#{RCS.commit key}"
725                         puts "committer #{xauthor} #{xdate}"
726                         puts "data #{xlog.length}"
727                         puts xlog unless xlog.empty?
728                         # TODO branching support for multi-file export
729                         # puts "from :#{RCS.commit from}" if self.branch_point
730                         puts self.tree.to_s
731
732                         # TODO branching support for multi-file export
733                         # rev.branches.each do |sym|
734                         #       puts "reset refs/heads/#{sym}"
735                         #       puts "from :#{RCS.commit key}"
736                         # end
737
738                         self.symbols.each do |sym|
739                                 puts "reset refs/tags/#{sym}"
740                                 puts "from :#{RCS.commit key}"
741                         end
742
743                 end
744         end
745 end
746
747 require 'getoptlong'
748
749 opts = GetoptLong.new(
750         # Authors file, like git-svn and git-cvsimport, more than one can be
751         # specified
752         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
753         # Use "co" to obtain the actual revision with keywords expanded.
754         ['--expand-keywords', GetoptLong::NO_ARGUMENT],
755         # RCS file suffix, like RCS
756         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
757         # Shell pattern to identify files to be ignored
758         ['--ignore', GetoptLong::REQUIRED_ARGUMENT],
759         # Date fuzziness for commits to be considered the same (in seconds)
760         ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
761         # warn about usernames missing in authors file map?
762         ['--warn-missing-authors', GetoptLong::NO_ARGUMENT],
763         ['--no-warn-missing-authors', GetoptLong::NO_ARGUMENT],
764         # check symbols when coalescing?
765         ['--symbol-check', GetoptLong::NO_ARGUMENT],
766         ['--no-symbol-check', GetoptLong::NO_ARGUMENT],
767         # tag each revision?
768         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
769         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
770         # prepend filenames to commit logs?
771         ['--log-filename', GetoptLong::NO_ARGUMENT],
772         ['--no-log-filename', GetoptLong::NO_ARGUMENT],
773         # skip branches when exporting a whole tree?
774         ['--skip-branches', GetoptLong::NO_ARGUMENT],
775         # show current version
776         ['--version', '-v', GetoptLong::NO_ARGUMENT],
777         # show help/usage
778         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
779 )
780
781 # We read options in order, but they apply to all passed parameters.
782 # TODO maybe they should only apply to the following, unless there's only one
783 # file?
784 opts.ordering = GetoptLong::RETURN_IN_ORDER
785
786 file_list = []
787 parse_options = {
788         :authors => Hash.new,
789         :ignore => Array.new,
790         :commit_fuzz => 300,
791         :tag_fuzz => -1,
792 }
793
794 # Read config options
795 `git config --get-all rcs.authorsfile`.each_line do |fn|
796         parse_options[:authors].merge! load_authors_file(fn.chomp)
797 end
798
799 parse_options[:tag_each_rev] = (
800         `git config --bool rcs.tageachrev`.chomp == 'true'
801 ) ? true : false
802
803 parse_options[:log_filename] = (
804         `git config --bool rcs.logfilename`.chomp == 'true'
805 ) ? true : false
806
807 fuzz = `git config --int rcs.commitFuzz`.chomp
808 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
809
810 fuzz = `git config --int rcs.tagFuzz`.chomp
811 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
812
813 parse_options[:symbol_check] = (
814         `git config --bool rcs.symbolcheck`.chomp == 'false'
815 ) ? false : true
816
817 parse_options[:warn_missing_authors] = (
818         `git config --bool rcs.warnmissingauthors`.chomp == 'false'
819 ) ? false : true
820
821 opts.each do |opt, arg|
822         case opt
823         when '--authors-file'
824                 authors = load_authors_file(arg)
825                 redef = parse_options[:authors].keys & authors.keys
826                 warning "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
827                 parse_options[:authors].merge!(authors)
828         when '--expand-keywords'
829                 parse_options[:expand_keywords] = true
830         when '--rcs-suffixes'
831                 # TODO
832         when '--ignore'
833                 parse_options[:ignore] << arg
834         when '--rcs-commit-fuzz'
835                 parse_options[:commit_fuzz] = arg.to_i
836         when '--rcs-tag-fuzz'
837                 parse_options[:tag_fuzz] = arg.to_i
838         when '--symbol-check'
839                 parse_options[:symbol_check] = true
840         when '--no-symbol-check'
841                 parse_options[:symbol_check] = false
842         when '--tag-each-rev'
843                 parse_options[:tag_each_rev] = true
844         when '--no-tag-each-rev'
845                 # this is the default, which is fine since the missing key
846                 # (default) returns nil which is false in Ruby
847                 parse_options[:tag_each_rev] = false
848         when '--log-filename'
849                 parse_options[:log_filename] = true
850         when '--no-log-filename'
851                 # this is the default, which is fine since the missing key
852                 # (default) returns nil which is false in Ruby
853                 parse_options[:log_filename] = false
854         when '--skip-branches'
855                 parse_options[:skip_branches] = true
856         when ''
857                 file_list << arg
858         when '--version'
859                 version
860                 exit
861         when '--help'
862                 usage
863                 exit
864         end
865 end
866
867 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
868         parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
869 end
870
871 require 'etc'
872
873 user = Etc.getlogin || ENV['USER']
874
875 # steal username/email data from other init files that may contain the
876 # information
877 def steal_username
878         [
879                 # the user's .hgrc file for a username field
880                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
881                 # the user's .(g)vimrc for a changelog_username setting
882                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
883                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
884         ].each do |fn, rx, idx|
885                 file = File.expand_path fn
886                 if File.readable?(file) and File.read(file) =~ rx
887                         parse_options[:authors][user] = Regexp.last_match(idx).strip
888                         break
889                 end
890         end
891 end
892
893 if user and not user.empty? and not parse_options[:authors].has_key?(user)
894         name = ENV['GIT_AUTHOR_NAME'] || ''
895         name.replace(`git config user.name`.chomp) if name.empty?
896         name.replace(Etc.getpwnam(user).gecos) if name.empty?
897
898         if name.empty?
899                 # couldn't find a name, try to steal data from other sources
900                 steal_username
901         else
902                 # if we found a name, try to find an email too
903                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
904                 email.replace(`git config user.email`.chomp) if email.empty?
905
906                 if email.empty?
907                         # couldn't find an email, try to steal data too
908                         steal_username
909                 else
910                         # we got both a name and email, fill the info
911                         parse_options[:authors][user] = "#{name} <#{email}>"
912                 end
913         end
914 end
915
916 if file_list.empty?
917         usage
918         exit 1
919 end
920
921 SFX = ',v'
922
923 status = 0
924
925 rcs = []
926 file_list.each do |arg|
927         case ftype = File.ftype(arg)
928         when 'file'
929                 if arg[-2,2] == SFX
930                         if File.exists? arg
931                                 rcsfile = arg.dup
932                         else
933                                 not_found "RCS file #{arg}"
934                                 status |= 1
935                         end
936                         filename = File.basename(arg, SFX)
937                 else
938                         filename = File.basename(arg)
939                         path = File.dirname(arg)
940                         rcsfile = File.join(path, 'RCS', filename) + SFX
941                         unless File.exists? rcsfile
942                                 rcsfile.replace File.join(path, filename) + SFX
943                                 unless File.exists? rcsfile
944                                         not_found "RCS file for #{filename} in #{path}"
945                                 end
946                         end
947                 end
948                 rcs << RCS.parse(filename, rcsfile, parse_options)
949         when 'directory'
950                 argdirname = arg.chomp(File::SEPARATOR)
951                 pattern = File.join(argdirname, '**', '*' + SFX)
952                 Dir.glob(pattern, File::FNM_DOTMATCH).each do |rcsfile|
953                         filename = File.basename(rcsfile, SFX)
954                         path = File.dirname(rcsfile)
955                         # strip trailing "/RCS" if present, or "RCS" if that's
956                         # the full path
957                         path.sub!(/(^|#{File::SEPARATOR})RCS$/, '')
958                         # strip off the portion of the path specified
959                         # on the command line from the front of the path
960                         # (or delete the path completely if it is the same
961                         # as the specified directory)
962                         path.sub!(/^#{Regexp.escape argdirname}(#{File::SEPARATOR}|$)/, '')
963                         filename = File.join(path, filename) unless path.empty?
964
965                         # skip file if it's to be ignored
966                         unless parse_options[:ignore].empty?
967                                 ignored = false
968                                 parse_options[:ignore].each do |pat|
969                                         if File.fnmatch?(pat, filename, File::FNM_PATHNAME)
970                                                 ignored = true
971                                                 break
972                                         end
973                                 end
974                                 next if ignored
975                         end
976
977                         # proceed
978                         begin
979                                 rcs << RCS.parse(filename, rcsfile, parse_options)
980                         rescue Exception => e
981                                 warning "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
982                                 raise e
983                         end
984                 end
985         else
986                 warning "Cannot handle #{arg} of #{ftype} type"
987                 status |= 1
988         end
989 end
990
991 if rcs.length == 1
992         rcs.first.export_commits(parse_options)
993 else
994         warning "Preparing commits"
995
996         commits = []
997
998         rcs.each do |r|
999                 r.revision.each do |k, rev|
1000                         begin
1001                                 commits << RCS::Commit.new(r, rev)
1002                         rescue NoBranchSupport
1003                                 if parse_options[:skip_branches]
1004                                         warning "Skipping revision #{rev.rev} for #{r.fname} (branch)"
1005                                 else raise
1006                                 end
1007                         end
1008                 end
1009         end
1010
1011         warning "Sorting by date"
1012
1013         commits.sort!
1014
1015         if $DEBUG
1016                 warning "RAW commits (#{commits.length}):"
1017                 commits.each do |c|
1018                         PP.pp c.to_a, $stderr
1019                 end
1020         else
1021                 warning "#{commits.length} single-file commits"
1022         end
1023
1024         warning "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
1025
1026         thisindex = commits.size
1027         commits.reverse_each do |c|
1028                 nextindex = thisindex
1029                 thisindex -= 1
1030
1031                 cfiles = Set.new c.tree.filenames
1032                 ofiles = Set.new
1033
1034                 mergeable = []
1035
1036                 # test for mergeable commits by looking at following commits
1037                 while nextindex < commits.size
1038                         k = commits[nextindex]
1039                         nextindex += 1
1040
1041                         # commits are date-sorted, so we know we can quit early if we are too far
1042                         # for coalescing to work
1043                         break if k.min_date > c.max_date + parse_options[:commit_fuzz]
1044
1045                         skipthis = false
1046
1047                         kfiles = Set.new k.tree.filenames
1048
1049                         if c.log != k.log or c.author != k.author or c.branch != k.branch
1050                                 skipthis = true
1051                         end
1052
1053                         unless c.symbols.subset?(k.symbols) or k.symbols.subset?(c.symbols)
1054                                 cflist = cfiles.to_a.join(', ')
1055                                 kflist = kfiles.to_a.join(', ')
1056                                 if parse_options[:symbol_check]
1057                                         warning "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1058                                         warning "\tbecause their symbols disagree:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
1059                                         warning "\tretry with the --no-symbol-check option if you want to merge these commits anyway"
1060                                         skipthis = true
1061                                 elsif $DEBUG
1062                                         warning "Coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1063                                         warning "\twith disagreeing symbols:\n\t#{c.symbols.to_a.inspect} and #{k.symbols.to_a.inspect} disagree on #{(c.symbols ^ k.symbols).to_a.inspect}"
1064                                 end
1065                         end
1066
1067                         # keep track of filenames touched by commits we are not merging with,
1068                         # since we don't want to merge with commits that touch them, to preserve
1069                         # the monotonicity of history for each file
1070                         # TODO we could forward-merge with them, unless some of our files were
1071                         # touched too.
1072                         if skipthis
1073                                 # if the candidate touches any file already in the commit,
1074                                 # we can stop looking forward
1075                                 break unless cfiles.intersection(kfiles).empty?
1076                                 ofiles |= kfiles
1077                                 next
1078                         end
1079
1080                         # the candidate has the same log, author, branch and appropriate symbols
1081                         # does it touch anything in ofiles?
1082                         unless ofiles.intersection(kfiles).empty?
1083                                 if $DEBUG
1084                                         cflist = cfiles.to_a.join(', ')
1085                                         kflist = kfiles.to_a.join(', ')
1086                                         oflist = ofiles.to_a.join(', ')
1087                                         warning "Not coalescing #{c.log.inspect}\n\tfor (#{cflist})\n\tand (#{kflist})"
1088                                         warning "\tbecause the latter intersects #{oflist} in #{(ofiles & kfiles).to_a.inspect}"
1089                                 end
1090                                 next
1091                         end
1092
1093                         mergeable << k
1094                 end
1095
1096                 mergeable.each do |k|
1097                         begin
1098                                 c.merge! k
1099                         rescue RuntimeError => err
1100                                 fuzz = c.date - k.date
1101                                 warning "Fuzzy commit coalescing failed: #{err}"
1102                                 warning "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
1103                                 break
1104                         end
1105                         commits.delete k
1106                 end
1107         end
1108
1109         if $DEBUG
1110                 warning "[1] commits (#{commits.length}):"
1111                 commits.each do |c|
1112                         PP.pp c.to_a, $stderr
1113                 end
1114         else
1115                 warning "#{commits.length} coalesced commits"
1116         end
1117
1118         commits.each { |c| c.export(parse_options) }
1119
1120 end
1121
1122 exit status