5 * Option to coalesce commits that only differ by having a symbol or not
6 * Further coalescing options? (e.g. small logfile differences)
7 * Proper branching support in multi-file export
8 * Optimize memory usage by discarding unneeded text
13 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
15 unless 2.respond_to? :odd?
25 #{$0} [options] file [file ...]
27 Fast-export the RCS history of one or more files. If a directory is specified,
28 all RCS-tracked files in the directory and its descendants are exported.
30 When importing single files, their pathname is discarded during import. When
31 importing directories, only the specified directory component is discarded.
33 When importing a single file, RCS commits are converted one by one. Otherwise,
34 some heuristics is used to determine how to coalesce commits of different.
36 Currently, commits are coalesced if they share the exact same log and symbols,
37 and if their date differs by no more than a the user-specified fuzziness.
40 --help, -h, -? display this help text
41 --authors-file, -A specify a file containing username = Full Name <email> mappings
42 --rcs-commit-fuzz fuzziness in RCS commits to be considered a single one when
43 importing multiple files
44 (in seconds, defaults to 300, i.e. 5 minutes)
45 --[no-]tag-each-rev [do not] create a lightweight tag for each RCS revision when
46 importing a single file
47 --[no-]log-filename [do not] prepend the filename to the commit log when importing
51 rcs.authorsFile for --authors-file
52 rcs.tagEachRev for --tag-each-rev
53 rcs.logFilename for --log-filename
54 rcs.commitFuzz for --rcs-commit-fuzz
55 rcs.tagFuzz for --rcs-tag-fuzz
61 STDERR.puts "Could not find #{arg}"
64 # returns a hash that maps usernames to author names & emails
65 def load_authors_file(fn)
68 File.open(File.expand_path(fn)) do |io|
69 io.each_line do |line|
70 uname, author = line.split('=', 2)
73 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
85 fields = string.split('.')
86 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
92 # strip an optional final ;
97 # strip the first and last @, and de-double @@s
102 raise 'malformed first line' unless ret.first[0,1] == '@'
103 raise 'malformed last line' unless ret.last[-1,1] == '@'
104 ret.first.sub!(/^@/,'')
105 ret.last.sub!(/@$/,'')
106 ret.map { |l| l.gsub('@@','@') }
108 arg.chomp('@').sub(/^@/,'').gsub('@@','@')
115 def RCS.at_clean(arg)
116 RCS.sanitize RCS.clean(arg)
124 @@marks[key] = @@marks.length + 1
128 def RCS.blob(file, rev)
129 RCS.mark([file, rev])
132 def RCS.commit(commit)
137 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
138 def initialize(fname, executable)
143 @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
144 @mode = executable ? '755' : '644'
147 def has_revision?(rev)
148 @revision.has_key?(rev) and not @revision[rev].author.nil?
151 def export_commits(opts={})
154 until @revision.empty?
157 # a string sort is a very good candidate for
158 # export order, getting a miss only for
159 # multi-digit revision components
160 keys = @revision.keys.sort
162 STDERR.puts "commit export loop ##{counter}"
163 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
164 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
168 # the parent commit is rev.next if we're on the
169 # master branch (rev.branch is nil) or
170 # rev.diff_base otherwise
171 from = rev.branch.nil? ? rev.next : rev.diff_base
172 # A commit can only be exported if it has no
173 # parent, or if the parent has been exported
174 # already. Skip this commit otherwise
175 if from and not exported.include? from
179 branch = rev.branch || 'master'
180 author = opts[:authors][rev.author] || "#{rev.author} <empty>"
181 date = "#{rev.date.tv_sec} +0000"
183 if opts[:log_filename]
184 log << @fname << ": "
188 puts "commit refs/heads/#{branch}"
189 puts "mark :#{RCS.commit key}"
190 puts "committer #{author} #{date}"
191 puts "data #{log.length}"
192 puts log unless log.empty?
193 puts "from :#{RCS.commit from}" if rev.branch_point
194 puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
196 # TODO FIXME this *should* be safe, in
197 # that it should not unduly move
198 # branches back in time, but I'm not
200 rev.branches.each do |sym|
201 puts "reset refs/heads/#{sym}"
202 puts "from :#{RCS.commit key}"
204 rev.symbols.each do |sym|
205 puts "reset refs/tags/#{sym}"
206 puts "from :#{RCS.commit key}"
208 if opts[:tag_each_rev]
209 puts "reset refs/tags/#{key}"
210 puts "from :#{RCS.commit key}"
215 exported.each { |k| @revision.delete(k) }
221 attr_accessor :rev, :author, :state, :next
222 attr_accessor :branches, :log, :text, :symbols
223 attr_accessor :branch, :diff_base, :branch_point
225 def initialize(file, rev)
242 @date = Time.rcs(str)
247 ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
252 def RCS.parse(fname, rcsfile)
253 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
255 ::File.open(rcsfile, 'r') do |file|
260 file.each_line do |line|
263 command, args = line.split($;,2)
264 next if command.empty?
266 if command.chomp!(';')
267 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
273 rcs.head = RCS.clean(args.chomp)
279 rcs.comment = RCS.at_clean(args.chomp)
282 if rcs.has_revision?(rev)
283 status.push :revision_data
285 status.push :new_revision
290 status.push :read_lines
291 when 'branch', 'access', 'locks'
292 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
294 raise "Unknown command #{command.inspect}"
297 # we can have multiple symbols per line
298 pairs = line.strip.split($;)
300 sym, rev = pair.strip.split(':',2);
302 status.pop if rev.chomp!(';')
303 rcs.revision[rev].symbols << sym
309 rcs.desc.replace lines.dup
312 # we sanitize lines as we read them
314 actual_line = line.dup
316 # the first line must begin with a @, which we strip
318 ats = line.match(/^@+/)
319 raise 'malformed line' unless ats
320 actual_line.replace line.sub(/^@/,'')
323 # if the line ends with an ODD number of @, it's the
324 # last line -- we work on actual_line so that content
325 # such as @\n or @ work correctly (they would be
326 # encoded respectively as ['@@@\n','@\n'] and
328 ats = actual_line.chomp.match(/@+$/)
329 if nomore = (ats && Regexp.last_match(0).length.odd?)
330 actual_line.replace actual_line.chomp.sub(/@$/,'')
332 lines << actual_line.gsub('@@','@')
339 when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
340 rcs.revision[rev].date = $1
341 rcs.revision[rev].author = $2
342 rcs.revision[rev].state = $3
344 status.push :branches
347 when /^next\s+(\S+)?;$/
348 nxt = rcs.revision[rev].next = $1
350 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
351 rcs.revision[nxt].diff_base = rev
352 rcs.revision[nxt].branch = rcs.revision[rev].branch
357 candidate = line.split(';',2)
358 branch = candidate.first.strip
359 rcs.revision[rev].branches.push branch
360 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
361 rcs.revision[branch].diff_base = rev
362 # we drop the last number from the branch name
363 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
364 rcs.revision[branch].branch_point = rev
365 status.pop if candidate.length > 1
371 status.push :read_lines
379 status.push :read_lines
384 rcs.revision[rev].log.replace lines.dup
387 rcs.revision[rev].text.replace lines.dup
388 puts rcs.revision[rev].blob
391 difflines.replace lines.dup
392 difflines.pop if difflines.last.empty?
393 base = rcs.revision[rev].diff_base
394 unless rcs.revision[base].text
397 raise 'no diff base!'
401 rcs.revision[base].text.each { |l| buffer << [l.dup] }
407 while l = difflines.shift
409 raise 'negative index during insertion' if index < 0
410 raise 'negative count during insertion' if count < 0
413 # collected all the lines, put the before
415 buffer[index].unshift(*adding)
422 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
428 # for deletion, index 1 is the first index, so the Ruby
429 # index is one less than the diff one
431 # we replace them with empty string so that 'a' commands
432 # referring to the same line work properly
439 # addition will prepend the appropriate lines
440 # to the given index, and in this case Ruby
441 # and diff indices are the same
446 # turn the buffer into an array of lines, deleting the empty ones
447 buffer.delete_if { |l| l.empty? }
450 rcs.revision[rev].text = buffer
451 puts rcs.revision[rev].blob
454 raise "Unknown status #{status.last}"
459 # clean up the symbols/branches: look for revisions that have
460 # one or more symbols but no dates, and make them into
461 # branches, pointing to the highest commit with that key
463 keys = rcs.revision.keys
464 rcs.revision.each do |key, rev|
465 if rev.date.nil? and not rev.symbols.empty?
466 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
467 tr = rcs.revision[top]
468 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
469 tr.branches |= rev.symbols
473 branches.each { |k| rcs.revision.delete k }
479 def initialize(commit)
487 if prev.log == rev.log
488 str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
490 str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
492 if prev.text != rev.text
495 @commit.warn_about str
507 @files.map do |rcs, rev|
508 files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
519 attr_accessor :date, :log, :symbols, :author, :branch
521 def initialize(rcs, rev)
522 raise NotImplementedError if rev.branch
523 self.date = rev.date.dup
524 self.log = rev.log.dup
525 self.symbols = rev.symbols.dup
526 self.author = rev.author
527 self.branch = rev.branch
529 self.tree = Tree.new self
530 self.tree.add rcs, rev
534 [self.date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
538 warn str + " for commit on #{self.date}"
541 # Sort by date and then by number of symbols
543 ds = self.date <=> other.date
547 return self.symbols.length <=> other.symbols.length
552 commit.tree.each do |rcs, rev|
553 self.tree.add rcs, rev
555 if commit.date > self.date
556 warn_about "updating date to #{commit.date}"
557 self.date = commit.date
559 # TODO this is a possible option when merging commits with differing symbols
560 # self.symbols |= commit.symbols
564 xbranch = self.branch || 'master'
565 xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
567 numdate = self.date.tv_sec
568 xdate = "#{numdate} +0000"
571 puts "commit refs/heads/#{xbranch}"
572 puts "mark :#{RCS.commit key}"
573 puts "committer #{xauthor} #{xdate}"
574 puts "data #{xlog.length}"
575 puts xlog unless xlog.empty?
576 # TODO branching support for multi-file export
577 # puts "from :#{RCS.commit from}" if self.branch_point
580 # TODO branching support for multi-file export
581 # rev.branches.each do |sym|
582 # puts "reset refs/heads/#{sym}"
583 # puts "from :#{RCS.commit key}"
586 self.symbols.each do |sym|
587 puts "reset refs/tags/#{sym}"
588 puts "from :#{RCS.commit key}"
597 opts = GetoptLong.new(
598 # Authors file, like git-svn and git-cvsimport, more than one can be
600 ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
601 # RCS file suffix, like RCS
602 ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
603 # Date fuzziness for commits to be considered the same (in seconds)
604 ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
606 ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
607 ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
608 # prepend filenames to commit logs?
609 ['--log-filename', GetoptLong::NO_ARGUMENT],
610 ['--no-log-filename', GetoptLong::NO_ARGUMENT],
611 ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
614 # We read options in order, but they apply to all passed parameters.
615 # TODO maybe they should only apply to the following, unless there's only one
617 opts.ordering = GetoptLong::RETURN_IN_ORDER
621 :authors => Hash.new,
626 # Read config options
627 `git config --get-all rcs.authorsfile`.each_line do |fn|
628 parse_options[:authors].merge! load_authors_file(fn.chomp)
631 parse_options[:tag_each_rev] = (
632 `git config --bool rcs.tageachrev`.chomp == 'true'
635 parse_options[:log_filename] = (
636 `git config --bool rcs.logfilename`.chomp == 'true'
639 fuzz = `git config --int rcs.commitFuzz`.chomp
640 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
642 fuzz = `git config --int rcs.tagFuzz`.chomp
643 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
645 opts.each do |opt, arg|
647 when '--authors-file'
648 authors = load_authors_file(arg)
649 redef = parse_options[:authors].keys & authors.keys
650 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
651 parse_options[:authors].merge!(authors)
652 when '--rcs-suffixes'
654 when '--rcs-commit-fuzz'
655 parse_options[:commit_fuzz] = arg.to_i
656 when '--rcs-tag-fuzz'
657 parse_options[:tag_fuzz] = arg.to_i
658 when '--tag-each-rev'
659 parse_options[:tag_each_rev] = true
660 when '--no-tag-each-rev'
661 # this is the default, which is fine since the missing key
662 # (default) returns nil which is false in Ruby
663 parse_options[:tag_each_rev] = false
664 when '--log-filename'
665 parse_options[:log_filename] = true
666 when '--no-log-filename'
667 # this is the default, which is fine since the missing key
668 # (default) returns nil which is false in Ruby
669 parse_options[:log_filename] = false
678 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
679 parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
684 user = Etc.getlogin || ENV['USER']
686 # steal username/email data from other init files that may contain the
690 # the user's .hgrc file for a username field
691 ['~/.hgrc', /^\s*username\s*=\s*(["'])?(.*)\1$/, 2],
692 # the user's .(g)vimrc for a changelog_username setting
693 ['~/.vimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
694 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
696 ].each do |fn, rx, idx|
697 file = File.expand_path fn
698 if File.readable?(file) and File.read(file) =~ rx
699 parse_options[:authors][user] = Regexp.last_match(idx).strip
705 if user and not user.empty? and not parse_options[:authors].has_key?(user)
706 name = ENV['GIT_AUTHOR_NAME'] || ''
707 name.replace(`git config user.name`.chomp) if name.empty?
708 name.replace(Etc.getpwnam(user).gecos) if name.empty?
711 # couldn't find a name, try to steal data from other sources
714 # if we found a name, try to find an email too
715 email = ENV['GIT_AUTHOR_EMAIL'] || ''
716 email.replace(`git config user.email`.chomp) if email.empty?
719 # couldn't find an email, try to steal data too
722 # we got both a name and email, fill the info
723 parse_options[:authors][user] = "#{name} <#{email}>"
738 file_list.each do |arg|
739 case ftype = File.ftype(arg)
745 not_found "RCS file #{arg}"
748 filename = File.basename(arg, SFX)
750 filename = File.basename(arg)
751 path = File.dirname(arg)
752 rcsfile = File.join(path, 'RCS', filename) + SFX
753 unless File.exists? rcsfile
754 rcsfile.replace File.join(path, filename) + SFX
755 unless File.exists? rcsfile
756 not_found "RCS file for #{filename} in #{path}"
760 rcs << RCS.parse(filename, rcsfile)
762 pattern = File.join(arg, '**', '*' + SFX)
763 Dir.glob(pattern).each do |rcsfile|
764 filename = File.basename(rcsfile, SFX)
765 path = File.dirname(rcsfile)
766 path.sub!(/\/?RCS$/, '') # strip final /RCS if present
767 path.sub!(/^#{Regexp.escape arg}\/?/, '') # strip initial dirname
768 filename = File.join(path, filename) unless path.empty?
770 rcs << RCS.parse(filename, rcsfile)
771 rescue Exception => e
772 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
777 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
783 rcs.first.export_commits(parse_options)
785 STDERR.puts "Preparing commits"
790 r.revision.each do |k, rev|
791 commits << RCS::Commit.new(r, rev)
795 STDERR.puts "Sorting by date"
800 STDERR.puts "RAW commits (#{commits.length}):"
802 PP.pp c.to_a, $stderr
805 STDERR.puts "#{commits.length} single-file commits"
808 STDERR.puts "Coalescing [1] by date fuzz"
810 commits.reverse_each do |c|
811 commits.reverse_each do |k|
812 break if k.date < c.date - parse_options[:commit_fuzz]
814 next if c.log != k.log or c.symbols != k.symbols or c.author != k.author or c.branch != k.branch
815 next if k.date > c.date
822 STDERR.puts "[1] commits (#{commits.length}):"
824 PP.pp c.to_a, $stderr
827 STDERR.puts "#{commits.length} coalesced commits"
830 commits.each { |c| c.export(parse_options) }