5 * Option to coalesce commits that only differ by having a symbol or not
6 * Further coalescing options? (e.g. small logfile differences)
7 * Proper branching support in multi-file export
8 * Optimize memory usage by discarding unneeded text
13 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
15 unless 2.respond_to? :odd?
25 #{$0} [options] file [file ...]
27 Fast-export the RCS history of one or more files. If a directory is specified,
28 all RCS-tracked files in the directory and its descendants are exported.
30 When importing single files, their pathname is discarded during import. When
31 importing directories, only the specified directory component is discarded.
33 When importing a single file, RCS commits are converted one by one. Otherwise,
34 some heuristics is used to determine how to coalesce commits of different.
36 Currently, commits are coalesced if they share the exact same log and symbols,
37 and if their date differs by no more than a the user-specified fuzziness.
40 --help, -h, -? display this help text
41 --authors-file, -A specify a file containing username = Full Name <email> mappings
42 --rcs-commit-fuzz fuzziness in RCS commits to be considered a single one when
43 importing multiple files
44 (in seconds, defaults to 300, i.e. 5 minutes)
45 --[no-]tag-each-rev [do not] create a lightweight tag for each RCS revision when
46 importing a single file
47 --[no-]log-filename [do not] prepend the filename to the commit log when importing
51 rcs.authorsFile for --authors-file
52 rcs.tagEachRev for --tag-each-rev
53 rcs.logFilename for --log-filename
54 rcs.commitFuzz for --rcs-commit-fuzz
55 rcs.tagFuzz for --rcs-tag-fuzz
61 STDERR.puts "Could not find #{arg}"
64 # returns a hash that maps usernames to author names & emails
65 def load_authors_file(fn)
68 File.open(File.expand_path(fn)) do |io|
69 io.each_line do |line|
70 uname, author = line.split('=', 2)
73 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
85 fields = string.split('.')
86 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
92 # strip an optional final ;
97 # strip the first and last @, and de-double @@s
102 raise 'malformed first line' unless ret.first[0,1] == '@'
103 raise 'malformed last line' unless ret.last[-1,1] == '@'
104 ret.first.sub!(/^@/,'')
105 ret.last.sub!(/@$/,'')
106 ret.map { |l| l.gsub('@@','@') }
108 arg.chomp('@').sub(/^@/,'').gsub('@@','@')
115 def RCS.at_clean(arg)
116 RCS.sanitize RCS.clean(arg)
124 @@marks[key] = @@marks.length + 1
128 def RCS.blob(file, rev)
129 RCS.mark([file, rev])
132 def RCS.commit(commit)
137 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
138 def initialize(fname, executable)
143 @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
144 @mode = executable ? '755' : '644'
147 def has_revision?(rev)
148 @revision.has_key?(rev) and not @revision[rev].author.nil?
151 def export_commits(opts={})
154 until @revision.empty?
157 # a string sort is a very good candidate for
158 # export order, getting a miss only for
159 # multi-digit revision components
160 keys = @revision.keys.sort
162 STDERR.puts "commit export loop ##{counter}"
163 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
164 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
168 # the parent commit is rev.next if we're on the
169 # master branch (rev.branch is nil) or
170 # rev.diff_base otherwise
171 from = rev.branch.nil? ? rev.next : rev.diff_base
172 # A commit can only be exported if it has no
173 # parent, or if the parent has been exported
174 # already. Skip this commit otherwise
175 if from and not exported.include? from
179 branch = rev.branch || 'master'
180 author = opts[:authors][rev.author] || "#{rev.author} <empty>"
181 date = "#{rev.date.tv_sec} +0000"
183 if opts[:log_filename]
184 log << @fname << ": "
188 puts "commit refs/heads/#{branch}"
189 puts "mark :#{RCS.commit key}"
190 puts "committer #{author} #{date}"
191 puts "data #{log.length}"
192 puts log unless log.empty?
193 puts "from :#{RCS.commit from}" if rev.branch_point
194 puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
196 # TODO FIXME this *should* be safe, in
197 # that it should not unduly move
198 # branches back in time, but I'm not
200 rev.branches.each do |sym|
201 puts "reset refs/heads/#{sym}"
202 puts "from :#{RCS.commit key}"
204 rev.symbols.each do |sym|
205 puts "reset refs/tags/#{sym}"
206 puts "from :#{RCS.commit key}"
208 if opts[:tag_each_rev]
209 puts "reset refs/tags/#{key}"
210 puts "from :#{RCS.commit key}"
215 exported.each { |k| @revision.delete(k) }
221 attr_accessor :rev, :author, :state, :next
222 attr_accessor :branches, :log, :text, :symbols
223 attr_accessor :branch, :diff_base, :branch_point
225 def initialize(file, rev)
242 @date = Time.rcs(str)
247 ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
252 def RCS.parse(fname, rcsfile)
253 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
255 ::File.open(rcsfile, 'r') do |file|
260 file.each_line do |line|
263 command, args = line.split($;,2)
264 next if command.empty?
266 if command.chomp!(';')
267 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
273 rcs.head = RCS.clean(args.chomp)
279 rcs.comment = RCS.at_clean(args.chomp)
282 if rcs.has_revision?(rev)
283 status.push :revision_data
285 status.push :new_revision
290 status.push :read_lines
291 when 'branch', 'access', 'locks', 'expand'
292 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
293 status.push :skipping_lines
297 raise "Unknown command #{command.inspect}"
300 status.pop if line.strip.chomp!(';')
302 # we can have multiple symbols per line
303 pairs = line.strip.split($;)
305 sym, rev = pair.strip.split(':',2);
307 status.pop if rev.chomp!(';')
308 rcs.revision[rev].symbols << sym
314 rcs.desc.replace lines.dup
317 # we sanitize lines as we read them
319 actual_line = line.dup
321 # the first line must begin with a @, which we strip
323 ats = line.match(/^@+/)
324 raise 'malformed line' unless ats
325 actual_line.replace line.sub(/^@/,'')
328 # if the line ends with an ODD number of @, it's the
329 # last line -- we work on actual_line so that content
330 # such as @\n or @ work correctly (they would be
331 # encoded respectively as ['@@@\n','@\n'] and
333 ats = actual_line.chomp.match(/@+$/)
334 if nomore = (ats && Regexp.last_match(0).length.odd?)
335 actual_line.replace actual_line.chomp.sub(/@$/,'')
337 lines << actual_line.gsub('@@','@')
344 when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
345 rcs.revision[rev].date = $1
346 rcs.revision[rev].author = $2
347 rcs.revision[rev].state = $3
349 status.push :branches
352 when /^next\s+(\S+)?;$/
353 nxt = rcs.revision[rev].next = $1
355 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
356 rcs.revision[nxt].diff_base = rev
357 rcs.revision[nxt].branch = rcs.revision[rev].branch
362 candidate = line.split(';',2)
363 branch = candidate.first.strip
364 rcs.revision[rev].branches.push branch
365 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
366 rcs.revision[branch].diff_base = rev
367 # we drop the last number from the branch name
368 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
369 rcs.revision[branch].branch_point = rev
370 status.pop if candidate.length > 1
376 status.push :read_lines
384 status.push :read_lines
389 rcs.revision[rev].log.replace lines.dup
392 rcs.revision[rev].text.replace lines.dup
393 puts rcs.revision[rev].blob
396 difflines.replace lines.dup
397 difflines.pop if difflines.last.empty?
398 base = rcs.revision[rev].diff_base
399 unless rcs.revision[base].text
402 raise 'no diff base!'
406 rcs.revision[base].text.each { |l| buffer << [l.dup] }
412 while l = difflines.shift
414 raise 'negative index during insertion' if index < 0
415 raise 'negative count during insertion' if count < 0
418 # collected all the lines, put the before
423 buffer[index].unshift(*adding)
430 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
436 # for deletion, index 1 is the first index, so the Ruby
437 # index is one less than the diff one
439 # we replace them with empty string so that 'a' commands
440 # referring to the same line work properly
447 # addition will prepend the appropriate lines
448 # to the given index, and in this case Ruby
449 # and diff indices are the same
454 # turn the buffer into an array of lines, deleting the empty ones
455 buffer.delete_if { |l| l.empty? }
458 rcs.revision[rev].text = buffer
459 puts rcs.revision[rev].blob
462 raise "Unknown status #{status.last}"
467 # clean up the symbols/branches: look for revisions that have
468 # one or more symbols but no dates, and make them into
469 # branches, pointing to the highest commit with that key
471 keys = rcs.revision.keys
472 rcs.revision.each do |key, rev|
473 if rev.date.nil? and not rev.symbols.empty?
474 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
475 tr = rcs.revision[top]
476 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
477 tr.branches |= rev.symbols
481 branches.each { |k| rcs.revision.delete k }
487 def initialize(commit)
493 testfiles = @files.dup
494 tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
495 # the next line is only reached if all the adds were
496 # succesfull, so the merge is atomic
497 @files.replace testfiles
500 def add(rcs, rev, file_list=@files)
501 if file_list.key? rcs
502 prev = file_list[rcs]
503 if prev.log == rev.log
504 str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
506 str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
508 if prev.text != rev.text
511 @commit.warn_about str
523 @files.map do |rcs, rev|
524 files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
535 attr_accessor :date, :log, :symbols, :author, :branch
537 def initialize(rcs, rev)
538 raise NotImplementedError if rev.branch
539 self.date = rev.date.dup
540 self.log = rev.log.dup
541 self.symbols = rev.symbols.dup
542 self.author = rev.author
543 self.branch = rev.branch
545 self.tree = Tree.new self
546 self.tree.add rcs, rev
550 [self.date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
554 warn str + " for commit on #{self.date}"
557 # Sort by date and then by number of symbols
559 ds = self.date <=> other.date
563 return self.symbols.length <=> other.symbols.length
568 self.tree.merge! commit.tree
569 if commit.date > self.date
570 warn_about "updating date to #{commit.date}"
571 self.date = commit.date
573 # TODO this is a possible option when merging commits with differing symbols
574 # self.symbols |= commit.symbols
578 xbranch = self.branch || 'master'
579 xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
581 numdate = self.date.tv_sec
582 xdate = "#{numdate} +0000"
585 puts "commit refs/heads/#{xbranch}"
586 puts "mark :#{RCS.commit key}"
587 puts "committer #{xauthor} #{xdate}"
588 puts "data #{xlog.length}"
589 puts xlog unless xlog.empty?
590 # TODO branching support for multi-file export
591 # puts "from :#{RCS.commit from}" if self.branch_point
594 # TODO branching support for multi-file export
595 # rev.branches.each do |sym|
596 # puts "reset refs/heads/#{sym}"
597 # puts "from :#{RCS.commit key}"
600 self.symbols.each do |sym|
601 puts "reset refs/tags/#{sym}"
602 puts "from :#{RCS.commit key}"
611 opts = GetoptLong.new(
612 # Authors file, like git-svn and git-cvsimport, more than one can be
614 ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
615 # RCS file suffix, like RCS
616 ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
617 # Date fuzziness for commits to be considered the same (in seconds)
618 ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
620 ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
621 ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
622 # prepend filenames to commit logs?
623 ['--log-filename', GetoptLong::NO_ARGUMENT],
624 ['--no-log-filename', GetoptLong::NO_ARGUMENT],
625 ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
628 # We read options in order, but they apply to all passed parameters.
629 # TODO maybe they should only apply to the following, unless there's only one
631 opts.ordering = GetoptLong::RETURN_IN_ORDER
635 :authors => Hash.new,
640 # Read config options
641 `git config --get-all rcs.authorsfile`.each_line do |fn|
642 parse_options[:authors].merge! load_authors_file(fn.chomp)
645 parse_options[:tag_each_rev] = (
646 `git config --bool rcs.tageachrev`.chomp == 'true'
649 parse_options[:log_filename] = (
650 `git config --bool rcs.logfilename`.chomp == 'true'
653 fuzz = `git config --int rcs.commitFuzz`.chomp
654 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
656 fuzz = `git config --int rcs.tagFuzz`.chomp
657 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
659 opts.each do |opt, arg|
661 when '--authors-file'
662 authors = load_authors_file(arg)
663 redef = parse_options[:authors].keys & authors.keys
664 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
665 parse_options[:authors].merge!(authors)
666 when '--rcs-suffixes'
668 when '--rcs-commit-fuzz'
669 parse_options[:commit_fuzz] = arg.to_i
670 when '--rcs-tag-fuzz'
671 parse_options[:tag_fuzz] = arg.to_i
672 when '--tag-each-rev'
673 parse_options[:tag_each_rev] = true
674 when '--no-tag-each-rev'
675 # this is the default, which is fine since the missing key
676 # (default) returns nil which is false in Ruby
677 parse_options[:tag_each_rev] = false
678 when '--log-filename'
679 parse_options[:log_filename] = true
680 when '--no-log-filename'
681 # this is the default, which is fine since the missing key
682 # (default) returns nil which is false in Ruby
683 parse_options[:log_filename] = false
692 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
693 parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
698 user = Etc.getlogin || ENV['USER']
700 # steal username/email data from other init files that may contain the
704 # the user's .hgrc file for a username field
705 ['~/.hgrc', /^\s*username\s*=\s*(["'])?(.*)\1$/, 2],
706 # the user's .(g)vimrc for a changelog_username setting
707 ['~/.vimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
708 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
710 ].each do |fn, rx, idx|
711 file = File.expand_path fn
712 if File.readable?(file) and File.read(file) =~ rx
713 parse_options[:authors][user] = Regexp.last_match(idx).strip
719 if user and not user.empty? and not parse_options[:authors].has_key?(user)
720 name = ENV['GIT_AUTHOR_NAME'] || ''
721 name.replace(`git config user.name`.chomp) if name.empty?
722 name.replace(Etc.getpwnam(user).gecos) if name.empty?
725 # couldn't find a name, try to steal data from other sources
728 # if we found a name, try to find an email too
729 email = ENV['GIT_AUTHOR_EMAIL'] || ''
730 email.replace(`git config user.email`.chomp) if email.empty?
733 # couldn't find an email, try to steal data too
736 # we got both a name and email, fill the info
737 parse_options[:authors][user] = "#{name} <#{email}>"
752 file_list.each do |arg|
753 case ftype = File.ftype(arg)
759 not_found "RCS file #{arg}"
762 filename = File.basename(arg, SFX)
764 filename = File.basename(arg)
765 path = File.dirname(arg)
766 rcsfile = File.join(path, 'RCS', filename) + SFX
767 unless File.exists? rcsfile
768 rcsfile.replace File.join(path, filename) + SFX
769 unless File.exists? rcsfile
770 not_found "RCS file for #{filename} in #{path}"
774 rcs << RCS.parse(filename, rcsfile)
776 pattern = File.join(arg, '**', '*' + SFX)
777 Dir.glob(pattern).each do |rcsfile|
778 filename = File.basename(rcsfile, SFX)
779 path = File.dirname(rcsfile)
780 path.sub!(/\/?RCS$/, '') # strip final /RCS if present
781 path.sub!(/^#{Regexp.escape arg}\/?/, '') # strip initial dirname
782 filename = File.join(path, filename) unless path.empty?
784 rcs << RCS.parse(filename, rcsfile)
785 rescue Exception => e
786 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
791 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
797 rcs.first.export_commits(parse_options)
799 STDERR.puts "Preparing commits"
804 r.revision.each do |k, rev|
805 commits << RCS::Commit.new(r, rev)
809 STDERR.puts "Sorting by date"
814 STDERR.puts "RAW commits (#{commits.length}):"
816 PP.pp c.to_a, $stderr
819 STDERR.puts "#{commits.length} single-file commits"
822 STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
824 commits.reverse_each do |c|
825 commits.reverse_each do |k|
826 break if k.date < c.date - parse_options[:commit_fuzz]
828 next if c.log != k.log or c.symbols != k.symbols or c.author != k.author or c.branch != k.branch
829 next if k.date > c.date
832 rescue RuntimeError => err
833 fuzz = c.date - k.date
834 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
835 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
843 STDERR.puts "[1] commits (#{commits.length}):"
845 PP.pp c.to_a, $stderr
848 STDERR.puts "#{commits.length} coalesced commits"
851 commits.each { |c| c.export(parse_options) }