5 * Option to coalesce commits that only differ by having a symbol or not
6 * Further coalescing options? (e.g. small logfile differences)
7 * Proper branching support in multi-file export
8 * Optimize memory usage by discarding unneeded text
13 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
15 unless 2.respond_to? :odd?
25 #{$0} [options] file [file ...]
27 Fast-export the RCS history of one or more files. If a directory is specified,
28 all RCS-tracked files in the directory and its descendants are exported.
30 When importing single files, their pathname is discarded during import. When
31 importing directories, only the specified directory component is discarded.
33 When importing a single file, RCS commits are converted one by one. Otherwise,
34 some heuristics is used to determine how to coalesce commits of different.
36 Currently, commits are coalesced if they share the exact same log and symbols,
37 and if their date differs by no more than the user-specified fuzziness.
40 git init && rcs-fast-export.rb . | git fast-import && git reset --hard
43 --help, -h, -? display this help text
44 --authors-file, -A specify a file containing username = Full Name <email> mappings
45 --rcs-commit-fuzz fuzziness in RCS commits to be considered a single one when
46 importing multiple files
47 (in seconds, defaults to 300, i.e. 5 minutes)
48 --[no-]tag-each-rev [do not] create a lightweight tag for each RCS revision when
49 importing a single file
50 --[no-]log-filename [do not] prepend the filename to the commit log when importing
54 rcs.authorsFile for --authors-file
55 rcs.tagEachRev for --tag-each-rev
56 rcs.logFilename for --log-filename
57 rcs.commitFuzz for --rcs-commit-fuzz
58 rcs.tagFuzz for --rcs-tag-fuzz
64 STDERR.puts "Could not find #{arg}"
67 # returns a hash that maps usernames to author names & emails
68 def load_authors_file(fn)
71 File.open(File.expand_path(fn)) do |io|
72 io.each_line do |line|
73 uname, author = line.split('=', 2)
76 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
88 fields = string.split('.')
89 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
95 # strip an optional final ;
100 # strip the first and last @, and de-double @@s
101 def RCS.sanitize(arg)
105 raise 'malformed first line' unless ret.first[0,1] == '@'
106 raise 'malformed last line' unless ret.last[-1,1] == '@'
107 ret.first.sub!(/^@/,'')
108 ret.last.sub!(/@$/,'')
109 ret.map { |l| l.gsub('@@','@') }
111 arg.chomp('@').sub(/^@/,'').gsub('@@','@')
118 def RCS.at_clean(arg)
119 RCS.sanitize RCS.clean(arg)
127 @@marks[key] = @@marks.length + 1
131 def RCS.blob(file, rev)
132 RCS.mark([file, rev])
135 def RCS.commit(commit)
140 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
141 def initialize(fname, executable)
146 @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
147 @mode = executable ? '755' : '644'
150 def has_revision?(rev)
151 @revision.has_key?(rev) and not @revision[rev].author.nil?
154 def export_commits(opts={})
157 until @revision.empty?
160 # a string sort is a very good candidate for
161 # export order, getting a miss only for
162 # multi-digit revision components
163 keys = @revision.keys.sort
165 STDERR.puts "commit export loop ##{counter}"
166 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
167 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
171 # the parent commit is rev.next if we're on the
172 # master branch (rev.branch is nil) or
173 # rev.diff_base otherwise
174 from = rev.branch.nil? ? rev.next : rev.diff_base
175 # A commit can only be exported if it has no
176 # parent, or if the parent has been exported
177 # already. Skip this commit otherwise
178 if from and not exported.include? from
182 branch = rev.branch || 'master'
183 author = opts[:authors][rev.author] || "#{rev.author} <empty>"
184 date = "#{rev.date.tv_sec} +0000"
186 if opts[:log_filename]
187 log << @fname << ": "
191 puts "commit refs/heads/#{branch}"
192 puts "mark :#{RCS.commit key}"
193 puts "committer #{author} #{date}"
194 puts "data #{log.length}"
195 puts log unless log.empty?
196 puts "from :#{RCS.commit from}" if rev.branch_point
197 puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
199 # TODO FIXME this *should* be safe, in
200 # that it should not unduly move
201 # branches back in time, but I'm not
203 rev.branches.each do |sym|
204 puts "reset refs/heads/#{sym}"
205 puts "from :#{RCS.commit key}"
207 rev.symbols.each do |sym|
208 puts "reset refs/tags/#{sym}"
209 puts "from :#{RCS.commit key}"
211 if opts[:tag_each_rev]
212 puts "reset refs/tags/#{key}"
213 puts "from :#{RCS.commit key}"
218 exported.each { |k| @revision.delete(k) }
224 attr_accessor :rev, :author, :state, :next
225 attr_accessor :branches, :log, :text, :symbols
226 attr_accessor :branch, :diff_base, :branch_point
228 def initialize(file, rev)
245 @date = Time.rcs(str)
250 ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
255 def RCS.parse(fname, rcsfile)
256 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
258 ::File.open(rcsfile, 'r') do |file|
263 file.each_line do |line|
266 command, args = line.split($;,2)
267 next if command.empty?
269 if command.chomp!(';')
270 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
276 rcs.head = RCS.clean(args.chomp)
282 rcs.comment = RCS.at_clean(args.chomp)
285 if rcs.has_revision?(rev)
286 status.push :revision_data
288 status.push :new_revision
293 status.push :read_lines
294 when 'branch', 'access', 'locks', 'expand'
295 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
296 status.push :skipping_lines
300 raise "Unknown command #{command.inspect}"
303 status.pop if line.strip.chomp!(';')
305 # we can have multiple symbols per line
306 pairs = line.strip.split($;)
308 sym, rev = pair.strip.split(':',2);
310 status.pop if rev.chomp!(';')
311 rcs.revision[rev].symbols << sym
317 rcs.desc.replace lines.dup
320 # we sanitize lines as we read them
322 actual_line = line.dup
324 # the first line must begin with a @, which we strip
326 ats = line.match(/^@+/)
327 raise 'malformed line' unless ats
328 actual_line.replace line.sub(/^@/,'')
331 # if the line ends with an ODD number of @, it's the
332 # last line -- we work on actual_line so that content
333 # such as @\n or @ work correctly (they would be
334 # encoded respectively as ['@@@\n','@\n'] and
336 ats = actual_line.chomp.match(/@+$/)
337 if nomore = (ats && Regexp.last_match(0).length.odd?)
338 actual_line.replace actual_line.chomp.sub(/@$/,'')
340 lines << actual_line.gsub('@@','@')
347 when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
348 rcs.revision[rev].date = $1
349 rcs.revision[rev].author = $2
350 rcs.revision[rev].state = $3
352 status.push :branches
355 when /^next\s+(\S+)?;$/
356 nxt = rcs.revision[rev].next = $1
358 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
359 rcs.revision[nxt].diff_base = rev
360 rcs.revision[nxt].branch = rcs.revision[rev].branch
365 candidate = line.split(';',2)
366 branch = candidate.first.strip
367 rcs.revision[rev].branches.push branch
368 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
369 rcs.revision[branch].diff_base = rev
370 # we drop the last number from the branch name
371 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
372 rcs.revision[branch].branch_point = rev
373 status.pop if candidate.length > 1
379 status.push :read_lines
387 status.push :read_lines
392 rcs.revision[rev].log.replace lines.dup
395 rcs.revision[rev].text.replace lines.dup
396 puts rcs.revision[rev].blob
399 difflines.replace lines.dup
400 difflines.pop if difflines.last.empty?
401 base = rcs.revision[rev].diff_base
402 unless rcs.revision[base].text
405 raise 'no diff base!'
409 rcs.revision[base].text.each { |l| buffer << [l.dup] }
415 while l = difflines.shift
417 raise 'negative index during insertion' if index < 0
418 raise 'negative count during insertion' if count < 0
421 # collected all the lines, put the before
426 buffer[index].unshift(*adding)
433 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
439 # for deletion, index 1 is the first index, so the Ruby
440 # index is one less than the diff one
442 # we replace them with empty string so that 'a' commands
443 # referring to the same line work properly
450 # addition will prepend the appropriate lines
451 # to the given index, and in this case Ruby
452 # and diff indices are the same
457 # turn the buffer into an array of lines, deleting the empty ones
458 buffer.delete_if { |l| l.empty? }
461 rcs.revision[rev].text = buffer
462 puts rcs.revision[rev].blob
465 raise "Unknown status #{status.last}"
470 # clean up the symbols/branches: look for revisions that have
471 # one or more symbols but no dates, and make them into
472 # branches, pointing to the highest commit with that key
474 keys = rcs.revision.keys
475 rcs.revision.each do |key, rev|
476 if rev.date.nil? and not rev.symbols.empty?
477 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
478 tr = rcs.revision[top]
479 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
480 tr.branches |= rev.symbols
484 branches.each { |k| rcs.revision.delete k }
490 def initialize(commit)
496 testfiles = @files.dup
497 tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
498 # the next line is only reached if all the adds were
499 # successful, so the merge is atomic
500 @files.replace testfiles
503 def add(rcs, rev, file_list=@files)
504 if file_list.key? rcs
505 prev = file_list[rcs]
506 if prev.log == rev.log
507 str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
509 str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
511 if prev.text != rev.text
514 @commit.warn_about str
526 @files.map do |rcs, rev|
527 files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
538 attr_accessor :date, :log, :symbols, :author, :branch
540 def initialize(rcs, rev)
541 raise NotImplementedError if rev.branch
542 self.date = rev.date.dup
543 self.log = rev.log.dup
544 self.symbols = rev.symbols.dup
545 self.author = rev.author
546 self.branch = rev.branch
548 self.tree = Tree.new self
549 self.tree.add rcs, rev
553 [self.date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
557 warn str + " for commit on #{self.date}"
560 # Sort by date and then by number of symbols
562 ds = self.date <=> other.date
566 return self.symbols.length <=> other.symbols.length
571 self.tree.merge! commit.tree
572 if commit.date > self.date
573 warn_about "updating date to #{commit.date}"
574 self.date = commit.date
576 # TODO this is a possible option when merging commits with differing symbols
577 # self.symbols |= commit.symbols
581 xbranch = self.branch || 'master'
582 xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
584 numdate = self.date.tv_sec
585 xdate = "#{numdate} +0000"
588 puts "commit refs/heads/#{xbranch}"
589 puts "mark :#{RCS.commit key}"
590 puts "committer #{xauthor} #{xdate}"
591 puts "data #{xlog.length}"
592 puts xlog unless xlog.empty?
593 # TODO branching support for multi-file export
594 # puts "from :#{RCS.commit from}" if self.branch_point
597 # TODO branching support for multi-file export
598 # rev.branches.each do |sym|
599 # puts "reset refs/heads/#{sym}"
600 # puts "from :#{RCS.commit key}"
603 self.symbols.each do |sym|
604 puts "reset refs/tags/#{sym}"
605 puts "from :#{RCS.commit key}"
614 opts = GetoptLong.new(
615 # Authors file, like git-svn and git-cvsimport, more than one can be
617 ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
618 # RCS file suffix, like RCS
619 ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
620 # Date fuzziness for commits to be considered the same (in seconds)
621 ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
623 ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
624 ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
625 # prepend filenames to commit logs?
626 ['--log-filename', GetoptLong::NO_ARGUMENT],
627 ['--no-log-filename', GetoptLong::NO_ARGUMENT],
628 ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
631 # We read options in order, but they apply to all passed parameters.
632 # TODO maybe they should only apply to the following, unless there's only one
634 opts.ordering = GetoptLong::RETURN_IN_ORDER
638 :authors => Hash.new,
643 # Read config options
644 `git config --get-all rcs.authorsfile`.each_line do |fn|
645 parse_options[:authors].merge! load_authors_file(fn.chomp)
648 parse_options[:tag_each_rev] = (
649 `git config --bool rcs.tageachrev`.chomp == 'true'
652 parse_options[:log_filename] = (
653 `git config --bool rcs.logfilename`.chomp == 'true'
656 fuzz = `git config --int rcs.commitFuzz`.chomp
657 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
659 fuzz = `git config --int rcs.tagFuzz`.chomp
660 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
662 opts.each do |opt, arg|
664 when '--authors-file'
665 authors = load_authors_file(arg)
666 redef = parse_options[:authors].keys & authors.keys
667 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
668 parse_options[:authors].merge!(authors)
669 when '--rcs-suffixes'
671 when '--rcs-commit-fuzz'
672 parse_options[:commit_fuzz] = arg.to_i
673 when '--rcs-tag-fuzz'
674 parse_options[:tag_fuzz] = arg.to_i
675 when '--tag-each-rev'
676 parse_options[:tag_each_rev] = true
677 when '--no-tag-each-rev'
678 # this is the default, which is fine since the missing key
679 # (default) returns nil which is false in Ruby
680 parse_options[:tag_each_rev] = false
681 when '--log-filename'
682 parse_options[:log_filename] = true
683 when '--no-log-filename'
684 # this is the default, which is fine since the missing key
685 # (default) returns nil which is false in Ruby
686 parse_options[:log_filename] = false
695 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
696 parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
701 user = Etc.getlogin || ENV['USER']
703 # steal username/email data from other init files that may contain the
707 # the user's .hgrc file for a username field
708 ['~/.hgrc', /^\s*username\s*=\s*(["'])?(.*)\1$/, 2],
709 # the user's .(g)vimrc for a changelog_username setting
710 ['~/.vimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
711 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
713 ].each do |fn, rx, idx|
714 file = File.expand_path fn
715 if File.readable?(file) and File.read(file) =~ rx
716 parse_options[:authors][user] = Regexp.last_match(idx).strip
722 if user and not user.empty? and not parse_options[:authors].has_key?(user)
723 name = ENV['GIT_AUTHOR_NAME'] || ''
724 name.replace(`git config user.name`.chomp) if name.empty?
725 name.replace(Etc.getpwnam(user).gecos) if name.empty?
728 # couldn't find a name, try to steal data from other sources
731 # if we found a name, try to find an email too
732 email = ENV['GIT_AUTHOR_EMAIL'] || ''
733 email.replace(`git config user.email`.chomp) if email.empty?
736 # couldn't find an email, try to steal data too
739 # we got both a name and email, fill the info
740 parse_options[:authors][user] = "#{name} <#{email}>"
755 file_list.each do |arg|
756 case ftype = File.ftype(arg)
762 not_found "RCS file #{arg}"
765 filename = File.basename(arg, SFX)
767 filename = File.basename(arg)
768 path = File.dirname(arg)
769 rcsfile = File.join(path, 'RCS', filename) + SFX
770 unless File.exists? rcsfile
771 rcsfile.replace File.join(path, filename) + SFX
772 unless File.exists? rcsfile
773 not_found "RCS file for #{filename} in #{path}"
777 rcs << RCS.parse(filename, rcsfile)
779 pattern = File.join(arg, '**', '*' + SFX)
780 Dir.glob(pattern).each do |rcsfile|
781 filename = File.basename(rcsfile, SFX)
782 path = File.dirname(rcsfile)
783 path.sub!(/\/?RCS$/, '') # strip final /RCS if present
784 path.sub!(/^#{Regexp.escape arg}\/?/, '') # strip initial dirname
785 filename = File.join(path, filename) unless path.empty?
787 rcs << RCS.parse(filename, rcsfile)
788 rescue Exception => e
789 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
794 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
800 rcs.first.export_commits(parse_options)
802 STDERR.puts "Preparing commits"
807 r.revision.each do |k, rev|
808 commits << RCS::Commit.new(r, rev)
812 STDERR.puts "Sorting by date"
817 STDERR.puts "RAW commits (#{commits.length}):"
819 PP.pp c.to_a, $stderr
822 STDERR.puts "#{commits.length} single-file commits"
825 STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
827 commits.reverse_each do |c|
828 commits.reverse_each do |k|
829 break if k.date < c.date - parse_options[:commit_fuzz]
831 next if c.log != k.log or c.symbols != k.symbols or c.author != k.author or c.branch != k.branch
832 next if k.date > c.date
835 rescue RuntimeError => err
836 fuzz = c.date - k.date
837 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
838 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
846 STDERR.puts "[1] commits (#{commits.length}):"
848 PP.pp c.to_a, $stderr
851 STDERR.puts "#{commits.length} coalesced commits"
854 commits.each { |c| c.export(parse_options) }