5 * Option to coalesce commits that only differ by having a symbol or not
6 * Further coalescing options? (e.g. small logfile differences)
7 * Proper branching support in multi-file export
8 * Optimize memory usage by discarding unneeded text
13 # Integer#odd? was introduced in Ruby 1.8.7, backport it to
15 unless 2.respond_to? :odd?
25 #{$0} [options] file [file ...]
27 Fast-export the RCS history of one or more files. If a directory is specified,
28 all RCS-tracked files in the directory and its descendants are exported.
30 When importing single files, their pathname is discarded during import. When
31 importing directories, only the specified directory component is discarded.
33 When importing a single file, RCS commits are converted one by one. Otherwise,
34 some heuristics is used to determine how to coalesce commits touching different
37 Currently, commits are coalesced if they share the exact same log and symbols,
38 and if their date differs by no more than the user-specified fuzziness.
41 git init && rcs-fast-export.rb . | git fast-import && git reset --hard
44 --help, -h, -? display this help text
45 --authors-file, -A specify a file containing username = Full Name <email> mappings
46 --rcs-commit-fuzz fuzziness in RCS commits to be considered a single one when
47 importing multiple files
48 (in seconds, defaults to 300, i.e. 5 minutes)
49 --[no-]tag-each-rev [do not] create a lightweight tag for each RCS revision when
50 importing a single file
51 --[no-]log-filename [do not] prepend the filename to the commit log when importing
55 rcs.authorsFile for --authors-file
56 rcs.tagEachRev for --tag-each-rev
57 rcs.logFilename for --log-filename
58 rcs.commitFuzz for --rcs-commit-fuzz
59 rcs.tagFuzz for --rcs-tag-fuzz
65 STDERR.puts "Could not find #{arg}"
68 # returns a hash that maps usernames to author names & emails
69 def load_authors_file(fn)
72 File.open(File.expand_path(fn)) do |io|
73 io.each_line do |line|
74 uname, author = line.split('=', 2)
77 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
89 fields = string.split('.')
90 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
96 # strip an optional final ;
101 # strip the first and last @, and de-double @@s
102 def RCS.sanitize(arg)
106 raise 'malformed first line' unless ret.first[0,1] == '@'
107 raise 'malformed last line' unless ret.last[-1,1] == '@'
108 ret.first.sub!(/^@/,'')
109 ret.last.sub!(/@$/,'')
110 ret.map { |l| l.gsub('@@','@') }
112 arg.chomp('@').sub(/^@/,'').gsub('@@','@')
119 def RCS.at_clean(arg)
120 RCS.sanitize RCS.clean(arg)
128 @@marks[key] = @@marks.length + 1
132 def RCS.blob(file, rev)
133 RCS.mark([file, rev])
136 def RCS.commit(commit)
141 attr_accessor :head, :comment, :desc, :revision, :fname, :mode
142 def initialize(fname, executable)
147 @revision = Hash.new { |h, r| h[r] = Revision.new(self, r) }
148 @mode = executable ? '755' : '644'
151 def has_revision?(rev)
152 @revision.has_key?(rev) and not @revision[rev].author.nil?
155 def export_commits(opts={})
158 until @revision.empty?
161 # a string sort is a very good candidate for
162 # export order, getting a miss only for
163 # multi-digit revision components
164 keys = @revision.keys.sort
166 STDERR.puts "commit export loop ##{counter}"
167 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
168 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
172 # the parent commit is rev.next if we're on the
173 # master branch (rev.branch is nil) or
174 # rev.diff_base otherwise
175 from = rev.branch.nil? ? rev.next : rev.diff_base
176 # A commit can only be exported if it has no
177 # parent, or if the parent has been exported
178 # already. Skip this commit otherwise
179 if from and not exported.include? from
183 branch = rev.branch || 'master'
184 author = opts[:authors][rev.author] || "#{rev.author} <empty>"
185 date = "#{rev.date.tv_sec} +0000"
187 if opts[:log_filename]
188 log << @fname << ": "
192 puts "commit refs/heads/#{branch}"
193 puts "mark :#{RCS.commit key}"
194 puts "committer #{author} #{date}"
195 puts "data #{log.length}"
196 puts log unless log.empty?
197 puts "from :#{RCS.commit from}" if rev.branch_point
198 puts "M #{@mode} :#{RCS.blob @fname, key} #{@fname}"
200 # TODO FIXME this *should* be safe, in
201 # that it should not unduly move
202 # branches back in time, but I'm not
204 rev.branches.each do |sym|
205 puts "reset refs/heads/#{sym}"
206 puts "from :#{RCS.commit key}"
208 rev.symbols.each do |sym|
209 puts "reset refs/tags/#{sym}"
210 puts "from :#{RCS.commit key}"
212 if opts[:tag_each_rev]
213 puts "reset refs/tags/#{key}"
214 puts "from :#{RCS.commit key}"
219 exported.each { |k| @revision.delete(k) }
225 attr_accessor :rev, :author, :state, :next
226 attr_accessor :branches, :log, :text, :symbols
227 attr_accessor :branch, :diff_base, :branch_point
229 def initialize(file, rev)
246 @date = Time.rcs(str)
251 ret = "blob\nmark :#{RCS.blob @file.fname, @rev}\ndata #{str.length}\n#{str}\n"
256 def RCS.parse(fname, rcsfile)
257 rcs = RCS::File.new(fname, ::File.executable?(rcsfile))
259 ::File.open(rcsfile, 'r') do |file|
264 file.each_line do |line|
267 command, args = line.split($;,2)
268 next if command.empty?
270 if command.chomp!(';')
271 STDERR.puts "Skipping empty command #{command.inspect}" if $DEBUG
277 rcs.head = RCS.clean(args.chomp)
283 rcs.comment = RCS.at_clean(args.chomp)
286 if rcs.has_revision?(rev)
287 status.push :revision_data
289 status.push :new_revision
294 status.push :read_lines
295 when 'branch', 'access', 'locks', 'expand'
296 STDERR.puts "Skipping unhandled command #{command.inspect}" if $DEBUG
297 status.push :skipping_lines
301 raise "Unknown command #{command.inspect}"
304 status.pop if line.strip.chomp!(';')
306 # we can have multiple symbols per line
307 pairs = line.strip.split($;)
309 sym, rev = pair.strip.split(':',2);
311 status.pop if rev.chomp!(';')
312 rcs.revision[rev].symbols << sym
318 rcs.desc.replace lines.dup
321 # we sanitize lines as we read them
323 actual_line = line.dup
325 # the first line must begin with a @, which we strip
327 ats = line.match(/^@+/)
328 raise 'malformed line' unless ats
329 actual_line.replace line.sub(/^@/,'')
332 # if the line ends with an ODD number of @, it's the
333 # last line -- we work on actual_line so that content
334 # such as @\n or @ work correctly (they would be
335 # encoded respectively as ['@@@\n','@\n'] and
337 ats = actual_line.chomp.match(/@+$/)
338 if nomore = (ats && Regexp.last_match(0).length.odd?)
339 actual_line.replace actual_line.chomp.sub(/@$/,'')
341 lines << actual_line.gsub('@@','@')
348 when /^date\s+(\S+);\s+author\s+(\S+);\s+state\s+(\S+);$/
349 rcs.revision[rev].date = $1
350 rcs.revision[rev].author = $2
351 rcs.revision[rev].state = $3
353 status.push :branches
356 when /^next\s+(\S+)?;$/
357 nxt = rcs.revision[rev].next = $1
359 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
360 rcs.revision[nxt].diff_base = rev
361 rcs.revision[nxt].branch = rcs.revision[rev].branch
366 candidate = line.split(';',2)
367 branch = candidate.first.strip
368 rcs.revision[rev].branches.push branch
369 raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
370 rcs.revision[branch].diff_base = rev
371 # we drop the last number from the branch name
372 rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
373 rcs.revision[branch].branch_point = rev
374 status.pop if candidate.length > 1
380 status.push :read_lines
388 status.push :read_lines
393 rcs.revision[rev].log.replace lines.dup
396 rcs.revision[rev].text.replace lines.dup
397 puts rcs.revision[rev].blob
400 difflines.replace lines.dup
401 difflines.pop if difflines.last.empty?
402 base = rcs.revision[rev].diff_base
403 unless rcs.revision[base].text
406 raise 'no diff base!'
410 rcs.revision[base].text.each { |l| buffer << [l.dup] }
416 while l = difflines.shift
418 raise 'negative index during insertion' if index < 0
419 raise 'negative count during insertion' if count < 0
422 # collected all the lines, put the before
427 buffer[index].unshift(*adding)
434 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
440 # for deletion, index 1 is the first index, so the Ruby
441 # index is one less than the diff one
443 # we replace them with empty string so that 'a' commands
444 # referring to the same line work properly
451 # addition will prepend the appropriate lines
452 # to the given index, and in this case Ruby
453 # and diff indices are the same
458 # turn the buffer into an array of lines, deleting the empty ones
459 buffer.delete_if { |l| l.empty? }
462 rcs.revision[rev].text = buffer
463 puts rcs.revision[rev].blob
466 raise "Unknown status #{status.last}"
471 # clean up the symbols/branches: look for revisions that have
472 # one or more symbols but no dates, and make them into
473 # branches, pointing to the highest commit with that key
475 keys = rcs.revision.keys
476 rcs.revision.each do |key, rev|
477 if rev.date.nil? and not rev.symbols.empty?
478 top = keys.select { |k| k.match(/^#{key}\./) }.sort.last
479 tr = rcs.revision[top]
480 raise "unhandled complex branch structure met: #{rev.inspect} refers #{tr.inspect}" if tr.date.nil?
481 tr.branches |= rev.symbols
485 branches.each { |k| rcs.revision.delete k }
491 def initialize(commit)
497 testfiles = @files.dup
498 tree.each { |rcs, rev| self.add(rcs, rev, testfiles) }
499 # the next line is only reached if all the adds were
500 # successful, so the merge is atomic
501 @files.replace testfiles
504 def add(rcs, rev, file_list=@files)
505 if file_list.key? rcs
506 prev = file_list[rcs]
507 if prev.log == rev.log
508 str = "re-adding existing file #{rcs.fname} (old: #{prev.rev}, new: #{rev.rev})"
510 str = "re-adding existing file #{rcs.fname} (old: #{[prev.rev, prev.log.to_s].inspect}, new: #{[rev.rev, rev.log.to_s].inspect})"
512 if prev.text != rev.text
515 @commit.warn_about str
527 @files.map do |rcs, rev|
528 files << "M #{rcs.mode} :#{RCS.blob rcs.fname, rev.rev} #{rcs.fname}"
539 attr_accessor :date, :log, :symbols, :author, :branch
541 def initialize(rcs, rev)
542 raise NotImplementedError if rev.branch
543 self.date = rev.date.dup
544 self.log = rev.log.dup
545 self.symbols = rev.symbols.dup
546 self.author = rev.author
547 self.branch = rev.branch
549 self.tree = Tree.new self
550 self.tree.add rcs, rev
554 [self.date, self.branch, self.symbols, self.author, self.log, self.tree.to_a]
558 warn str + " for commit on #{self.date}"
561 # Sort by date and then by number of symbols
563 ds = self.date <=> other.date
567 return self.symbols.length <=> other.symbols.length
572 self.tree.merge! commit.tree
573 if commit.date > self.date
574 warn_about "updating date to #{commit.date}"
575 self.date = commit.date
577 # TODO this is a possible option when merging commits with differing symbols
578 # self.symbols |= commit.symbols
582 xbranch = self.branch || 'master'
583 xauthor = opts[:authors][self.author] || "#{self.author} <empty>"
585 numdate = self.date.tv_sec
586 xdate = "#{numdate} +0000"
589 puts "commit refs/heads/#{xbranch}"
590 puts "mark :#{RCS.commit key}"
591 puts "committer #{xauthor} #{xdate}"
592 puts "data #{xlog.length}"
593 puts xlog unless xlog.empty?
594 # TODO branching support for multi-file export
595 # puts "from :#{RCS.commit from}" if self.branch_point
598 # TODO branching support for multi-file export
599 # rev.branches.each do |sym|
600 # puts "reset refs/heads/#{sym}"
601 # puts "from :#{RCS.commit key}"
604 self.symbols.each do |sym|
605 puts "reset refs/tags/#{sym}"
606 puts "from :#{RCS.commit key}"
615 opts = GetoptLong.new(
616 # Authors file, like git-svn and git-cvsimport, more than one can be
618 ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
619 # RCS file suffix, like RCS
620 ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
621 # Date fuzziness for commits to be considered the same (in seconds)
622 ['--rcs-commit-fuzz', GetoptLong::REQUIRED_ARGUMENT],
624 ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
625 ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
626 # prepend filenames to commit logs?
627 ['--log-filename', GetoptLong::NO_ARGUMENT],
628 ['--no-log-filename', GetoptLong::NO_ARGUMENT],
629 ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
632 # We read options in order, but they apply to all passed parameters.
633 # TODO maybe they should only apply to the following, unless there's only one
635 opts.ordering = GetoptLong::RETURN_IN_ORDER
639 :authors => Hash.new,
644 # Read config options
645 `git config --get-all rcs.authorsfile`.each_line do |fn|
646 parse_options[:authors].merge! load_authors_file(fn.chomp)
649 parse_options[:tag_each_rev] = (
650 `git config --bool rcs.tageachrev`.chomp == 'true'
653 parse_options[:log_filename] = (
654 `git config --bool rcs.logfilename`.chomp == 'true'
657 fuzz = `git config --int rcs.commitFuzz`.chomp
658 parse_options[:commit_fuzz] = fuzz.to_i unless fuzz.empty?
660 fuzz = `git config --int rcs.tagFuzz`.chomp
661 parse_options[:tag_fuzz] = fuzz.to_i unless fuzz.empty?
663 opts.each do |opt, arg|
665 when '--authors-file'
666 authors = load_authors_file(arg)
667 redef = parse_options[:authors].keys & authors.keys
668 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
669 parse_options[:authors].merge!(authors)
670 when '--rcs-suffixes'
672 when '--rcs-commit-fuzz'
673 parse_options[:commit_fuzz] = arg.to_i
674 when '--rcs-tag-fuzz'
675 parse_options[:tag_fuzz] = arg.to_i
676 when '--tag-each-rev'
677 parse_options[:tag_each_rev] = true
678 when '--no-tag-each-rev'
679 # this is the default, which is fine since the missing key
680 # (default) returns nil which is false in Ruby
681 parse_options[:tag_each_rev] = false
682 when '--log-filename'
683 parse_options[:log_filename] = true
684 when '--no-log-filename'
685 # this is the default, which is fine since the missing key
686 # (default) returns nil which is false in Ruby
687 parse_options[:log_filename] = false
696 if parse_options[:tag_fuzz] < parse_options[:commit_fuzz]
697 parse_options[:tag_fuzz] = parse_options[:commit_fuzz]
702 user = Etc.getlogin || ENV['USER']
704 # steal username/email data from other init files that may contain the
708 # the user's .hgrc file for a username field
709 ['~/.hgrc', /^\s*username\s*=\s*(["'])?(.*)\1$/, 2],
710 # the user's .(g)vimrc for a changelog_username setting
711 ['~/.vimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
712 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
714 ].each do |fn, rx, idx|
715 file = File.expand_path fn
716 if File.readable?(file) and File.read(file) =~ rx
717 parse_options[:authors][user] = Regexp.last_match(idx).strip
723 if user and not user.empty? and not parse_options[:authors].has_key?(user)
724 name = ENV['GIT_AUTHOR_NAME'] || ''
725 name.replace(`git config user.name`.chomp) if name.empty?
726 name.replace(Etc.getpwnam(user).gecos) if name.empty?
729 # couldn't find a name, try to steal data from other sources
732 # if we found a name, try to find an email too
733 email = ENV['GIT_AUTHOR_EMAIL'] || ''
734 email.replace(`git config user.email`.chomp) if email.empty?
737 # couldn't find an email, try to steal data too
740 # we got both a name and email, fill the info
741 parse_options[:authors][user] = "#{name} <#{email}>"
756 file_list.each do |arg|
757 case ftype = File.ftype(arg)
763 not_found "RCS file #{arg}"
766 filename = File.basename(arg, SFX)
768 filename = File.basename(arg)
769 path = File.dirname(arg)
770 rcsfile = File.join(path, 'RCS', filename) + SFX
771 unless File.exists? rcsfile
772 rcsfile.replace File.join(path, filename) + SFX
773 unless File.exists? rcsfile
774 not_found "RCS file for #{filename} in #{path}"
778 rcs << RCS.parse(filename, rcsfile)
780 pattern = File.join(arg, '**', '*' + SFX)
781 Dir.glob(pattern).each do |rcsfile|
782 filename = File.basename(rcsfile, SFX)
783 path = File.dirname(rcsfile)
784 path.sub!(/\/?RCS$/, '') # strip final /RCS if present
785 path.sub!(/^#{Regexp.escape arg}\/?/, '') # strip initial dirname
786 filename = File.join(path, filename) unless path.empty?
788 rcs << RCS.parse(filename, rcsfile)
789 rescue Exception => e
790 STDERR.puts "Failed to parse #{filename} @ #{rcsfile}:#{$.}"
795 STDERR.puts "Cannot handle #{arg} of #{ftype} type"
801 rcs.first.export_commits(parse_options)
803 STDERR.puts "Preparing commits"
808 r.revision.each do |k, rev|
809 commits << RCS::Commit.new(r, rev)
813 STDERR.puts "Sorting by date"
818 STDERR.puts "RAW commits (#{commits.length}):"
820 PP.pp c.to_a, $stderr
823 STDERR.puts "#{commits.length} single-file commits"
826 STDERR.puts "Coalescing [1] by date with fuzz #{parse_options[:commit_fuzz]}"
828 commits.reverse_each do |c|
829 commits.reverse_each do |k|
830 break if k.date < c.date - parse_options[:commit_fuzz]
832 next if c.log != k.log or c.symbols != k.symbols or c.author != k.author or c.branch != k.branch
833 next if k.date > c.date
836 rescue RuntimeError => err
837 fuzz = c.date - k.date
838 STDERR.puts "Fuzzy commit coalescing failed: #{err}"
839 STDERR.puts "\tretry with commit fuzz < #{fuzz} if you don't want to see this message"
847 STDERR.puts "[1] commits (#{commits.length}):"
849 PP.pp c.to_a, $stderr
852 STDERR.puts "#{commits.length} coalesced commits"
855 commits.each { |c| c.export(parse_options) }