Try to guess the user's name and email
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 require 'pp'
4
5 def usage
6         STDERR.puts <<EOM
7 #{$0} [options] file [file ...]
8
9 Fast-export the RCS history of one or more file.
10
11 Options:
12         --help, -h, -?          display this help text
13         --authors-file, -A      specify a file containing username = Full Name <email> mappings
14         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision
15
16 Config options:
17         rcs.authorsFile         for --authors-file
18         rcs.tagEachRev          for --tag-each-rev
19
20 EOM
21 end
22
23 def not_found(arg)
24         STDERR.puts "Could not find #{arg}"
25 end
26
27 # returns a hash that maps usernames to author names & emails
28 def load_authors_file(fn)
29         hash = {}
30         begin
31                 File.open(File.expand_path fn) do |io|
32                         io.each_line do |line|
33                                 uname, author = line.split('=', 2)
34                                 uname.strip!
35                                 author.strip!
36                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
37                                 hash[uname] = author
38                         end
39                 end
40         rescue
41                 not_found(fn)
42         end
43         return hash
44 end
45
46 class Time
47         def Time.rcs(string)
48                 fields = string.split('.')
49                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
50                 Time.utc(*fields)
51         end
52 end
53
54 module RCS
55         # strip an optional final ;
56         def RCS.clean(arg)
57                 arg.chomp(';')
58         end
59
60         # strip the first and last @, and de-double @@s
61         def RCS.sanitize(arg)
62                 case arg
63                 when Array
64                         ret = arg.dup
65                         raise 'malformed first line' unless ret.first[0,1] == '@'
66                         raise 'malformed last line' unless ret.last[-1,1] == '@'
67                         ret.first.sub!(/^@/,'')
68                         ret.last.sub!(/@$/,'')
69                         ret.map { |l| l.gsub('@@','@') }
70                 when String
71                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
72                 else
73                         raise
74                 end
75         end
76
77         # clean and sanitize
78         def RCS.at_clean(arg)
79                 RCS.sanitize RCS.clean(arg)
80         end
81
82         def RCS.blob(arg)
83                 arg.gsub('.', '0') + ('90'*5)
84         end
85
86         def RCS.commit(arg)
87                 arg.gsub('.', '0') + ('09'*5)
88         end
89
90         class File
91                 attr_accessor :head, :comment, :desc, :revision
92                 def initialize(fname)
93                         @fname = fname.dup
94                         @head = nil
95                         @comment = nil
96                         @desc = []
97                         @revision = Hash.new { |h, r| h[r] = Revision.new(r) }
98                 end
99
100                 def has_revision?(rev)
101                         @revision.has_key?(rev) and not @revision[rev].author.nil?
102                 end
103
104                 def export_commits(opts={})
105                         counter = 0
106                         exported = []
107                         until @revision.empty?
108                                 counter += 1
109
110                                 # a string sort is a very good candidate for
111                                 # export order, getting a miss only for
112                                 # multi-digit revision components
113                                 keys = @revision.keys.sort
114
115                                 STDERR.puts "commit export loop ##{counter}"
116                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
117                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
118
119                                 keys.each do |key|
120                                         rev = @revision[key]
121                                         # the parent commit is rev.next if we're on the
122                                         # master branch (rev.branch is nil) or
123                                         # rev.diff_base otherwise
124                                         from = rev.branch.nil? ? rev.next : rev.diff_base
125                                         # A commit can only be exported if it has no
126                                         # parent, or if the parent has been exported
127                                         # already. Skip this commit otherwise
128                                         if from and not exported.include? from
129                                                 next
130                                         end
131
132                                         branch = rev.branch || 'master'
133                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
134                                         date = "#{rev.date.tv_sec} +0000"
135                                         log = rev.log.to_s
136
137                                         puts "commit refs/heads/#{branch}"
138                                         puts "mark :#{RCS.commit key}"
139                                         puts "committer #{author} #{date}"
140                                         puts "data #{log.length}"
141                                         puts log unless log.empty?
142                                         puts "from :#{RCS.commit from}" if rev.branch_point
143                                         puts "M 644 :#{RCS.blob key} #{@fname}"
144
145                                         rev.symbols.each do |sym|
146                                                 puts "reset refs/tags/#{sym}"
147                                                 puts "from :#{RCS.commit key}"
148                                         end
149                                         if opts[:tag_each_rev]
150                                                 puts "reset refs/tags/#{key}"
151                                                 puts "from :#{RCS.commit key}"
152                                         end
153
154                                         exported.push key
155                                 end
156                                 exported.each { |k| @revision.delete(k) }
157                         end
158                 end
159         end
160
161         class Revision
162                 attr_accessor :rev, :author, :date, :state, :next
163                 attr_accessor :branches, :log, :text, :symbols
164                 attr_accessor :branch, :diff_base, :branch_point
165                 def initialize(rev)
166                         @rev = rev
167                         @author = nil
168                         @date = nil
169                         @state = nil
170                         @next = nil
171                         @branches = []
172                         @branch = nil
173                         @branch_point = nil
174                         @diff_base = nil
175                         @log = []
176                         @text = []
177                         @symbols = []
178                 end
179
180                 def date=(str)
181                         @date = Time.rcs(str)
182                 end
183
184                 def blob
185                         str = @text.join('')
186                         ret = "blob\nmark :#{RCS.blob @rev}\ndata #{str.length}\n#{str}\n"
187                         ret
188                 end
189         end
190
191         def RCS.parse(fname, rcsfile, opts={})
192                 rcs = RCS::File.new(fname)
193
194                 ::File.open(rcsfile, 'r') do |file|
195                         status = [:basic]
196                         rev = nil
197                         lines = []
198                         difflines = []
199                         file.each_line do |line|
200                                 case status.last
201                                 when :basic
202                                         command, args = line.split($;,2)
203                                         next if command.empty?
204
205                                         case command
206                                         when 'head'
207                                                 rcs.head = RCS.clean(args.chomp)
208                                         when 'symbols'
209                                                 status.push :symbols
210                                         when 'comment'
211                                                 rcs.comment = RCS.at_clean(args.chomp)
212                                         when /^[0-9.]+$/
213                                                 rev = command.dup
214                                                 if rcs.has_revision?(rev)
215                                                         status.push :revision_data
216                                                 else
217                                                         status.push :new_revision
218                                                 end
219                                         when 'desc'
220                                                 status.push :desc
221                                                 lines.clear
222                                                 status.push :read_lines
223                                         else
224                                                 STDERR.puts "Skipping unhandled command #{command.inspect}"
225                                         end
226                                 when :symbols
227                                         sym, rev = line.strip.split(':',2);
228                                         status.pop if rev.chomp!(';')
229                                         rcs.revision[rev].symbols << sym
230                                 when :desc
231                                         rcs.desc.replace lines.dup
232                                         status.pop
233                                 when :read_lines
234                                         # we sanitize lines as we read them
235
236                                         actual_line = line.dup
237
238                                         # the first line must begin with a @, which we strip
239                                         if lines.empty?
240                                                 ats = line.match(/^@+/)
241                                                 raise 'malformed line' unless ats
242                                                 actual_line.replace line.sub(/^@/,'')
243                                         end
244
245                                         # if the line ends with an ODD number of @, it's the
246                                         # last line -- we work on actual_line so that content
247                                         # such as @\n or @ work correctly (they would be
248                                         # encoded respectively as ['@@@\n','@\n'] and
249                                         # ['@@@@\n']
250                                         ats = actual_line.chomp.match(/@+$/)
251                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
252                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
253                                         end
254                                         lines << actual_line.gsub('@@','@')
255                                         if nomore
256                                                 status.pop
257                                                 redo
258                                         end
259                                 when :new_revision
260                                         case line.chomp
261                                         when /^date\s+(\S+);\s+author\s+(\S+);\sstate\s(\S+);$/
262                                                 rcs.revision[rev].date = $1
263                                                 rcs.revision[rev].author = $2
264                                                 rcs.revision[rev].state = $3
265                                         when 'branches'
266                                                 status.push :branches
267                                         when 'branches;'
268                                                 next
269                                         when /^next\s+(\S+)?;$/
270                                                 nxt = rcs.revision[rev].next = $1
271                                                 next unless nxt
272                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
273                                                 rcs.revision[nxt].diff_base = rev
274                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
275                                         else
276                                                 status.pop
277                                         end
278                                 when :branches
279                                         candidate = line.split(';',2)
280                                         branch = candidate.first.strip
281                                         rcs.revision[rev].branches.push branch
282                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
283                                         rcs.revision[branch].diff_base = rev
284                                         # we drop the last number from the branch name
285                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
286                                         rcs.revision[branch].branch_point = rev
287                                         status.pop if candidate.length > 1
288                                 when :revision_data
289                                         case line.chomp
290                                         when 'log'
291                                                 status.push :log
292                                                 lines.clear
293                                                 status.push :read_lines
294                                         when 'text'
295                                                 if rev == rcs.head
296                                                         status.push :head
297                                                 else
298                                                         status.push :diff
299                                                 end
300                                                 lines.clear
301                                                 status.push :read_lines
302                                         else
303                                                 status.pop
304                                         end
305                                 when :log
306                                         rcs.revision[rev].log.replace lines.dup
307                                         status.pop
308                                 when :head
309                                         rcs.revision[rev].text.replace lines.dup
310                                         puts rcs.revision[rev].blob
311                                         status.pop
312                                 when :diff
313                                         difflines.replace lines.dup
314                                         difflines.pop if difflines.last.empty?
315                                         base = rcs.revision[rev].diff_base
316                                         unless rcs.revision[base].text
317                                                 pp rcs
318                                                 puts rev, base
319                                                 raise 'no diff base!'
320                                         end
321                                         # deep copy
322                                         buffer = []
323                                         rcs.revision[base].text.each { |l| buffer << l.dup }
324
325                                         adding = false
326                                         index = -1
327                                         count = -1
328
329                                         while l = difflines.shift
330                                                 if adding
331                                                         buffer[index] << l
332                                                         count -= 1
333                                                         adding = false unless count > 0
334                                                         next
335                                                 end
336
337                                                 l.chomp!
338                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
339                                                 index = $2.to_i-1
340                                                 count = $3.to_i
341                                                 case $1.intern
342                                                 when :d
343                                                         # we replace them with empty string so that 'a' commands
344                                                         # referring to the same line work properly
345                                                         while count > 0
346                                                                 buffer[index].replace ''
347                                                                 index += 1
348                                                                 count -= 1
349                                                         end
350                                                 when :a
351                                                         adding = true
352                                                 end
353                                         end
354
355                                         # remove empty lines
356                                         buffer.delete_if { |l| l.empty? }
357
358                                         rcs.revision[rev].text = buffer
359                                         puts rcs.revision[rev].blob
360                                         status.pop
361                                 else
362                                         STDERR.puts "Unknown status #{status.last}"
363                                         exit 1
364                                 end
365                         end
366                 end
367                 rcs.export_commits(opts)
368         end
369 end
370
371 require 'getoptlong'
372
373 opts = GetoptLong.new(
374         # Authors file, like git-svn and git-cvsimport, more than one can be
375         # specified
376         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
377         # RCS file suffix, like RCS
378         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
379         # tag each revision?
380         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
381         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
382         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
383 )
384
385 # We read options in order, but they apply to all passed parameters.
386 # TODO maybe they should only apply to the following, unless there's only one
387 # file?
388 opts.ordering = GetoptLong::RETURN_IN_ORDER
389
390 file_list = []
391 parse_options = {
392         :authors => Hash.new,
393 }
394
395 # Read config options
396 `git config --get-all rcs.authorsfile`.each_line do |fn|
397         parse_options[:authors].merge! load_authors_file(fn.chomp)
398 end
399
400 parse_options[:tag_each_rev] = (
401         `git config --bool rcs.tageachrev`.chomp == 'true'
402 ) ? true : false
403
404 opts.each do |opt, arg|
405         case opt
406         when '--authors-file'
407                 authors = load_authors_file(arg)
408                 redef = parse_options[:authors].keys & authors.keys
409                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
410                 parse_options[:authors].merge!(authors)
411         when '--rcs-suffixes'
412                 # TODO
413         when '--tag-each-rev'
414                 parse_options[:tag_each_rev] = true
415         when '--no-tag-each-rev'
416                 # this is the default, which is fine since the missing key
417                 # (default) returns nil which is false in Ruby
418                 parse_options[:tag_each_rev] = false
419         when ''
420                 file_list << arg
421         when '--help'
422                 usage
423                 exit
424         end
425 end
426
427 require 'etc'
428
429 user = Etc.getlogin || ENV['USER']
430
431 # steal username/email data from other init files that may contain the
432 # information
433 def steal_username
434         [
435                 # the user's .hgrc file for a username field
436                 ['~/.hgrc',   /^\s*username\s*=\s*(["'])?(.*)\1$/,       2],
437                 # the user's .(g)vimrc for a changelog_username setting
438                 ['~/.vimrc',  /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
439                 ['~/.gvimrc', /changelog_username\s*=\s*(["'])?(.*)\1$/, 2],
440                 []
441         ].each do |fn, rx, idx|
442                 file = File.expand_path fn
443                 if File.readable?(file) and File.read(file) =~ rx
444                         parse_options[:authors][user] = Regexp.last_match(idx).strip
445                         break
446                 end
447         end
448 end
449
450 if user and not user.empty? and not parse_options[:authors].has_key?(user)
451         name = ENV['GIT_AUTHOR_NAME'] || ''
452         name.replace(`git config user.name`.chomp) if name.empty?
453         name.replace(Etc.getpwnam(user).gecos) if name.empty?
454
455         if name.empty?
456                 # couldn't find a name, try to steal data from other sources
457                 steal_username
458         else
459                 # if we found a name, try to find an email too
460                 email = ENV['GIT_AUTHOR_EMAIL'] || ''
461                 email.replace(`git config user.email`.chomp) if email.empty?
462
463                 if email.empty?
464                         # couldn't find an email, try to steal data too
465                         steal_username
466                 else
467                         # we got both a name and email, fill the info
468                         parse_options[:authors][user] = "#{name} <#{email}>"
469                 end
470         end
471 end
472
473 if file_list.empty?
474         usage
475         exit 1
476 end
477
478 SFX = ',v'
479
480 status = 0
481
482 file_list.each do |arg|
483         if arg[-2,2] == SFX
484                 if File.exists? arg
485                         rcsfile = arg.dup
486                 else
487                         not_found "RCS file #{arg}"
488                         status |= 1
489                 end
490                 filename = File.basename(arg, SFX)
491         else
492                 filename = File.basename(arg)
493                 path = File.dirname(arg)
494                 rcsfile = File.join(path, 'RCS', filename) + SFX
495                 unless File.exists? rcsfile
496                         rcsfile.replace File.join(path, filename) + SFX
497                         unless File.exists? rcsfile
498                                 not_found "RCS file for #{filename} in #{path}"
499                         end
500                 end
501         end
502
503         RCS.parse(filename, rcsfile, parse_options)
504 end
505
506 exit status