Also read config option rcs.tageachrev
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 require 'pp'
4
5 def usage
6         STDERR.puts <<EOM
7 #{$0} [options] file [file ...]
8
9 Fast-export the RCS history of one or more file.
10
11 Options:
12         --help, -h, -?          display this help text
13         --authors-file, -A      specify a file containing username = Full Name <email> mappings
14         --[no-]tag-each-rev     [do not] create a lightweight tag for each RCS revision
15
16 EOM
17 end
18
19 def not_found(arg)
20         STDERR.puts "Could not find #{arg}"
21 end
22
23 # returns a hash that maps usernames to author names & emails
24 def load_authors_file(fn)
25         hash = {}
26         begin
27                 File.open(File.expand_path fn) do |io|
28                         io.each_line do |line|
29                                 uname, author = line.split('=', 2)
30                                 uname.strip!
31                                 author.strip!
32                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
33                                 hash[uname] = author
34                         end
35                 end
36         rescue
37                 not_found(fn)
38         end
39         return hash
40 end
41
42 class Time
43         def Time.rcs(string)
44                 fields = string.split('.')
45                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
46                 Time.utc(*fields)
47         end
48 end
49
50 module RCS
51         # strip an optional final ;
52         def RCS.clean(arg)
53                 arg.chomp(';')
54         end
55
56         # strip the first and last @, and de-double @@s
57         def RCS.sanitize(arg)
58                 case arg
59                 when Array
60                         ret = arg.dup
61                         raise 'malformed first line' unless ret.first[0,1] == '@'
62                         raise 'malformed last line' unless ret.last[-1,1] == '@'
63                         ret.first.sub!(/^@/,'')
64                         ret.last.sub!(/@$/,'')
65                         ret.map { |l| l.gsub('@@','@') }
66                 when String
67                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
68                 else
69                         raise
70                 end
71         end
72
73         # clean and sanitize
74         def RCS.at_clean(arg)
75                 RCS.sanitize RCS.clean(arg)
76         end
77
78         def RCS.blob(arg)
79                 arg.gsub('.', '0') + ('90'*5)
80         end
81
82         def RCS.commit(arg)
83                 arg.gsub('.', '0') + ('09'*5)
84         end
85
86         class File
87                 attr_accessor :head, :comment, :desc, :revision
88                 def initialize(fname)
89                         @fname = fname.dup
90                         @head = nil
91                         @comment = nil
92                         @desc = []
93                         @revision = Hash.new { |h, r| h[r] = Revision.new(r) }
94                 end
95
96                 def has_revision?(rev)
97                         @revision.has_key?(rev) and not @revision[rev].author.nil?
98                 end
99
100                 def export_commits(opts={})
101                         counter = 0
102                         exported = []
103                         until @revision.empty?
104                                 counter += 1
105
106                                 # a string sort is a very good candidate for
107                                 # export order, getting a miss only for
108                                 # multi-digit revision components
109                                 keys = @revision.keys.sort
110
111                                 STDERR.puts "commit export loop ##{counter}"
112                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
113                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
114
115                                 keys.each do |key|
116                                         rev = @revision[key]
117                                         # the parent commit is rev.next if we're on the
118                                         # master branch (rev.branch is nil) or
119                                         # rev.diff_base otherwise
120                                         from = rev.branch.nil? ? rev.next : rev.diff_base
121                                         # A commit can only be exported if it has no
122                                         # parent, or if the parent has been exported
123                                         # already. Skip this commit otherwise
124                                         if from and not exported.include? from
125                                                 next
126                                         end
127
128                                         branch = rev.branch || 'master'
129                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
130                                         date = "#{rev.date.tv_sec} +0000"
131                                         log = rev.log.to_s
132
133                                         puts "commit refs/heads/#{branch}"
134                                         puts "mark :#{RCS.commit key}"
135                                         puts "committer #{author} #{date}"
136                                         puts "data #{log.length}"
137                                         puts log unless log.empty?
138                                         puts "from :#{RCS.commit from}" if rev.branch_point
139                                         puts "M 644 :#{RCS.blob key} #{@fname}"
140
141                                         rev.symbols.each do |sym|
142                                                 puts "reset refs/tags/#{sym}"
143                                                 puts "from :#{RCS.commit key}"
144                                         end
145                                         if opts[:tag_each_rev]
146                                                 puts "reset refs/tags/#{key}"
147                                                 puts "from :#{RCS.commit key}"
148                                         end
149
150                                         exported.push key
151                                 end
152                                 exported.each { |k| @revision.delete(k) }
153                         end
154                 end
155         end
156
157         class Revision
158                 attr_accessor :rev, :author, :date, :state, :next
159                 attr_accessor :branches, :log, :text, :symbols
160                 attr_accessor :branch, :diff_base, :branch_point
161                 def initialize(rev)
162                         @rev = rev
163                         @author = nil
164                         @date = nil
165                         @state = nil
166                         @next = nil
167                         @branches = []
168                         @branch = nil
169                         @branch_point = nil
170                         @diff_base = nil
171                         @log = []
172                         @text = []
173                         @symbols = []
174                 end
175
176                 def date=(str)
177                         @date = Time.rcs(str)
178                 end
179
180                 def blob
181                         str = @text.join('')
182                         ret = "blob\nmark :#{RCS.blob @rev}\ndata #{str.length}\n#{str}\n"
183                         ret
184                 end
185         end
186
187         def RCS.parse(fname, rcsfile, opts={})
188                 rcs = RCS::File.new(fname)
189
190                 ::File.open(rcsfile, 'r') do |file|
191                         status = [:basic]
192                         rev = nil
193                         lines = []
194                         difflines = []
195                         file.each_line do |line|
196                                 case status.last
197                                 when :basic
198                                         command, args = line.split($;,2)
199                                         next if command.empty?
200
201                                         case command
202                                         when 'head'
203                                                 rcs.head = RCS.clean(args.chomp)
204                                         when 'symbols'
205                                                 status.push :symbols
206                                         when 'comment'
207                                                 rcs.comment = RCS.at_clean(args.chomp)
208                                         when /^[0-9.]+$/
209                                                 rev = command.dup
210                                                 if rcs.has_revision?(rev)
211                                                         status.push :revision_data
212                                                 else
213                                                         status.push :new_revision
214                                                 end
215                                         when 'desc'
216                                                 status.push :desc
217                                                 lines.clear
218                                                 status.push :read_lines
219                                         else
220                                                 STDERR.puts "Skipping unhandled command #{command.inspect}"
221                                         end
222                                 when :symbols
223                                         sym, rev = line.strip.split(':',2);
224                                         status.pop if rev.chomp!(';')
225                                         rcs.revision[rev].symbols << sym
226                                 when :desc
227                                         rcs.desc.replace lines.dup
228                                         status.pop
229                                 when :read_lines
230                                         # we sanitize lines as we read them
231
232                                         actual_line = line.dup
233
234                                         # the first line must begin with a @, which we strip
235                                         if lines.empty?
236                                                 ats = line.match(/^@+/)
237                                                 raise 'malformed line' unless ats
238                                                 actual_line.replace line.sub(/^@/,'')
239                                         end
240
241                                         # if the line ends with an ODD number of @, it's the
242                                         # last line -- we work on actual_line so that content
243                                         # such as @\n or @ work correctly (they would be
244                                         # encoded respectively as ['@@@\n','@\n'] and
245                                         # ['@@@@\n']
246                                         ats = actual_line.chomp.match(/@+$/)
247                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
248                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
249                                         end
250                                         lines << actual_line.gsub('@@','@')
251                                         if nomore
252                                                 status.pop
253                                                 redo
254                                         end
255                                 when :new_revision
256                                         case line.chomp
257                                         when /^date\s+(\S+);\s+author\s+(\S+);\sstate\s(\S+);$/
258                                                 rcs.revision[rev].date = $1
259                                                 rcs.revision[rev].author = $2
260                                                 rcs.revision[rev].state = $3
261                                         when 'branches'
262                                                 status.push :branches
263                                         when 'branches;'
264                                                 next
265                                         when /^next\s+(\S+)?;$/
266                                                 nxt = rcs.revision[rev].next = $1
267                                                 next unless nxt
268                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
269                                                 rcs.revision[nxt].diff_base = rev
270                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
271                                         else
272                                                 status.pop
273                                         end
274                                 when :branches
275                                         candidate = line.split(';',2)
276                                         branch = candidate.first.strip
277                                         rcs.revision[rev].branches.push branch
278                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
279                                         rcs.revision[branch].diff_base = rev
280                                         # we drop the last number from the branch name
281                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
282                                         rcs.revision[branch].branch_point = rev
283                                         status.pop if candidate.length > 1
284                                 when :revision_data
285                                         case line.chomp
286                                         when 'log'
287                                                 status.push :log
288                                                 lines.clear
289                                                 status.push :read_lines
290                                         when 'text'
291                                                 if rev == rcs.head
292                                                         status.push :head
293                                                 else
294                                                         status.push :diff
295                                                 end
296                                                 lines.clear
297                                                 status.push :read_lines
298                                         else
299                                                 status.pop
300                                         end
301                                 when :log
302                                         rcs.revision[rev].log.replace lines.dup
303                                         status.pop
304                                 when :head
305                                         rcs.revision[rev].text.replace lines.dup
306                                         puts rcs.revision[rev].blob
307                                         status.pop
308                                 when :diff
309                                         difflines.replace lines.dup
310                                         difflines.pop if difflines.last.empty?
311                                         base = rcs.revision[rev].diff_base
312                                         unless rcs.revision[base].text
313                                                 pp rcs
314                                                 puts rev, base
315                                                 raise 'no diff base!'
316                                         end
317                                         # deep copy
318                                         buffer = []
319                                         rcs.revision[base].text.each { |l| buffer << l.dup }
320
321                                         adding = false
322                                         index = -1
323                                         count = -1
324
325                                         while l = difflines.shift
326                                                 if adding
327                                                         buffer[index] << l
328                                                         count -= 1
329                                                         adding = false unless count > 0
330                                                         next
331                                                 end
332
333                                                 l.chomp!
334                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
335                                                 index = $2.to_i-1
336                                                 count = $3.to_i
337                                                 case $1.intern
338                                                 when :d
339                                                         # we replace them with empty string so that 'a' commands
340                                                         # referring to the same line work properly
341                                                         while count > 0
342                                                                 buffer[index].replace ''
343                                                                 index += 1
344                                                                 count -= 1
345                                                         end
346                                                 when :a
347                                                         adding = true
348                                                 end
349                                         end
350
351                                         # remove empty lines
352                                         buffer.delete_if { |l| l.empty? }
353
354                                         rcs.revision[rev].text = buffer
355                                         puts rcs.revision[rev].blob
356                                         status.pop
357                                 else
358                                         STDERR.puts "Unknown status #{status.last}"
359                                         exit 1
360                                 end
361                         end
362                 end
363                 rcs.export_commits(opts)
364         end
365 end
366
367 require 'getoptlong'
368
369 opts = GetoptLong.new(
370         # Authors file, like git-svn and git-cvsimport, more than one can be
371         # specified
372         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
373         # RCS file suffix, like RCS
374         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
375         # tag each revision?
376         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
377         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT],
378         ['--help', '-h', '-?', GetoptLong::NO_ARGUMENT]
379 )
380
381 # We read options in order, but they apply to all passed parameters.
382 # TODO maybe they should only apply to the following, unless there's only one
383 # file?
384 opts.ordering = GetoptLong::RETURN_IN_ORDER
385
386 file_list = []
387 parse_options = {
388         :authors => Hash.new,
389 }
390
391 # Read config options
392 `git config --get-all rcs.authorsfile`.each_line do |fn|
393         parse_options[:authors].merge! load_authors_file(fn.chomp)
394 end
395
396 parse_options[:tag_each_rev] = (
397         `git config --bool rcs.tageachrev`.chomp == 'true'
398 ) ? true : false
399
400 opts.each do |opt, arg|
401         case opt
402         when '--authors-file'
403                 authors = load_authors_file(arg)
404                 redef = parse_options[:authors].keys & authors.keys
405                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
406                 parse_options[:authors].merge!(authors)
407         when '--rcs-suffixes'
408                 # TODO
409         when '--tag-each-rev'
410                 parse_options[:tag_each_rev] = true
411         when '--no-tag-each-rev'
412                 # this is the default, which is fine since the missing key
413                 # (default) returns nil which is false in Ruby
414                 parse_options[:tag_each_rev] = false
415         when ''
416                 file_list << arg
417         when '--help'
418                 usage
419                 exit
420         end
421 end
422
423 if file_list.empty?
424         usage
425         exit 1
426 end
427
428 SFX = ',v'
429
430 status = 0
431
432 file_list.each do |arg|
433         if arg[-2,2] == SFX
434                 if File.exists? arg
435                         rcsfile = arg.dup
436                 else
437                         not_found "RCS file #{arg}"
438                         status |= 1
439                 end
440                 filename = File.basename(arg, SFX)
441         else
442                 filename = File.basename(arg)
443                 path = File.dirname(arg)
444                 rcsfile = File.join(path, 'RCS', filename) + SFX
445                 unless File.exists? rcsfile
446                         rcsfile.replace File.join(path, filename) + SFX
447                         unless File.exists? rcsfile
448                                 not_found "RCS file for #{filename} in #{path}"
449                         end
450                 end
451         end
452
453         RCS.parse(filename, rcsfile, parse_options)
454 end
455
456 exit status