Expand authorsfile path
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 require 'pp'
4
5 def usage
6         STDERR.puts "#{$0} filename -- fast-export filename's RCS history"
7 end
8
9 def not_found(arg)
10         STDERR.puts "Could not find #{arg}"
11 end
12
13 # returns a hash that maps usernames to author names & emails
14 def load_authors_file(fn)
15         hash = {}
16         begin
17                 File.open(File.expand_path fn) do |io|
18                         io.each_line do |line|
19                                 uname, author = line.split('=', 2)
20                                 uname.strip!
21                                 author.strip!
22                                 STDERR.puts "Username #{uname} redefined to #{author}" if hash.has_key? uname
23                                 hash[uname] = author
24                         end
25                 end
26         rescue
27                 not_found(fn)
28         end
29         return hash
30 end
31
32 class Time
33         def Time.rcs(string)
34                 fields = string.split('.')
35                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
36                 Time.utc(*fields)
37         end
38 end
39
40 module RCS
41         # strip an optional final ;
42         def RCS.clean(arg)
43                 arg.chomp(';')
44         end
45
46         # strip the first and last @, and de-double @@s
47         def RCS.sanitize(arg)
48                 case arg
49                 when Array
50                         ret = arg.dup
51                         raise 'malformed first line' unless ret.first[0,1] == '@'
52                         raise 'malformed last line' unless ret.last[-1,1] == '@'
53                         ret.first.sub!(/^@/,'')
54                         ret.last.sub!(/@$/,'')
55                         ret.map { |l| l.gsub('@@','@') }
56                 when String
57                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
58                 else
59                         raise
60                 end
61         end
62
63         # clean and sanitize
64         def RCS.at_clean(arg)
65                 RCS.sanitize RCS.clean(arg)
66         end
67
68         def RCS.blob(arg)
69                 arg.gsub('.', '0') + ('90'*5)
70         end
71
72         def RCS.commit(arg)
73                 arg.gsub('.', '0') + ('09'*5)
74         end
75
76         class File
77                 attr_accessor :head, :comment, :desc, :revision
78                 def initialize(fname)
79                         @fname = fname.dup
80                         @head = nil
81                         @comment = nil
82                         @desc = []
83                         @revision = Hash.new { |h, r| h[r] = Revision.new(r) }
84                 end
85
86                 def has_revision?(rev)
87                         @revision.has_key?(rev) and not @revision[rev].author.nil?
88                 end
89
90                 def export_commits(opts={})
91                         counter = 0
92                         exported = []
93                         until @revision.empty?
94                                 counter += 1
95
96                                 # a string sort is a very good candidate for
97                                 # export order, getting a miss only for
98                                 # multi-digit revision components
99                                 keys = @revision.keys.sort
100
101                                 STDERR.puts "commit export loop ##{counter}"
102                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
103                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
104
105                                 keys.each do |key|
106                                         rev = @revision[key]
107                                         # the parent commit is rev.next if we're on the
108                                         # master branch (rev.branch is nil) or
109                                         # rev.diff_base otherwise
110                                         from = rev.branch.nil? ? rev.next : rev.diff_base
111                                         # A commit can only be exported if it has no
112                                         # parent, or if the parent has been exported
113                                         # already. Skip this commit otherwise
114                                         if from and not exported.include? from
115                                                 next
116                                         end
117
118                                         branch = rev.branch || 'master'
119                                         author = opts[:authors][rev.author] || "#{rev.author} <empty>"
120                                         date = "#{rev.date.tv_sec} +0000"
121                                         log = rev.log.to_s
122
123                                         puts "commit refs/heads/#{branch}"
124                                         puts "mark :#{RCS.commit key}"
125                                         puts "committer #{author} #{date}"
126                                         puts "data #{log.length}"
127                                         puts log unless log.empty?
128                                         puts "from :#{RCS.commit from}" if rev.branch_point
129                                         puts "M 644 :#{RCS.blob key} #{@fname}"
130
131                                         rev.symbols.each do |sym|
132                                                 puts "reset refs/tags/#{sym}"
133                                                 puts "from :#{RCS.commit key}"
134                                         end
135                                         if opts[:tag_each_rev]
136                                                 puts "reset refs/tags/#{key}"
137                                                 puts "from :#{RCS.commit key}"
138                                         end
139
140                                         exported.push key
141                                 end
142                                 exported.each { |k| @revision.delete(k) }
143                         end
144                 end
145         end
146
147         class Revision
148                 attr_accessor :rev, :author, :date, :state, :next
149                 attr_accessor :branches, :log, :text, :symbols
150                 attr_accessor :branch, :diff_base, :branch_point
151                 def initialize(rev)
152                         @rev = rev
153                         @author = nil
154                         @date = nil
155                         @state = nil
156                         @next = nil
157                         @branches = []
158                         @branch = nil
159                         @branch_point = nil
160                         @diff_base = nil
161                         @log = []
162                         @text = []
163                         @symbols = []
164                 end
165
166                 def date=(str)
167                         @date = Time.rcs(str)
168                 end
169
170                 def blob
171                         str = @text.join('')
172                         ret = "blob\nmark :#{RCS.blob @rev}\ndata #{str.length}\n#{str}\n"
173                         ret
174                 end
175         end
176
177         def RCS.parse(fname, rcsfile, opts={})
178                 rcs = RCS::File.new(fname)
179
180                 ::File.open(rcsfile, 'r') do |file|
181                         status = [:basic]
182                         rev = nil
183                         lines = []
184                         difflines = []
185                         file.each_line do |line|
186                                 case status.last
187                                 when :basic
188                                         command, args = line.split($;,2)
189                                         next if command.empty?
190
191                                         case command
192                                         when 'head'
193                                                 rcs.head = RCS.clean(args.chomp)
194                                         when 'symbols'
195                                                 status.push :symbols
196                                         when 'comment'
197                                                 rcs.comment = RCS.at_clean(args.chomp)
198                                         when /^[0-9.]+$/
199                                                 rev = command.dup
200                                                 if rcs.has_revision?(rev)
201                                                         status.push :revision_data
202                                                 else
203                                                         status.push :new_revision
204                                                 end
205                                         when 'desc'
206                                                 status.push :desc
207                                                 lines.clear
208                                                 status.push :read_lines
209                                         else
210                                                 STDERR.puts "Skipping unhandled command #{command.inspect}"
211                                         end
212                                 when :symbols
213                                         sym, rev = line.strip.split(':',2);
214                                         status.pop if rev.chomp!(';')
215                                         rcs.revision[rev].symbols << sym
216                                 when :desc
217                                         rcs.desc.replace lines.dup
218                                         status.pop
219                                 when :read_lines
220                                         # we sanitize lines as we read them
221
222                                         actual_line = line.dup
223
224                                         # the first line must begin with a @, which we strip
225                                         if lines.empty?
226                                                 ats = line.match(/^@+/)
227                                                 raise 'malformed line' unless ats
228                                                 actual_line.replace line.sub(/^@/,'')
229                                         end
230
231                                         # if the line ends with an ODD number of @, it's the
232                                         # last line -- we work on actual_line so that content
233                                         # such as @\n or @ work correctly (they would be
234                                         # encoded respectively as ['@@@\n','@\n'] and
235                                         # ['@@@@\n']
236                                         ats = actual_line.chomp.match(/@+$/)
237                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
238                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
239                                         end
240                                         lines << actual_line.gsub('@@','@')
241                                         if nomore
242                                                 status.pop
243                                                 redo
244                                         end
245                                 when :new_revision
246                                         case line.chomp
247                                         when /^date\s+(\S+);\s+author\s+(\S+);\sstate\s(\S+);$/
248                                                 rcs.revision[rev].date = $1
249                                                 rcs.revision[rev].author = $2
250                                                 rcs.revision[rev].state = $3
251                                         when 'branches'
252                                                 status.push :branches
253                                         when 'branches;'
254                                                 next
255                                         when /^next\s+(\S+)?;$/
256                                                 nxt = rcs.revision[rev].next = $1
257                                                 next unless nxt
258                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
259                                                 rcs.revision[nxt].diff_base = rev
260                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
261                                         else
262                                                 status.pop
263                                         end
264                                 when :branches
265                                         candidate = line.split(';',2)
266                                         branch = candidate.first.strip
267                                         rcs.revision[rev].branches.push branch
268                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
269                                         rcs.revision[branch].diff_base = rev
270                                         # we drop the last number from the branch name
271                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
272                                         rcs.revision[branch].branch_point = rev
273                                         status.pop if candidate.length > 1
274                                 when :revision_data
275                                         case line.chomp
276                                         when 'log'
277                                                 status.push :log
278                                                 lines.clear
279                                                 status.push :read_lines
280                                         when 'text'
281                                                 if rev == rcs.head
282                                                         status.push :head
283                                                 else
284                                                         status.push :diff
285                                                 end
286                                                 lines.clear
287                                                 status.push :read_lines
288                                         else
289                                                 status.pop
290                                         end
291                                 when :log
292                                         rcs.revision[rev].log.replace lines.dup
293                                         status.pop
294                                 when :head
295                                         rcs.revision[rev].text.replace lines.dup
296                                         puts rcs.revision[rev].blob
297                                         status.pop
298                                 when :diff
299                                         difflines.replace lines.dup
300                                         difflines.pop if difflines.last.empty?
301                                         base = rcs.revision[rev].diff_base
302                                         unless rcs.revision[base].text
303                                                 pp rcs
304                                                 puts rev, base
305                                                 raise 'no diff base!'
306                                         end
307                                         # deep copy
308                                         buffer = []
309                                         rcs.revision[base].text.each { |l| buffer << l.dup }
310
311                                         adding = false
312                                         index = -1
313                                         count = -1
314
315                                         while l = difflines.shift
316                                                 if adding
317                                                         buffer[index] << l
318                                                         count -= 1
319                                                         adding = false unless count > 0
320                                                         next
321                                                 end
322
323                                                 l.chomp!
324                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
325                                                 index = $2.to_i-1
326                                                 count = $3.to_i
327                                                 case $1.intern
328                                                 when :d
329                                                         # we replace them with empty string so that 'a' commands
330                                                         # referring to the same line work properly
331                                                         while count > 0
332                                                                 buffer[index].replace ''
333                                                                 index += 1
334                                                                 count -= 1
335                                                         end
336                                                 when :a
337                                                         adding = true
338                                                 end
339                                         end
340
341                                         # remove empty lines
342                                         buffer.delete_if { |l| l.empty? }
343
344                                         rcs.revision[rev].text = buffer
345                                         puts rcs.revision[rev].blob
346                                         status.pop
347                                 else
348                                         STDERR.puts "Unknown status #{status.last}"
349                                         exit 1
350                                 end
351                         end
352                 end
353                 rcs.export_commits(opts)
354         end
355 end
356
357 require 'getoptlong'
358
359 opts = GetoptLong.new(
360         # Authors file, like git-svn and git-cvsimport, more than one can be
361         # specified
362         ['--authors-file', '-A', GetoptLong::REQUIRED_ARGUMENT],
363         # RCS file suffix, like RCS
364         ['--rcs-suffixes', '-x', GetoptLong::REQUIRED_ARGUMENT],
365         # tag each revision?
366         ['--tag-each-rev', GetoptLong::NO_ARGUMENT],
367         ['--no-tag-each-rev', GetoptLong::NO_ARGUMENT]
368 )
369
370 # We read options in order, but they apply to all passed parameters.
371 # TODO maybe they should only apply to the following, unless there's only one
372 # file?
373 opts.ordering = GetoptLong::RETURN_IN_ORDER
374
375 file_list = []
376 parse_options = {
377         :authors => Hash.new,
378 }
379
380 opts.each do |opt, arg|
381         case opt
382         when '--authors-file'
383                 authors = load_authors_file(arg)
384                 redef = parse_options[:authors].keys & authors.keys
385                 STDERR.puts "Authors file #{arg} redefines #{redef.join(', ')}" unless redef.empty?
386                 parse_options[:authors].merge!(authors)
387         when '--rcs-suffixes'
388                 # TODO
389         when '--tag-each-rev'
390                 parse_options[:tag_each_rev] = true
391         when '--no-tag-each-rev'
392                 # this is the default, which is fine since the missing key
393                 # (default) returns nil which is false in Ruby
394                 parse_options[:tag_each_rev] = false
395         when ''
396                 file_list << arg
397         end
398 end
399
400 if file_list.empty?
401         usage
402         exit 1
403 end
404
405 SFX = ',v'
406
407 status = 0
408
409 # Read config options
410 `git config --get-all rcs.authorsfile`.each_line do |fn|
411         authors = load_authors_file(fn.chomp)
412         # Add but don't overwrite
413         authors.each do |k, v|
414                 parse_options[:authors][k] ||= v
415         end
416 end
417
418 file_list.each do |arg|
419         if arg[-2,2] == SFX
420                 if File.exists? arg
421                         rcsfile = arg.dup
422                 else
423                         not_found "RCS file #{arg}"
424                         status |= 1
425                 end
426                 filename = File.basename(arg, SFX)
427         else
428                 filename = File.basename(arg)
429                 path = File.dirname(arg)
430                 rcsfile = File.join(path, 'RCS', filename) + SFX
431                 unless File.exists? rcsfile
432                         rcsfile.replace File.join(path, filename) + SFX
433                         unless File.exists? rcsfile
434                                 not_found "RCS file for #{filename} in #{path}"
435                         end
436                 end
437         end
438
439         RCS.parse(filename, rcsfile, parse_options)
440 end
441
442 exit status