Import RCS symbols as lightweight git tags
[rcs-fast-export] / rcs-fast-export.rb
1 #!/usr/bin/ruby
2
3 require 'pp'
4
5 def usage
6         STDERR.puts "#{$0} filename -- fast-export filename's RCS history"
7 end
8
9 def not_found(arg)
10         STDERR.puts "Could not find #{arg}"
11 end
12
13 class Time
14         def Time.rcs(string)
15                 fields = string.split('.')
16                 raise ArgumentError, "wrong number of fields for RCS date #{string}" unless fields.length == 6
17                 Time.utc(*fields)
18         end
19 end
20
21 module RCS
22         # strip an optional final ;
23         def RCS.clean(arg)
24                 arg.chomp(';')
25         end
26
27         # strip the first and last @, and de-double @@s
28         def RCS.sanitize(arg)
29                 case arg
30                 when Array
31                         ret = arg.dup
32                         raise 'malformed first line' unless ret.first[0,1] == '@'
33                         raise 'malformed last line' unless ret.last[-1,1] == '@'
34                         ret.first.sub!(/^@/,'')
35                         ret.last.sub!(/@$/,'')
36                         ret.map { |l| l.gsub('@@','@') }
37                 when String
38                         arg.chomp('@').sub(/^@/,'').gsub('@@','@')
39                 else
40                         raise
41                 end
42         end
43
44         # clean and sanitize
45         def RCS.at_clean(arg)
46                 RCS.sanitize RCS.clean(arg)
47         end
48
49         def RCS.blob(arg)
50                 arg.gsub('.', '0') + ('90'*5)
51         end
52
53         def RCS.commit(arg)
54                 arg.gsub('.', '0') + ('09'*5)
55         end
56
57         class File
58                 attr_accessor :head, :comment, :desc, :revision
59                 def initialize(fname)
60                         @fname = fname.dup
61                         @head = nil
62                         @comment = nil
63                         @desc = []
64                         @revision = Hash.new { |h, r| h[r] = Revision.new(r) }
65                 end
66
67                 def has_revision?(rev)
68                         @revision.has_key?(rev) and not @revision[rev].author.nil?
69                 end
70
71                 def export_commits
72                         counter = 0
73                         exported = []
74                         until @revision.empty?
75                                 counter += 1
76
77                                 # a string sort is a very good candidate for
78                                 # export order, getting a miss only for
79                                 # multi-digit revision components
80                                 keys = @revision.keys.sort
81
82                                 STDERR.puts "commit export loop ##{counter}"
83                                 STDERR.puts "\t#{exported.length} commits exported so far: #{exported.join(', ')}" unless exported.empty?
84                                 STDERR.puts "\t#{keys.size} to export: #{keys.join(', ')}"
85
86                                 keys.each do |key|
87                                         rev = @revision[key]
88                                         # the parent commit is rev.next if we're on the
89                                         # master branch (rev.branch is nil) or
90                                         # rev.diff_base otherwise
91                                         from = rev.branch.nil? ? rev.next : rev.diff_base
92                                         # A commit can only be exported if it has no
93                                         # parent, or if the parent has been exported
94                                         # already. Skip this commit otherwise
95                                         if from and not exported.include? from
96                                                 next
97                                         end
98
99                                         branch = rev.branch || 'master'
100                                         # TODO map authors to author/email
101                                         author = "#{rev.author} <empty>"
102                                         date = "#{rev.date.tv_sec} +0000"
103                                         log = rev.log.to_s
104
105                                         puts "commit refs/heads/#{branch}"
106                                         puts "mark :#{RCS.commit key}"
107                                         puts "committer #{author} #{date}"
108                                         puts "data #{log.length}"
109                                         puts log unless log.empty?
110                                         puts "from :#{RCS.commit from}" if rev.branch_point
111                                         puts "M 644 :#{RCS.blob key} #{@fname}"
112
113                                         rev.symbols.each do |sym|
114                                                 puts "reset refs/tags/#{sym}"
115                                                 puts "from :#{RCS.commit key}"
116                                         end
117                                         # TODO option to tag every revision with its revision number
118
119                                         exported.push key
120                                 end
121                                 exported.each { |k| @revision.delete(k) }
122                         end
123                 end
124         end
125
126         class Revision
127                 attr_accessor :rev, :author, :date, :state, :next
128                 attr_accessor :branches, :log, :text, :symbols
129                 attr_accessor :branch, :diff_base, :branch_point
130                 def initialize(rev)
131                         @rev = rev
132                         @author = nil
133                         @date = nil
134                         @state = nil
135                         @next = nil
136                         @branches = []
137                         @branch = nil
138                         @branch_point = nil
139                         @diff_base = nil
140                         @log = []
141                         @text = []
142                         @symbols = []
143                 end
144
145                 def date=(str)
146                         @date = Time.rcs(str)
147                 end
148
149                 def blob
150                         str = @text.join('')
151                         ret = "blob\nmark :#{RCS.blob @rev}\ndata #{str.length}\n#{str}\n"
152                         ret
153                 end
154         end
155
156         def RCS.parse(fname, rcsfile, opts={})
157                 rcs = RCS::File.new(fname)
158
159                 ::File.open(rcsfile, 'r') do |file|
160                         status = [:basic]
161                         rev = nil
162                         lines = []
163                         difflines = []
164                         file.each_line do |line|
165                                 case status.last
166                                 when :basic
167                                         command, args = line.split($;,2)
168                                         next if command.empty?
169
170                                         case command
171                                         when 'head'
172                                                 rcs.head = RCS.clean(args.chomp)
173                                         when 'symbols'
174                                                 status.push :symbols
175                                         when 'comment'
176                                                 rcs.comment = RCS.at_clean(args.chomp)
177                                         when /^[0-9.]+$/
178                                                 rev = command.dup
179                                                 if rcs.has_revision?(rev)
180                                                         status.push :revision_data
181                                                 else
182                                                         status.push :new_revision
183                                                 end
184                                         when 'desc'
185                                                 status.push :desc
186                                                 lines.clear
187                                                 status.push :read_lines
188                                         else
189                                                 STDERR.puts "Skipping unhandled command #{command.inspect}"
190                                         end
191                                 when :symbols
192                                         sym, rev = line.strip.split(':',2);
193                                         status.pop if rev.chomp!(';')
194                                         rcs.revision[rev].symbols << sym
195                                 when :desc
196                                         rcs.desc.replace lines.dup
197                                         status.pop
198                                 when :read_lines
199                                         # we sanitize lines as we read them
200
201                                         actual_line = line.dup
202
203                                         # the first line must begin with a @, which we strip
204                                         if lines.empty?
205                                                 ats = line.match(/^@+/)
206                                                 raise 'malformed line' unless ats
207                                                 actual_line.replace line.sub(/^@/,'')
208                                         end
209
210                                         # if the line ends with an ODD number of @, it's the
211                                         # last line -- we work on actual_line so that content
212                                         # such as @\n or @ work correctly (they would be
213                                         # encoded respectively as ['@@@\n','@\n'] and
214                                         # ['@@@@\n']
215                                         ats = actual_line.chomp.match(/@+$/)
216                                         if nomore = (ats && Regexp.last_match(0).length.odd?)
217                                                 actual_line.replace actual_line.chomp.sub(/@$/,'')
218                                         end
219                                         lines << actual_line.gsub('@@','@')
220                                         if nomore
221                                                 status.pop
222                                                 redo
223                                         end
224                                 when :new_revision
225                                         case line.chomp
226                                         when /^date\s+(\S+);\s+author\s+(\S+);\sstate\s(\S+);$/
227                                                 rcs.revision[rev].date = $1
228                                                 rcs.revision[rev].author = $2
229                                                 rcs.revision[rev].state = $3
230                                         when 'branches'
231                                                 status.push :branches
232                                         when 'branches;'
233                                                 next
234                                         when /^next\s+(\S+)?;$/
235                                                 nxt = rcs.revision[rev].next = $1
236                                                 next unless nxt
237                                                 raise "multiple diff_bases for #{nxt}" unless rcs.revision[nxt].diff_base.nil?
238                                                 rcs.revision[nxt].diff_base = rev
239                                                 rcs.revision[nxt].branch = rcs.revision[rev].branch
240                                         else
241                                                 status.pop
242                                         end
243                                 when :branches
244                                         candidate = line.split(';',2)
245                                         branch = candidate.first.strip
246                                         rcs.revision[rev].branches.push branch
247                                         raise "multiple diff_bases for #{branch}" unless rcs.revision[branch].diff_base.nil?
248                                         rcs.revision[branch].diff_base = rev
249                                         # we drop the last number from the branch name
250                                         rcs.revision[branch].branch = branch.sub(/\.\d+$/,'.x')
251                                         rcs.revision[branch].branch_point = rev
252                                         status.pop if candidate.length > 1
253                                 when :revision_data
254                                         case line.chomp
255                                         when 'log'
256                                                 status.push :log
257                                                 lines.clear
258                                                 status.push :read_lines
259                                         when 'text'
260                                                 if rev == rcs.head
261                                                         status.push :head
262                                                 else
263                                                         status.push :diff
264                                                 end
265                                                 lines.clear
266                                                 status.push :read_lines
267                                         else
268                                                 status.pop
269                                         end
270                                 when :log
271                                         rcs.revision[rev].log.replace lines.dup
272                                         status.pop
273                                 when :head
274                                         rcs.revision[rev].text.replace lines.dup
275                                         puts rcs.revision[rev].blob
276                                         status.pop
277                                 when :diff
278                                         difflines.replace lines.dup
279                                         difflines.pop if difflines.last.empty?
280                                         base = rcs.revision[rev].diff_base
281                                         unless rcs.revision[base].text
282                                                 pp rcs
283                                                 puts rev, base
284                                                 raise 'no diff base!'
285                                         end
286                                         # deep copy
287                                         buffer = []
288                                         rcs.revision[base].text.each { |l| buffer << l.dup }
289
290                                         adding = false
291                                         index = -1
292                                         count = -1
293
294                                         while l = difflines.shift
295                                                 if adding
296                                                         buffer[index] << l
297                                                         count -= 1
298                                                         adding = false unless count > 0
299                                                         next
300                                                 end
301
302                                                 l.chomp!
303                                                 raise 'malformed diff' unless l =~ /^([ad])(\d+) (\d+)$/
304                                                 index = $2.to_i-1
305                                                 count = $3.to_i
306                                                 case $1.intern
307                                                 when :d
308                                                         # we replace them with empty string so that 'a' commands
309                                                         # referring to the same line work properly
310                                                         while count > 0
311                                                                 buffer[index].replace ''
312                                                                 index += 1
313                                                                 count -= 1
314                                                         end
315                                                 when :a
316                                                         adding = true
317                                                 end
318                                         end
319
320                                         # remove empty lines
321                                         buffer.delete_if { |l| l.empty? }
322
323                                         rcs.revision[rev].text = buffer
324                                         puts rcs.revision[rev].blob
325                                         status.pop
326                                 else
327                                         STDERR.puts "Unknown status #{status.last}"
328                                         exit 1
329                                 end
330                         end
331                 end
332                 rcs.export_commits
333         end
334 end
335
336 arg=ARGV[0]
337
338 if arg.nil?
339         usage
340         exit 1
341 end
342
343 SFX = ',v'
344
345 if arg[-2,2] == SFX
346         if File.exists? arg
347                 rcsfile = arg.dup
348         else
349                 not_found "RCS file #{arg}"
350                 exit 1
351         end
352         filename = File.basename(arg, SFX)
353 else
354         filename = File.basename(arg)
355         path = File.dirname(arg)
356         rcsfile = File.join(path, 'RCS', filename) + SFX
357         unless File.exists? rcsfile
358                 rcsfile.replace File.join(path, filename) + SFX
359                 unless File.exists? rcsfile
360                         not_found "RCS file for #{filename} in #{path}"
361                 end
362         end
363 end
364
365 RCS.parse(filename, rcsfile)