Commit | Line | Data |
---|---|---|
3328aced | 1 | #!/usr/bin/perl |
2b72ccb2 PK |
2 | # |
3 | # Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se> | |
4 | # | |
5 | # ------------------------------------------------------------------------ | |
6 | # | |
7 | # This program is free software; you can redistribute it and/or modify | |
8 | # it under the terms of the GNU General Public License as published by | |
9 | # the Free Software Foundation. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, | |
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | # GNU General Public License for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License | |
17 | # along with this program; if not, write to the Free Software | |
18 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
19 | # | |
20 | # ------------------------------------------------------------------------ | |
21 | ||
22 | =pod | |
23 | ||
24 | =head1 NAME | |
25 | ||
26 | import-directories - Import bits and pieces to Git. | |
27 | ||
28 | =head1 SYNOPSIS | |
29 | ||
30 | B<import-directories.perl> F<configfile> F<outputfile> | |
31 | ||
32 | =head1 DESCRIPTION | |
33 | ||
34 | Script to import arbitrary projects version controlled by the "copy the | |
35 | source directory to a new location and edit it there"-version controlled | |
36 | projects into version control. Handles projects with arbitrary branching | |
37 | and version trees, taking a file describing the inputs and generating a | |
38 | file compatible with the L<git-fast-import(1)> format. | |
39 | ||
40 | =head1 CONFIGURATION FILE | |
41 | ||
42 | =head2 Format | |
43 | ||
44 | The configuration file is based on the standard I<.ini> format. | |
45 | ||
46 | ; Comments start with semi-colons | |
47 | [section] | |
48 | key=value | |
49 | ||
50 | Please see below for information on how to escape special characters. | |
51 | ||
52 | =head2 Global configuration | |
53 | ||
54 | Global configuration is done in the B<[config]> section, which should be | |
55 | the first section in the file. Configuration can be changed by | |
56 | repeating configuration sections later on. | |
57 | ||
58 | [config] | |
59 | ; configure conversion of CRLFs. "convert" means that all CRLFs | |
60 | ; should be converted into LFs (suitable for the core.autocrlf | |
61 | ; setting set to true in Git). "none" means that all data is | |
62 | ; treated as binary. | |
63 | crlf=convert | |
64 | ||
65 | =head2 Revision configuration | |
66 | ||
67 | Each revision that is to be imported is described in three | |
68 | sections. Revisions should be defined in topological order, so | |
69 | that a revision's parent has always been defined when a new revision | |
70 | is introduced. All the sections for one revision must be defined | |
71 | before defining the next revision. | |
72 | ||
73 | Each revision is assigned a unique numerical identifier. The | |
74 | numbers do not need to be consecutive, nor monotonically | |
75 | increasing. | |
76 | ||
77 | For instance, if your configuration file contains only the two | |
78 | revisions 4711 and 42, where 4711 is the initial commit, the | |
79 | only requirement is that 4711 is completely defined before 42. | |
80 | ||
81 | =pod | |
82 | ||
83 | =head3 Revision description section | |
84 | ||
85 | A section whose section name is just an integer gives meta-data | |
86 | about the revision. | |
87 | ||
88 | [3] | |
89 | ; author sets the author of the revisions | |
90 | author=Peter Krefting <peter@softwolves.pp.se> | |
91 | ; branch sets the branch that the revision should be committed to | |
92 | branch=master | |
93 | ; parent describes the revision that is the parent of this commit | |
94 | ; (optional) | |
95 | parent=1 | |
96 | ; merges describes a revision that is merged into this commit | |
97 | ; (optional; can be repeated) | |
98 | merges=2 | |
99 | ; selects one file to take the timestamp from | |
100 | ; (optional; if unspecified, the most recent file from the .files | |
101 | ; section is used) | |
102 | timestamp=3/source.c | |
103 | ||
104 | =head3 Revision contents section | |
105 | ||
106 | A section whose section name is an integer followed by B<.files> | |
107 | describe all the files included in this revision. If a file that | |
108 | was available previously is not included in this revision, it will | |
109 | be removed. | |
110 | ||
111 | If an on-disk revision is incomplete, you can point to files from | |
01689909 JL |
112 | a previous revision. There are no restrictions on where the source |
113 | files are located, nor on their names. | |
2b72ccb2 PK |
114 | |
115 | [3.files] | |
116 | ; the key is the path inside the repository, the value is the path | |
117 | ; as seen from the importer script. | |
118 | source.c=ver-3.00/source.c | |
119 | source.h=ver-2.99/source.h | |
120 | readme.txt=ver-3.00/introduction to the project.txt | |
121 | ||
122 | File names are treated as byte strings (but please see below on | |
123 | quoting rules), and should be stored in the configuration file in | |
124 | the encoding that should be used in the generated repository. | |
125 | ||
126 | =head3 Revision commit message section | |
127 | ||
128 | A section whose section name is an integer followed by B<.message> | |
129 | gives the commit message. This section is read verbatim, up until | |
130 | the beginning of the next section. As such, a commit message may not | |
131 | contain a line that begins with an opening square bracket ("[") and | |
132 | ends with a closing square bracket ("]"), unless they are surrounded | |
133 | by whitespace or other characters. | |
134 | ||
135 | [3.message] | |
136 | Implement foobar. | |
137 | ; trailing blank lines are ignored. | |
138 | ||
139 | =cut | |
140 | ||
141 | # Globals | |
142 | use strict; | |
3328aced | 143 | use warnings; |
2b72ccb2 PK |
144 | use integer; |
145 | my $crlfmode = 0; | |
146 | my @revs; | |
147 | my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource); | |
148 | my $sectiontype = 0; | |
149 | my $rev = 0; | |
150 | my $mark = 1; | |
151 | ||
152 | # Check command line | |
153 | if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/) | |
154 | { | |
155 | exec('perldoc', $0); | |
156 | exit 1; | |
157 | } | |
158 | ||
159 | # Open configuration | |
160 | my $config = $ARGV[0]; | |
161 | open CFG, '<', $config or die "Cannot open configuration file \"$config\": "; | |
162 | ||
163 | # Open output | |
164 | my $output = $ARGV[1]; | |
165 | open OUT, '>', $output or die "Cannot create output file \"$output\": "; | |
166 | binmode OUT; | |
167 | ||
168 | LINE: while (my $line = <CFG>) | |
169 | { | |
170 | $line =~ s/\r?\n$//; | |
171 | next LINE if $sectiontype != 4 && $line eq ''; | |
172 | next LINE if $line =~ /^;/; | |
173 | my $oldsectiontype = $sectiontype; | |
174 | my $oldrev = $rev; | |
175 | ||
176 | # Sections | |
177 | if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$") | |
178 | { | |
179 | if ($1 eq 'config') | |
180 | { | |
181 | $sectiontype = 1; | |
182 | } | |
183 | elsif ($3 eq '') | |
184 | { | |
185 | $sectiontype = 2; | |
186 | $rev = $2; | |
187 | # Create a new revision | |
188 | die "Duplicate rev: $line\n " if defined $revmap{$rev}; | |
189 | print "Reading revision $rev\n"; | |
190 | push @revs, $rev; | |
191 | $revmap{$rev} = $mark ++; | |
192 | $time{$revmap{$rev}} = 0; | |
193 | } | |
194 | elsif ($3 eq '.files') | |
195 | { | |
196 | $sectiontype = 3; | |
197 | $rev = $2; | |
198 | die "Revision mismatch: $line\n " unless $rev == $oldrev; | |
199 | } | |
200 | elsif ($3 eq '.message') | |
201 | { | |
202 | $sectiontype = 4; | |
203 | $rev = $2; | |
204 | die "Revision mismatch: $line\n " unless $rev == $oldrev; | |
205 | } | |
206 | else | |
207 | { | |
208 | die "Internal parse error: $line\n "; | |
209 | } | |
210 | next LINE; | |
211 | } | |
212 | ||
213 | # Parse data | |
214 | if ($sectiontype != 4) | |
215 | { | |
216 | # Key and value | |
217 | if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$") | |
218 | { | |
219 | my ($key, $value) = &parsekeyvaluepair($1); | |
220 | # Global configuration | |
221 | if (1 == $sectiontype) | |
222 | { | |
223 | if ($key eq 'crlf') | |
224 | { | |
225 | $crlfmode = 1, next LINE if $value eq 'convert'; | |
226 | $crlfmode = 0, next LINE if $value eq 'none'; | |
227 | } | |
228 | die "Unknown configuration option: $line\n "; | |
229 | } | |
230 | # Revision specification | |
231 | if (2 == $sectiontype) | |
232 | { | |
233 | my $current = $revmap{$rev}; | |
234 | $author{$current} = $value, next LINE if $key eq 'author'; | |
235 | $branch{$current} = $value, next LINE if $key eq 'branch'; | |
236 | $parent{$current} = $value, next LINE if $key eq 'parent'; | |
237 | $timesource{$current} = $value, next LINE if $key eq 'timestamp'; | |
238 | push(@{$merges{$current}}, $value), next LINE if $key eq 'merges'; | |
239 | die "Unknown revision option: $line\n "; | |
240 | } | |
241 | # Filespecs | |
242 | if (3 == $sectiontype) | |
243 | { | |
244 | # Add the file and create a marker | |
245 | die "File not found: $line\n " unless -f $value; | |
246 | my $current = $revmap{$rev}; | |
247 | ${$files{$current}}{$key} = $mark; | |
248 | my $time = &fileblob($value, $crlfmode, $mark ++); | |
249 | ||
250 | # Update revision timestamp if more recent than other | |
251 | # files seen, or if this is the file we have selected | |
252 | # to take the time stamp from using the "timestamp" | |
253 | # directive. | |
254 | if ((defined $timesource{$current} && $timesource{$current} eq $value) | |
255 | || $time > $time{$current}) | |
256 | { | |
257 | $time{$current} = $time; | |
258 | } | |
259 | } | |
260 | } | |
261 | else | |
262 | { | |
263 | die "Parse error: $line\n "; | |
264 | } | |
265 | } | |
266 | else | |
267 | { | |
268 | # Commit message | |
269 | my $current = $revmap{$rev}; | |
270 | if (defined $message{$current}) | |
271 | { | |
272 | $message{$current} .= "\n"; | |
273 | } | |
274 | $message{$current} .= $line; | |
275 | } | |
276 | } | |
277 | close CFG; | |
278 | ||
279 | # Start spewing out data for git-fast-import | |
280 | foreach my $commit (@revs) | |
281 | { | |
282 | # Progress | |
283 | print OUT "progress Creating revision $commit\n"; | |
284 | ||
285 | # Create commit header | |
286 | my $mark = $revmap{$commit}; | |
287 | ||
288 | # Branch and commit id | |
289 | print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n"; | |
290 | ||
291 | # Author and timestamp | |
292 | die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark}; | |
293 | print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n"; | |
294 | ||
295 | # Commit message | |
296 | die "No message defined for $commit\n" unless defined $message{$mark}; | |
297 | my $message = $message{$mark}; | |
298 | $message =~ s/\n$//; # Kill trailing empty line | |
299 | print OUT "data ", length($message), "\n", $message, "\n"; | |
300 | ||
301 | # Parent and any merges | |
302 | print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark}; | |
303 | if (defined $merges{$mark}) | |
304 | { | |
305 | foreach my $merge (@{$merges{$mark}}) | |
306 | { | |
307 | print OUT "merge :", $revmap{$merge}, "\n"; | |
308 | } | |
309 | } | |
310 | ||
311 | # Output file marks | |
312 | print OUT "deleteall\n"; # start from scratch | |
313 | foreach my $file (sort keys %{$files{$mark}}) | |
314 | { | |
315 | print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n"; | |
316 | } | |
317 | print OUT "\n"; | |
318 | } | |
319 | ||
320 | # Create one file blob | |
321 | sub fileblob | |
322 | { | |
323 | my ($filename, $crlfmode, $mark) = @_; | |
324 | ||
325 | # Import the file | |
326 | print OUT "progress Importing $filename\nblob\nmark :$mark\n"; | |
327 | open FILE, '<', $filename or die "Cannot read $filename\n "; | |
328 | binmode FILE; | |
329 | my ($size, $mtime) = (stat(FILE))[7,9]; | |
330 | my $file; | |
331 | read FILE, $file, $size; | |
332 | close FILE; | |
333 | $file =~ s/\r\n/\n/g if $crlfmode; | |
334 | print OUT "data ", length($file), "\n", $file, "\n"; | |
335 | ||
336 | return $mtime; | |
337 | } | |
338 | ||
339 | # Parse a key=value pair | |
340 | sub parsekeyvaluepair | |
341 | { | |
342 | =pod | |
343 | ||
344 | =head2 Escaping special characters | |
345 | ||
346 | Key and value strings may be enclosed in quotes, in which case | |
347 | whitespace inside the quotes is preserved. Additionally, an equal | |
9517e6b8 | 348 | sign may be included in the key by preceding it with a backslash. |
2b72ccb2 PK |
349 | For example: |
350 | ||
351 | "key1 "=value1 | |
352 | key2=" value2" | |
353 | key\=3=value3 | |
354 | key4=value=4 | |
355 | "key5""=value5 | |
356 | ||
357 | Here the first key is "key1 " (note the trailing white-space) and the | |
358 | second value is " value2" (note the leading white-space). The third | |
359 | key contains an equal sign "key=3" and so does the fourth value, which | |
360 | does not need to be escaped. The fifth key contains a trailing quote, | |
361 | which does not need to be escaped since it is inside a surrounding | |
362 | quote. | |
363 | ||
364 | =cut | |
365 | my $pair = shift; | |
366 | ||
367 | # Separate key and value by the first non-quoted equal sign | |
368 | my ($key, $value); | |
369 | if ($pair =~ /^(.*[^\\])=(.*)$/) | |
370 | { | |
371 | ($key, $value) = ($1, $2) | |
372 | } | |
373 | else | |
374 | { | |
375 | die "Parse error: $pair\n "; | |
376 | } | |
377 | ||
378 | # Unquote and unescape the key and value separately | |
379 | return (&unescape($key), &unescape($value)); | |
380 | } | |
381 | ||
382 | # Unquote and unescape | |
383 | sub unescape | |
384 | { | |
385 | my $string = shift; | |
386 | ||
387 | # First remove enclosing quotes. Backslash before the trailing | |
388 | # quote leaves both. | |
389 | if ($string =~ /^"(.*[^\\])"$/) | |
390 | { | |
391 | $string = $1; | |
392 | } | |
393 | ||
394 | # Second remove any backslashes inside the unquoted string. | |
395 | # For later: Handle special sequences like \t ? | |
396 | $string =~ s/\\(.)/$1/g; | |
397 | ||
398 | return $string; | |
399 | } | |
400 | ||
401 | __END__ | |
402 | ||
403 | =pod | |
404 | ||
405 | =head1 EXAMPLES | |
406 | ||
407 | B<import-directories.perl> F<project.import> | |
408 | ||
409 | =head1 AUTHOR | |
410 | ||
411 | Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se> | |
412 | ||
413 | This program is free software; you can redistribute it and/or modify | |
414 | it under the terms of the GNU General Public License as published by | |
415 | the Free Software Foundation. | |
416 | ||
417 | =cut |