3 # Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
5 # ------------------------------------------------------------------------
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 # ------------------------------------------------------------------------
26 import-directories - Import bits and pieces to Git.
30 B<import-directories.perl> F<configfile> F<outputfile>
34 Script to import arbitrary projects version controlled by the "copy the
35 source directory to a new location and edit it there"-version controlled
36 projects into version control. Handles projects with arbitrary branching
37 and version trees, taking a file describing the inputs and generating a
38 file compatible with the L<git-fast-import(1)> format.
40 =head1 CONFIGURATION FILE
44 The configuration file is based on the standard I<.ini> format.
46 ; Comments start with semi-colons
50 Please see below for information on how to escape special characters.
52 =head2 Global configuration
54 Global configuration is done in the B<[config]> section, which should be
55 the first section in the file. Configuration can be changed by
56 repeating configuration sections later on.
59 ; configure conversion of CRLFs. "convert" means that all CRLFs
60 ; should be converted into LFs (suitable for the core.autocrlf
61 ; setting set to true in Git). "none" means that all data is
65 =head2 Revision configuration
67 Each revision that is to be imported is described in three
68 sections. Revisions should be defined in topological order, so
69 that a revision's parent has always been defined when a new revision
70 is introduced. All the sections for one revision must be defined
71 before defining the next revision.
73 Each revision is assigned a unique numerical identifier. The
74 numbers do not need to be consecutive, nor monotonically
77 For instance, if your configuration file contains only the two
78 revisions 4711 and 42, where 4711 is the initial commit, the
79 only requirement is that 4711 is completely defined before 42.
83 =head3 Revision description section
85 A section whose section name is just an integer gives meta-data
89 ; author sets the author of the revisions
90 author=Peter Krefting <peter@softwolves.pp.se>
91 ; branch sets the branch that the revision should be committed to
93 ; parent describes the revision that is the parent of this commit
96 ; merges describes a revision that is merged into this commit
97 ; (optional; can be repeated)
99 ; selects one file to take the timestamp from
100 ; (optional; if unspecified, the most recent file from the .files
104 =head3 Revision contents section
106 A section whose section name is an integer followed by B<.files>
107 describe all the files included in this revision. If a file that
108 was available previously is not included in this revision, it will
111 If an on-disk revision is incomplete, you can point to files from
112 a previous revision. There are no restrictions on where the source
113 files are located, nor on their names.
116 ; the key is the path inside the repository, the value is the path
117 ; as seen from the importer script.
118 source.c=ver-3.00/source.c
119 source.h=ver-2.99/source.h
120 readme.txt=ver-3.00/introduction to the project.txt
122 File names are treated as byte strings (but please see below on
123 quoting rules), and should be stored in the configuration file in
124 the encoding that should be used in the generated repository.
126 =head3 Revision commit message section
128 A section whose section name is an integer followed by B<.message>
129 gives the commit message. This section is read verbatim, up until
130 the beginning of the next section. As such, a commit message may not
131 contain a line that begins with an opening square bracket ("[") and
132 ends with a closing square bracket ("]"), unless they are surrounded
133 by whitespace or other characters.
137 ; trailing blank lines are ignored.
147 my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
153 if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
160 my $config = $ARGV[0];
161 open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
164 my $output = $ARGV[1];
165 open OUT, '>', $output or die "Cannot create output file \"$output\": ";
168 LINE: while (my $line = <CFG>)
171 next LINE if $sectiontype != 4 && $line eq '';
172 next LINE if $line =~ /^;/;
173 my $oldsectiontype = $sectiontype;
177 if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
187 # Create a new revision
188 die "Duplicate rev: $line\n " if defined $revmap{$rev};
189 print "Reading revision $rev\n";
191 $revmap{$rev} = $mark ++;
192 $time{$revmap{$rev}} = 0;
194 elsif ($3 eq '.files')
198 die "Revision mismatch: $line\n " unless $rev == $oldrev;
200 elsif ($3 eq '.message')
204 die "Revision mismatch: $line\n " unless $rev == $oldrev;
208 die "Internal parse error: $line\n ";
214 if ($sectiontype != 4)
217 if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
219 my ($key, $value) = &parsekeyvaluepair($1);
220 # Global configuration
221 if (1 == $sectiontype)
225 $crlfmode = 1, next LINE if $value eq 'convert';
226 $crlfmode = 0, next LINE if $value eq 'none';
228 die "Unknown configuration option: $line\n ";
230 # Revision specification
231 if (2 == $sectiontype)
233 my $current = $revmap{$rev};
234 $author{$current} = $value, next LINE if $key eq 'author';
235 $branch{$current} = $value, next LINE if $key eq 'branch';
236 $parent{$current} = $value, next LINE if $key eq 'parent';
237 $timesource{$current} = $value, next LINE if $key eq 'timestamp';
238 push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
239 die "Unknown revision option: $line\n ";
242 if (3 == $sectiontype)
244 # Add the file and create a marker
245 die "File not found: $line\n " unless -f $value;
246 my $current = $revmap{$rev};
247 ${$files{$current}}{$key} = $mark;
248 my $time = &fileblob($value, $crlfmode, $mark ++);
250 # Update revision timestamp if more recent than other
251 # files seen, or if this is the file we have selected
252 # to take the time stamp from using the "timestamp"
254 if ((defined $timesource{$current} && $timesource{$current} eq $value)
255 || $time > $time{$current})
257 $time{$current} = $time;
263 die "Parse error: $line\n ";
269 my $current = $revmap{$rev};
270 if (defined $message{$current})
272 $message{$current} .= "\n";
274 $message{$current} .= $line;
279 # Start spewing out data for git-fast-import
280 foreach my $commit (@revs)
283 print OUT "progress Creating revision $commit\n";
285 # Create commit header
286 my $mark = $revmap{$commit};
288 # Branch and commit id
289 print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
291 # Author and timestamp
292 die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
293 print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
296 die "No message defined for $commit\n" unless defined $message{$mark};
297 my $message = $message{$mark};
298 $message =~ s/\n$//; # Kill trailing empty line
299 print OUT "data ", length($message), "\n", $message, "\n";
301 # Parent and any merges
302 print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
303 if (defined $merges{$mark})
305 foreach my $merge (@{$merges{$mark}})
307 print OUT "merge :", $revmap{$merge}, "\n";
312 print OUT "deleteall\n"; # start from scratch
313 foreach my $file (sort keys %{$files{$mark}})
315 print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
320 # Create one file blob
323 my ($filename, $crlfmode, $mark) = @_;
326 print OUT "progress Importing $filename\nblob\nmark :$mark\n";
327 open FILE, '<', $filename or die "Cannot read $filename\n ";
329 my ($size, $mtime) = (stat(FILE))[7,9];
331 read FILE, $file, $size;
333 $file =~ s/\r\n/\n/g if $crlfmode;
334 print OUT "data ", length($file), "\n", $file, "\n";
339 # Parse a key=value pair
340 sub parsekeyvaluepair
344 =head2 Escaping special characters
346 Key and value strings may be enclosed in quotes, in which case
347 whitespace inside the quotes is preserved. Additionally, an equal
348 sign may be included in the key by preceding it with a backslash.
357 Here the first key is "key1 " (note the trailing white-space) and the
358 second value is " value2" (note the leading white-space). The third
359 key contains an equal sign "key=3" and so does the fourth value, which
360 does not need to be escaped. The fifth key contains a trailing quote,
361 which does not need to be escaped since it is inside a surrounding
367 # Separate key and value by the first non-quoted equal sign
369 if ($pair =~ /^(.*[^\\])=(.*)$/)
371 ($key, $value) = ($1, $2)
375 die "Parse error: $pair\n ";
378 # Unquote and unescape the key and value separately
379 return (&unescape($key), &unescape($value));
382 # Unquote and unescape
387 # First remove enclosing quotes. Backslash before the trailing
389 if ($string =~ /^"(.*[^\\])"$/)
394 # Second remove any backslashes inside the unquoted string.
395 # For later: Handle special sequences like \t ?
396 $string =~ s/\\(.)/$1/g;
407 B<import-directories.perl> F<project.import>
411 Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
413 This program is free software; you can redistribute it and/or modify
414 it under the terms of the GNU General Public License as published by
415 the Free Software Foundation.