Merge branch 'rh/prompt-pcmode-avoid-eval-on-refname' into maint
[git] / contrib / fast-import / import-directories.perl
CommitLineData
3328aced 1#!/usr/bin/perl
2b72ccb2
PK
2#
3# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
4#
5# ------------------------------------------------------------------------
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, write to the Free Software
18# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19#
20# ------------------------------------------------------------------------
21
22=pod
23
24=head1 NAME
25
26import-directories - Import bits and pieces to Git.
27
28=head1 SYNOPSIS
29
30B<import-directories.perl> F<configfile> F<outputfile>
31
32=head1 DESCRIPTION
33
34Script to import arbitrary projects version controlled by the "copy the
35source directory to a new location and edit it there"-version controlled
36projects into version control. Handles projects with arbitrary branching
37and version trees, taking a file describing the inputs and generating a
38file compatible with the L<git-fast-import(1)> format.
39
40=head1 CONFIGURATION FILE
41
42=head2 Format
43
44The configuration file is based on the standard I<.ini> format.
45
46 ; Comments start with semi-colons
47 [section]
48 key=value
49
50Please see below for information on how to escape special characters.
51
52=head2 Global configuration
53
54Global configuration is done in the B<[config]> section, which should be
55the first section in the file. Configuration can be changed by
56repeating configuration sections later on.
57
58 [config]
59 ; configure conversion of CRLFs. "convert" means that all CRLFs
60 ; should be converted into LFs (suitable for the core.autocrlf
61 ; setting set to true in Git). "none" means that all data is
62 ; treated as binary.
63 crlf=convert
64
65=head2 Revision configuration
66
67Each revision that is to be imported is described in three
68sections. Revisions should be defined in topological order, so
69that a revision's parent has always been defined when a new revision
70is introduced. All the sections for one revision must be defined
71before defining the next revision.
72
73Each revision is assigned a unique numerical identifier. The
74numbers do not need to be consecutive, nor monotonically
75increasing.
76
77For instance, if your configuration file contains only the two
78revisions 4711 and 42, where 4711 is the initial commit, the
79only requirement is that 4711 is completely defined before 42.
80
81=pod
82
83=head3 Revision description section
84
85A section whose section name is just an integer gives meta-data
86about the revision.
87
88 [3]
89 ; author sets the author of the revisions
90 author=Peter Krefting <peter@softwolves.pp.se>
91 ; branch sets the branch that the revision should be committed to
92 branch=master
93 ; parent describes the revision that is the parent of this commit
94 ; (optional)
95 parent=1
96 ; merges describes a revision that is merged into this commit
97 ; (optional; can be repeated)
98 merges=2
99 ; selects one file to take the timestamp from
100 ; (optional; if unspecified, the most recent file from the .files
101 ; section is used)
102 timestamp=3/source.c
103
104=head3 Revision contents section
105
106A section whose section name is an integer followed by B<.files>
107describe all the files included in this revision. If a file that
108was available previously is not included in this revision, it will
109be removed.
110
111If an on-disk revision is incomplete, you can point to files from
01689909
JL
112a previous revision. There are no restrictions on where the source
113files are located, nor on their names.
2b72ccb2
PK
114
115 [3.files]
116 ; the key is the path inside the repository, the value is the path
117 ; as seen from the importer script.
118 source.c=ver-3.00/source.c
119 source.h=ver-2.99/source.h
120 readme.txt=ver-3.00/introduction to the project.txt
121
122File names are treated as byte strings (but please see below on
123quoting rules), and should be stored in the configuration file in
124the encoding that should be used in the generated repository.
125
126=head3 Revision commit message section
127
128A section whose section name is an integer followed by B<.message>
129gives the commit message. This section is read verbatim, up until
130the beginning of the next section. As such, a commit message may not
131contain a line that begins with an opening square bracket ("[") and
132ends with a closing square bracket ("]"), unless they are surrounded
133by whitespace or other characters.
134
135 [3.message]
136 Implement foobar.
137 ; trailing blank lines are ignored.
138
139=cut
140
141# Globals
142use strict;
3328aced 143use warnings;
2b72ccb2
PK
144use integer;
145my $crlfmode = 0;
146my @revs;
147my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
148my $sectiontype = 0;
149my $rev = 0;
150my $mark = 1;
151
152# Check command line
153if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
154{
155 exec('perldoc', $0);
156 exit 1;
157}
158
159# Open configuration
160my $config = $ARGV[0];
161open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
162
163# Open output
164my $output = $ARGV[1];
165open OUT, '>', $output or die "Cannot create output file \"$output\": ";
166binmode OUT;
167
168LINE: while (my $line = <CFG>)
169{
170 $line =~ s/\r?\n$//;
171 next LINE if $sectiontype != 4 && $line eq '';
172 next LINE if $line =~ /^;/;
173 my $oldsectiontype = $sectiontype;
174 my $oldrev = $rev;
175
176 # Sections
177 if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
178 {
179 if ($1 eq 'config')
180 {
181 $sectiontype = 1;
182 }
183 elsif ($3 eq '')
184 {
185 $sectiontype = 2;
186 $rev = $2;
187 # Create a new revision
188 die "Duplicate rev: $line\n " if defined $revmap{$rev};
189 print "Reading revision $rev\n";
190 push @revs, $rev;
191 $revmap{$rev} = $mark ++;
192 $time{$revmap{$rev}} = 0;
193 }
194 elsif ($3 eq '.files')
195 {
196 $sectiontype = 3;
197 $rev = $2;
198 die "Revision mismatch: $line\n " unless $rev == $oldrev;
199 }
200 elsif ($3 eq '.message')
201 {
202 $sectiontype = 4;
203 $rev = $2;
204 die "Revision mismatch: $line\n " unless $rev == $oldrev;
205 }
206 else
207 {
208 die "Internal parse error: $line\n ";
209 }
210 next LINE;
211 }
212
213 # Parse data
214 if ($sectiontype != 4)
215 {
216 # Key and value
217 if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
218 {
219 my ($key, $value) = &parsekeyvaluepair($1);
220 # Global configuration
221 if (1 == $sectiontype)
222 {
223 if ($key eq 'crlf')
224 {
225 $crlfmode = 1, next LINE if $value eq 'convert';
226 $crlfmode = 0, next LINE if $value eq 'none';
227 }
228 die "Unknown configuration option: $line\n ";
229 }
230 # Revision specification
231 if (2 == $sectiontype)
232 {
233 my $current = $revmap{$rev};
234 $author{$current} = $value, next LINE if $key eq 'author';
235 $branch{$current} = $value, next LINE if $key eq 'branch';
236 $parent{$current} = $value, next LINE if $key eq 'parent';
237 $timesource{$current} = $value, next LINE if $key eq 'timestamp';
238 push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
239 die "Unknown revision option: $line\n ";
240 }
241 # Filespecs
242 if (3 == $sectiontype)
243 {
244 # Add the file and create a marker
245 die "File not found: $line\n " unless -f $value;
246 my $current = $revmap{$rev};
247 ${$files{$current}}{$key} = $mark;
248 my $time = &fileblob($value, $crlfmode, $mark ++);
249
250 # Update revision timestamp if more recent than other
251 # files seen, or if this is the file we have selected
252 # to take the time stamp from using the "timestamp"
253 # directive.
254 if ((defined $timesource{$current} && $timesource{$current} eq $value)
255 || $time > $time{$current})
256 {
257 $time{$current} = $time;
258 }
259 }
260 }
261 else
262 {
263 die "Parse error: $line\n ";
264 }
265 }
266 else
267 {
268 # Commit message
269 my $current = $revmap{$rev};
270 if (defined $message{$current})
271 {
272 $message{$current} .= "\n";
273 }
274 $message{$current} .= $line;
275 }
276}
277close CFG;
278
279# Start spewing out data for git-fast-import
280foreach my $commit (@revs)
281{
282 # Progress
283 print OUT "progress Creating revision $commit\n";
284
285 # Create commit header
286 my $mark = $revmap{$commit};
287
288 # Branch and commit id
289 print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
290
291 # Author and timestamp
292 die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
293 print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
294
295 # Commit message
296 die "No message defined for $commit\n" unless defined $message{$mark};
297 my $message = $message{$mark};
298 $message =~ s/\n$//; # Kill trailing empty line
299 print OUT "data ", length($message), "\n", $message, "\n";
300
301 # Parent and any merges
302 print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
303 if (defined $merges{$mark})
304 {
305 foreach my $merge (@{$merges{$mark}})
306 {
307 print OUT "merge :", $revmap{$merge}, "\n";
308 }
309 }
310
311 # Output file marks
312 print OUT "deleteall\n"; # start from scratch
313 foreach my $file (sort keys %{$files{$mark}})
314 {
315 print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
316 }
317 print OUT "\n";
318}
319
320# Create one file blob
321sub fileblob
322{
323 my ($filename, $crlfmode, $mark) = @_;
324
325 # Import the file
326 print OUT "progress Importing $filename\nblob\nmark :$mark\n";
327 open FILE, '<', $filename or die "Cannot read $filename\n ";
328 binmode FILE;
329 my ($size, $mtime) = (stat(FILE))[7,9];
330 my $file;
331 read FILE, $file, $size;
332 close FILE;
333 $file =~ s/\r\n/\n/g if $crlfmode;
334 print OUT "data ", length($file), "\n", $file, "\n";
335
336 return $mtime;
337}
338
339# Parse a key=value pair
340sub parsekeyvaluepair
341{
342=pod
343
344=head2 Escaping special characters
345
346Key and value strings may be enclosed in quotes, in which case
347whitespace inside the quotes is preserved. Additionally, an equal
9517e6b8 348sign may be included in the key by preceding it with a backslash.
2b72ccb2
PK
349For example:
350
351 "key1 "=value1
352 key2=" value2"
353 key\=3=value3
354 key4=value=4
355 "key5""=value5
356
357Here the first key is "key1 " (note the trailing white-space) and the
358second value is " value2" (note the leading white-space). The third
359key contains an equal sign "key=3" and so does the fourth value, which
360does not need to be escaped. The fifth key contains a trailing quote,
361which does not need to be escaped since it is inside a surrounding
362quote.
363
364=cut
365 my $pair = shift;
366
367 # Separate key and value by the first non-quoted equal sign
368 my ($key, $value);
369 if ($pair =~ /^(.*[^\\])=(.*)$/)
370 {
371 ($key, $value) = ($1, $2)
372 }
373 else
374 {
375 die "Parse error: $pair\n ";
376 }
377
378 # Unquote and unescape the key and value separately
379 return (&unescape($key), &unescape($value));
380}
381
382# Unquote and unescape
383sub unescape
384{
385 my $string = shift;
386
387 # First remove enclosing quotes. Backslash before the trailing
388 # quote leaves both.
389 if ($string =~ /^"(.*[^\\])"$/)
390 {
391 $string = $1;
392 }
393
394 # Second remove any backslashes inside the unquoted string.
395 # For later: Handle special sequences like \t ?
396 $string =~ s/\\(.)/$1/g;
397
398 return $string;
399}
400
401__END__
402
403=pod
404
405=head1 EXAMPLES
406
407B<import-directories.perl> F<project.import>
408
409=head1 AUTHOR
410
411Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
412
413This program is free software; you can redistribute it and/or modify
414it under the terms of the GNU General Public License as published by
415the Free Software Foundation.
416
417=cut