3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
11 git-archimport-script -i <archive>/<branch> [<archive>/<branch>]
12 [ <archive>/<branch> ]
14 The script expects you to provide the key roots where it can start the
15 import from an 'initial import' or 'tag' type of Arch commit. It will
16 then follow all the branching and tagging within the provided roots.
18 It will die if it sees branches that have different roots.
22 - keep track of merged patches, and mark a git merge when it happens
23 - smarter rules to parse the archive history "up" and "down"
24 - be able to continue an import where we left off
25 - audit shell-escaping of filenames
29 Add print in front of the shell commands invoked via backticks.
37 use File::Temp qw(tempfile);
38 use File::Path qw(mkpath);
39 use File::Basename qw(basename dirname);
40 use String::ShellQuote;
44 use POSIX qw(strftime dup2);
45 use Data::Dumper qw/ Dumper /;
48 $SIG{'PIPE'}="IGNORE";
51 our($opt_h,$opt_v, $opt_T,
56 Usage: ${\basename $0} # fetch/update GIT from Arch
58 [ -C GIT_repository ] [ -t tempdir ]
59 repository/arch-branch [ repository/arch-branch] ...
64 getopts("hviC:t:") or usage();
67 @ARGV >= 1 or usage();
68 my @arch_roots = @ARGV;
72 $tmp .= '/git-archimport/';
74 my $git_tree = $opt_C;
78 my @psets = (); # the collection
80 foreach my $root (@arch_roots) {
81 my ($arepo, $abranch) = split(m!/!, $root);
82 open ABROWSE, "tla abrowse -f -A $arepo --desc --merges $abranch |"
83 or die "Problems with tla abrowse: $!";
85 my %ps = (); # the current one
92 # first record padded w 8 spaces
95 # store the record we just captured
97 my %temp = %ps; # break references
98 push (@psets, \%temp);
102 my ($id, $type) = split(m/\s{3}/, $_);
107 if ($type =~ m/^\(simple changeset\)/) {
109 } elsif ($type eq '(initial import)') {
111 } elsif ($type =~ m/^\(tag revision of (.+)\)/) {
115 warn "Unknown type $type";
121 # 10 leading spaces or more
122 # indicate commit metadata
125 if ($lastseen eq 'id' && m/^\d{4}-\d{2}-\d{2}/) {
127 my ($date, $authoremail) = split(m/\s{2,}/, $_);
129 $ps{date} =~ s/\bGMT$//; # strip off trailign GMT
130 if ($ps{date} =~ m/\b\w+$/) {
131 warn 'Arch dates not in GMT?! - imported dates will be wrong';
134 $authoremail =~ m/^(.+)\s(\S+)$/;
140 } elsif ($lastseen eq 'date') {
141 # the only hint is position
142 # subject is after date
146 } elsif ($lastseen eq 'subj' && $_ eq 'merges in:') {
148 $lastseen = 'merges';
150 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
151 push (@{$ps{merges}}, $_);
153 warn 'more metadata after merges!?';
160 my %temp = %ps; # break references
161 push (@psets, \%temp);
165 } # end foreach $root
167 ## Order patches by time
168 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
170 #print Dumper \@psets;
173 ## TODO cleanup irrelevant patches
174 ## and put an initial import
177 unless (-d '.git') { # initial import
178 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
179 print "Starting import from $psets[0]{id}\n";
184 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
189 foreach my $ps (@psets) {
191 $ps->{branch} = branchname($ps->{id});
194 # ensure we have a clean state
196 if (`git diff-files`) {
197 die "Unclean tree when about to process $ps->{id} " .
198 " - did we fail to commit cleanly before?";
203 # skip commits already in repo
205 if (ptag($ps->{id})) {
206 $opt_v && print "Skipping already imported: $ps->{id}\n";
211 # create the branch if needed
213 if ($ps->{type} eq 'i' && !$import) {
214 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
217 unless ($import) { # skip for import
218 if ( -e ".git/refs/heads/$ps->{branch}") {
219 # we know about this branch
220 `git checkout $ps->{branch}`;
222 # new branch! we need to verify a few things
223 die "Branch on a non-tag!" unless $ps->{type} eq 't';
224 my $branchpoint = ptag($ps->{tag});
225 die "Tagging from unknown id unsupported: $ps->{tag}"
228 # find where we are supposed to branch from
229 `git checkout -b $ps->{branch} $branchpoint`;
231 # If we trust Arch with the fact that this is just
232 # a tag, and it does not affect the state of the tree
233 # then we just tag and move on
234 tag($ps->{id}, $branchpoint);
235 ptag($ps->{id}, $branchpoint);
236 print " * Tagged $ps->{id} at $branchpoint\n";
243 # Apply the import/changeset/merge into the working tree
245 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
246 apply_import($ps) or die $!;
248 } elsif ($ps->{type} eq 's') {
253 # prepare update git's index, based on what arch knows
254 # about the pset, resolve parents, etc
258 my $commitlog = `tla cat-archive-log -A $ps->{repo} $ps->{id}`;
259 die "Error in cat-archive-log: $!" if $?;
261 # parselog will git-add/rm files
262 # and generally prepare things for the commit
263 # NOTE: parselog will shell-quote filenames!
264 my ($sum, $msg, $add, $del, $mod, $ren) = parselog($commitlog);
265 my $logmessage = "$sum\n$msg";
268 # imports don't give us good info
269 # on added files. Shame on them
270 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
271 `find . -type f -print0 | grep -zv '^./.git' | xargs -0 -l100 git-update-cache --add`;
272 `git-ls-files --deleted -z | xargs --no-run-if-empty -0 -l100 git-update-cache --remove`;
277 my @slice = splice(@$add, 0, 100);
278 my $slice = join(' ', @slice);
279 `git-update-cache --add $slice`;
280 die "Error in git-update-cache --add: $!" if $?;
284 foreach my $file (@$del) {
285 unlink $file or die "Problems deleting $file : $!";
288 my @slice = splice(@$del, 0, 100);
289 my $slice = join(' ', @slice);
290 `git-update-cache --remove $slice`;
291 die "Error in git-update-cache --remove: $!" if $?;
294 if (@$ren) { # renamed
296 die "Odd number of entries in rename!?";
300 my $from = pop @$ren;
303 unless (-d dirname($to)) {
304 mkpath(dirname($to)); # will die on err
306 #print "moving $from $to";
308 die "Error renaming $from $to : $!" if $?;
309 `git-update-cache --remove $from`;
310 die "Error in git-update-cache --remove: $!" if $?;
311 `git-update-cache --add $to`;
312 die "Error in git-update-cache --add: $!" if $?;
316 if (@$mod) { # must be _after_ renames
318 my @slice = splice(@$mod, 0, 100);
319 my $slice = join(' ', @slice);
320 `git-update-cache $slice`;
321 die "Error in git-update-cache: $!" if $?;
325 # warn "errors when running git-update-cache! $!";
326 $tree = `git-write-tree`;
327 die "cannot write tree $!" if $?;
335 if ( -e ".git/refs/heads/$ps->{branch}") {
336 if (open HEAD, "<.git/refs/heads/$ps->{branch}") {
342 if ($ps->{type} eq 's') {
343 warn "Could not find the right head for the branch $ps->{branch}";
348 my $par = join (' ', @par);
351 # Commit, tag and clean state
354 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
355 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
356 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
357 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
358 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
359 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
361 my ($pid, $commit_rh, $commit_wh);
362 $commit_rh = 'commit_rh';
363 $commit_wh = 'commit_wh';
365 $pid = open2(*READER, *WRITER, "git-commit-tree $tree $par")
367 print WRITER $logmessage; # write
369 my $commitid = <READER>; # read
372 waitpid $pid,0; # close;
374 if (length $commitid != 40) {
375 die "Something went wrong with the commit! $! $commitid";
380 open HEAD, ">.git/refs/heads/$ps->{branch}";
381 print HEAD $commitid;
383 unlink ('.git/HEAD');
384 symlink("refs/heads/$ps->{branch}",".git/HEAD");
387 ptag($ps->{id}, $commitid); # private tag
388 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
389 tag($ps->{id}, $commitid);
391 print " * Committed $ps->{id}\n";
392 print " + tree $tree\n";
393 print " + commit $commitid\n";
394 # print " + commit date is $ps->{date} \n";
400 my @parts = split(m/--/, $id);
401 return join('--', @parts[0..1]);
406 my $bname = branchname($ps->{id});
410 `tla get -s --no-pristine -A $ps->{repo} $ps->{id} $tmp/import`;
411 die "Cannot get import: $!" if $?;
412 `rsync -v --archive --delete --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/import/* ./`;
413 die "Cannot rsync import:$!" if $?;
415 `rm -fr $tmp/import`;
416 die "Cannot remove tempdir: $!" if $?;
428 `tla get-changeset -A $ps->{repo} $ps->{id} $tmp/changeset`;
429 die "Cannot get changeset: $!" if $?;
432 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
433 # this can be sped up considerably by doing
434 # (find | xargs cat) | patch
435 # but that cna get mucked up by patches
436 # with missing trailing newlines or the standard
437 # 'missing newline' flag in the patch - possibly
438 # produced with an old/buggy diff.
439 # slow and safe, we invoke patch once per patchfile
440 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
441 die "Problem applying patches! $!" if $?;
444 # apply changed binary files
445 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
446 foreach my $mod (@modified) {
449 $orig =~ s/\.modified$//; # lazy
450 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
451 #print "rsync -p '$mod' '$orig'";
452 `rsync -p $mod ./$orig`;
453 die "Problem applying binary changes! $!" if $?;
458 `rsync --archive --exclude '.git' --exclude '.arch-ids' --exclude '{arch}' $tmp/changeset/new-files-archive/* ./`;
460 # deleted files are hinted from the commitlog processing
462 `rm -fr $tmp/changeset`;
467 # A log entry looks like
468 # Revision: moodle-org--moodle--1.3.3--patch-15
469 # Archive: arch-eduforge@catalyst.net.nz--2004
470 # Creator: Penny Leach <penny@catalyst.net.nz>
471 # Date: Wed May 25 14:15:34 NZST 2005
472 # Standard-date: 2005-05-25 02:15:34 GMT
473 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
474 # lang/de/.arch-ids/block_html.php.id
475 # New-directories: lang/de/help/questionnaire
476 # lang/de/help/questionnaire/.arch-ids
477 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
478 # db_sears.sql db/db_sears.sql
479 # Removed-files: lang/be/docs/.arch-ids/release.html.id
480 # lang/be/docs/.arch-ids/releaseold.html.id
481 # Modified-files: admin/cron.php admin/delete.php
482 # admin/editor.html backup/lib.php backup/restore.php
483 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
484 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
487 # Updating yadda tadda tadda madda
492 my (@add, @del, @mod, @ren, @kw, $sum, $msg );
494 if ($log =~ m/(?:\n|^)New-files:(.*?)(?=\n\w)/s ) {
496 @add = split(m/\s+/s, $files);
499 if ($log =~ m/(?:\n|^)Removed-files:(.*?)(?=\n\w)/s ) {
501 @del = split(m/\s+/s, $files);
504 if ($log =~ m/(?:\n|^)Modified-files:(.*?)(?=\n\w)/s ) {
506 @mod = split(m/\s+/s, $files);
509 if ($log =~ m/(?:\n|^)Renamed-files:(.*?)(?=\n\w)/s ) {
511 @ren = split(m/\s+/s, $files);
515 if ($log =~ m/^Summary:(.+?)$/m ) {
522 if ($log =~ m/\n\n(.+)$/s) {
530 foreach my $ref ( (\@add, \@del, \@mod, \@ren) ) {
532 while (my $t = pop @$ref) {
533 next unless length ($t);
534 next if $t =~ m!\{arch\}/!;
535 next if $t =~ m!\.arch-ids/!;
536 next if $t =~ m!\.arch-inventory$!;
537 push (@tmp, shell_quote($t));
542 #print Dumper [$sum, $msg, \@add, \@del, \@mod, \@ren];
543 return ($sum, $msg, \@add, \@del, \@mod, \@ren);
548 my ($tag, $commit) = @_;
550 $tag = shell_quote($tag);
553 open(C,">.git/refs/tags/$tag")
554 or die "Cannot create tag $tag: $!\n";
556 or die "Cannot write tag $tag: $!\n";
558 or die "Cannot write tag $tag: $!\n";
559 print "Created tag '$tag' on '$commit'\n" if $opt_v;
561 open(C,"<.git/refs/tags/$tag")
562 or die "Cannot read tag $tag: $!\n";
565 die "Error reading tag $tag: $!\n" unless length $commit == 40;
567 or die "Cannot read tag $tag: $!\n";
572 # write/read a private tag
573 # reads fail softly if the tag isn't there
575 my ($tag, $commit) = @_;
577 $tag = shell_quote($tag);
579 unless (-d '.git/archimport/tags') {
580 mkpath('.git/archimport/tags');
583 if ($commit) { # write
584 open(C,">.git/archimport/tags/$tag")
585 or die "Cannot create tag $tag: $!\n";
587 or die "Cannot write tag $tag: $!\n";
589 or die "Cannot write tag $tag: $!\n";
591 # if the tag isn't there, return 0
592 unless ( -s ".git/archimport/tags/$tag") {
595 open(C,"<.git/archimport/tags/$tag")
596 or die "Cannot read tag $tag: $!\n";
599 die "Error reading tag $tag: $!\n" unless length $commit == 40;
601 or die "Cannot read tag $tag: $!\n";