3 # This tool is copyright (c) 2005, Martin Langhoff.
4 # It is released under the Gnu Public License, version 2.
6 # The basic idea is to walk the output of tla abrowse,
7 # fetch the changesets and apply them.
12 git-archimport [ -h ] [ -v ] [ -T ] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
14 Imports a project from one or more Arch repositories. It will follow branches
15 and repositories within the namespaces defined by the <archive/branch>
16 parameters suppplied. If it cannot find the remote branch a merge comes from
17 it will just import it as a regular commit. If it can find it, it will mark it
18 as a merge whenever possible.
20 See man (1) git-archimport for more details.
24 - create tag objects instead of ref tags
25 - audit shell-escaping of filenames
26 - hide our private tags somewhere smarter
27 - find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
28 - sort and apply patches by graphing ancestry relations instead of just
29 relying in dates supplied in the changeset itself.
30 tla ancestry-graph -m could be helpful here...
34 Add print in front of the shell commands invoked via backticks.
38 There are several places where Arch and git terminology are intermixed
39 and potentially confused.
41 The notion of a "branch" in git is approximately equivalent to
42 a "archive/category--branch--version" in Arch. Also, it should be noted
43 that the "--branch" portion of "archive/category--branch--version" is really
44 optional in Arch although not many people (nor tools!) seem to know this.
45 This means that "archive/category--version" is also a valid "branch"
48 We always refer to Arch names by their fully qualified variant (which
49 means the "archive" name is prefixed.
51 For people unfamiliar with Arch, an "archive" is the term for "repository",
52 and can contain multiple, unrelated branches.
59 use File::Temp qw(tempdir);
60 use File::Path qw(mkpath rmtree);
61 use File::Basename qw(basename dirname);
62 use Data::Dumper qw/ Dumper /;
65 $SIG{'PIPE'}="IGNORE";
68 my $git_dir = $ENV{"GIT_DIR"} || ".git";
69 $ENV{"GIT_DIR"} = $git_dir;
70 my $ptag_dir = "$git_dir/archimport/tags";
72 our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
76 Usage: ${\basename $0} # fetch/update GIT from Arch
77 [ -f ] [ -o ] [ -h ] [ -v ] [ -T ] [ -a ] [ -D depth ] [ -t tempdir ]
78 repository/arch-branch [ repository/arch-branch] ...
83 getopts("fThvat:D:") or usage();
86 @ARGV >= 1 or usage();
88 # values associated with keys:
89 # =1 - Arch version / git 'branch' detected via abrowse on a limit
90 # >1 - Arch version / git 'branch' of an auxilliary branch we've merged
91 my %arch_branches = map { $_ => 1 } @ARGV;
93 $ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
94 my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
95 $opt_v && print "+ Using $tmp as temporary directory\n";
97 my %reachable = (); # Arch repositories we can access
98 my %unreachable = (); # Arch repositories we can't access :<
99 my @psets = (); # the collection
100 my %psets = (); # the collection, by name
101 my %stats = ( # Track which strategy we used to import:
102 get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
103 simple_changeset => 0, import_or_tag => 0
106 my %rptags = (); # my reverse private tags
107 # to map a SHA1 to a commitid
108 my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
112 while (my ($limit, $level) = each %arch_branches) {
113 next unless $level == $stage;
115 open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
116 or die "Problems with tla abrowse: $!";
118 my %ps = (); # the current one
124 # first record padded w 8 spaces
126 my ($id, $type) = split(m/\s+/, $_, 2);
129 # store the record we just captured
130 if (%ps && !exists $psets{ $ps{id} }) {
131 %last_ps = %ps; # break references
132 push (@psets, \%last_ps);
133 $psets{ $last_ps{id} } = \%last_ps;
136 my $branch = extract_versionname($id);
137 %ps = ( id => $id, branch => $branch );
138 if (%last_ps && ($last_ps{branch} eq $branch)) {
139 $ps{parent_id} = $last_ps{id};
142 $arch_branches{$branch} = 1;
145 # deal with types (should work with baz or tla):
146 if ($type =~ m/\(.*changeset\)/) {
148 } elsif ($type =~ /\(.*import\)/) {
150 } elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
152 # read which revision we've tagged when we parse the log
155 warn "Unknown type $type";
158 $arch_branches{$branch} = 1;
160 } elsif (s/^\s{10}//) {
161 # 10 leading spaces or more
162 # indicate commit metadata
165 if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
168 } elsif ($_ eq 'merges in:') {
170 $lastseen = 'merges';
171 } elsif ($lastseen eq 'merges' && s/^\s{2}//) {
173 push (@{$ps{merges}}, $id);
175 # aggressive branch finding:
177 my $branch = extract_versionname($id);
178 my $repo = extract_reponame($branch);
180 if (archive_reachable($repo) &&
181 !defined $arch_branches{$branch}) {
182 $arch_branches{$branch} = $stage + 1;
186 warn "more metadata after merges!?: $_\n" unless /^\s*$/;
191 if (%ps && !exists $psets{ $ps{id} }) {
192 my %temp = %ps; # break references
193 if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
194 $temp{parent_id} = $psets[$#psets]{id};
196 push (@psets, \%temp);
197 $psets{ $temp{id} } = \%temp;
200 close ABROWSE or die "$TLA abrowse failed on $limit\n";
202 } # end foreach $root
207 while ($depth <= $opt_D) {
212 ## Order patches by time
213 # FIXME see if we can find a more optimal way to do this by graphing
214 # the ancestry data and walking it, that way we won't have to rely on
215 # client-supplied dates
216 @psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
218 #print Dumper \@psets;
221 ## TODO cleanup irrelevant patches
222 ## and put an initial import
225 unless (-d $git_dir) { # initial import
226 if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
227 print "Starting import from $psets[0]{id}\n";
232 die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
234 } else { # progressing an import
236 opendir(DIR, $ptag_dir)
237 || die "can't opendir: $!";
238 while (my $file = readdir(DIR)) {
239 # skip non-interesting-files
240 next unless -f "$ptag_dir/$file";
242 # convert first '--' to '/' from old git-archimport to use
243 # as an archivename/c--b--v private tag
247 print STDERR "converting old tag $oldfile to $file\n";
248 rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
250 my $sha = ptag($file);
252 $rptags{$sha} = $file;
258 # extract the Arch repository name (Arch "archive" in Arch-speak)
259 sub extract_reponame {
260 my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
261 return (split(/\//, $fq_cvbr))[0];
264 sub extract_versionname {
266 $name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
270 # convert a fully-qualified revision or version to a unique dirname:
271 # normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
272 # becomes: normalperson@yhbt.net-05,mpd--uclinux--1
274 # the git notion of a branch is closer to
275 # archive/category--branch--version than archive/category--branch, so we
276 # use this to convert to git branch names.
277 # Also, keep archive names but replace '/' with ',' since it won't require
278 # subdirectories, and is safer than swapping '--' which could confuse
279 # reverse-mapping when dealing with bastard branches that
280 # are just archive/category--version (no --branch)
282 my $revision = shift;
283 my $name = extract_versionname($revision);
288 # old versions of git-archimport just use the <category--branch> part:
289 sub old_style_branchname {
291 my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
296 *git_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
298 sub process_patchset_accurate {
301 # switch to that branch if we're not already in that branch:
302 if (-e "$git_dir/refs/heads/$ps->{branch}") {
303 system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
305 # remove any old stuff that got leftover:
306 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
307 rmtree(split(/\0/,$rm)) if $rm;
310 # Apply the import/changeset/merge into the working tree
311 my $dir = sync_to_ps($ps);
312 # read the new log entry:
313 my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
314 die "Error in cat-log: $!" if $?;
317 # grab variables we want from the log, new fields get added to $ps:
318 # (author, date, email, summary, message body ...)
319 parselog($ps, \@commitlog);
321 if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
322 # this should work when importing continuations
323 if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
325 # find where we are supposed to branch from
326 system('git-checkout','-f','-b',$ps->{branch},
327 $branchpoint) == 0 or die "$! $?\n";
329 # remove any old stuff that got leftover:
330 my $rm = safe_pipe_capture('git-ls-files','--others','-z');
331 rmtree(split(/\0/,$rm)) if $rm;
333 # If we trust Arch with the fact that this is just
334 # a tag, and it does not affect the state of the tree
335 # then we just tag and move on
336 tag($ps->{id}, $branchpoint);
337 ptag($ps->{id}, $branchpoint);
338 print " * Tagged $ps->{id} at $branchpoint\n";
341 warn "Tagging from unknown id unsupported\n" if $ps->{tag};
343 # allow multiple bases/imports here since Arch supports cherry-picks
344 # from unrelated trees
347 # update the index with all the changes we got
348 system('git-ls-files --others -z | '.
349 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
350 system('git-ls-files --deleted -z | '.
351 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
352 system('git-ls-files -z | '.
353 'git-update-index -z --stdin') == 0 or die "$! $?\n";
357 # the native changeset processing strategy. This is very fast, but
358 # does not handle permissions or any renames involving directories
359 sub process_patchset_fast {
362 # create the branch if needed
364 if ($ps->{type} eq 'i' && !$import) {
365 die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
368 unless ($import) { # skip for import
369 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
370 # we know about this branch
371 system('git-checkout',$ps->{branch});
373 # new branch! we need to verify a few things
374 die "Branch on a non-tag!" unless $ps->{type} eq 't';
375 my $branchpoint = ptag($ps->{tag});
376 die "Tagging from unknown id unsupported: $ps->{tag}"
379 # find where we are supposed to branch from
380 system('git-checkout','-b',$ps->{branch},$branchpoint);
382 # If we trust Arch with the fact that this is just
383 # a tag, and it does not affect the state of the tree
384 # then we just tag and move on
385 tag($ps->{id}, $branchpoint);
386 ptag($ps->{id}, $branchpoint);
387 print " * Tagged $ps->{id} at $branchpoint\n";
394 # Apply the import/changeset/merge into the working tree
396 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
397 apply_import($ps) or die $!;
398 $stats{import_or_tag}++;
400 } elsif ($ps->{type} eq 's') {
402 $stats{simple_changeset}++;
406 # prepare update git's index, based on what arch knows
407 # about the pset, resolve parents, etc
410 my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
411 die "Error in cat-archive-log: $!" if $?;
413 parselog($ps,\@commitlog);
415 # imports don't give us good info
416 # on added files. Shame on them
417 if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
418 system('git-ls-files --others -z | '.
419 'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
420 system('git-ls-files --deleted -z | '.
421 'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
424 # TODO: handle removed_directories and renamed_directories:
426 if (my $add = $ps->{new_files}) {
428 my @slice = splice(@$add, 0, 100);
429 system('git-update-index','--add','--',@slice) == 0 or
430 die "Error in git-update-index --add: $! $?\n";
434 if (my $del = $ps->{removed_files}) {
437 my @slice = splice(@$del, 0, 100);
438 system('git-update-index','--remove','--',@slice) == 0 or
439 die "Error in git-update-index --remove: $! $?\n";
443 if (my $ren = $ps->{renamed_files}) { # renamed
445 die "Odd number of entries in rename!?";
449 my $from = shift @$ren;
450 my $to = shift @$ren;
452 unless (-d dirname($to)) {
453 mkpath(dirname($to)); # will die on err
455 # print "moving $from $to";
456 rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
457 system('git-update-index','--remove','--',$from) == 0 or
458 die "Error in git-update-index --remove: $! $?\n";
459 system('git-update-index','--add','--',$to) == 0 or
460 die "Error in git-update-index --add: $! $?\n";
464 if (my $mod = $ps->{modified_files}) {
466 my @slice = splice(@$mod, 0, 100);
467 system('git-update-index','--',@slice) == 0 or
468 die "Error in git-update-index: $! $?\n";
471 return 1; # we successfully applied the changeset
475 print "Will import patchsets using the fast strategy\n",
476 "Renamed directories and permission changes will be missed\n";
477 *process_patchset = *process_patchset_fast;
479 print "Using the default (accurate) import strategy.\n",
480 "Things may be a bit slow\n";
481 *process_patchset = *process_patchset_accurate;
484 foreach my $ps (@psets) {
486 $ps->{branch} = git_branchname($ps->{id});
489 # ensure we have a clean state
491 if (my $dirty = `git-diff-files`) {
492 die "Unclean tree when about to process $ps->{id} " .
493 " - did we fail to commit cleanly before?\n$dirty";
498 # skip commits already in repo
500 if (ptag($ps->{id})) {
501 $opt_v && print " * Skipping already imported: $ps->{id}\n";
505 print " * Starting to work on $ps->{id}\n";
507 process_patchset($ps) or next;
509 # warn "errors when running git-update-index! $!";
510 my $tree = `git-write-tree`;
511 die "cannot write tree $!" if $?;
518 if ( -e "$git_dir/refs/heads/$ps->{branch}") {
519 if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
525 if ($ps->{type} eq 's') {
526 warn "Could not find the right head for the branch $ps->{branch}";
532 push @par, find_parents($ps);
536 # Commit, tag and clean state
539 $ENV{GIT_AUTHOR_NAME} = $ps->{author};
540 $ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
541 $ENV{GIT_AUTHOR_DATE} = $ps->{date};
542 $ENV{GIT_COMMITTER_NAME} = $ps->{author};
543 $ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
544 $ENV{GIT_COMMITTER_DATE} = $ps->{date};
546 my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
548 print WRITER $ps->{summary},"\n";
549 print WRITER $ps->{message},"\n";
551 # make it easy to backtrack and figure out which Arch revision this was:
552 print WRITER 'git-archimport-id: ',$ps->{id},"\n";
555 my $commitid = <READER>; # read
558 waitpid $pid,0; # close;
560 if (length $commitid != 40) {
561 die "Something went wrong with the commit! $! $commitid";
566 open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
567 print HEAD $commitid;
569 system('git-update-ref', 'HEAD', "$ps->{branch}");
572 ptag($ps->{id}, $commitid); # private tag
573 if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
574 tag($ps->{id}, $commitid);
576 print " * Committed $ps->{id}\n";
577 print " + tree $tree\n";
578 print " + commit $commitid\n";
579 $opt_v && print " + commit date is $ps->{date} \n";
580 $opt_v && print " + parents: ",join(' ',@par),"\n";
581 if (my $dirty = `git-diff-files`) {
582 die "22 Unclean tree when about to process $ps->{id} " .
583 " - did we fail to commit cleanly before?\n$dirty";
588 foreach (sort keys %stats) {
589 print" $_: $stats{$_}\n";
594 # used by the accurate strategy:
597 my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
599 $opt_v && print "sync_to_ps($ps->{id}) method: ";
602 if ($ps->{type} eq 't') {
603 $opt_v && print "get (tag)\n";
604 # looks like a tag-only or (worse,) a mixed tags/changeset branch,
605 # can't rely on replay to work correctly on these
607 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
610 my $tree_id = arch_tree_id($tree_dir);
611 if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
612 # the common case (hopefully)
613 $opt_v && print "replay\n";
614 safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
617 # getting one tree is usually faster than getting two trees
618 # and applying the delta ...
620 $opt_v && print "apply-delta\n";
621 safe_pipe_capture($TLA,'get','--no-pristine',
622 $ps->{id},$tree_dir);
628 $opt_v && print "get (new tree)\n";
629 safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
633 # added -I flag to rsync since we're going to fast! AIEEEEE!!!!
634 system('rsync','-aI','--delete','--exclude',$git_dir,
635 # '--exclude','.arch-inventory',
636 '--exclude','.arch-ids','--exclude','{arch}',
637 '--exclude','+*','--exclude',',*',
638 "$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
644 my $bname = git_branchname($ps->{id});
648 safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
649 die "Cannot get import: $!" if $?;
650 system('rsync','-aI','--delete', '--exclude',$git_dir,
651 '--exclude','.arch-ids','--exclude','{arch}',
652 "$tmp/import/", './');
653 die "Cannot rsync import:$!" if $?;
655 rmtree("$tmp/import");
656 die "Cannot remove tempdir: $!" if $?;
668 safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
669 die "Cannot get changeset: $!" if $?;
672 if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
673 # this can be sped up considerably by doing
674 # (find | xargs cat) | patch
675 # but that cna get mucked up by patches
676 # with missing trailing newlines or the standard
677 # 'missing newline' flag in the patch - possibly
678 # produced with an old/buggy diff.
679 # slow and safe, we invoke patch once per patchfile
680 `find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
681 die "Problem applying patches! $!" if $?;
684 # apply changed binary files
685 if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
686 foreach my $mod (@modified) {
689 $orig =~ s/\.modified$//; # lazy
690 $orig =~ s!^\Q$tmp\E/changeset/patches/!!;
691 #print "rsync -p '$mod' '$orig'";
692 system('rsync','-p',$mod,"./$orig");
693 die "Problem applying binary changes! $!" if $?;
698 system('rsync','-aI','--exclude',$git_dir,
699 '--exclude','.arch-ids',
700 '--exclude', '{arch}',
701 "$tmp/changeset/new-files-archive/",'./');
703 # deleted files are hinted from the commitlog processing
705 rmtree("$tmp/changeset");
710 # notes: *-files/-directories keys cannot have spaces, they're always
711 # pika-escaped. Everything after the first newline
712 # A log entry looks like:
713 # Revision: moodle-org--moodle--1.3.3--patch-15
714 # Archive: arch-eduforge@catalyst.net.nz--2004
715 # Creator: Penny Leach <penny@catalyst.net.nz>
716 # Date: Wed May 25 14:15:34 NZST 2005
717 # Standard-date: 2005-05-25 02:15:34 GMT
718 # New-files: lang/de/.arch-ids/block_glossary_random.php.id
719 # lang/de/.arch-ids/block_html.php.id
720 # New-directories: lang/de/help/questionnaire
721 # lang/de/help/questionnaire/.arch-ids
722 # Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
723 # db_sears.sql db/db_sears.sql
724 # Removed-files: lang/be/docs/.arch-ids/release.html.id
725 # lang/be/docs/.arch-ids/releaseold.html.id
726 # Modified-files: admin/cron.php admin/delete.php
727 # admin/editor.html backup/lib.php backup/restore.php
728 # New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
729 # Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
730 # summary can be multiline with a leading space just like the above fields
733 # Updating yadda tadda tadda madda
738 # headers we want that contain filenames:
743 renamed_directories => 1,
745 removed_directories => 1,
749 while ($_ = shift @$log) {
750 if (/^Continuation-of:\s*(.*)/) {
753 } elsif (/^Summary:\s*(.*)$/ ) {
754 # summary can be multiline as long as it has a leading space
755 $ps->{summary} = [ $1 ];
757 } elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
761 # any *-files or *-directories can be read here:
762 } elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
765 $key =~ tr/-/_/; # too lazy to quote :P
766 if ($want_headers{$key}) {
767 push @{$ps->{$key}}, split(/\s+/, $val);
772 last; # remainder of @$log that didn't get shifted off is message
775 if ($key eq 'summary') {
776 push @{$ps->{$key}}, $1;
777 } else { # files/directories:
778 push @{$ps->{$key}}, split(/\s+/, $1);
787 $ps->{summary} = join("\n",@{$ps->{summary}})."\n";
788 $ps->{message} = join("\n",@$log);
790 # skip Arch control files, unescape pika-escaped files
791 foreach my $k (keys %want_headers) {
792 next unless (defined $ps->{$k});
794 foreach my $t (@{$ps->{$k}}) {
795 next unless length ($t);
796 next if $t =~ m!\{arch\}/!;
797 next if $t =~ m!\.arch-ids/!;
798 # should we skip this?
799 next if $t =~ m!\.arch-inventory$!;
800 # tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
801 # we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
803 $t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
807 $ps->{$k} = \@tmp if scalar @tmp;
813 my ($tag, $commit) = @_;
818 # don't use subdirs for tags yet, it could screw up other porcelains
823 open(C,">","$git_dir/refs/tags/$tag")
824 or die "Cannot create tag $tag: $!\n";
826 or die "Cannot write tag $tag: $!\n";
828 or die "Cannot write tag $tag: $!\n";
829 print " * Created tag '$tag' on '$commit'\n" if $opt_v;
831 open(C,"<","$git_dir/refs/tags/$tag")
832 or die "Cannot read tag $tag: $!\n";
835 die "Error reading tag $tag: $!\n" unless length $commit == 40;
837 or die "Cannot read tag $tag: $!\n";
842 # write/read a private tag
843 # reads fail softly if the tag isn't there
845 my ($tag, $commit) = @_;
847 # don't use subdirs for tags yet, it could screw up other porcelains
850 my $tag_file = "$ptag_dir/$tag";
851 my $tag_branch_dir = dirname($tag_file);
852 mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
854 if ($commit) { # write
855 open(C,">",$tag_file)
856 or die "Cannot create tag $tag: $!\n";
858 or die "Cannot write tag $tag: $!\n";
860 or die "Cannot write tag $tag: $!\n";
861 $rptags{$commit} = $tag
862 unless $tag =~ m/--base-0$/;
864 # if the tag isn't there, return 0
865 unless ( -s $tag_file) {
868 open(C,"<",$tag_file)
869 or die "Cannot read tag $tag: $!\n";
872 die "Error reading tag $tag: $!\n" unless length $commit == 40;
874 or die "Cannot read tag $tag: $!\n";
875 unless (defined $rptags{$commit}) {
876 $rptags{$commit} = $tag;
884 # Identify what branches are merging into me
885 # and whether we are fully merged
886 # git-merge-base <headsha> <headsha> should tell
887 # me what the base of the merge should be
891 my %branches; # holds an arrayref per branch
892 # the arrayref contains a list of
893 # merged patches between the base
894 # of the merge and the current head
896 my @parents; # parents found for this commit
898 # simple loop to split the merges
900 foreach my $merge (@{$ps->{merges}}) {
901 my $branch = git_branchname($merge);
902 unless (defined $branches{$branch} ){
903 $branches{$branch} = [];
905 push @{$branches{$branch}}, $merge;
909 # foreach branch find a merge base and walk it to the
910 # head where we are, collecting the merged patchsets that
911 # Arch has recorded. Keep that in @have
912 # Compare that with the commits on the other branch
913 # between merge-base and the tip of the branch (@need)
914 # and see if we have a series of consecutive patches
915 # starting from the merge base. The tip of the series
916 # of consecutive patches merged is our new parent for
919 foreach my $branch (keys %branches) {
921 # check that we actually know about the branch
922 next unless -e "$git_dir/refs/heads/$branch";
924 my $mergebase = `git-merge-base $branch $ps->{branch}`;
926 # Don't die here, Arch supports one-way cherry-picking
927 # between branches with no common base (or any relationship
929 warn "Cannot find merge base for $branch and $ps->{branch}";
934 # now walk up to the mergepoint collecting what patches we have
935 my $branchtip = git_rev_parse($ps->{branch});
936 my @ancestors = `git-rev-list --merge-order $branchtip ^$mergebase`;
937 my %have; # collected merges this branch has
938 foreach my $merge (@{$ps->{merges}}) {
942 foreach my $par (@ancestors) {
943 $par = commitid2pset($par);
944 if (defined $par->{merges}) {
945 foreach my $merge (@{$par->{merges}}) {
946 $ancestorshave{$merge}=1;
950 # print "++++ Merges in $ps->{id} are....\n";
951 # my @have = sort keys %have; print Dumper(\@have);
953 # merge what we have with what ancestors have
954 %have = (%have, %ancestorshave);
956 # see what the remote branch has - these are the merges we
957 # will want to have in a consecutive series from the mergebase
958 my $otherbranchtip = git_rev_parse($branch);
959 my @needraw = `git-rev-list --merge-order $otherbranchtip ^$mergebase`;
961 foreach my $needps (@needraw) { # get the psets
962 $needps = commitid2pset($needps);
963 # git-rev-list will also
964 # list commits merged in via earlier
965 # merges. we are only interested in commits
966 # from the branch we're looking at
967 if ($branch eq $needps->{branch}) {
968 push @need, $needps->{id};
972 # print "++++ Merges from $branch we want are....\n";
973 # print Dumper(\@need);
976 while (my $needed_commit = pop @need) {
977 if ($have{$needed_commit}) {
978 $newparent = $needed_commit;
980 last; # break out of the while
984 push @parents, $newparent;
988 } # end foreach branch
990 # prune redundant parents
992 foreach my $p (@parents) {
995 foreach my $p (@parents) {
996 next unless exists $psets{$p}{merges};
997 next unless ref $psets{$p}{merges};
998 my @merges = @{$psets{$p}{merges}};
999 foreach my $merge (@merges) {
1000 if ($parents{$merge}) {
1001 delete $parents{$merge};
1007 foreach (keys %parents) {
1008 push @parents, '-p', ptag($_);
1015 my $val = `git-rev-parse $name`;
1016 die "Error: git-rev-parse $name" if $?;
1021 # resolve a SHA1 to a known patchset
1023 my $commitid = shift;
1025 my $name = $rptags{$commitid}
1026 || die "Cannot find reverse tag mapping for $commitid";
1028 my $ps = $psets{$name}
1029 || (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1034 # an alterative to `command` that allows input to be passed as an array
1035 # to work around shell problems with weird characters in arguments
1036 sub safe_pipe_capture {
1038 if (my $pid = open my $child, '-|') {
1039 @output = (<$child>);
1040 close $child or die join(' ',@_).": $! $?";
1042 exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1044 return wantarray ? @output : join('',@output);
1047 # `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1050 chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1054 sub archive_reachable {
1055 my $archive = shift;
1056 return 1 if $reachable{$archive};
1057 return 0 if $unreachable{$archive};
1059 if (system "$TLA whereis-archive $archive >/dev/null") {
1060 if ($opt_a && (system($TLA,'register-archive',
1061 "http://mirrors.sourcecontrol.net/$archive") == 0)) {
1062 $reachable{$archive} = 1;
1065 print STDERR "Archive is unreachable: $archive\n";
1066 $unreachable{$archive} = 1;
1069 $reachable{$archive} = 1;