gitweb: Finish restoring "blob" links in git_difftree_body
[git] / git-svnimport.perl
1 #!/usr/bin/perl -w
2
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to pull and analyze SVN changes.
7 #
8 # Checking out the files is done by a single long-running SVN connection.
9 #
10 # The head revision is on branch "origin" by default.
11 # You can change that with the '-o' option.
12
13 use strict;
14 use warnings;
15 use Getopt::Std;
16 use File::Copy;
17 use File::Spec;
18 use File::Temp qw(tempfile);
19 use File::Path qw(mkpath);
20 use File::Basename qw(basename dirname);
21 use Time::Local;
22 use IO::Pipe;
23 use POSIX qw(strftime dup2);
24 use IPC::Open2;
25 use SVN::Core;
26 use SVN::Ra;
27
28 die "Need SVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
29
30 $SIG{'PIPE'}="IGNORE";
31 $ENV{'TZ'}="UTC";
32
33 our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,
34     $opt_b,$opt_r,$opt_I,$opt_A,$opt_s,$opt_l,$opt_d,$opt_D,$opt_S,$opt_F,$opt_P);
35
36 sub usage() {
37         print STDERR <<END;
38 Usage: ${\basename $0}     # fetch/update GIT from SVN
39        [-o branch-for-HEAD] [-h] [-v] [-l max_rev]
40        [-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname]
41        [-d|-D] [-i] [-u] [-r] [-I ignorefilename] [-s start_chg]
42        [-m] [-M regex] [-A author_file] [-S] [-F] [-P project_name] [SVN_URL]
43 END
44         exit(1);
45 }
46
47 getopts("A:b:C:dDFhiI:l:mM:o:rs:t:T:SP:uv") or usage();
48 usage if $opt_h;
49
50 my $tag_name = $opt_t || "tags";
51 my $trunk_name = $opt_T || "trunk";
52 my $branch_name = $opt_b || "branches";
53 my $project_name = $opt_P || "";
54 $project_name = "/" . $project_name if ($project_name);
55
56 @ARGV == 1 or @ARGV == 2 or usage();
57
58 $opt_o ||= "origin";
59 $opt_s ||= 1;
60 my $git_tree = $opt_C;
61 $git_tree ||= ".";
62
63 my $svn_url = $ARGV[0];
64 my $svn_dir = $ARGV[1];
65
66 our @mergerx = ();
67 if ($opt_m) {
68         my $branch_esc = quotemeta ($branch_name);
69         my $trunk_esc  = quotemeta ($trunk_name);
70         @mergerx =
71         (
72                 qr!\b(?:merg(?:ed?|ing))\b.*?\b((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i,
73                 qr!\b(?:from|of)\W+((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i,
74                 qr!\b(?:from|of)\W+(?:the )?([\w\.\-]+)[-\s]branch\b!i
75         );
76 }
77 if ($opt_M) {
78         unshift (@mergerx, qr/$opt_M/);
79 }
80
81 # Absolutize filename now, since we will have chdir'ed by the time we
82 # get around to opening it.
83 $opt_A = File::Spec->rel2abs($opt_A) if $opt_A;
84
85 our %users = ();
86 our $users_file = undef;
87 sub read_users($) {
88         $users_file = File::Spec->rel2abs(@_);
89         die "Cannot open $users_file\n" unless -f $users_file;
90         open(my $authors,$users_file);
91         while(<$authors>) {
92                 chomp;
93                 next unless /^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/;
94                 (my $user,my $name,my $email) = ($1,$2,$3);
95                 $users{$user} = [$name,$email];
96         }
97         close($authors);
98 }
99
100 select(STDERR); $|=1; select(STDOUT);
101
102
103 package SVNconn;
104 # Basic SVN connection.
105 # We're only interested in connecting and downloading, so ...
106
107 use File::Spec;
108 use File::Temp qw(tempfile);
109 use POSIX qw(strftime dup2);
110 use Fcntl qw(SEEK_SET);
111
112 sub new {
113         my($what,$repo) = @_;
114         $what=ref($what) if ref($what);
115
116         my $self = {};
117         $self->{'buffer'} = "";
118         bless($self,$what);
119
120         $repo =~ s#/+$##;
121         $self->{'fullrep'} = $repo;
122         $self->conn();
123
124         return $self;
125 }
126
127 sub conn {
128         my $self = shift;
129         my $repo = $self->{'fullrep'};
130         my $auth = SVN::Core::auth_open ([SVN::Client::get_simple_provider,
131                           SVN::Client::get_ssl_server_trust_file_provider,
132                           SVN::Client::get_username_provider]);
133         my $s = SVN::Ra->new(url => $repo, auth => $auth);
134         die "SVN connection to $repo: $!\n" unless defined $s;
135         $self->{'svn'} = $s;
136         $self->{'repo'} = $repo;
137         $self->{'maxrev'} = $s->get_latest_revnum();
138 }
139
140 sub file {
141         my($self,$path,$rev) = @_;
142
143         my ($fh, $name) = tempfile('gitsvn.XXXXXX',
144                     DIR => File::Spec->tmpdir(), UNLINK => 1);
145
146         print "... $rev $path ...\n" if $opt_v;
147         my (undef, $properties);
148         my $pool = SVN::Pool->new();
149         eval { (undef, $properties)
150                    = $self->{'svn'}->get_file($path,$rev,$fh,$pool); };
151         $pool->clear;
152         if($@) {
153                 return undef if $@ =~ /Attempted to get checksum/;
154                 die $@;
155         }
156         my $mode;
157         if (exists $properties->{'svn:executable'}) {
158                 $mode = '100755';
159         } elsif (exists $properties->{'svn:special'}) {
160                 my ($special_content, $filesize);
161                 $filesize = tell $fh;
162                 seek $fh, 0, SEEK_SET;
163                 read $fh, $special_content, $filesize;
164                 if ($special_content =~ s/^link //) {
165                         $mode = '120000';
166                         seek $fh, 0, SEEK_SET;
167                         truncate $fh, 0;
168                         print $fh $special_content;
169                 } else {
170                         die "unexpected svn:special file encountered";
171                 }
172         } else {
173                 $mode = '100644';
174         }
175         close ($fh);
176
177         return ($name, $mode);
178 }
179
180 sub ignore {
181         my($self,$path,$rev) = @_;
182
183         print "... $rev $path ...\n" if $opt_v;
184         my (undef,undef,$properties)
185             = $self->{'svn'}->get_dir($path,$rev,undef);
186         if (exists $properties->{'svn:ignore'}) {
187                 my ($fh, $name) = tempfile('gitsvn.XXXXXX',
188                                            DIR => File::Spec->tmpdir(),
189                                            UNLINK => 1);
190                 print $fh $properties->{'svn:ignore'};
191                 close($fh);
192                 return $name;
193         } else {
194                 return undef;
195         }
196 }
197
198 sub dir_list {
199         my($self,$path,$rev) = @_;
200         my ($dirents,undef,$properties)
201             = $self->{'svn'}->get_dir($path,$rev,undef);
202         return $dirents;
203 }
204
205 package main;
206 use URI;
207
208 our $svn = $svn_url;
209 $svn .= "/$svn_dir" if defined $svn_dir;
210 my $svn2 = SVNconn->new($svn);
211 $svn = SVNconn->new($svn);
212
213 my $lwp_ua;
214 if($opt_d or $opt_D) {
215         $svn_url = URI->new($svn_url)->canonical;
216         if($opt_D) {
217                 $svn_dir =~ s#/*$#/#;
218         } else {
219                 $svn_dir = "";
220         }
221         if ($svn_url->scheme eq "http") {
222                 use LWP::UserAgent;
223                 $lwp_ua = LWP::UserAgent->new(keep_alive => 1, requests_redirectable => []);
224         } else {
225                 print STDERR "Warning: not HTTP; turning off direct file access\n";
226                 $opt_d=0;
227         }
228 }
229
230 sub pdate($) {
231         my($d) = @_;
232         $d =~ m#(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)#
233                 or die "Unparseable date: $d\n";
234         my $y=$1; $y-=1900 if $y>1900;
235         return timegm($6||0,$5,$4,$3,$2-1,$y);
236 }
237
238 sub getwd() {
239         my $pwd = `pwd`;
240         chomp $pwd;
241         return $pwd;
242 }
243
244
245 sub get_headref($$) {
246     my $name    = shift;
247     my $git_dir = shift;
248     my $sha;
249
250     if (open(C,"$git_dir/refs/heads/$name")) {
251         chomp($sha = <C>);
252         close(C);
253         length($sha) == 40
254             or die "Cannot get head id for $name ($sha): $!\n";
255     }
256     return $sha;
257 }
258
259
260 -d $git_tree
261         or mkdir($git_tree,0777)
262         or die "Could not create $git_tree: $!";
263 chdir($git_tree);
264
265 my $orig_branch = "";
266 my $forward_master = 0;
267 my %branches;
268
269 my $git_dir = $ENV{"GIT_DIR"} || ".git";
270 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
271 $ENV{"GIT_DIR"} = $git_dir;
272 my $orig_git_index;
273 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
274 my ($git_ih, $git_index) = tempfile('gitXXXXXX', SUFFIX => '.idx',
275                                     DIR => File::Spec->tmpdir());
276 close ($git_ih);
277 $ENV{GIT_INDEX_FILE} = $git_index;
278 my $maxnum = 0;
279 my $last_rev = "";
280 my $last_branch;
281 my $current_rev = $opt_s || 1;
282 unless(-d $git_dir) {
283         system("git-init-db");
284         die "Cannot init the GIT db at $git_tree: $?\n" if $?;
285         system("git-read-tree");
286         die "Cannot init an empty tree: $?\n" if $?;
287
288         $last_branch = $opt_o;
289         $orig_branch = "";
290 } else {
291         -f "$git_dir/refs/heads/$opt_o"
292                 or die "Branch '$opt_o' does not exist.\n".
293                        "Either use the correct '-o branch' option,\n".
294                        "or import to a new repository.\n";
295
296         -f "$git_dir/svn2git"
297                 or die "'$git_dir/svn2git' does not exist.\n".
298                        "You need that file for incremental imports.\n";
299         open(F, "git-symbolic-ref HEAD |") or
300                 die "Cannot run git-symbolic-ref: $!\n";
301         chomp ($last_branch = <F>);
302         $last_branch = basename($last_branch);
303         close(F);
304         unless($last_branch) {
305                 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
306                 $last_branch = "master";
307         }
308         $orig_branch = $last_branch;
309         $last_rev = get_headref($orig_branch, $git_dir);
310         if (-f "$git_dir/SVN2GIT_HEAD") {
311                 die <<EOM;
312 SVN2GIT_HEAD exists.
313 Make sure your working directory corresponds to HEAD and remove SVN2GIT_HEAD.
314 You may need to run
315
316     git-read-tree -m -u SVN2GIT_HEAD HEAD
317 EOM
318         }
319         system('cp', "$git_dir/HEAD", "$git_dir/SVN2GIT_HEAD");
320
321         $forward_master =
322             $opt_o ne 'master' && -f "$git_dir/refs/heads/master" &&
323             system('cmp', '-s', "$git_dir/refs/heads/master",
324                                 "$git_dir/refs/heads/$opt_o") == 0;
325
326         # populate index
327         system('git-read-tree', $last_rev);
328         die "read-tree failed: $?\n" if $?;
329
330         # Get the last import timestamps
331         open my $B,"<", "$git_dir/svn2git";
332         while(<$B>) {
333                 chomp;
334                 my($num,$branch,$ref) = split;
335                 $branches{$branch}{$num} = $ref;
336                 $branches{$branch}{"LAST"} = $ref;
337                 $current_rev = $num+1 if $current_rev <= $num;
338         }
339         close($B);
340 }
341 -d $git_dir
342         or die "Could not create git subdir ($git_dir).\n";
343
344 my $default_authors = "$git_dir/svn-authors";
345 if ($opt_A) {
346         read_users($opt_A);
347         copy($opt_A,$default_authors) or die "Copy failed: $!";
348 } else {
349         read_users($default_authors) if -f $default_authors;
350 }
351
352 open BRANCHES,">>", "$git_dir/svn2git";
353
354 sub node_kind($$) {
355         my ($svnpath, $revision) = @_;
356         my $pool=SVN::Pool->new;
357         my $kind = $svn->{'svn'}->check_path($svnpath,$revision,$pool);
358         $pool->clear;
359         return $kind;
360 }
361
362 sub get_file($$$) {
363         my($svnpath,$rev,$path) = @_;
364
365         # now get it
366         my ($name,$mode);
367         if($opt_d) {
368                 my($req,$res);
369
370                 # /svn/!svn/bc/2/django/trunk/django-docs/build.py
371                 my $url=$svn_url->clone();
372                 $url->path($url->path."/!svn/bc/$rev/$svn_dir$svnpath");
373                 print "... $path...\n" if $opt_v;
374                 $req = HTTP::Request->new(GET => $url);
375                 $res = $lwp_ua->request($req);
376                 if ($res->is_success) {
377                         my $fh;
378                         ($fh, $name) = tempfile('gitsvn.XXXXXX',
379                         DIR => File::Spec->tmpdir(), UNLINK => 1);
380                         print $fh $res->content;
381                         close($fh) or die "Could not write $name: $!\n";
382                 } else {
383                         return undef if $res->code == 301; # directory?
384                         die $res->status_line." at $url\n";
385                 }
386                 $mode = '0644'; # can't obtain mode via direct http request?
387         } else {
388                 ($name,$mode) = $svn->file("$svnpath",$rev);
389                 return undef unless defined $name;
390         }
391
392         my $pid = open(my $F, '-|');
393         die $! unless defined $pid;
394         if (!$pid) {
395             exec("git-hash-object", "-w", $name)
396                 or die "Cannot create object: $!\n";
397         }
398         my $sha = <$F>;
399         chomp $sha;
400         close $F;
401         unlink $name;
402         return [$mode, $sha, $path];
403 }
404
405 sub get_ignore($$$$$) {
406         my($new,$old,$rev,$path,$svnpath) = @_;
407
408         return unless $opt_I;
409         my $name = $svn->ignore("$svnpath",$rev);
410         if ($path eq '/') {
411                 $path = $opt_I;
412         } else {
413                 $path = File::Spec->catfile($path,$opt_I);
414         }
415         if (defined $name) {
416                 my $pid = open(my $F, '-|');
417                 die $! unless defined $pid;
418                 if (!$pid) {
419                         exec("git-hash-object", "-w", $name)
420                             or die "Cannot create object: $!\n";
421                 }
422                 my $sha = <$F>;
423                 chomp $sha;
424                 close $F;
425                 unlink $name;
426                 push(@$new,['0644',$sha,$path]);
427         } elsif (defined $old) {
428                 push(@$old,$path);
429         }
430 }
431
432 sub project_path($$)
433 {
434         my ($path, $project) = @_;
435
436         $path = "/".$path unless ($path =~ m#^\/#) ;
437         return $1 if ($path =~ m#^$project\/(.*)$#);
438
439         $path =~ s#\.#\\\.#g;
440         $path =~ s#\+#\\\+#g;
441         return "/" if ($project =~ m#^$path.*$#);
442
443         return undef;
444 }
445
446 sub split_path($$) {
447         my($rev,$path) = @_;
448         my $branch;
449
450         if($path =~ s#^/\Q$tag_name\E/([^/]+)/?##) {
451                 $branch = "/$1";
452         } elsif($path =~ s#^/\Q$trunk_name\E/?##) {
453                 $branch = "/";
454         } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
455                 $branch = $1;
456         } else {
457                 my %no_error = (
458                         "/" => 1,
459                         "/$tag_name" => 1,
460                         "/$branch_name" => 1
461                 );
462                 print STDERR "$rev: Unrecognized path: $path\n" unless (defined $no_error{$path});
463                 return ()
464         }
465         if ($path eq "") {
466                 $path = "/";
467         } elsif ($project_name) {
468                 $path = project_path($path, $project_name);
469         }
470         return ($branch,$path);
471 }
472
473 sub branch_rev($$) {
474
475         my ($srcbranch,$uptorev) = @_;
476
477         my $bbranches = $branches{$srcbranch};
478         my @revs = reverse sort { ($a eq 'LAST' ? 0 : $a) <=> ($b eq 'LAST' ? 0 : $b) } keys %$bbranches;
479         my $therev;
480         foreach my $arev(@revs) {
481                 next if  ($arev eq 'LAST');
482                 if ($arev <= $uptorev) {
483                         $therev = $arev;
484                         last;
485                 }
486         }
487         return $therev;
488 }
489
490 sub expand_svndir($$$);
491
492 sub expand_svndir($$$)
493 {
494         my ($svnpath, $rev, $path) = @_;
495         my @list;
496         get_ignore(\@list, undef, $rev, $path, $svnpath);
497         my $dirents = $svn->dir_list($svnpath, $rev);
498         foreach my $p(keys %$dirents) {
499                 my $kind = node_kind($svnpath.'/'.$p, $rev);
500                 if ($kind eq $SVN::Node::file) {
501                         my $f = get_file($svnpath.'/'.$p, $rev, $path.'/'.$p);
502                         push(@list, $f) if $f;
503                 } elsif ($kind eq $SVN::Node::dir) {
504                         push(@list,
505                              expand_svndir($svnpath.'/'.$p, $rev, $path.'/'.$p));
506                 }
507         }
508         return @list;
509 }
510
511 sub copy_path($$$$$$$$) {
512         # Somebody copied a whole subdirectory.
513         # We need to find the index entries from the old version which the
514         # SVN log entry points to, and add them to the new place.
515
516         my($newrev,$newbranch,$path,$oldpath,$rev,$node_kind,$new,$parents) = @_;
517
518         my($srcbranch,$srcpath) = split_path($rev,$oldpath);
519         unless(defined $srcbranch && defined $srcpath) {
520                 print "Path not found when copying from $oldpath @ $rev.\n".
521                         "Will try to copy from original SVN location...\n"
522                         if $opt_v;
523                 push (@$new, expand_svndir($oldpath, $rev, $path));
524                 return;
525         }
526         my $therev = branch_rev($srcbranch, $rev);
527         my $gitrev = $branches{$srcbranch}{$therev};
528         unless($gitrev) {
529                 print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n";
530                 return;
531         }
532         if ($srcbranch ne $newbranch) {
533                 push(@$parents, $branches{$srcbranch}{'LAST'});
534         }
535         print "$newrev:$newbranch:$path: copying from $srcbranch:$srcpath @ $rev\n" if $opt_v;
536         if ($node_kind eq $SVN::Node::dir) {
537                 $srcpath =~ s#/*$#/#;
538         }
539         
540         my $pid = open my $f,'-|';
541         die $! unless defined $pid;
542         if (!$pid) {
543                 exec("git-ls-tree","-r","-z",$gitrev,$srcpath)
544                         or die $!;
545         }
546         local $/ = "\0";
547         while(<$f>) {
548                 chomp;
549                 my($m,$p) = split(/\t/,$_,2);
550                 my($mode,$type,$sha1) = split(/ /,$m);
551                 next if $type ne "blob";
552                 if ($node_kind eq $SVN::Node::dir) {
553                         $p = $path . substr($p,length($srcpath)-1);
554                 } else {
555                         $p = $path;
556                 }
557                 push(@$new,[$mode,$sha1,$p]);   
558         }
559         close($f) or
560                 print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n";
561 }
562
563 sub commit {
564         my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
565         my($committer_name,$committer_email,$dest);
566         my($author_name,$author_email);
567         my(@old,@new,@parents);
568
569         if (not defined $author or $author eq "") {
570                 $committer_name = $committer_email = "unknown";
571         } elsif (defined $users_file) {
572                 die "User $author is not listed in $users_file\n"
573                     unless exists $users{$author};
574                 ($committer_name,$committer_email) = @{$users{$author}};
575         } elsif ($author =~ /^(.*?)\s+<(.*)>$/) {
576                 ($committer_name, $committer_email) = ($1, $2);
577         } else {
578                 $author =~ s/^<(.*)>$/$1/;
579                 $committer_name = $committer_email = $author;
580         }
581
582         if ($opt_F && $message =~ /From:\s+(.*?)\s+<(.*)>\s*\n/) {
583                 ($author_name, $author_email) = ($1, $2);
584                 print "Author from From: $1 <$2>\n" if ($opt_v);;
585         } elsif ($opt_S && $message =~ /Signed-off-by:\s+(.*?)\s+<(.*)>\s*\n/) {
586                 ($author_name, $author_email) = ($1, $2);
587                 print "Author from Signed-off-by: $1 <$2>\n" if ($opt_v);;
588         } else {
589                 $author_name = $committer_name;
590                 $author_email = $committer_email;
591         }
592
593         $date = pdate($date);
594
595         my $tag;
596         my $parent;
597         if($branch eq "/") { # trunk
598                 $parent = $opt_o;
599         } elsif($branch =~ m#^/(.+)#) { # tag
600                 $tag = 1;
601                 $parent = $1;
602         } else { # "normal" branch
603                 # nothing to do
604                 $parent = $branch;
605         }
606         $dest = $parent;
607
608         my $prev = $changed_paths->{"/"};
609         if($prev and $prev->[0] eq "A") {
610                 delete $changed_paths->{"/"};
611                 my $oldpath = $prev->[1];
612                 my $rev;
613                 if(defined $oldpath) {
614                         my $p;
615                         ($parent,$p) = split_path($revision,$oldpath);
616                         if(defined $parent) {
617                                 if($parent eq "/") {
618                                         $parent = $opt_o;
619                                 } else {
620                                         $parent =~ s#^/##; # if it's a tag
621                                 }
622                         }
623                 } else {
624                         $parent = undef;
625                 }
626         }
627
628         my $rev;
629         if($revision > $opt_s and defined $parent) {
630                 open(H,"git-rev-parse --verify $parent |");
631                 $rev = <H>;
632                 close(H) or do {
633                         print STDERR "$revision: cannot find commit '$parent'!\n";
634                         return;
635                 };
636                 chop $rev;
637                 if(length($rev) != 40) {
638                         print STDERR "$revision: cannot find commit '$parent'!\n";
639                         return;
640                 }
641                 $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"};
642                 if($revision != $opt_s and not $rev) {
643                         print STDERR "$revision: do not know ancestor for '$parent'!\n";
644                         return;
645                 }
646         } else {
647                 $rev = undef;
648         }
649
650 #       if($prev and $prev->[0] eq "A") {
651 #               if(not $tag) {
652 #                       unless(open(H,"> $git_dir/refs/heads/$branch")) {
653 #                               print STDERR "$revision: Could not create branch $branch: $!\n";
654 #                               $state=11;
655 #                               next;
656 #                       }
657 #                       print H "$rev\n"
658 #                               or die "Could not write branch $branch: $!";
659 #                       close(H)
660 #                               or die "Could not write branch $branch: $!";
661 #               }
662 #       }
663         if(not defined $rev) {
664                 unlink($git_index);
665         } elsif ($rev ne $last_rev) {
666                 print "Switching from $last_rev to $rev ($branch)\n" if $opt_v;
667                 system("git-read-tree", $rev);
668                 die "read-tree failed for $rev: $?\n" if $?;
669                 $last_rev = $rev;
670         }
671
672         push (@parents, $rev) if defined $rev;
673
674         my $cid;
675         if($tag and not %$changed_paths) {
676                 $cid = $rev;
677         } else {
678                 my @paths = sort keys %$changed_paths;
679                 foreach my $path(@paths) {
680                         my $action = $changed_paths->{$path};
681
682                         if ($action->[0] eq "R") {
683                                 # refer to a file/tree in an earlier commit
684                                 push(@old,$path); # remove any old stuff
685                         }
686                         if(($action->[0] eq "A") || ($action->[0] eq "R")) {
687                                 my $node_kind = node_kind($action->[3], $revision);
688                                 if ($node_kind eq $SVN::Node::file) {
689                                         my $f = get_file($action->[3],
690                                                          $revision, $path);
691                                         if ($f) {
692                                                 push(@new,$f) if $f;
693                                         } else {
694                                                 my $opath = $action->[3];
695                                                 print STDERR "$revision: $branch: could not fetch '$opath'\n";
696                                         }
697                                 } elsif ($node_kind eq $SVN::Node::dir) {
698                                         if($action->[1]) {
699                                                 copy_path($revision, $branch,
700                                                           $path, $action->[1],
701                                                           $action->[2], $node_kind,
702                                                           \@new, \@parents);
703                                         } else {
704                                                 get_ignore(\@new, \@old, $revision,
705                                                            $path, $action->[3]);
706                                         }
707                                 }
708                         } elsif ($action->[0] eq "D") {
709                                 push(@old,$path);
710                         } elsif ($action->[0] eq "M") {
711                                 my $node_kind = node_kind($action->[3], $revision);
712                                 if ($node_kind eq $SVN::Node::file) {
713                                         my $f = get_file($action->[3],
714                                                          $revision, $path);
715                                         push(@new,$f) if $f;
716                                 } elsif ($node_kind eq $SVN::Node::dir) {
717                                         get_ignore(\@new, \@old, $revision,
718                                                    $path, $action->[3]);
719                                 }
720                         } else {
721                                 die "$revision: unknown action '".$action->[0]."' for $path\n";
722                         }
723                 }
724
725                 while(@old) {
726                         my @o1;
727                         if(@old > 55) {
728                                 @o1 = splice(@old,0,50);
729                         } else {
730                                 @o1 = @old;
731                                 @old = ();
732                         }
733                         my $pid = open my $F, "-|";
734                         die "$!" unless defined $pid;
735                         if (!$pid) {
736                                 exec("git-ls-files", "-z", @o1) or die $!;
737                         }
738                         @o1 = ();
739                         local $/ = "\0";
740                         while(<$F>) {
741                                 chomp;
742                                 push(@o1,$_);
743                         }
744                         close($F);
745
746                         while(@o1) {
747                                 my @o2;
748                                 if(@o1 > 55) {
749                                         @o2 = splice(@o1,0,50);
750                                 } else {
751                                         @o2 = @o1;
752                                         @o1 = ();
753                                 }
754                                 system("git-update-index","--force-remove","--",@o2);
755                                 die "Cannot remove files: $?\n" if $?;
756                         }
757                 }
758                 while(@new) {
759                         my @n2;
760                         if(@new > 12) {
761                                 @n2 = splice(@new,0,10);
762                         } else {
763                                 @n2 = @new;
764                                 @new = ();
765                         }
766                         system("git-update-index","--add",
767                                 (map { ('--cacheinfo', @$_) } @n2));
768                         die "Cannot add files: $?\n" if $?;
769                 }
770
771                 my $pid = open(C,"-|");
772                 die "Cannot fork: $!" unless defined $pid;
773                 unless($pid) {
774                         exec("git-write-tree");
775                         die "Cannot exec git-write-tree: $!\n";
776                 }
777                 chomp(my $tree = <C>);
778                 length($tree) == 40
779                         or die "Cannot get tree id ($tree): $!\n";
780                 close(C)
781                         or die "Error running git-write-tree: $?\n";
782                 print "Tree ID $tree\n" if $opt_v;
783
784                 my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n";
785                 my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n";
786                 $pid = fork();
787                 die "Fork: $!\n" unless defined $pid;
788                 unless($pid) {
789                         $pr->writer();
790                         $pw->reader();
791                         open(OUT,">&STDOUT");
792                         dup2($pw->fileno(),0);
793                         dup2($pr->fileno(),1);
794                         $pr->close();
795                         $pw->close();
796
797                         my @par = ();
798
799                         # loose detection of merges
800                         # based on the commit msg
801                         foreach my $rx (@mergerx) {
802                                 if ($message =~ $rx) {
803                                         my $mparent = $1;
804                                         if ($mparent eq 'HEAD') { $mparent = $opt_o };
805                                         if ( -e "$git_dir/refs/heads/$mparent") {
806                                                 $mparent = get_headref($mparent, $git_dir);
807                                                 push (@parents, $mparent);
808                                                 print OUT "Merge parent branch: $mparent\n" if $opt_v;
809                                         }
810                                 }
811                         }
812                         my %seen_parents = ();
813                         my @unique_parents = grep { ! $seen_parents{$_} ++ } @parents;
814                         foreach my $bparent (@unique_parents) {
815                                 push @par, '-p', $bparent;
816                                 print OUT "Merge parent branch: $bparent\n" if $opt_v;
817                         }
818
819                         exec("env",
820                                 "GIT_AUTHOR_NAME=$author_name",
821                                 "GIT_AUTHOR_EMAIL=$author_email",
822                                 "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
823                                 "GIT_COMMITTER_NAME=$committer_name",
824                                 "GIT_COMMITTER_EMAIL=$committer_email",
825                                 "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
826                                 "git-commit-tree", $tree,@par);
827                         die "Cannot exec git-commit-tree: $!\n";
828                 }
829                 $pw->writer();
830                 $pr->reader();
831
832                 $message =~ s/[\s\n]+\z//;
833                 $message = "r$revision: $message" if $opt_r;
834
835                 print $pw "$message\n"
836                         or die "Error writing to git-commit-tree: $!\n";
837                 $pw->close();
838
839                 print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v;
840                 chomp($cid = <$pr>);
841                 length($cid) == 40
842                         or die "Cannot get commit id ($cid): $!\n";
843                 print "Commit ID $cid\n" if $opt_v;
844                 $pr->close();
845
846                 waitpid($pid,0);
847                 die "Error running git-commit-tree: $?\n" if $?;
848         }
849
850         if (not defined $cid) {
851                 $cid = $branches{"/"}{"LAST"};
852         }
853
854         if(not defined $dest) {
855                 print "... no known parent\n" if $opt_v;
856         } elsif(not $tag) {
857                 print "Writing to refs/heads/$dest\n" if $opt_v;
858                 open(C,">$git_dir/refs/heads/$dest") and
859                 print C ("$cid\n") and
860                 close(C)
861                         or die "Cannot write branch $dest for update: $!\n";
862         }
863
864         if($tag) {
865                 my($in, $out) = ('','');
866                 $last_rev = "-" if %$changed_paths;
867                 # the tag was 'complex', i.e. did not refer to a "real" revision
868
869                 $dest =~ tr/_/\./ if $opt_u;
870                 $branch = $dest;
871
872                 my $pid = open2($in, $out, 'git-mktag');
873                 print $out ("object $cid\n".
874                     "type commit\n".
875                     "tag $dest\n".
876                     "tagger $committer_name <$committer_email> 0 +0000\n") and
877                 close($out)
878                     or die "Cannot create tag object $dest: $!\n";
879
880                 my $tagobj = <$in>;
881                 chomp $tagobj;
882
883                 if ( !close($in) or waitpid($pid, 0) != $pid or
884                                 $? != 0 or $tagobj !~ /^[0123456789abcdef]{40}$/ ) {
885                         die "Cannot create tag object $dest: $!\n";
886                 }
887
888                 open(C,">$git_dir/refs/tags/$dest") and
889                 print C ("$tagobj\n") and
890                 close(C)
891                         or die "Cannot create tag $branch: $!\n";
892
893                 print "Created tag '$dest' on '$branch'\n" if $opt_v;
894         }
895         $branches{$branch}{"LAST"} = $cid;
896         $branches{$branch}{$revision} = $cid;
897         $last_rev = $cid;
898         print BRANCHES "$revision $branch $cid\n";
899         print "DONE: $revision $dest $cid\n" if $opt_v;
900 }
901
902 sub commit_all {
903         # Recursive use of the SVN connection does not work
904         local $svn = $svn2;
905
906         my ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
907         my %p;
908         while(my($path,$action) = each %$changed_paths) {
909                 $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev, $path ];
910         }
911         $changed_paths = \%p;
912
913         my %done;
914         my @col;
915         my $pref;
916         my $branch;
917
918         while(my($path,$action) = each %$changed_paths) {
919                 ($branch,$path) = split_path($revision,$path);
920                 next if not defined $branch;
921                 next if not defined $path;
922                 $done{$branch}{$path} = $action;
923         }
924         while(($branch,$changed_paths) = each %done) {
925                 commit($branch, $changed_paths, $revision, $author, $date, $message);
926         }
927 }
928
929 $opt_l = $svn->{'maxrev'} if not defined $opt_l or $opt_l > $svn->{'maxrev'};
930
931 if ($opt_l < $current_rev) {
932     print "Up to date: no new revisions to fetch!\n" if $opt_v;
933     unlink("$git_dir/SVN2GIT_HEAD");
934     exit;
935 }
936
937 print "Fetching from $current_rev to $opt_l ...\n" if $opt_v;
938
939 my $pool=SVN::Pool->new;
940 $svn->{'svn'}->get_log("/",$current_rev,$opt_l,0,1,1,\&commit_all,$pool);
941 $pool->clear;
942
943
944 unlink($git_index);
945
946 if (defined $orig_git_index) {
947         $ENV{GIT_INDEX_FILE} = $orig_git_index;
948 } else {
949         delete $ENV{GIT_INDEX_FILE};
950 }
951
952 # Now switch back to the branch we were in before all of this happened
953 if($orig_branch) {
954         print "DONE\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
955         system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
956                 if $forward_master;
957         unless ($opt_i) {
958                 system('git-read-tree', '-m', '-u', 'SVN2GIT_HEAD', 'HEAD');
959                 die "read-tree failed: $?\n" if $?;
960         }
961 } else {
962         $orig_branch = "master";
963         print "DONE; creating $orig_branch branch\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
964         system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
965                 unless -f "$git_dir/refs/heads/master";
966         system('git-update-ref', 'HEAD', "$orig_branch");
967         unless ($opt_i) {
968                 system('git checkout');
969                 die "checkout failed: $?\n" if $?;
970         }
971 }
972 unlink("$git_dir/SVN2GIT_HEAD");
973 close(BRANCHES);