git-svn: avoid crashing svnserve when creating new directories
[git] / git-svnimport.perl
1 #!/usr/bin/perl -w
2
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to pull and analyze SVN changes.
7 #
8 # Checking out the files is done by a single long-running SVN connection.
9 #
10 # The head revision is on branch "origin" by default.
11 # You can change that with the '-o' option.
12
13 use strict;
14 use warnings;
15 use Getopt::Std;
16 use File::Copy;
17 use File::Spec;
18 use File::Temp qw(tempfile);
19 use File::Path qw(mkpath);
20 use File::Basename qw(basename dirname);
21 use Time::Local;
22 use IO::Pipe;
23 use POSIX qw(strftime dup2);
24 use IPC::Open2;
25 use SVN::Core;
26 use SVN::Ra;
27
28 die "Need SVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
29
30 $SIG{'PIPE'}="IGNORE";
31 $ENV{'TZ'}="UTC";
32
33 our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,
34     $opt_b,$opt_r,$opt_I,$opt_A,$opt_s,$opt_l,$opt_d,$opt_D,$opt_S,$opt_F,
35     $opt_P,$opt_R);
36
37 sub usage() {
38         print STDERR <<END;
39 Usage: ${\basename $0}     # fetch/update GIT from SVN
40        [-o branch-for-HEAD] [-h] [-v] [-l max_rev] [-R repack_each_revs]
41        [-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname]
42        [-d|-D] [-i] [-u] [-r] [-I ignorefilename] [-s start_chg]
43        [-m] [-M regex] [-A author_file] [-S] [-F] [-P project_name] [SVN_URL]
44 END
45         exit(1);
46 }
47
48 getopts("A:b:C:dDFhiI:l:mM:o:rs:t:T:SP:R:uv") or usage();
49 usage if $opt_h;
50
51 my $tag_name = $opt_t || "tags";
52 my $trunk_name = $opt_T || "trunk";
53 my $branch_name = $opt_b || "branches";
54 my $project_name = $opt_P || "";
55 $project_name = "/" . $project_name if ($project_name);
56 my $repack_after = $opt_R || 1000;
57
58 @ARGV == 1 or @ARGV == 2 or usage();
59
60 $opt_o ||= "origin";
61 $opt_s ||= 1;
62 my $git_tree = $opt_C;
63 $git_tree ||= ".";
64
65 my $svn_url = $ARGV[0];
66 my $svn_dir = $ARGV[1];
67
68 our @mergerx = ();
69 if ($opt_m) {
70         my $branch_esc = quotemeta ($branch_name);
71         my $trunk_esc  = quotemeta ($trunk_name);
72         @mergerx =
73         (
74                 qr!\b(?:merg(?:ed?|ing))\b.*?\b((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i,
75                 qr!\b(?:from|of)\W+((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i,
76                 qr!\b(?:from|of)\W+(?:the )?([\w\.\-]+)[-\s]branch\b!i
77         );
78 }
79 if ($opt_M) {
80         unshift (@mergerx, qr/$opt_M/);
81 }
82
83 # Absolutize filename now, since we will have chdir'ed by the time we
84 # get around to opening it.
85 $opt_A = File::Spec->rel2abs($opt_A) if $opt_A;
86
87 our %users = ();
88 our $users_file = undef;
89 sub read_users($) {
90         $users_file = File::Spec->rel2abs(@_);
91         die "Cannot open $users_file\n" unless -f $users_file;
92         open(my $authors,$users_file);
93         while(<$authors>) {
94                 chomp;
95                 next unless /^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/;
96                 (my $user,my $name,my $email) = ($1,$2,$3);
97                 $users{$user} = [$name,$email];
98         }
99         close($authors);
100 }
101
102 select(STDERR); $|=1; select(STDOUT);
103
104
105 package SVNconn;
106 # Basic SVN connection.
107 # We're only interested in connecting and downloading, so ...
108
109 use File::Spec;
110 use File::Temp qw(tempfile);
111 use POSIX qw(strftime dup2);
112 use Fcntl qw(SEEK_SET);
113
114 sub new {
115         my($what,$repo) = @_;
116         $what=ref($what) if ref($what);
117
118         my $self = {};
119         $self->{'buffer'} = "";
120         bless($self,$what);
121
122         $repo =~ s#/+$##;
123         $self->{'fullrep'} = $repo;
124         $self->conn();
125
126         return $self;
127 }
128
129 sub conn {
130         my $self = shift;
131         my $repo = $self->{'fullrep'};
132         my $auth = SVN::Core::auth_open ([SVN::Client::get_simple_provider,
133                           SVN::Client::get_ssl_server_trust_file_provider,
134                           SVN::Client::get_username_provider]);
135         my $s = SVN::Ra->new(url => $repo, auth => $auth);
136         die "SVN connection to $repo: $!\n" unless defined $s;
137         $self->{'svn'} = $s;
138         $self->{'repo'} = $repo;
139         $self->{'maxrev'} = $s->get_latest_revnum();
140 }
141
142 sub file {
143         my($self,$path,$rev) = @_;
144
145         my ($fh, $name) = tempfile('gitsvn.XXXXXX',
146                     DIR => File::Spec->tmpdir(), UNLINK => 1);
147
148         print "... $rev $path ...\n" if $opt_v;
149         my (undef, $properties);
150         my $pool = SVN::Pool->new();
151         $path =~ s#^/*##;
152         eval { (undef, $properties)
153                    = $self->{'svn'}->get_file($path,$rev,$fh,$pool); };
154         $pool->clear;
155         if($@) {
156                 return undef if $@ =~ /Attempted to get checksum/;
157                 die $@;
158         }
159         my $mode;
160         if (exists $properties->{'svn:executable'}) {
161                 $mode = '100755';
162         } elsif (exists $properties->{'svn:special'}) {
163                 my ($special_content, $filesize);
164                 $filesize = tell $fh;
165                 seek $fh, 0, SEEK_SET;
166                 read $fh, $special_content, $filesize;
167                 if ($special_content =~ s/^link //) {
168                         $mode = '120000';
169                         seek $fh, 0, SEEK_SET;
170                         truncate $fh, 0;
171                         print $fh $special_content;
172                 } else {
173                         die "unexpected svn:special file encountered";
174                 }
175         } else {
176                 $mode = '100644';
177         }
178         close ($fh);
179
180         return ($name, $mode);
181 }
182
183 sub ignore {
184         my($self,$path,$rev) = @_;
185
186         print "... $rev $path ...\n" if $opt_v;
187         $path =~ s#^/*##;
188         my (undef,undef,$properties)
189             = $self->{'svn'}->get_dir($path,$rev,undef);
190         if (exists $properties->{'svn:ignore'}) {
191                 my ($fh, $name) = tempfile('gitsvn.XXXXXX',
192                                            DIR => File::Spec->tmpdir(),
193                                            UNLINK => 1);
194                 print $fh $properties->{'svn:ignore'};
195                 close($fh);
196                 return $name;
197         } else {
198                 return undef;
199         }
200 }
201
202 sub dir_list {
203         my($self,$path,$rev) = @_;
204         $path =~ s#^/*##;
205         my ($dirents,undef,$properties)
206             = $self->{'svn'}->get_dir($path,$rev,undef);
207         return $dirents;
208 }
209
210 package main;
211 use URI;
212
213 our $svn = $svn_url;
214 $svn .= "/$svn_dir" if defined $svn_dir;
215 my $svn2 = SVNconn->new($svn);
216 $svn = SVNconn->new($svn);
217
218 my $lwp_ua;
219 if($opt_d or $opt_D) {
220         $svn_url = URI->new($svn_url)->canonical;
221         if($opt_D) {
222                 $svn_dir =~ s#/*$#/#;
223         } else {
224                 $svn_dir = "";
225         }
226         if ($svn_url->scheme eq "http") {
227                 use LWP::UserAgent;
228                 $lwp_ua = LWP::UserAgent->new(keep_alive => 1, requests_redirectable => []);
229         } else {
230                 print STDERR "Warning: not HTTP; turning off direct file access\n";
231                 $opt_d=0;
232         }
233 }
234
235 sub pdate($) {
236         my($d) = @_;
237         $d =~ m#(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)#
238                 or die "Unparseable date: $d\n";
239         my $y=$1; $y-=1900 if $y>1900;
240         return timegm($6||0,$5,$4,$3,$2-1,$y);
241 }
242
243 sub getwd() {
244         my $pwd = `pwd`;
245         chomp $pwd;
246         return $pwd;
247 }
248
249
250 sub get_headref($$) {
251     my $name    = shift;
252     my $git_dir = shift;
253     my $sha;
254
255     if (open(C,"$git_dir/refs/heads/$name")) {
256         chomp($sha = <C>);
257         close(C);
258         length($sha) == 40
259             or die "Cannot get head id for $name ($sha): $!\n";
260     }
261     return $sha;
262 }
263
264
265 -d $git_tree
266         or mkdir($git_tree,0777)
267         or die "Could not create $git_tree: $!";
268 chdir($git_tree);
269
270 my $orig_branch = "";
271 my $forward_master = 0;
272 my %branches;
273
274 my $git_dir = $ENV{"GIT_DIR"} || ".git";
275 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
276 $ENV{"GIT_DIR"} = $git_dir;
277 my $orig_git_index;
278 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
279 my ($git_ih, $git_index) = tempfile('gitXXXXXX', SUFFIX => '.idx',
280                                     DIR => File::Spec->tmpdir());
281 close ($git_ih);
282 $ENV{GIT_INDEX_FILE} = $git_index;
283 my $maxnum = 0;
284 my $last_rev = "";
285 my $last_branch;
286 my $current_rev = $opt_s || 1;
287 unless(-d $git_dir) {
288         system("git-init");
289         die "Cannot init the GIT db at $git_tree: $?\n" if $?;
290         system("git-read-tree");
291         die "Cannot init an empty tree: $?\n" if $?;
292
293         $last_branch = $opt_o;
294         $orig_branch = "";
295 } else {
296         -f "$git_dir/refs/heads/$opt_o"
297                 or die "Branch '$opt_o' does not exist.\n".
298                        "Either use the correct '-o branch' option,\n".
299                        "or import to a new repository.\n";
300
301         -f "$git_dir/svn2git"
302                 or die "'$git_dir/svn2git' does not exist.\n".
303                        "You need that file for incremental imports.\n";
304         open(F, "git-symbolic-ref HEAD |") or
305                 die "Cannot run git-symbolic-ref: $!\n";
306         chomp ($last_branch = <F>);
307         $last_branch = basename($last_branch);
308         close(F);
309         unless($last_branch) {
310                 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
311                 $last_branch = "master";
312         }
313         $orig_branch = $last_branch;
314         $last_rev = get_headref($orig_branch, $git_dir);
315         if (-f "$git_dir/SVN2GIT_HEAD") {
316                 die <<EOM;
317 SVN2GIT_HEAD exists.
318 Make sure your working directory corresponds to HEAD and remove SVN2GIT_HEAD.
319 You may need to run
320
321     git-read-tree -m -u SVN2GIT_HEAD HEAD
322 EOM
323         }
324         system('cp', "$git_dir/HEAD", "$git_dir/SVN2GIT_HEAD");
325
326         $forward_master =
327             $opt_o ne 'master' && -f "$git_dir/refs/heads/master" &&
328             system('cmp', '-s', "$git_dir/refs/heads/master",
329                                 "$git_dir/refs/heads/$opt_o") == 0;
330
331         # populate index
332         system('git-read-tree', $last_rev);
333         die "read-tree failed: $?\n" if $?;
334
335         # Get the last import timestamps
336         open my $B,"<", "$git_dir/svn2git";
337         while(<$B>) {
338                 chomp;
339                 my($num,$branch,$ref) = split;
340                 $branches{$branch}{$num} = $ref;
341                 $branches{$branch}{"LAST"} = $ref;
342                 $current_rev = $num+1 if $current_rev <= $num;
343         }
344         close($B);
345 }
346 -d $git_dir
347         or die "Could not create git subdir ($git_dir).\n";
348
349 my $default_authors = "$git_dir/svn-authors";
350 if ($opt_A) {
351         read_users($opt_A);
352         copy($opt_A,$default_authors) or die "Copy failed: $!";
353 } else {
354         read_users($default_authors) if -f $default_authors;
355 }
356
357 open BRANCHES,">>", "$git_dir/svn2git";
358
359 sub node_kind($$) {
360         my ($svnpath, $revision) = @_;
361         my $pool=SVN::Pool->new;
362         $svnpath =~ s#^/*##;
363         my $kind = $svn->{'svn'}->check_path($svnpath,$revision,$pool);
364         $pool->clear;
365         return $kind;
366 }
367
368 sub get_file($$$) {
369         my($svnpath,$rev,$path) = @_;
370
371         # now get it
372         my ($name,$mode);
373         if($opt_d) {
374                 my($req,$res);
375
376                 # /svn/!svn/bc/2/django/trunk/django-docs/build.py
377                 my $url=$svn_url->clone();
378                 $url->path($url->path."/!svn/bc/$rev/$svn_dir$svnpath");
379                 print "... $path...\n" if $opt_v;
380                 $req = HTTP::Request->new(GET => $url);
381                 $res = $lwp_ua->request($req);
382                 if ($res->is_success) {
383                         my $fh;
384                         ($fh, $name) = tempfile('gitsvn.XXXXXX',
385                         DIR => File::Spec->tmpdir(), UNLINK => 1);
386                         print $fh $res->content;
387                         close($fh) or die "Could not write $name: $!\n";
388                 } else {
389                         return undef if $res->code == 301; # directory?
390                         die $res->status_line." at $url\n";
391                 }
392                 $mode = '0644'; # can't obtain mode via direct http request?
393         } else {
394                 ($name,$mode) = $svn->file("$svnpath",$rev);
395                 return undef unless defined $name;
396         }
397
398         my $pid = open(my $F, '-|');
399         die $! unless defined $pid;
400         if (!$pid) {
401             exec("git-hash-object", "-w", $name)
402                 or die "Cannot create object: $!\n";
403         }
404         my $sha = <$F>;
405         chomp $sha;
406         close $F;
407         unlink $name;
408         return [$mode, $sha, $path];
409 }
410
411 sub get_ignore($$$$$) {
412         my($new,$old,$rev,$path,$svnpath) = @_;
413
414         return unless $opt_I;
415         my $name = $svn->ignore("$svnpath",$rev);
416         if ($path eq '/') {
417                 $path = $opt_I;
418         } else {
419                 $path = File::Spec->catfile($path,$opt_I);
420         }
421         if (defined $name) {
422                 my $pid = open(my $F, '-|');
423                 die $! unless defined $pid;
424                 if (!$pid) {
425                         exec("git-hash-object", "-w", $name)
426                             or die "Cannot create object: $!\n";
427                 }
428                 my $sha = <$F>;
429                 chomp $sha;
430                 close $F;
431                 unlink $name;
432                 push(@$new,['0644',$sha,$path]);
433         } elsif (defined $old) {
434                 push(@$old,$path);
435         }
436 }
437
438 sub project_path($$)
439 {
440         my ($path, $project) = @_;
441
442         $path = "/".$path unless ($path =~ m#^\/#) ;
443         return $1 if ($path =~ m#^$project\/(.*)$#);
444
445         $path =~ s#\.#\\\.#g;
446         $path =~ s#\+#\\\+#g;
447         return "/" if ($project =~ m#^$path.*$#);
448
449         return undef;
450 }
451
452 sub split_path($$) {
453         my($rev,$path) = @_;
454         my $branch;
455
456         if($path =~ s#^/\Q$tag_name\E/([^/]+)/?##) {
457                 $branch = "/$1";
458         } elsif($path =~ s#^/\Q$trunk_name\E/?##) {
459                 $branch = "/";
460         } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
461                 $branch = $1;
462         } else {
463                 my %no_error = (
464                         "/" => 1,
465                         "/$tag_name" => 1,
466                         "/$branch_name" => 1
467                 );
468                 print STDERR "$rev: Unrecognized path: $path\n" unless (defined $no_error{$path});
469                 return ()
470         }
471         if ($path eq "") {
472                 $path = "/";
473         } elsif ($project_name) {
474                 $path = project_path($path, $project_name);
475         }
476         return ($branch,$path);
477 }
478
479 sub branch_rev($$) {
480
481         my ($srcbranch,$uptorev) = @_;
482
483         my $bbranches = $branches{$srcbranch};
484         my @revs = reverse sort { ($a eq 'LAST' ? 0 : $a) <=> ($b eq 'LAST' ? 0 : $b) } keys %$bbranches;
485         my $therev;
486         foreach my $arev(@revs) {
487                 next if  ($arev eq 'LAST');
488                 if ($arev <= $uptorev) {
489                         $therev = $arev;
490                         last;
491                 }
492         }
493         return $therev;
494 }
495
496 sub expand_svndir($$$);
497
498 sub expand_svndir($$$)
499 {
500         my ($svnpath, $rev, $path) = @_;
501         my @list;
502         get_ignore(\@list, undef, $rev, $path, $svnpath);
503         my $dirents = $svn->dir_list($svnpath, $rev);
504         foreach my $p(keys %$dirents) {
505                 my $kind = node_kind($svnpath.'/'.$p, $rev);
506                 if ($kind eq $SVN::Node::file) {
507                         my $f = get_file($svnpath.'/'.$p, $rev, $path.'/'.$p);
508                         push(@list, $f) if $f;
509                 } elsif ($kind eq $SVN::Node::dir) {
510                         push(@list,
511                              expand_svndir($svnpath.'/'.$p, $rev, $path.'/'.$p));
512                 }
513         }
514         return @list;
515 }
516
517 sub copy_path($$$$$$$$) {
518         # Somebody copied a whole subdirectory.
519         # We need to find the index entries from the old version which the
520         # SVN log entry points to, and add them to the new place.
521
522         my($newrev,$newbranch,$path,$oldpath,$rev,$node_kind,$new,$parents) = @_;
523
524         my($srcbranch,$srcpath) = split_path($rev,$oldpath);
525         unless(defined $srcbranch && defined $srcpath) {
526                 print "Path not found when copying from $oldpath @ $rev.\n".
527                         "Will try to copy from original SVN location...\n"
528                         if $opt_v;
529                 push (@$new, expand_svndir($oldpath, $rev, $path));
530                 return;
531         }
532         my $therev = branch_rev($srcbranch, $rev);
533         my $gitrev = $branches{$srcbranch}{$therev};
534         unless($gitrev) {
535                 print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n";
536                 return;
537         }
538         if ($srcbranch ne $newbranch) {
539                 push(@$parents, $branches{$srcbranch}{'LAST'});
540         }
541         print "$newrev:$newbranch:$path: copying from $srcbranch:$srcpath @ $rev\n" if $opt_v;
542         if ($node_kind eq $SVN::Node::dir) {
543                 $srcpath =~ s#/*$#/#;
544         }
545         
546         my $pid = open my $f,'-|';
547         die $! unless defined $pid;
548         if (!$pid) {
549                 exec("git-ls-tree","-r","-z",$gitrev,$srcpath)
550                         or die $!;
551         }
552         local $/ = "\0";
553         while(<$f>) {
554                 chomp;
555                 my($m,$p) = split(/\t/,$_,2);
556                 my($mode,$type,$sha1) = split(/ /,$m);
557                 next if $type ne "blob";
558                 if ($node_kind eq $SVN::Node::dir) {
559                         $p = $path . substr($p,length($srcpath)-1);
560                 } else {
561                         $p = $path;
562                 }
563                 push(@$new,[$mode,$sha1,$p]);   
564         }
565         close($f) or
566                 print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n";
567 }
568
569 sub commit {
570         my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
571         my($committer_name,$committer_email,$dest);
572         my($author_name,$author_email);
573         my(@old,@new,@parents);
574
575         if (not defined $author or $author eq "") {
576                 $committer_name = $committer_email = "unknown";
577         } elsif (defined $users_file) {
578                 die "User $author is not listed in $users_file\n"
579                     unless exists $users{$author};
580                 ($committer_name,$committer_email) = @{$users{$author}};
581         } elsif ($author =~ /^(.*?)\s+<(.*)>$/) {
582                 ($committer_name, $committer_email) = ($1, $2);
583         } else {
584                 $author =~ s/^<(.*)>$/$1/;
585                 $committer_name = $committer_email = $author;
586         }
587
588         if ($opt_F && $message =~ /From:\s+(.*?)\s+<(.*)>\s*\n/) {
589                 ($author_name, $author_email) = ($1, $2);
590                 print "Author from From: $1 <$2>\n" if ($opt_v);;
591         } elsif ($opt_S && $message =~ /Signed-off-by:\s+(.*?)\s+<(.*)>\s*\n/) {
592                 ($author_name, $author_email) = ($1, $2);
593                 print "Author from Signed-off-by: $1 <$2>\n" if ($opt_v);;
594         } else {
595                 $author_name = $committer_name;
596                 $author_email = $committer_email;
597         }
598
599         $date = pdate($date);
600
601         my $tag;
602         my $parent;
603         if($branch eq "/") { # trunk
604                 $parent = $opt_o;
605         } elsif($branch =~ m#^/(.+)#) { # tag
606                 $tag = 1;
607                 $parent = $1;
608         } else { # "normal" branch
609                 # nothing to do
610                 $parent = $branch;
611         }
612         $dest = $parent;
613
614         my $prev = $changed_paths->{"/"};
615         if($prev and $prev->[0] eq "A") {
616                 delete $changed_paths->{"/"};
617                 my $oldpath = $prev->[1];
618                 my $rev;
619                 if(defined $oldpath) {
620                         my $p;
621                         ($parent,$p) = split_path($revision,$oldpath);
622                         if(defined $parent) {
623                                 if($parent eq "/") {
624                                         $parent = $opt_o;
625                                 } else {
626                                         $parent =~ s#^/##; # if it's a tag
627                                 }
628                         }
629                 } else {
630                         $parent = undef;
631                 }
632         }
633
634         my $rev;
635         if($revision > $opt_s and defined $parent) {
636                 open(H,"git-rev-parse --verify $parent |");
637                 $rev = <H>;
638                 close(H) or do {
639                         print STDERR "$revision: cannot find commit '$parent'!\n";
640                         return;
641                 };
642                 chop $rev;
643                 if(length($rev) != 40) {
644                         print STDERR "$revision: cannot find commit '$parent'!\n";
645                         return;
646                 }
647                 $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"};
648                 if($revision != $opt_s and not $rev) {
649                         print STDERR "$revision: do not know ancestor for '$parent'!\n";
650                         return;
651                 }
652         } else {
653                 $rev = undef;
654         }
655
656 #       if($prev and $prev->[0] eq "A") {
657 #               if(not $tag) {
658 #                       unless(open(H,"> $git_dir/refs/heads/$branch")) {
659 #                               print STDERR "$revision: Could not create branch $branch: $!\n";
660 #                               $state=11;
661 #                               next;
662 #                       }
663 #                       print H "$rev\n"
664 #                               or die "Could not write branch $branch: $!";
665 #                       close(H)
666 #                               or die "Could not write branch $branch: $!";
667 #               }
668 #       }
669         if(not defined $rev) {
670                 unlink($git_index);
671         } elsif ($rev ne $last_rev) {
672                 print "Switching from $last_rev to $rev ($branch)\n" if $opt_v;
673                 system("git-read-tree", $rev);
674                 die "read-tree failed for $rev: $?\n" if $?;
675                 $last_rev = $rev;
676         }
677
678         push (@parents, $rev) if defined $rev;
679
680         my $cid;
681         if($tag and not %$changed_paths) {
682                 $cid = $rev;
683         } else {
684                 my @paths = sort keys %$changed_paths;
685                 foreach my $path(@paths) {
686                         my $action = $changed_paths->{$path};
687
688                         if ($action->[0] eq "R") {
689                                 # refer to a file/tree in an earlier commit
690                                 push(@old,$path); # remove any old stuff
691                         }
692                         if(($action->[0] eq "A") || ($action->[0] eq "R")) {
693                                 my $node_kind = node_kind($action->[3], $revision);
694                                 if ($node_kind eq $SVN::Node::file) {
695                                         my $f = get_file($action->[3],
696                                                          $revision, $path);
697                                         if ($f) {
698                                                 push(@new,$f) if $f;
699                                         } else {
700                                                 my $opath = $action->[3];
701                                                 print STDERR "$revision: $branch: could not fetch '$opath'\n";
702                                         }
703                                 } elsif ($node_kind eq $SVN::Node::dir) {
704                                         if($action->[1]) {
705                                                 copy_path($revision, $branch,
706                                                           $path, $action->[1],
707                                                           $action->[2], $node_kind,
708                                                           \@new, \@parents);
709                                         } else {
710                                                 get_ignore(\@new, \@old, $revision,
711                                                            $path, $action->[3]);
712                                         }
713                                 }
714                         } elsif ($action->[0] eq "D") {
715                                 push(@old,$path);
716                         } elsif ($action->[0] eq "M") {
717                                 my $node_kind = node_kind($action->[3], $revision);
718                                 if ($node_kind eq $SVN::Node::file) {
719                                         my $f = get_file($action->[3],
720                                                          $revision, $path);
721                                         push(@new,$f) if $f;
722                                 } elsif ($node_kind eq $SVN::Node::dir) {
723                                         get_ignore(\@new, \@old, $revision,
724                                                    $path, $action->[3]);
725                                 }
726                         } else {
727                                 die "$revision: unknown action '".$action->[0]."' for $path\n";
728                         }
729                 }
730
731                 while(@old) {
732                         my @o1;
733                         if(@old > 55) {
734                                 @o1 = splice(@old,0,50);
735                         } else {
736                                 @o1 = @old;
737                                 @old = ();
738                         }
739                         my $pid = open my $F, "-|";
740                         die "$!" unless defined $pid;
741                         if (!$pid) {
742                                 exec("git-ls-files", "-z", @o1) or die $!;
743                         }
744                         @o1 = ();
745                         local $/ = "\0";
746                         while(<$F>) {
747                                 chomp;
748                                 push(@o1,$_);
749                         }
750                         close($F);
751
752                         while(@o1) {
753                                 my @o2;
754                                 if(@o1 > 55) {
755                                         @o2 = splice(@o1,0,50);
756                                 } else {
757                                         @o2 = @o1;
758                                         @o1 = ();
759                                 }
760                                 system("git-update-index","--force-remove","--",@o2);
761                                 die "Cannot remove files: $?\n" if $?;
762                         }
763                 }
764                 while(@new) {
765                         my @n2;
766                         if(@new > 12) {
767                                 @n2 = splice(@new,0,10);
768                         } else {
769                                 @n2 = @new;
770                                 @new = ();
771                         }
772                         system("git-update-index","--add",
773                                 (map { ('--cacheinfo', @$_) } @n2));
774                         die "Cannot add files: $?\n" if $?;
775                 }
776
777                 my $pid = open(C,"-|");
778                 die "Cannot fork: $!" unless defined $pid;
779                 unless($pid) {
780                         exec("git-write-tree");
781                         die "Cannot exec git-write-tree: $!\n";
782                 }
783                 chomp(my $tree = <C>);
784                 length($tree) == 40
785                         or die "Cannot get tree id ($tree): $!\n";
786                 close(C)
787                         or die "Error running git-write-tree: $?\n";
788                 print "Tree ID $tree\n" if $opt_v;
789
790                 my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n";
791                 my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n";
792                 $pid = fork();
793                 die "Fork: $!\n" unless defined $pid;
794                 unless($pid) {
795                         $pr->writer();
796                         $pw->reader();
797                         open(OUT,">&STDOUT");
798                         dup2($pw->fileno(),0);
799                         dup2($pr->fileno(),1);
800                         $pr->close();
801                         $pw->close();
802
803                         my @par = ();
804
805                         # loose detection of merges
806                         # based on the commit msg
807                         foreach my $rx (@mergerx) {
808                                 if ($message =~ $rx) {
809                                         my $mparent = $1;
810                                         if ($mparent eq 'HEAD') { $mparent = $opt_o };
811                                         if ( -e "$git_dir/refs/heads/$mparent") {
812                                                 $mparent = get_headref($mparent, $git_dir);
813                                                 push (@parents, $mparent);
814                                                 print OUT "Merge parent branch: $mparent\n" if $opt_v;
815                                         }
816                                 }
817                         }
818                         my %seen_parents = ();
819                         my @unique_parents = grep { ! $seen_parents{$_} ++ } @parents;
820                         foreach my $bparent (@unique_parents) {
821                                 push @par, '-p', $bparent;
822                                 print OUT "Merge parent branch: $bparent\n" if $opt_v;
823                         }
824
825                         exec("env",
826                                 "GIT_AUTHOR_NAME=$author_name",
827                                 "GIT_AUTHOR_EMAIL=$author_email",
828                                 "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
829                                 "GIT_COMMITTER_NAME=$committer_name",
830                                 "GIT_COMMITTER_EMAIL=$committer_email",
831                                 "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
832                                 "git-commit-tree", $tree,@par);
833                         die "Cannot exec git-commit-tree: $!\n";
834                 }
835                 $pw->writer();
836                 $pr->reader();
837
838                 $message =~ s/[\s\n]+\z//;
839                 $message = "r$revision: $message" if $opt_r;
840
841                 print $pw "$message\n"
842                         or die "Error writing to git-commit-tree: $!\n";
843                 $pw->close();
844
845                 print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v;
846                 chomp($cid = <$pr>);
847                 length($cid) == 40
848                         or die "Cannot get commit id ($cid): $!\n";
849                 print "Commit ID $cid\n" if $opt_v;
850                 $pr->close();
851
852                 waitpid($pid,0);
853                 die "Error running git-commit-tree: $?\n" if $?;
854         }
855
856         if (not defined $cid) {
857                 $cid = $branches{"/"}{"LAST"};
858         }
859
860         if(not defined $dest) {
861                 print "... no known parent\n" if $opt_v;
862         } elsif(not $tag) {
863                 print "Writing to refs/heads/$dest\n" if $opt_v;
864                 open(C,">$git_dir/refs/heads/$dest") and
865                 print C ("$cid\n") and
866                 close(C)
867                         or die "Cannot write branch $dest for update: $!\n";
868         }
869
870         if($tag) {
871                 my($in, $out) = ('','');
872                 $last_rev = "-" if %$changed_paths;
873                 # the tag was 'complex', i.e. did not refer to a "real" revision
874
875                 $dest =~ tr/_/\./ if $opt_u;
876                 $branch = $dest;
877
878                 my $pid = open2($in, $out, 'git-mktag');
879                 print $out ("object $cid\n".
880                     "type commit\n".
881                     "tag $dest\n".
882                     "tagger $committer_name <$committer_email> 0 +0000\n") and
883                 close($out)
884                     or die "Cannot create tag object $dest: $!\n";
885
886                 my $tagobj = <$in>;
887                 chomp $tagobj;
888
889                 if ( !close($in) or waitpid($pid, 0) != $pid or
890                                 $? != 0 or $tagobj !~ /^[0123456789abcdef]{40}$/ ) {
891                         die "Cannot create tag object $dest: $!\n";
892                 }
893
894                 open(C,">$git_dir/refs/tags/$dest") and
895                 print C ("$tagobj\n") and
896                 close(C)
897                         or die "Cannot create tag $branch: $!\n";
898
899                 print "Created tag '$dest' on '$branch'\n" if $opt_v;
900         }
901         $branches{$branch}{"LAST"} = $cid;
902         $branches{$branch}{$revision} = $cid;
903         $last_rev = $cid;
904         print BRANCHES "$revision $branch $cid\n";
905         print "DONE: $revision $dest $cid\n" if $opt_v;
906 }
907
908 sub commit_all {
909         # Recursive use of the SVN connection does not work
910         local $svn = $svn2;
911
912         my ($changed_paths, $revision, $author, $date, $message, $pool) = @_;
913         my %p;
914         while(my($path,$action) = each %$changed_paths) {
915                 $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev, $path ];
916         }
917         $changed_paths = \%p;
918
919         my %done;
920         my @col;
921         my $pref;
922         my $branch;
923
924         while(my($path,$action) = each %$changed_paths) {
925                 ($branch,$path) = split_path($revision,$path);
926                 next if not defined $branch;
927                 next if not defined $path;
928                 $done{$branch}{$path} = $action;
929         }
930         while(($branch,$changed_paths) = each %done) {
931                 commit($branch, $changed_paths, $revision, $author, $date, $message);
932         }
933 }
934
935 $opt_l = $svn->{'maxrev'} if not defined $opt_l or $opt_l > $svn->{'maxrev'};
936
937 if ($opt_l < $current_rev) {
938     print "Up to date: no new revisions to fetch!\n" if $opt_v;
939     unlink("$git_dir/SVN2GIT_HEAD");
940     exit;
941 }
942
943 print "Processing from $current_rev to $opt_l ...\n" if $opt_v;
944
945 my $from_rev;
946 my $to_rev = $current_rev - 1;
947
948 while ($to_rev < $opt_l) {
949         $from_rev = $to_rev + 1;
950         $to_rev = $from_rev + $repack_after;
951         $to_rev = $opt_l if $opt_l < $to_rev;
952         print "Fetching from $from_rev to $to_rev ...\n" if $opt_v;
953         my $pool=SVN::Pool->new;
954         $svn->{'svn'}->get_log("/",$from_rev,$to_rev,0,1,1,\&commit_all,$pool);
955         $pool->clear;
956         my $pid = fork();
957         die "Fork: $!\n" unless defined $pid;
958         unless($pid) {
959                 exec("git-repack", "-d")
960                         or die "Cannot repack: $!\n";
961         }
962         waitpid($pid, 0);
963 }
964
965
966 unlink($git_index);
967
968 if (defined $orig_git_index) {
969         $ENV{GIT_INDEX_FILE} = $orig_git_index;
970 } else {
971         delete $ENV{GIT_INDEX_FILE};
972 }
973
974 # Now switch back to the branch we were in before all of this happened
975 if($orig_branch) {
976         print "DONE\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
977         system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
978                 if $forward_master;
979         unless ($opt_i) {
980                 system('git-read-tree', '-m', '-u', 'SVN2GIT_HEAD', 'HEAD');
981                 die "read-tree failed: $?\n" if $?;
982         }
983 } else {
984         $orig_branch = "master";
985         print "DONE; creating $orig_branch branch\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
986         system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
987                 unless -f "$git_dir/refs/heads/master";
988         system('git-update-ref', 'HEAD', "$orig_branch");
989         unless ($opt_i) {
990                 system('git checkout');
991                 die "checkout failed: $?\n" if $?;
992         }
993 }
994 unlink("$git_dir/SVN2GIT_HEAD");
995 close(BRANCHES);