Merge branch 'nd/find-pack-entry-recent-cache-invalidation' into maint
[git] / contrib / examples / git-svnimport.perl
1 #!/usr/bin/perl
2
3 # This tool is copyright (c) 2005, Matthias Urlichs.
4 # It is released under the Gnu Public License, version 2.
5 #
6 # The basic idea is to pull and analyze SVN changes.
7 #
8 # Checking out the files is done by a single long-running SVN connection.
9 #
10 # The head revision is on branch "origin" by default.
11 # You can change that with the '-o' option.
12
13 use strict;
14 use warnings;
15 use Getopt::Std;
16 use File::Copy;
17 use File::Spec;
18 use File::Temp qw(tempfile);
19 use File::Path qw(mkpath);
20 use File::Basename qw(basename dirname);
21 use Time::Local;
22 use IO::Pipe;
23 use POSIX qw(strftime dup2);
24 use IPC::Open2;
25 use SVN::Core;
26 use SVN::Ra;
27
28 die "Need SVN:Core 1.2.1 or better" if $SVN::Core::VERSION lt "1.2.1";
29
30 $SIG{'PIPE'}="IGNORE";
31 $ENV{'TZ'}="UTC";
32
33 our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,
34     $opt_b,$opt_r,$opt_I,$opt_A,$opt_s,$opt_l,$opt_d,$opt_D,$opt_S,$opt_F,
35     $opt_P,$opt_R);
36
37 sub usage() {
38         print STDERR <<END;
39 Usage: ${\basename $0}     # fetch/update GIT from SVN
40        [-o branch-for-HEAD] [-h] [-v] [-l max_rev] [-R repack_each_revs]
41        [-C GIT_repository] [-t tagname] [-T trunkname] [-b branchname]
42        [-d|-D] [-i] [-u] [-r] [-I ignorefilename] [-s start_chg]
43        [-m] [-M regex] [-A author_file] [-S] [-F] [-P project_name] [SVN_URL]
44 END
45         exit(1);
46 }
47
48 getopts("A:b:C:dDFhiI:l:mM:o:rs:t:T:SP:R:uv") or usage();
49 usage if $opt_h;
50
51 my $tag_name = $opt_t || "tags";
52 my $trunk_name = defined $opt_T ? $opt_T : "trunk";
53 my $branch_name = $opt_b || "branches";
54 my $project_name = $opt_P || "";
55 $project_name = "/" . $project_name if ($project_name);
56 my $repack_after = $opt_R || 1000;
57 my $root_pool = SVN::Pool->new_default;
58
59 @ARGV == 1 or @ARGV == 2 or usage();
60
61 $opt_o ||= "origin";
62 $opt_s ||= 1;
63 my $git_tree = $opt_C;
64 $git_tree ||= ".";
65
66 my $svn_url = $ARGV[0];
67 my $svn_dir = $ARGV[1];
68
69 our @mergerx = ();
70 if ($opt_m) {
71         my $branch_esc = quotemeta ($branch_name);
72         my $trunk_esc  = quotemeta ($trunk_name);
73         @mergerx =
74         (
75                 qr!\b(?:merg(?:ed?|ing))\b.*?\b((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i,
76                 qr!\b(?:from|of)\W+((?:(?<=$branch_esc/)[\w\.\-]+)|(?:$trunk_esc))\b!i,
77                 qr!\b(?:from|of)\W+(?:the )?([\w\.\-]+)[-\s]branch\b!i
78         );
79 }
80 if ($opt_M) {
81         unshift (@mergerx, qr/$opt_M/);
82 }
83
84 # Absolutize filename now, since we will have chdir'ed by the time we
85 # get around to opening it.
86 $opt_A = File::Spec->rel2abs($opt_A) if $opt_A;
87
88 our %users = ();
89 our $users_file = undef;
90 sub read_users($) {
91         $users_file = File::Spec->rel2abs(@_);
92         die "Cannot open $users_file\n" unless -f $users_file;
93         open(my $authors,$users_file);
94         while(<$authors>) {
95                 chomp;
96                 next unless /^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/;
97                 (my $user,my $name,my $email) = ($1,$2,$3);
98                 $users{$user} = [$name,$email];
99         }
100         close($authors);
101 }
102
103 select(STDERR); $|=1; select(STDOUT);
104
105
106 package SVNconn;
107 # Basic SVN connection.
108 # We're only interested in connecting and downloading, so ...
109
110 use File::Spec;
111 use File::Temp qw(tempfile);
112 use POSIX qw(strftime dup2);
113 use Fcntl qw(SEEK_SET);
114
115 sub new {
116         my($what,$repo) = @_;
117         $what=ref($what) if ref($what);
118
119         my $self = {};
120         $self->{'buffer'} = "";
121         bless($self,$what);
122
123         $repo =~ s#/+$##;
124         $self->{'fullrep'} = $repo;
125         $self->conn();
126
127         return $self;
128 }
129
130 sub conn {
131         my $self = shift;
132         my $repo = $self->{'fullrep'};
133         my $auth = SVN::Core::auth_open ([SVN::Client::get_simple_provider,
134                           SVN::Client::get_ssl_server_trust_file_provider,
135                           SVN::Client::get_username_provider]);
136         my $s = SVN::Ra->new(url => $repo, auth => $auth, pool => $root_pool);
137         die "SVN connection to $repo: $!\n" unless defined $s;
138         $self->{'svn'} = $s;
139         $self->{'repo'} = $repo;
140         $self->{'maxrev'} = $s->get_latest_revnum();
141 }
142
143 sub file {
144         my($self,$path,$rev) = @_;
145
146         my ($fh, $name) = tempfile('gitsvn.XXXXXX',
147                     DIR => File::Spec->tmpdir(), UNLINK => 1);
148
149         print "... $rev $path ...\n" if $opt_v;
150         my (undef, $properties);
151         $path =~ s#^/*##;
152         my $subpool = SVN::Pool::new_default_sub;
153         eval { (undef, $properties)
154                    = $self->{'svn'}->get_file($path,$rev,$fh); };
155         if($@) {
156                 return undef if $@ =~ /Attempted to get checksum/;
157                 die $@;
158         }
159         my $mode;
160         if (exists $properties->{'svn:executable'}) {
161                 $mode = '100755';
162         } elsif (exists $properties->{'svn:special'}) {
163                 my ($special_content, $filesize);
164                 $filesize = tell $fh;
165                 seek $fh, 0, SEEK_SET;
166                 read $fh, $special_content, $filesize;
167                 if ($special_content =~ s/^link //) {
168                         $mode = '120000';
169                         seek $fh, 0, SEEK_SET;
170                         truncate $fh, 0;
171                         print $fh $special_content;
172                 } else {
173                         die "unexpected svn:special file encountered";
174                 }
175         } else {
176                 $mode = '100644';
177         }
178         close ($fh);
179
180         return ($name, $mode);
181 }
182
183 sub ignore {
184         my($self,$path,$rev) = @_;
185
186         print "... $rev $path ...\n" if $opt_v;
187         $path =~ s#^/*##;
188         my $subpool = SVN::Pool::new_default_sub;
189         my (undef,undef,$properties)
190             = $self->{'svn'}->get_dir($path,$rev,undef);
191         if (exists $properties->{'svn:ignore'}) {
192                 my ($fh, $name) = tempfile('gitsvn.XXXXXX',
193                                            DIR => File::Spec->tmpdir(),
194                                            UNLINK => 1);
195                 print $fh $properties->{'svn:ignore'};
196                 close($fh);
197                 return $name;
198         } else {
199                 return undef;
200         }
201 }
202
203 sub dir_list {
204         my($self,$path,$rev) = @_;
205         $path =~ s#^/*##;
206         my $subpool = SVN::Pool::new_default_sub;
207         my ($dirents,undef,$properties)
208             = $self->{'svn'}->get_dir($path,$rev,undef);
209         return $dirents;
210 }
211
212 package main;
213 use URI;
214
215 our $svn = $svn_url;
216 $svn .= "/$svn_dir" if defined $svn_dir;
217 my $svn2 = SVNconn->new($svn);
218 $svn = SVNconn->new($svn);
219
220 my $lwp_ua;
221 if($opt_d or $opt_D) {
222         $svn_url = URI->new($svn_url)->canonical;
223         if($opt_D) {
224                 $svn_dir =~ s#/*$#/#;
225         } else {
226                 $svn_dir = "";
227         }
228         if ($svn_url->scheme eq "http") {
229                 use LWP::UserAgent;
230                 $lwp_ua = LWP::UserAgent->new(keep_alive => 1, requests_redirectable => []);
231         } else {
232                 print STDERR "Warning: not HTTP; turning off direct file access\n";
233                 $opt_d=0;
234         }
235 }
236
237 sub pdate($) {
238         my($d) = @_;
239         $d =~ m#(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)#
240                 or die "Unparseable date: $d\n";
241         my $y=$1; $y-=1900 if $y>1900;
242         return timegm($6||0,$5,$4,$3,$2-1,$y);
243 }
244
245 sub getwd() {
246         my $pwd = `pwd`;
247         chomp $pwd;
248         return $pwd;
249 }
250
251
252 sub get_headref($$) {
253     my $name    = shift;
254     my $git_dir = shift;
255     my $sha;
256
257     if (open(C,"$git_dir/refs/heads/$name")) {
258         chomp($sha = <C>);
259         close(C);
260         length($sha) == 40
261             or die "Cannot get head id for $name ($sha): $!\n";
262     }
263     return $sha;
264 }
265
266
267 -d $git_tree
268         or mkdir($git_tree,0777)
269         or die "Could not create $git_tree: $!";
270 chdir($git_tree);
271
272 my $orig_branch = "";
273 my $forward_master = 0;
274 my %branches;
275
276 my $git_dir = $ENV{"GIT_DIR"} || ".git";
277 $git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
278 $ENV{"GIT_DIR"} = $git_dir;
279 my $orig_git_index;
280 $orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
281 my ($git_ih, $git_index) = tempfile('gitXXXXXX', SUFFIX => '.idx',
282                                     DIR => File::Spec->tmpdir());
283 close ($git_ih);
284 $ENV{GIT_INDEX_FILE} = $git_index;
285 my $maxnum = 0;
286 my $last_rev = "";
287 my $last_branch;
288 my $current_rev = $opt_s || 1;
289 unless(-d $git_dir) {
290         system("git init");
291         die "Cannot init the GIT db at $git_tree: $?\n" if $?;
292         system("git read-tree --empty");
293         die "Cannot init an empty tree: $?\n" if $?;
294
295         $last_branch = $opt_o;
296         $orig_branch = "";
297 } else {
298         -f "$git_dir/refs/heads/$opt_o"
299                 or die "Branch '$opt_o' does not exist.\n".
300                        "Either use the correct '-o branch' option,\n".
301                        "or import to a new repository.\n";
302
303         -f "$git_dir/svn2git"
304                 or die "'$git_dir/svn2git' does not exist.\n".
305                        "You need that file for incremental imports.\n";
306         open(F, "git symbolic-ref HEAD |") or
307                 die "Cannot run git-symbolic-ref: $!\n";
308         chomp ($last_branch = <F>);
309         $last_branch = basename($last_branch);
310         close(F);
311         unless($last_branch) {
312                 warn "Cannot read the last branch name: $! -- assuming 'master'\n";
313                 $last_branch = "master";
314         }
315         $orig_branch = $last_branch;
316         $last_rev = get_headref($orig_branch, $git_dir);
317         if (-f "$git_dir/SVN2GIT_HEAD") {
318                 die <<EOM;
319 SVN2GIT_HEAD exists.
320 Make sure your working directory corresponds to HEAD and remove SVN2GIT_HEAD.
321 You may need to run
322
323     git-read-tree -m -u SVN2GIT_HEAD HEAD
324 EOM
325         }
326         system('cp', "$git_dir/HEAD", "$git_dir/SVN2GIT_HEAD");
327
328         $forward_master =
329             $opt_o ne 'master' && -f "$git_dir/refs/heads/master" &&
330             system('cmp', '-s', "$git_dir/refs/heads/master",
331                                 "$git_dir/refs/heads/$opt_o") == 0;
332
333         # populate index
334         system('git', 'read-tree', $last_rev);
335         die "read-tree failed: $?\n" if $?;
336
337         # Get the last import timestamps
338         open my $B,"<", "$git_dir/svn2git";
339         while(<$B>) {
340                 chomp;
341                 my($num,$branch,$ref) = split;
342                 $branches{$branch}{$num} = $ref;
343                 $branches{$branch}{"LAST"} = $ref;
344                 $current_rev = $num+1 if $current_rev <= $num;
345         }
346         close($B);
347 }
348 -d $git_dir
349         or die "Could not create git subdir ($git_dir).\n";
350
351 my $default_authors = "$git_dir/svn-authors";
352 if ($opt_A) {
353         read_users($opt_A);
354         copy($opt_A,$default_authors) or die "Copy failed: $!";
355 } else {
356         read_users($default_authors) if -f $default_authors;
357 }
358
359 open BRANCHES,">>", "$git_dir/svn2git";
360
361 sub node_kind($$) {
362         my ($svnpath, $revision) = @_;
363         $svnpath =~ s#^/*##;
364         my $subpool = SVN::Pool::new_default_sub;
365         my $kind = $svn->{'svn'}->check_path($svnpath,$revision);
366         return $kind;
367 }
368
369 sub get_file($$$) {
370         my($svnpath,$rev,$path) = @_;
371
372         # now get it
373         my ($name,$mode);
374         if($opt_d) {
375                 my($req,$res);
376
377                 # /svn/!svn/bc/2/django/trunk/django-docs/build.py
378                 my $url=$svn_url->clone();
379                 $url->path($url->path."/!svn/bc/$rev/$svn_dir$svnpath");
380                 print "... $path...\n" if $opt_v;
381                 $req = HTTP::Request->new(GET => $url);
382                 $res = $lwp_ua->request($req);
383                 if ($res->is_success) {
384                         my $fh;
385                         ($fh, $name) = tempfile('gitsvn.XXXXXX',
386                         DIR => File::Spec->tmpdir(), UNLINK => 1);
387                         print $fh $res->content;
388                         close($fh) or die "Could not write $name: $!\n";
389                 } else {
390                         return undef if $res->code == 301; # directory?
391                         die $res->status_line." at $url\n";
392                 }
393                 $mode = '0644'; # can't obtain mode via direct http request?
394         } else {
395                 ($name,$mode) = $svn->file("$svnpath",$rev);
396                 return undef unless defined $name;
397         }
398
399         my $pid = open(my $F, '-|');
400         die $! unless defined $pid;
401         if (!$pid) {
402             exec("git", "hash-object", "-w", $name)
403                 or die "Cannot create object: $!\n";
404         }
405         my $sha = <$F>;
406         chomp $sha;
407         close $F;
408         unlink $name;
409         return [$mode, $sha, $path];
410 }
411
412 sub get_ignore($$$$$) {
413         my($new,$old,$rev,$path,$svnpath) = @_;
414
415         return unless $opt_I;
416         my $name = $svn->ignore("$svnpath",$rev);
417         if ($path eq '/') {
418                 $path = $opt_I;
419         } else {
420                 $path = File::Spec->catfile($path,$opt_I);
421         }
422         if (defined $name) {
423                 my $pid = open(my $F, '-|');
424                 die $! unless defined $pid;
425                 if (!$pid) {
426                         exec("git", "hash-object", "-w", $name)
427                             or die "Cannot create object: $!\n";
428                 }
429                 my $sha = <$F>;
430                 chomp $sha;
431                 close $F;
432                 unlink $name;
433                 push(@$new,['0644',$sha,$path]);
434         } elsif (defined $old) {
435                 push(@$old,$path);
436         }
437 }
438
439 sub project_path($$)
440 {
441         my ($path, $project) = @_;
442
443         $path = "/".$path unless ($path =~ m#^\/#) ;
444         return $1 if ($path =~ m#^$project\/(.*)$#);
445
446         $path =~ s#\.#\\\.#g;
447         $path =~ s#\+#\\\+#g;
448         return "/" if ($project =~ m#^$path.*$#);
449
450         return undef;
451 }
452
453 sub split_path($$) {
454         my($rev,$path) = @_;
455         my $branch;
456
457         if($path =~ s#^/\Q$tag_name\E/([^/]+)/?##) {
458                 $branch = "/$1";
459         } elsif($path =~ s#^/\Q$trunk_name\E/?##) {
460                 $branch = "/";
461         } elsif($path =~ s#^/\Q$branch_name\E/([^/]+)/?##) {
462                 $branch = $1;
463         } else {
464                 my %no_error = (
465                         "/" => 1,
466                         "/$tag_name" => 1,
467                         "/$branch_name" => 1
468                 );
469                 print STDERR "$rev: Unrecognized path: $path\n" unless (defined $no_error{$path});
470                 return ()
471         }
472         if ($path eq "") {
473                 $path = "/";
474         } elsif ($project_name) {
475                 $path = project_path($path, $project_name);
476         }
477         return ($branch,$path);
478 }
479
480 sub branch_rev($$) {
481
482         my ($srcbranch,$uptorev) = @_;
483
484         my $bbranches = $branches{$srcbranch};
485         my @revs = reverse sort { ($a eq 'LAST' ? 0 : $a) <=> ($b eq 'LAST' ? 0 : $b) } keys %$bbranches;
486         my $therev;
487         foreach my $arev(@revs) {
488                 next if  ($arev eq 'LAST');
489                 if ($arev <= $uptorev) {
490                         $therev = $arev;
491                         last;
492                 }
493         }
494         return $therev;
495 }
496
497 sub expand_svndir($$$);
498
499 sub expand_svndir($$$)
500 {
501         my ($svnpath, $rev, $path) = @_;
502         my @list;
503         get_ignore(\@list, undef, $rev, $path, $svnpath);
504         my $dirents = $svn->dir_list($svnpath, $rev);
505         foreach my $p(keys %$dirents) {
506                 my $kind = node_kind($svnpath.'/'.$p, $rev);
507                 if ($kind eq $SVN::Node::file) {
508                         my $f = get_file($svnpath.'/'.$p, $rev, $path.'/'.$p);
509                         push(@list, $f) if $f;
510                 } elsif ($kind eq $SVN::Node::dir) {
511                         push(@list,
512                              expand_svndir($svnpath.'/'.$p, $rev, $path.'/'.$p));
513                 }
514         }
515         return @list;
516 }
517
518 sub copy_path($$$$$$$$) {
519         # Somebody copied a whole subdirectory.
520         # We need to find the index entries from the old version which the
521         # SVN log entry points to, and add them to the new place.
522
523         my($newrev,$newbranch,$path,$oldpath,$rev,$node_kind,$new,$parents) = @_;
524
525         my($srcbranch,$srcpath) = split_path($rev,$oldpath);
526         unless(defined $srcbranch && defined $srcpath) {
527                 print "Path not found when copying from $oldpath @ $rev.\n".
528                         "Will try to copy from original SVN location...\n"
529                         if $opt_v;
530                 push (@$new, expand_svndir($oldpath, $rev, $path));
531                 return;
532         }
533         my $therev = branch_rev($srcbranch, $rev);
534         my $gitrev = $branches{$srcbranch}{$therev};
535         unless($gitrev) {
536                 print STDERR "$newrev:$newbranch: could not find $oldpath \@ $rev\n";
537                 return;
538         }
539         if ($srcbranch ne $newbranch) {
540                 push(@$parents, $branches{$srcbranch}{'LAST'});
541         }
542         print "$newrev:$newbranch:$path: copying from $srcbranch:$srcpath @ $rev\n" if $opt_v;
543         if ($node_kind eq $SVN::Node::dir) {
544                 $srcpath =~ s#/*$#/#;
545         }
546
547         my $pid = open my $f,'-|';
548         die $! unless defined $pid;
549         if (!$pid) {
550                 exec("git","ls-tree","-r","-z",$gitrev,$srcpath)
551                         or die $!;
552         }
553         local $/ = "\0";
554         while(<$f>) {
555                 chomp;
556                 my($m,$p) = split(/\t/,$_,2);
557                 my($mode,$type,$sha1) = split(/ /,$m);
558                 next if $type ne "blob";
559                 if ($node_kind eq $SVN::Node::dir) {
560                         $p = $path . substr($p,length($srcpath)-1);
561                 } else {
562                         $p = $path;
563                 }
564                 push(@$new,[$mode,$sha1,$p]);
565         }
566         close($f) or
567                 print STDERR "$newrev:$newbranch: could not list files in $oldpath \@ $rev\n";
568 }
569
570 sub commit {
571         my($branch, $changed_paths, $revision, $author, $date, $message) = @_;
572         my($committer_name,$committer_email,$dest);
573         my($author_name,$author_email);
574         my(@old,@new,@parents);
575
576         if (not defined $author or $author eq "") {
577                 $committer_name = $committer_email = "unknown";
578         } elsif (defined $users_file) {
579                 die "User $author is not listed in $users_file\n"
580                     unless exists $users{$author};
581                 ($committer_name,$committer_email) = @{$users{$author}};
582         } elsif ($author =~ /^(.*?)\s+<(.*)>$/) {
583                 ($committer_name, $committer_email) = ($1, $2);
584         } else {
585                 $author =~ s/^<(.*)>$/$1/;
586                 $committer_name = $committer_email = $author;
587         }
588
589         if ($opt_F && $message =~ /From:\s+(.*?)\s+<(.*)>\s*\n/) {
590                 ($author_name, $author_email) = ($1, $2);
591                 print "Author from From: $1 <$2>\n" if ($opt_v);;
592         } elsif ($opt_S && $message =~ /Signed-off-by:\s+(.*?)\s+<(.*)>\s*\n/) {
593                 ($author_name, $author_email) = ($1, $2);
594                 print "Author from Signed-off-by: $1 <$2>\n" if ($opt_v);;
595         } else {
596                 $author_name = $committer_name;
597                 $author_email = $committer_email;
598         }
599
600         $date = pdate($date);
601
602         my $tag;
603         my $parent;
604         if($branch eq "/") { # trunk
605                 $parent = $opt_o;
606         } elsif($branch =~ m#^/(.+)#) { # tag
607                 $tag = 1;
608                 $parent = $1;
609         } else { # "normal" branch
610                 # nothing to do
611                 $parent = $branch;
612         }
613         $dest = $parent;
614
615         my $prev = $changed_paths->{"/"};
616         if($prev and $prev->[0] eq "A") {
617                 delete $changed_paths->{"/"};
618                 my $oldpath = $prev->[1];
619                 my $rev;
620                 if(defined $oldpath) {
621                         my $p;
622                         ($parent,$p) = split_path($revision,$oldpath);
623                         if(defined $parent) {
624                                 if($parent eq "/") {
625                                         $parent = $opt_o;
626                                 } else {
627                                         $parent =~ s#^/##; # if it's a tag
628                                 }
629                         }
630                 } else {
631                         $parent = undef;
632                 }
633         }
634
635         my $rev;
636         if($revision > $opt_s and defined $parent) {
637                 open(H,'-|',"git","rev-parse","--verify",$parent);
638                 $rev = <H>;
639                 close(H) or do {
640                         print STDERR "$revision: cannot find commit '$parent'!\n";
641                         return;
642                 };
643                 chop $rev;
644                 if(length($rev) != 40) {
645                         print STDERR "$revision: cannot find commit '$parent'!\n";
646                         return;
647                 }
648                 $rev = $branches{($parent eq $opt_o) ? "/" : $parent}{"LAST"};
649                 if($revision != $opt_s and not $rev) {
650                         print STDERR "$revision: do not know ancestor for '$parent'!\n";
651                         return;
652                 }
653         } else {
654                 $rev = undef;
655         }
656
657 #       if($prev and $prev->[0] eq "A") {
658 #               if(not $tag) {
659 #                       unless(open(H,"> $git_dir/refs/heads/$branch")) {
660 #                               print STDERR "$revision: Could not create branch $branch: $!\n";
661 #                               $state=11;
662 #                               next;
663 #                       }
664 #                       print H "$rev\n"
665 #                               or die "Could not write branch $branch: $!";
666 #                       close(H)
667 #                               or die "Could not write branch $branch: $!";
668 #               }
669 #       }
670         if(not defined $rev) {
671                 unlink($git_index);
672         } elsif ($rev ne $last_rev) {
673                 print "Switching from $last_rev to $rev ($branch)\n" if $opt_v;
674                 system("git", "read-tree", $rev);
675                 die "read-tree failed for $rev: $?\n" if $?;
676                 $last_rev = $rev;
677         }
678
679         push (@parents, $rev) if defined $rev;
680
681         my $cid;
682         if($tag and not %$changed_paths) {
683                 $cid = $rev;
684         } else {
685                 my @paths = sort keys %$changed_paths;
686                 foreach my $path(@paths) {
687                         my $action = $changed_paths->{$path};
688
689                         if ($action->[0] eq "R") {
690                                 # refer to a file/tree in an earlier commit
691                                 push(@old,$path); # remove any old stuff
692                         }
693                         if(($action->[0] eq "A") || ($action->[0] eq "R")) {
694                                 my $node_kind = node_kind($action->[3], $revision);
695                                 if ($node_kind eq $SVN::Node::file) {
696                                         my $f = get_file($action->[3],
697                                                          $revision, $path);
698                                         if ($f) {
699                                                 push(@new,$f) if $f;
700                                         } else {
701                                                 my $opath = $action->[3];
702                                                 print STDERR "$revision: $branch: could not fetch '$opath'\n";
703                                         }
704                                 } elsif ($node_kind eq $SVN::Node::dir) {
705                                         if($action->[1]) {
706                                                 copy_path($revision, $branch,
707                                                           $path, $action->[1],
708                                                           $action->[2], $node_kind,
709                                                           \@new, \@parents);
710                                         } else {
711                                                 get_ignore(\@new, \@old, $revision,
712                                                            $path, $action->[3]);
713                                         }
714                                 }
715                         } elsif ($action->[0] eq "D") {
716                                 push(@old,$path);
717                         } elsif ($action->[0] eq "M") {
718                                 my $node_kind = node_kind($action->[3], $revision);
719                                 if ($node_kind eq $SVN::Node::file) {
720                                         my $f = get_file($action->[3],
721                                                          $revision, $path);
722                                         push(@new,$f) if $f;
723                                 } elsif ($node_kind eq $SVN::Node::dir) {
724                                         get_ignore(\@new, \@old, $revision,
725                                                    $path, $action->[3]);
726                                 }
727                         } else {
728                                 die "$revision: unknown action '".$action->[0]."' for $path\n";
729                         }
730                 }
731
732                 while(@old) {
733                         my @o1;
734                         if(@old > 55) {
735                                 @o1 = splice(@old,0,50);
736                         } else {
737                                 @o1 = @old;
738                                 @old = ();
739                         }
740                         my $pid = open my $F, "-|";
741                         die "$!" unless defined $pid;
742                         if (!$pid) {
743                                 exec("git", "ls-files", "-z", @o1) or die $!;
744                         }
745                         @o1 = ();
746                         local $/ = "\0";
747                         while(<$F>) {
748                                 chomp;
749                                 push(@o1,$_);
750                         }
751                         close($F);
752
753                         while(@o1) {
754                                 my @o2;
755                                 if(@o1 > 55) {
756                                         @o2 = splice(@o1,0,50);
757                                 } else {
758                                         @o2 = @o1;
759                                         @o1 = ();
760                                 }
761                                 system("git","update-index","--force-remove","--",@o2);
762                                 die "Cannot remove files: $?\n" if $?;
763                         }
764                 }
765                 while(@new) {
766                         my @n2;
767                         if(@new > 12) {
768                                 @n2 = splice(@new,0,10);
769                         } else {
770                                 @n2 = @new;
771                                 @new = ();
772                         }
773                         system("git","update-index","--add",
774                                 (map { ('--cacheinfo', @$_) } @n2));
775                         die "Cannot add files: $?\n" if $?;
776                 }
777
778                 my $pid = open(C,"-|");
779                 die "Cannot fork: $!" unless defined $pid;
780                 unless($pid) {
781                         exec("git","write-tree");
782                         die "Cannot exec git-write-tree: $!\n";
783                 }
784                 chomp(my $tree = <C>);
785                 length($tree) == 40
786                         or die "Cannot get tree id ($tree): $!\n";
787                 close(C)
788                         or die "Error running git-write-tree: $?\n";
789                 print "Tree ID $tree\n" if $opt_v;
790
791                 my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n";
792                 my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n";
793                 $pid = fork();
794                 die "Fork: $!\n" unless defined $pid;
795                 unless($pid) {
796                         $pr->writer();
797                         $pw->reader();
798                         open(OUT,">&STDOUT");
799                         dup2($pw->fileno(),0);
800                         dup2($pr->fileno(),1);
801                         $pr->close();
802                         $pw->close();
803
804                         my @par = ();
805
806                         # loose detection of merges
807                         # based on the commit msg
808                         foreach my $rx (@mergerx) {
809                                 if ($message =~ $rx) {
810                                         my $mparent = $1;
811                                         if ($mparent eq 'HEAD') { $mparent = $opt_o };
812                                         if ( -e "$git_dir/refs/heads/$mparent") {
813                                                 $mparent = get_headref($mparent, $git_dir);
814                                                 push (@parents, $mparent);
815                                                 print OUT "Merge parent branch: $mparent\n" if $opt_v;
816                                         }
817                                 }
818                         }
819                         my %seen_parents = ();
820                         my @unique_parents = grep { ! $seen_parents{$_} ++ } @parents;
821                         foreach my $bparent (@unique_parents) {
822                                 push @par, '-p', $bparent;
823                                 print OUT "Merge parent branch: $bparent\n" if $opt_v;
824                         }
825
826                         exec("env",
827                                 "GIT_AUTHOR_NAME=$author_name",
828                                 "GIT_AUTHOR_EMAIL=$author_email",
829                                 "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
830                                 "GIT_COMMITTER_NAME=$committer_name",
831                                 "GIT_COMMITTER_EMAIL=$committer_email",
832                                 "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)),
833                                 "git", "commit-tree", $tree,@par);
834                         die "Cannot exec git-commit-tree: $!\n";
835                 }
836                 $pw->writer();
837                 $pr->reader();
838
839                 $message =~ s/[\s\n]+\z//;
840                 $message = "r$revision: $message" if $opt_r;
841
842                 print $pw "$message\n"
843                         or die "Error writing to git-commit-tree: $!\n";
844                 $pw->close();
845
846                 print "Committed change $revision:$branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v;
847                 chomp($cid = <$pr>);
848                 length($cid) == 40
849                         or die "Cannot get commit id ($cid): $!\n";
850                 print "Commit ID $cid\n" if $opt_v;
851                 $pr->close();
852
853                 waitpid($pid,0);
854                 die "Error running git-commit-tree: $?\n" if $?;
855         }
856
857         if (not defined $cid) {
858                 $cid = $branches{"/"}{"LAST"};
859         }
860
861         if(not defined $dest) {
862                 print "... no known parent\n" if $opt_v;
863         } elsif(not $tag) {
864                 print "Writing to refs/heads/$dest\n" if $opt_v;
865                 open(C,">$git_dir/refs/heads/$dest") and
866                 print C ("$cid\n") and
867                 close(C)
868                         or die "Cannot write branch $dest for update: $!\n";
869         }
870
871         if ($tag) {
872                 $last_rev = "-" if %$changed_paths;
873                 # the tag was 'complex', i.e. did not refer to a "real" revision
874
875                 $dest =~ tr/_/\./ if $opt_u;
876
877                 system('git', 'tag', '-f', $dest, $cid) == 0
878                         or die "Cannot create tag $dest: $!\n";
879
880                 print "Created tag '$dest' on '$branch'\n" if $opt_v;
881         }
882         $branches{$branch}{"LAST"} = $cid;
883         $branches{$branch}{$revision} = $cid;
884         $last_rev = $cid;
885         print BRANCHES "$revision $branch $cid\n";
886         print "DONE: $revision $dest $cid\n" if $opt_v;
887 }
888
889 sub commit_all {
890         # Recursive use of the SVN connection does not work
891         local $svn = $svn2;
892
893         my ($changed_paths, $revision, $author, $date, $message) = @_;
894         my %p;
895         while(my($path,$action) = each %$changed_paths) {
896                 $p{$path} = [ $action->action,$action->copyfrom_path, $action->copyfrom_rev, $path ];
897         }
898         $changed_paths = \%p;
899
900         my %done;
901         my @col;
902         my $pref;
903         my $branch;
904
905         while(my($path,$action) = each %$changed_paths) {
906                 ($branch,$path) = split_path($revision,$path);
907                 next if not defined $branch;
908                 next if not defined $path;
909                 $done{$branch}{$path} = $action;
910         }
911         while(($branch,$changed_paths) = each %done) {
912                 commit($branch, $changed_paths, $revision, $author, $date, $message);
913         }
914 }
915
916 $opt_l = $svn->{'maxrev'} if not defined $opt_l or $opt_l > $svn->{'maxrev'};
917
918 if ($opt_l < $current_rev) {
919     print "Up to date: no new revisions to fetch!\n" if $opt_v;
920     unlink("$git_dir/SVN2GIT_HEAD");
921     exit;
922 }
923
924 print "Processing from $current_rev to $opt_l ...\n" if $opt_v;
925
926 my $from_rev;
927 my $to_rev = $current_rev - 1;
928
929 my $subpool = SVN::Pool::new_default_sub;
930 while ($to_rev < $opt_l) {
931         $subpool->clear;
932         $from_rev = $to_rev + 1;
933         $to_rev = $from_rev + $repack_after;
934         $to_rev = $opt_l if $opt_l < $to_rev;
935         print "Fetching from $from_rev to $to_rev ...\n" if $opt_v;
936         $svn->{'svn'}->get_log("",$from_rev,$to_rev,0,1,1,\&commit_all);
937         my $pid = fork();
938         die "Fork: $!\n" unless defined $pid;
939         unless($pid) {
940                 exec("git", "repack", "-d")
941                         or die "Cannot repack: $!\n";
942         }
943         waitpid($pid, 0);
944 }
945
946
947 unlink($git_index);
948
949 if (defined $orig_git_index) {
950         $ENV{GIT_INDEX_FILE} = $orig_git_index;
951 } else {
952         delete $ENV{GIT_INDEX_FILE};
953 }
954
955 # Now switch back to the branch we were in before all of this happened
956 if($orig_branch) {
957         print "DONE\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
958         system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
959                 if $forward_master;
960         unless ($opt_i) {
961                 system('git', 'read-tree', '-m', '-u', 'SVN2GIT_HEAD', 'HEAD');
962                 die "read-tree failed: $?\n" if $?;
963         }
964 } else {
965         $orig_branch = "master";
966         print "DONE; creating $orig_branch branch\n" if $opt_v and (not defined $opt_l or $opt_l > 0);
967         system("cp","$git_dir/refs/heads/$opt_o","$git_dir/refs/heads/master")
968                 unless -f "$git_dir/refs/heads/master";
969         system('git', 'update-ref', 'HEAD', "$orig_branch");
970         unless ($opt_i) {
971                 system('git checkout');
972                 die "checkout failed: $?\n" if $?;
973         }
974 }
975 unlink("$git_dir/SVN2GIT_HEAD");
976 close(BRANCHES);