Merge branch 'hn/reftable' into master
[git] / contrib / fast-import / import-tars.perl
1 #!/usr/bin/perl
2
3 ## tar archive frontend for git-fast-import
4 ##
5 ## For example:
6 ##
7 ##  mkdir project; cd project; git init
8 ##  perl import-tars.perl *.tar.bz2
9 ##  git whatchanged import-tars
10 ##
11 ## Use --metainfo to specify the extension for a meta data file, where
12 ## import-tars can read the commit message and optionally author and
13 ## committer information.
14 ##
15 ##  echo 'This is the commit message' > myfile.tar.bz2.msg
16 ##  perl import-tars.perl --metainfo=msg myfile.tar.bz2
17
18 use strict;
19 use Getopt::Long;
20
21 my $metaext = '';
22
23 die "usage: import-tars [--metainfo=extension] *.tar.{gz,bz2,lzma,xz,Z}\n"
24         unless GetOptions('metainfo=s' => \$metaext) && @ARGV;
25
26 my $branch_name = 'import-tars';
27 my $branch_ref = "refs/heads/$branch_name";
28 my $author_name = $ENV{'GIT_AUTHOR_NAME'} || 'T Ar Creator';
29 my $author_email = $ENV{'GIT_AUTHOR_EMAIL'} || 'tar@example.com';
30 my $committer_name = $ENV{'GIT_COMMITTER_NAME'} || `git config --get user.name`;
31 my $committer_email = $ENV{'GIT_COMMITTER_EMAIL'} || `git config --get user.email`;
32
33 chomp($committer_name, $committer_email);
34
35 open(FI, '|-', 'git', 'fast-import', '--quiet')
36         or die "Unable to start git fast-import: $!\n";
37 foreach my $tar_file (@ARGV)
38 {
39         my $commit_time = time;
40         $tar_file =~ m,([^/]+)$,;
41         my $tar_name = $1;
42
43         if ($tar_name =~ s/\.(tar\.gz|tgz)$//) {
44                 open(I, '-|', 'gunzip', '-c', $tar_file)
45                         or die "Unable to gunzip -c $tar_file: $!\n";
46         } elsif ($tar_name =~ s/\.(tar\.bz2|tbz2)$//) {
47                 open(I, '-|', 'bunzip2', '-c', $tar_file)
48                         or die "Unable to bunzip2 -c $tar_file: $!\n";
49         } elsif ($tar_name =~ s/\.tar\.Z$//) {
50                 open(I, '-|', 'uncompress', '-c', $tar_file)
51                         or die "Unable to uncompress -c $tar_file: $!\n";
52         } elsif ($tar_name =~ s/\.(tar\.(lzma|xz)|(tlz|txz))$//) {
53                 open(I, '-|', 'xz', '-dc', $tar_file)
54                         or die "Unable to xz -dc $tar_file: $!\n";
55         } elsif ($tar_name =~ s/\.tar$//) {
56                 open(I, $tar_file) or die "Unable to open $tar_file: $!\n";
57         } else {
58                 die "Unrecognized compression format: $tar_file\n";
59         }
60
61         my $author_time = 0;
62         my $next_mark = 1;
63         my $have_top_dir = 1;
64         my ($top_dir, %files);
65
66         my $next_path = '';
67
68         while (read(I, $_, 512) == 512) {
69                 my ($name, $mode, $uid, $gid, $size, $mtime,
70                         $chksum, $typeflag, $linkname, $magic,
71                         $version, $uname, $gname, $devmajor, $devminor,
72                         $prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
73                         Z8 Z1 Z100 Z6
74                         Z2 Z32 Z32 Z8 Z8 Z*', $_;
75
76                 unless ($next_path eq '') {
77                         # Recover name from previous extended header
78                         $name = $next_path;
79                         $next_path = '';
80                 }
81
82                 last unless length($name);
83                 if ($name eq '././@LongLink') {
84                         # GNU tar extension
85                         if (read(I, $_, 512) != 512) {
86                                 die ('Short archive');
87                         }
88                         $name = unpack 'Z257', $_;
89                         next unless $name;
90
91                         my $dummy;
92                         if (read(I, $_, 512) != 512) {
93                                 die ('Short archive');
94                         }
95                         ($dummy, $mode, $uid, $gid, $size, $mtime,
96                         $chksum, $typeflag, $linkname, $magic,
97                         $version, $uname, $gname, $devmajor, $devminor,
98                         $prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
99                         Z8 Z1 Z100 Z6
100                         Z2 Z32 Z32 Z8 Z8 Z*', $_;
101                 }
102                 $mode = oct $mode;
103                 $size = oct $size;
104                 $mtime = oct $mtime;
105                 next if $typeflag == 5; # directory
106
107                 if ($typeflag eq 'x') { # extended header
108                         # If extended header, check for path
109                         my $pax_header = '';
110                         while ($size > 0 && read(I, $_, 512) == 512) {
111                                 $pax_header = $pax_header . substr($_, 0, $size);
112                                 $size -= 512;
113                         }
114
115                         my @lines = split /\n/, $pax_header;
116                         foreach my $line (@lines) {
117                                 my ($len, $entry) = split / /, $line;
118                                 my ($key, $value) = split /=/, $entry;
119                                 if ($key eq 'path') {
120                                         $next_path = $value;
121                                 }
122                         }
123                         next;
124                 } elsif ($name =~ m{/\z}) { # directory
125                         next;
126                 } elsif ($typeflag != 1) { # handle hard links later
127                         print FI "blob\n", "mark :$next_mark\n";
128                         if ($typeflag == 2) { # symbolic link
129                                 print FI "data ", length($linkname), "\n",
130                                         $linkname;
131                                 $mode = 0120000;
132                         } else {
133                                 print FI "data $size\n";
134                                 while ($size > 0 && read(I, $_, 512) == 512) {
135                                         print FI substr($_, 0, $size);
136                                         $size -= 512;
137                                 }
138                         }
139                         print FI "\n";
140                 }
141
142                 next if ($typeflag eq 'g'); # ignore global header
143
144                 my $path;
145                 if ($prefix) {
146                         $path = "$prefix/$name";
147                 } else {
148                         $path = "$name";
149                 }
150
151                 if ($typeflag == 1) { # hard link
152                         $linkname = "$prefix/$linkname" if $prefix;
153                         $files{$path} = [ $files{$linkname}->[0], $mode ];
154                 } else {
155                         $files{$path} = [$next_mark++, $mode];
156                 }
157
158                 $author_time = $mtime if $mtime > $author_time;
159                 $path =~ m,^([^/]+)/,;
160                 $top_dir = $1 unless $top_dir;
161                 $have_top_dir = 0 if $top_dir ne $1;
162         }
163
164         my $commit_msg = "Imported from $tar_file.";
165         my $this_committer_name = $committer_name;
166         my $this_committer_email = $committer_email;
167         my $this_author_name = $author_name;
168         my $this_author_email = $author_email;
169         if ($metaext ne '') {
170                 # Optionally read a commit message from <filename.tar>.msg
171                 # Add a line on the form "Committer: name <e-mail>" to override
172                 # the committer and "Author: name <e-mail>" to override the
173                 # author for this tar ball.
174                 if (open MSG, '<', "${tar_file}.${metaext}") {
175                         my $header_done = 0;
176                         $commit_msg = '';
177                         while (<MSG>) {
178                                 if (!$header_done && /^Committer:\s+([^<>]*)\s+<(.*)>\s*$/i) {
179                                         $this_committer_name = $1;
180                                         $this_committer_email = $2;
181                                 } elsif (!$header_done && /^Author:\s+([^<>]*)\s+<(.*)>\s*$/i) {
182                                         $this_author_name = $1;
183                                         $this_author_email = $2;
184                                 } elsif (!$header_done && /^$/) { # empty line ends header.
185                                         $header_done = 1;
186                                 } else {
187                                         $commit_msg .= $_;
188                                         $header_done = 1;
189                                 }
190                         }
191                         close MSG;
192                 }
193         }
194
195         print FI <<EOF;
196 commit $branch_ref
197 author $this_author_name <$this_author_email> $author_time +0000
198 committer $this_committer_name <$this_committer_email> $commit_time +0000
199 data <<END_OF_COMMIT_MESSAGE
200 $commit_msg
201 END_OF_COMMIT_MESSAGE
202
203 deleteall
204 EOF
205
206         foreach my $path (keys %files)
207         {
208                 my ($mark, $mode) = @{$files{$path}};
209                 $path =~ s,^([^/]+)/,, if $have_top_dir;
210                 $mode = $mode & 0111 ? 0755 : 0644 unless $mode == 0120000;
211                 printf FI "M %o :%i %s\n", $mode, $mark, $path;
212         }
213         print FI "\n";
214
215         print FI <<EOF;
216 tag $tar_name
217 from $branch_ref
218 tagger $author_name <$author_email> $author_time +0000
219 data <<END_OF_TAG_MESSAGE
220 Package $tar_name
221 END_OF_TAG_MESSAGE
222
223 EOF
224
225         close I;
226 }
227 close FI;