git-svnimport: support for partial imports
[git] / builtin-unpack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "delta.h"
5 #include "pack.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "tag.h"
9 #include "tree.h"
10
11 #include <sys/time.h>
12
13 static int dry_run, quiet, recover, has_errors;
14 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
15
16 /* We always read in 4kB chunks. */
17 static unsigned char buffer[4096];
18 static unsigned long offset, len, consumed_bytes;
19 static SHA_CTX ctx;
20
21 /*
22  * Make sure at least "min" bytes are available in the buffer, and
23  * return the pointer to the buffer.
24  */
25 static void *fill(int min)
26 {
27         if (min <= len)
28                 return buffer + offset;
29         if (min > sizeof(buffer))
30                 die("cannot fill %d bytes", min);
31         if (offset) {
32                 SHA1_Update(&ctx, buffer, offset);
33                 memmove(buffer, buffer + offset, len);
34                 offset = 0;
35         }
36         do {
37                 int ret = xread(0, buffer + len, sizeof(buffer) - len);
38                 if (ret <= 0) {
39                         if (!ret)
40                                 die("early EOF");
41                         die("read error on input: %s", strerror(errno));
42                 }
43                 len += ret;
44         } while (len < min);
45         return buffer;
46 }
47
48 static void use(int bytes)
49 {
50         if (bytes > len)
51                 die("used more bytes than were available");
52         len -= bytes;
53         offset += bytes;
54         consumed_bytes += bytes;
55 }
56
57 static void *get_data(unsigned long size)
58 {
59         z_stream stream;
60         void *buf = xmalloc(size);
61
62         memset(&stream, 0, sizeof(stream));
63
64         stream.next_out = buf;
65         stream.avail_out = size;
66         stream.next_in = fill(1);
67         stream.avail_in = len;
68         inflateInit(&stream);
69
70         for (;;) {
71                 int ret = inflate(&stream, 0);
72                 use(len - stream.avail_in);
73                 if (stream.total_out == size && ret == Z_STREAM_END)
74                         break;
75                 if (ret != Z_OK) {
76                         error("inflate returned %d\n", ret);
77                         free(buf);
78                         buf = NULL;
79                         if (!recover)
80                                 exit(1);
81                         has_errors = 1;
82                         break;
83                 }
84                 stream.next_in = fill(1);
85                 stream.avail_in = len;
86         }
87         inflateEnd(&stream);
88         return buf;
89 }
90
91 struct delta_info {
92         unsigned char base_sha1[20];
93         unsigned long base_offset;
94         unsigned long size;
95         void *delta;
96         unsigned nr;
97         struct delta_info *next;
98 };
99
100 static struct delta_info *delta_list;
101
102 static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
103                               unsigned long base_offset,
104                               void *delta, unsigned long size)
105 {
106         struct delta_info *info = xmalloc(sizeof(*info));
107
108         hashcpy(info->base_sha1, base_sha1);
109         info->base_offset = base_offset;
110         info->size = size;
111         info->delta = delta;
112         info->nr = nr;
113         info->next = delta_list;
114         delta_list = info;
115 }
116
117 struct obj_info {
118         unsigned long offset;
119         unsigned char sha1[20];
120 };
121
122 static struct obj_info *obj_list;
123
124 static void added_object(unsigned nr, const char *type, void *data,
125                          unsigned long size);
126
127 static void write_object(unsigned nr, void *buf, unsigned long size,
128                          const char *type)
129 {
130         if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0)
131                 die("failed to write object");
132         added_object(nr, type, buf, size);
133 }
134
135 static void resolve_delta(unsigned nr, const char *type,
136                           void *base, unsigned long base_size,
137                           void *delta, unsigned long delta_size)
138 {
139         void *result;
140         unsigned long result_size;
141
142         result = patch_delta(base, base_size,
143                              delta, delta_size,
144                              &result_size);
145         if (!result)
146                 die("failed to apply delta");
147         free(delta);
148         write_object(nr, result, result_size, type);
149         free(result);
150 }
151
152 static void added_object(unsigned nr, const char *type, void *data,
153                          unsigned long size)
154 {
155         struct delta_info **p = &delta_list;
156         struct delta_info *info;
157
158         while ((info = *p) != NULL) {
159                 if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
160                     info->base_offset == obj_list[nr].offset) {
161                         *p = info->next;
162                         p = &delta_list;
163                         resolve_delta(info->nr, type, data, size,
164                                       info->delta, info->size);
165                         free(info);
166                         continue;
167                 }
168                 p = &info->next;
169         }
170 }
171
172 static void unpack_non_delta_entry(enum object_type kind, unsigned long size,
173                                    unsigned nr)
174 {
175         void *buf = get_data(size);
176         const char *type;
177
178         switch (kind) {
179         case OBJ_COMMIT: type = commit_type; break;
180         case OBJ_TREE:   type = tree_type; break;
181         case OBJ_BLOB:   type = blob_type; break;
182         case OBJ_TAG:    type = tag_type; break;
183         default: die("bad type %d", kind);
184         }
185         if (!dry_run && buf)
186                 write_object(nr, buf, size, type);
187         free(buf);
188 }
189
190 static void unpack_delta_entry(enum object_type kind, unsigned long delta_size,
191                                unsigned nr)
192 {
193         void *delta_data, *base;
194         unsigned long base_size;
195         char type[20];
196         unsigned char base_sha1[20];
197
198         if (kind == OBJ_REF_DELTA) {
199                 hashcpy(base_sha1, fill(20));
200                 use(20);
201                 delta_data = get_data(delta_size);
202                 if (dry_run || !delta_data) {
203                         free(delta_data);
204                         return;
205                 }
206                 if (!has_sha1_file(base_sha1)) {
207                         hashcpy(obj_list[nr].sha1, null_sha1);
208                         add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
209                         return;
210                 }
211         } else {
212                 unsigned base_found = 0;
213                 unsigned char *pack, c;
214                 unsigned long base_offset;
215                 unsigned lo, mid, hi;
216
217                 pack = fill(1);
218                 c = *pack;
219                 use(1);
220                 base_offset = c & 127;
221                 while (c & 128) {
222                         base_offset += 1;
223                         if (!base_offset || base_offset & ~(~0UL >> 7))
224                                 die("offset value overflow for delta base object");
225                         pack = fill(1);
226                         c = *pack;
227                         use(1);
228                         base_offset = (base_offset << 7) + (c & 127);
229                 }
230                 base_offset = obj_list[nr].offset - base_offset;
231
232                 delta_data = get_data(delta_size);
233                 if (dry_run || !delta_data) {
234                         free(delta_data);
235                         return;
236                 }
237                 lo = 0;
238                 hi = nr;
239                 while (lo < hi) {
240                         mid = (lo + hi)/2;
241                         if (base_offset < obj_list[mid].offset) {
242                                 hi = mid;
243                         } else if (base_offset > obj_list[mid].offset) {
244                                 lo = mid + 1;
245                         } else {
246                                 hashcpy(base_sha1, obj_list[mid].sha1);
247                                 base_found = !is_null_sha1(base_sha1);
248                                 break;
249                         }
250                 }
251                 if (!base_found) {
252                         /* The delta base object is itself a delta that
253                            has not been resolved yet. */
254                         hashcpy(obj_list[nr].sha1, null_sha1);
255                         add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
256                         return;
257                 }
258         }
259
260         base = read_sha1_file(base_sha1, type, &base_size);
261         if (!base) {
262                 error("failed to read delta-pack base object %s",
263                       sha1_to_hex(base_sha1));
264                 if (!recover)
265                         exit(1);
266                 has_errors = 1;
267                 return;
268         }
269         resolve_delta(nr, type, base, base_size, delta_data, delta_size);
270         free(base);
271 }
272
273 static void unpack_one(unsigned nr, unsigned total)
274 {
275         unsigned shift;
276         unsigned char *pack, c;
277         unsigned long size;
278         enum object_type type;
279
280         obj_list[nr].offset = consumed_bytes;
281
282         pack = fill(1);
283         c = *pack;
284         use(1);
285         type = (c >> 4) & 7;
286         size = (c & 15);
287         shift = 4;
288         while (c & 0x80) {
289                 pack = fill(1);
290                 c = *pack;
291                 use(1);
292                 size += (c & 0x7f) << shift;
293                 shift += 7;
294         }
295         if (!quiet) {
296                 static unsigned long last_sec;
297                 static unsigned last_percent;
298                 struct timeval now;
299                 unsigned percentage = ((nr+1) * 100) / total;
300
301                 gettimeofday(&now, NULL);
302                 if (percentage != last_percent || now.tv_sec != last_sec) {
303                         last_sec = now.tv_sec;
304                         last_percent = percentage;
305                         fprintf(stderr, "%4u%% (%u/%u) done\r",
306                                         percentage, (nr+1), total);
307                 }
308         }
309         switch (type) {
310         case OBJ_COMMIT:
311         case OBJ_TREE:
312         case OBJ_BLOB:
313         case OBJ_TAG:
314                 unpack_non_delta_entry(type, size, nr);
315                 return;
316         case OBJ_REF_DELTA:
317         case OBJ_OFS_DELTA:
318                 unpack_delta_entry(type, size, nr);
319                 return;
320         default:
321                 error("bad object type %d", type);
322                 has_errors = 1;
323                 if (recover)
324                         return;
325                 exit(1);
326         }
327 }
328
329 static void unpack_all(void)
330 {
331         int i;
332         struct pack_header *hdr = fill(sizeof(struct pack_header));
333         unsigned nr_objects = ntohl(hdr->hdr_entries);
334
335         if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
336                 die("bad pack file");
337         if (!pack_version_ok(hdr->hdr_version))
338                 die("unknown pack file version %d", ntohl(hdr->hdr_version));
339         fprintf(stderr, "Unpacking %d objects\n", nr_objects);
340
341         obj_list = xmalloc(nr_objects * sizeof(*obj_list));
342         use(sizeof(struct pack_header));
343         for (i = 0; i < nr_objects; i++)
344                 unpack_one(i, nr_objects);
345         if (delta_list)
346                 die("unresolved deltas left after unpacking");
347 }
348
349 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
350 {
351         int i;
352         unsigned char sha1[20];
353
354         git_config(git_default_config);
355
356         quiet = !isatty(2);
357
358         for (i = 1 ; i < argc; i++) {
359                 const char *arg = argv[i];
360
361                 if (*arg == '-') {
362                         if (!strcmp(arg, "-n")) {
363                                 dry_run = 1;
364                                 continue;
365                         }
366                         if (!strcmp(arg, "-q")) {
367                                 quiet = 1;
368                                 continue;
369                         }
370                         if (!strcmp(arg, "-r")) {
371                                 recover = 1;
372                                 continue;
373                         }
374                         usage(unpack_usage);
375                 }
376
377                 /* We don't take any non-flag arguments now.. Maybe some day */
378                 usage(unpack_usage);
379         }
380         SHA1_Init(&ctx);
381         unpack_all();
382         SHA1_Update(&ctx, buffer, offset);
383         SHA1_Final(sha1, &ctx);
384         if (hashcmp(fill(20), sha1))
385                 die("final sha1 did not match");
386         use(20);
387
388         /* Write the last part of the buffer to stdout */
389         while (len) {
390                 int ret = xwrite(1, buffer + offset, len);
391                 if (ret <= 0)
392                         break;
393                 len -= ret;
394                 offset += ret;
395         }
396
397         /* All done */
398         if (!quiet)
399                 fprintf(stderr, "\n");
400         return has_errors;
401 }