Merge branch 'maint'
[git] / builtin-unpack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "delta.h"
5 #include "pack.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "tag.h"
9 #include "tree.h"
10
11 static int dry_run, quiet, recover, has_errors;
12 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
13
14 /* We always read in 4kB chunks. */
15 static unsigned char buffer[4096];
16 static unsigned long offset, len, consumed_bytes;
17 static SHA_CTX ctx;
18
19 /*
20  * Make sure at least "min" bytes are available in the buffer, and
21  * return the pointer to the buffer.
22  */
23 static void *fill(int min)
24 {
25         if (min <= len)
26                 return buffer + offset;
27         if (min > sizeof(buffer))
28                 die("cannot fill %d bytes", min);
29         if (offset) {
30                 SHA1_Update(&ctx, buffer, offset);
31                 memmove(buffer, buffer + offset, len);
32                 offset = 0;
33         }
34         do {
35                 int ret = xread(0, buffer + len, sizeof(buffer) - len);
36                 if (ret <= 0) {
37                         if (!ret)
38                                 die("early EOF");
39                         die("read error on input: %s", strerror(errno));
40                 }
41                 len += ret;
42         } while (len < min);
43         return buffer;
44 }
45
46 static void use(int bytes)
47 {
48         if (bytes > len)
49                 die("used more bytes than were available");
50         len -= bytes;
51         offset += bytes;
52         consumed_bytes += bytes;
53 }
54
55 static void *get_data(unsigned long size)
56 {
57         z_stream stream;
58         void *buf = xmalloc(size);
59
60         memset(&stream, 0, sizeof(stream));
61
62         stream.next_out = buf;
63         stream.avail_out = size;
64         stream.next_in = fill(1);
65         stream.avail_in = len;
66         inflateInit(&stream);
67
68         for (;;) {
69                 int ret = inflate(&stream, 0);
70                 use(len - stream.avail_in);
71                 if (stream.total_out == size && ret == Z_STREAM_END)
72                         break;
73                 if (ret != Z_OK) {
74                         error("inflate returned %d\n", ret);
75                         free(buf);
76                         buf = NULL;
77                         if (!recover)
78                                 exit(1);
79                         has_errors = 1;
80                         break;
81                 }
82                 stream.next_in = fill(1);
83                 stream.avail_in = len;
84         }
85         inflateEnd(&stream);
86         return buf;
87 }
88
89 struct delta_info {
90         unsigned char base_sha1[20];
91         unsigned long base_offset;
92         unsigned long size;
93         void *delta;
94         unsigned nr;
95         struct delta_info *next;
96 };
97
98 static struct delta_info *delta_list;
99
100 static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
101                               unsigned long base_offset,
102                               void *delta, unsigned long size)
103 {
104         struct delta_info *info = xmalloc(sizeof(*info));
105
106         hashcpy(info->base_sha1, base_sha1);
107         info->base_offset = base_offset;
108         info->size = size;
109         info->delta = delta;
110         info->nr = nr;
111         info->next = delta_list;
112         delta_list = info;
113 }
114
115 struct obj_info {
116         unsigned long offset;
117         unsigned char sha1[20];
118 };
119
120 static struct obj_info *obj_list;
121
122 static void added_object(unsigned nr, const char *type, void *data,
123                          unsigned long size);
124
125 static void write_object(unsigned nr, void *buf, unsigned long size,
126                          const char *type)
127 {
128         if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0)
129                 die("failed to write object");
130         added_object(nr, type, buf, size);
131 }
132
133 static void resolve_delta(unsigned nr, const char *type,
134                           void *base, unsigned long base_size,
135                           void *delta, unsigned long delta_size)
136 {
137         void *result;
138         unsigned long result_size;
139
140         result = patch_delta(base, base_size,
141                              delta, delta_size,
142                              &result_size);
143         if (!result)
144                 die("failed to apply delta");
145         free(delta);
146         write_object(nr, result, result_size, type);
147         free(result);
148 }
149
150 static void added_object(unsigned nr, const char *type, void *data,
151                          unsigned long size)
152 {
153         struct delta_info **p = &delta_list;
154         struct delta_info *info;
155
156         while ((info = *p) != NULL) {
157                 if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
158                     info->base_offset == obj_list[nr].offset) {
159                         *p = info->next;
160                         p = &delta_list;
161                         resolve_delta(info->nr, type, data, size,
162                                       info->delta, info->size);
163                         free(info);
164                         continue;
165                 }
166                 p = &info->next;
167         }
168 }
169
170 static void unpack_non_delta_entry(enum object_type kind, unsigned long size,
171                                    unsigned nr)
172 {
173         void *buf = get_data(size);
174         const char *type;
175
176         switch (kind) {
177         case OBJ_COMMIT: type = commit_type; break;
178         case OBJ_TREE:   type = tree_type; break;
179         case OBJ_BLOB:   type = blob_type; break;
180         case OBJ_TAG:    type = tag_type; break;
181         default: die("bad type %d", kind);
182         }
183         if (!dry_run && buf)
184                 write_object(nr, buf, size, type);
185         free(buf);
186 }
187
188 static void unpack_delta_entry(enum object_type kind, unsigned long delta_size,
189                                unsigned nr)
190 {
191         void *delta_data, *base;
192         unsigned long base_size;
193         char type[20];
194         unsigned char base_sha1[20];
195
196         if (kind == OBJ_REF_DELTA) {
197                 hashcpy(base_sha1, fill(20));
198                 use(20);
199                 delta_data = get_data(delta_size);
200                 if (dry_run || !delta_data) {
201                         free(delta_data);
202                         return;
203                 }
204                 if (!has_sha1_file(base_sha1)) {
205                         hashcpy(obj_list[nr].sha1, null_sha1);
206                         add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
207                         return;
208                 }
209         } else {
210                 unsigned base_found = 0;
211                 unsigned char *pack, c;
212                 unsigned long base_offset;
213                 unsigned lo, mid, hi;
214
215                 pack = fill(1);
216                 c = *pack;
217                 use(1);
218                 base_offset = c & 127;
219                 while (c & 128) {
220                         base_offset += 1;
221                         if (!base_offset || base_offset & ~(~0UL >> 7))
222                                 die("offset value overflow for delta base object");
223                         pack = fill(1);
224                         c = *pack;
225                         use(1);
226                         base_offset = (base_offset << 7) + (c & 127);
227                 }
228                 base_offset = obj_list[nr].offset - base_offset;
229
230                 delta_data = get_data(delta_size);
231                 if (dry_run || !delta_data) {
232                         free(delta_data);
233                         return;
234                 }
235                 lo = 0;
236                 hi = nr;
237                 while (lo < hi) {
238                         mid = (lo + hi)/2;
239                         if (base_offset < obj_list[mid].offset) {
240                                 hi = mid;
241                         } else if (base_offset > obj_list[mid].offset) {
242                                 lo = mid + 1;
243                         } else {
244                                 hashcpy(base_sha1, obj_list[mid].sha1);
245                                 base_found = !is_null_sha1(base_sha1);
246                                 break;
247                         }
248                 }
249                 if (!base_found) {
250                         /* The delta base object is itself a delta that
251                            has not been resolved yet. */
252                         hashcpy(obj_list[nr].sha1, null_sha1);
253                         add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
254                         return;
255                 }
256         }
257
258         base = read_sha1_file(base_sha1, type, &base_size);
259         if (!base) {
260                 error("failed to read delta-pack base object %s",
261                       sha1_to_hex(base_sha1));
262                 if (!recover)
263                         exit(1);
264                 has_errors = 1;
265                 return;
266         }
267         resolve_delta(nr, type, base, base_size, delta_data, delta_size);
268         free(base);
269 }
270
271 static void unpack_one(unsigned nr, unsigned total)
272 {
273         unsigned shift;
274         unsigned char *pack, c;
275         unsigned long size;
276         enum object_type type;
277
278         obj_list[nr].offset = consumed_bytes;
279
280         pack = fill(1);
281         c = *pack;
282         use(1);
283         type = (c >> 4) & 7;
284         size = (c & 15);
285         shift = 4;
286         while (c & 0x80) {
287                 pack = fill(1);
288                 c = *pack;
289                 use(1);
290                 size += (c & 0x7f) << shift;
291                 shift += 7;
292         }
293         if (!quiet) {
294                 static unsigned long last_sec;
295                 static unsigned last_percent;
296                 struct timeval now;
297                 unsigned percentage = ((nr+1) * 100) / total;
298
299                 gettimeofday(&now, NULL);
300                 if (percentage != last_percent || now.tv_sec != last_sec) {
301                         last_sec = now.tv_sec;
302                         last_percent = percentage;
303                         fprintf(stderr, "%4u%% (%u/%u) done\r",
304                                         percentage, (nr+1), total);
305                 }
306         }
307         switch (type) {
308         case OBJ_COMMIT:
309         case OBJ_TREE:
310         case OBJ_BLOB:
311         case OBJ_TAG:
312                 unpack_non_delta_entry(type, size, nr);
313                 return;
314         case OBJ_REF_DELTA:
315         case OBJ_OFS_DELTA:
316                 unpack_delta_entry(type, size, nr);
317                 return;
318         default:
319                 error("bad object type %d", type);
320                 has_errors = 1;
321                 if (recover)
322                         return;
323                 exit(1);
324         }
325 }
326
327 static void unpack_all(void)
328 {
329         int i;
330         struct pack_header *hdr = fill(sizeof(struct pack_header));
331         unsigned nr_objects = ntohl(hdr->hdr_entries);
332
333         if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
334                 die("bad pack file");
335         if (!pack_version_ok(hdr->hdr_version))
336                 die("unknown pack file version %d", ntohl(hdr->hdr_version));
337         fprintf(stderr, "Unpacking %d objects\n", nr_objects);
338
339         obj_list = xmalloc(nr_objects * sizeof(*obj_list));
340         use(sizeof(struct pack_header));
341         for (i = 0; i < nr_objects; i++)
342                 unpack_one(i, nr_objects);
343         if (delta_list)
344                 die("unresolved deltas left after unpacking");
345 }
346
347 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
348 {
349         int i;
350         unsigned char sha1[20];
351
352         git_config(git_default_config);
353
354         quiet = !isatty(2);
355
356         for (i = 1 ; i < argc; i++) {
357                 const char *arg = argv[i];
358
359                 if (*arg == '-') {
360                         if (!strcmp(arg, "-n")) {
361                                 dry_run = 1;
362                                 continue;
363                         }
364                         if (!strcmp(arg, "-q")) {
365                                 quiet = 1;
366                                 continue;
367                         }
368                         if (!strcmp(arg, "-r")) {
369                                 recover = 1;
370                                 continue;
371                         }
372                         if (!strncmp(arg, "--pack_header=", 14)) {
373                                 struct pack_header *hdr;
374                                 char *c;
375
376                                 hdr = (struct pack_header *)buffer;
377                                 hdr->hdr_signature = htonl(PACK_SIGNATURE);
378                                 hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
379                                 if (*c != ',')
380                                         die("bad %s", arg);
381                                 hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
382                                 if (*c)
383                                         die("bad %s", arg);
384                                 len = sizeof(*hdr);
385                                 continue;
386                         }
387                         usage(unpack_usage);
388                 }
389
390                 /* We don't take any non-flag arguments now.. Maybe some day */
391                 usage(unpack_usage);
392         }
393         SHA1_Init(&ctx);
394         unpack_all();
395         SHA1_Update(&ctx, buffer, offset);
396         SHA1_Final(sha1, &ctx);
397         if (hashcmp(fill(20), sha1))
398                 die("final sha1 did not match");
399         use(20);
400
401         /* Write the last part of the buffer to stdout */
402         while (len) {
403                 int ret = xwrite(1, buffer + offset, len);
404                 if (ret <= 0)
405                         break;
406                 len -= ret;
407                 offset += ret;
408         }
409
410         /* All done */
411         if (!quiet)
412                 fprintf(stderr, "\n");
413         return has_errors;
414 }