Merge branch 'jc/add'
[git] / builtin-unpack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "delta.h"
5 #include "pack.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "tag.h"
9 #include "tree.h"
10
11 static int dry_run, quiet, recover, has_errors;
12 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
13
14 /* We always read in 4kB chunks. */
15 static unsigned char buffer[4096];
16 static unsigned int offset, len;
17 static off_t consumed_bytes;
18 static SHA_CTX ctx;
19
20 /*
21  * Make sure at least "min" bytes are available in the buffer, and
22  * return the pointer to the buffer.
23  */
24 static void *fill(int min)
25 {
26         if (min <= len)
27                 return buffer + offset;
28         if (min > sizeof(buffer))
29                 die("cannot fill %d bytes", min);
30         if (offset) {
31                 SHA1_Update(&ctx, buffer, offset);
32                 memmove(buffer, buffer + offset, len);
33                 offset = 0;
34         }
35         do {
36                 int ret = xread(0, buffer + len, sizeof(buffer) - len);
37                 if (ret <= 0) {
38                         if (!ret)
39                                 die("early EOF");
40                         die("read error on input: %s", strerror(errno));
41                 }
42                 len += ret;
43         } while (len < min);
44         return buffer;
45 }
46
47 static void use(int bytes)
48 {
49         if (bytes > len)
50                 die("used more bytes than were available");
51         len -= bytes;
52         offset += bytes;
53
54         /* make sure off_t is sufficiently large not to wrap */
55         if (consumed_bytes > consumed_bytes + bytes)
56                 die("pack too large for current definition of off_t");
57         consumed_bytes += bytes;
58 }
59
60 static void *get_data(unsigned long size)
61 {
62         z_stream stream;
63         void *buf = xmalloc(size);
64
65         memset(&stream, 0, sizeof(stream));
66
67         stream.next_out = buf;
68         stream.avail_out = size;
69         stream.next_in = fill(1);
70         stream.avail_in = len;
71         inflateInit(&stream);
72
73         for (;;) {
74                 int ret = inflate(&stream, 0);
75                 use(len - stream.avail_in);
76                 if (stream.total_out == size && ret == Z_STREAM_END)
77                         break;
78                 if (ret != Z_OK) {
79                         error("inflate returned %d\n", ret);
80                         free(buf);
81                         buf = NULL;
82                         if (!recover)
83                                 exit(1);
84                         has_errors = 1;
85                         break;
86                 }
87                 stream.next_in = fill(1);
88                 stream.avail_in = len;
89         }
90         inflateEnd(&stream);
91         return buf;
92 }
93
94 struct delta_info {
95         unsigned char base_sha1[20];
96         unsigned nr;
97         off_t base_offset;
98         unsigned long size;
99         void *delta;
100         struct delta_info *next;
101 };
102
103 static struct delta_info *delta_list;
104
105 static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
106                               off_t base_offset,
107                               void *delta, unsigned long size)
108 {
109         struct delta_info *info = xmalloc(sizeof(*info));
110
111         hashcpy(info->base_sha1, base_sha1);
112         info->base_offset = base_offset;
113         info->size = size;
114         info->delta = delta;
115         info->nr = nr;
116         info->next = delta_list;
117         delta_list = info;
118 }
119
120 struct obj_info {
121         off_t offset;
122         unsigned char sha1[20];
123 };
124
125 static struct obj_info *obj_list;
126
127 static void added_object(unsigned nr, enum object_type type,
128                          void *data, unsigned long size);
129
130 static void write_object(unsigned nr, enum object_type type,
131                          void *buf, unsigned long size)
132 {
133         if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
134                 die("failed to write object");
135         added_object(nr, type, buf, size);
136 }
137
138 static void resolve_delta(unsigned nr, enum object_type type,
139                           void *base, unsigned long base_size,
140                           void *delta, unsigned long delta_size)
141 {
142         void *result;
143         unsigned long result_size;
144
145         result = patch_delta(base, base_size,
146                              delta, delta_size,
147                              &result_size);
148         if (!result)
149                 die("failed to apply delta");
150         free(delta);
151         write_object(nr, type, result, result_size);
152         free(result);
153 }
154
155 static void added_object(unsigned nr, enum object_type type,
156                          void *data, unsigned long size)
157 {
158         struct delta_info **p = &delta_list;
159         struct delta_info *info;
160
161         while ((info = *p) != NULL) {
162                 if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
163                     info->base_offset == obj_list[nr].offset) {
164                         *p = info->next;
165                         p = &delta_list;
166                         resolve_delta(info->nr, type, data, size,
167                                       info->delta, info->size);
168                         free(info);
169                         continue;
170                 }
171                 p = &info->next;
172         }
173 }
174
175 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
176                                    unsigned nr)
177 {
178         void *buf = get_data(size);
179
180         if (!dry_run && buf)
181                 write_object(nr, type, buf, size);
182         free(buf);
183 }
184
185 static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
186                                unsigned nr)
187 {
188         void *delta_data, *base;
189         unsigned long base_size;
190         unsigned char base_sha1[20];
191
192         if (type == OBJ_REF_DELTA) {
193                 hashcpy(base_sha1, fill(20));
194                 use(20);
195                 delta_data = get_data(delta_size);
196                 if (dry_run || !delta_data) {
197                         free(delta_data);
198                         return;
199                 }
200                 if (!has_sha1_file(base_sha1)) {
201                         hashcpy(obj_list[nr].sha1, null_sha1);
202                         add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
203                         return;
204                 }
205         } else {
206                 unsigned base_found = 0;
207                 unsigned char *pack, c;
208                 off_t base_offset;
209                 unsigned lo, mid, hi;
210
211                 pack = fill(1);
212                 c = *pack;
213                 use(1);
214                 base_offset = c & 127;
215                 while (c & 128) {
216                         base_offset += 1;
217                         if (!base_offset || MSB(base_offset, 7))
218                                 die("offset value overflow for delta base object");
219                         pack = fill(1);
220                         c = *pack;
221                         use(1);
222                         base_offset = (base_offset << 7) + (c & 127);
223                 }
224                 base_offset = obj_list[nr].offset - base_offset;
225
226                 delta_data = get_data(delta_size);
227                 if (dry_run || !delta_data) {
228                         free(delta_data);
229                         return;
230                 }
231                 lo = 0;
232                 hi = nr;
233                 while (lo < hi) {
234                         mid = (lo + hi)/2;
235                         if (base_offset < obj_list[mid].offset) {
236                                 hi = mid;
237                         } else if (base_offset > obj_list[mid].offset) {
238                                 lo = mid + 1;
239                         } else {
240                                 hashcpy(base_sha1, obj_list[mid].sha1);
241                                 base_found = !is_null_sha1(base_sha1);
242                                 break;
243                         }
244                 }
245                 if (!base_found) {
246                         /* The delta base object is itself a delta that
247                            has not been resolved yet. */
248                         hashcpy(obj_list[nr].sha1, null_sha1);
249                         add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
250                         return;
251                 }
252         }
253
254         base = read_sha1_file(base_sha1, &type, &base_size);
255         if (!base) {
256                 error("failed to read delta-pack base object %s",
257                       sha1_to_hex(base_sha1));
258                 if (!recover)
259                         exit(1);
260                 has_errors = 1;
261                 return;
262         }
263         resolve_delta(nr, type, base, base_size, delta_data, delta_size);
264         free(base);
265 }
266
267 static void unpack_one(unsigned nr, unsigned total)
268 {
269         unsigned shift;
270         unsigned char *pack, c;
271         unsigned long size;
272         enum object_type type;
273
274         obj_list[nr].offset = consumed_bytes;
275
276         pack = fill(1);
277         c = *pack;
278         use(1);
279         type = (c >> 4) & 7;
280         size = (c & 15);
281         shift = 4;
282         while (c & 0x80) {
283                 pack = fill(1);
284                 c = *pack;
285                 use(1);
286                 size += (c & 0x7f) << shift;
287                 shift += 7;
288         }
289         if (!quiet) {
290                 static unsigned long last_sec;
291                 static unsigned last_percent;
292                 struct timeval now;
293                 unsigned percentage = ((nr+1) * 100) / total;
294
295                 gettimeofday(&now, NULL);
296                 if (percentage != last_percent || now.tv_sec != last_sec) {
297                         last_sec = now.tv_sec;
298                         last_percent = percentage;
299                         fprintf(stderr, "%4u%% (%u/%u) done\r",
300                                         percentage, (nr+1), total);
301                 }
302         }
303         switch (type) {
304         case OBJ_COMMIT:
305         case OBJ_TREE:
306         case OBJ_BLOB:
307         case OBJ_TAG:
308                 unpack_non_delta_entry(type, size, nr);
309                 return;
310         case OBJ_REF_DELTA:
311         case OBJ_OFS_DELTA:
312                 unpack_delta_entry(type, size, nr);
313                 return;
314         default:
315                 error("bad object type %d", type);
316                 has_errors = 1;
317                 if (recover)
318                         return;
319                 exit(1);
320         }
321 }
322
323 static void unpack_all(void)
324 {
325         int i;
326         struct pack_header *hdr = fill(sizeof(struct pack_header));
327         unsigned nr_objects = ntohl(hdr->hdr_entries);
328
329         if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
330                 die("bad pack file");
331         if (!pack_version_ok(hdr->hdr_version))
332                 die("unknown pack file version %d", ntohl(hdr->hdr_version));
333         fprintf(stderr, "Unpacking %d objects\n", nr_objects);
334
335         obj_list = xmalloc(nr_objects * sizeof(*obj_list));
336         use(sizeof(struct pack_header));
337         for (i = 0; i < nr_objects; i++)
338                 unpack_one(i, nr_objects);
339         if (delta_list)
340                 die("unresolved deltas left after unpacking");
341 }
342
343 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
344 {
345         int i;
346         unsigned char sha1[20];
347
348         git_config(git_default_config);
349
350         quiet = !isatty(2);
351
352         for (i = 1 ; i < argc; i++) {
353                 const char *arg = argv[i];
354
355                 if (*arg == '-') {
356                         if (!strcmp(arg, "-n")) {
357                                 dry_run = 1;
358                                 continue;
359                         }
360                         if (!strcmp(arg, "-q")) {
361                                 quiet = 1;
362                                 continue;
363                         }
364                         if (!strcmp(arg, "-r")) {
365                                 recover = 1;
366                                 continue;
367                         }
368                         if (!prefixcmp(arg, "--pack_header=")) {
369                                 struct pack_header *hdr;
370                                 char *c;
371
372                                 hdr = (struct pack_header *)buffer;
373                                 hdr->hdr_signature = htonl(PACK_SIGNATURE);
374                                 hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
375                                 if (*c != ',')
376                                         die("bad %s", arg);
377                                 hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
378                                 if (*c)
379                                         die("bad %s", arg);
380                                 len = sizeof(*hdr);
381                                 continue;
382                         }
383                         usage(unpack_usage);
384                 }
385
386                 /* We don't take any non-flag arguments now.. Maybe some day */
387                 usage(unpack_usage);
388         }
389         SHA1_Init(&ctx);
390         unpack_all();
391         SHA1_Update(&ctx, buffer, offset);
392         SHA1_Final(sha1, &ctx);
393         if (hashcmp(fill(20), sha1))
394                 die("final sha1 did not match");
395         use(20);
396
397         /* Write the last part of the buffer to stdout */
398         while (len) {
399                 int ret = xwrite(1, buffer + offset, len);
400                 if (ret <= 0)
401                         break;
402                 len -= ret;
403                 offset += ret;
404         }
405
406         /* All done */
407         if (!quiet)
408                 fprintf(stderr, "\n");
409         return has_errors;
410 }