Merge git://git.kernel.org/pub/scm/gitk/gitk
[git] / builtin-unpack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "delta.h"
5 #include "pack.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "tag.h"
9 #include "tree.h"
10 #include "progress.h"
11 #include "decorate.h"
12
13 static int dry_run, quiet, recover, has_errors;
14 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
15
16 /* We always read in 4kB chunks. */
17 static unsigned char buffer[4096];
18 static unsigned int offset, len;
19 static off_t consumed_bytes;
20 static SHA_CTX ctx;
21
22 struct obj_buffer {
23         char *buffer;
24         unsigned long size;
25 };
26
27 static struct decoration obj_decorate;
28
29 static struct obj_buffer *lookup_object_buffer(struct object *base)
30 {
31         return lookup_decoration(&obj_decorate, base);
32 }
33
34 /*
35  * Make sure at least "min" bytes are available in the buffer, and
36  * return the pointer to the buffer.
37  */
38 static void *fill(int min)
39 {
40         if (min <= len)
41                 return buffer + offset;
42         if (min > sizeof(buffer))
43                 die("cannot fill %d bytes", min);
44         if (offset) {
45                 SHA1_Update(&ctx, buffer, offset);
46                 memmove(buffer, buffer + offset, len);
47                 offset = 0;
48         }
49         do {
50                 ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
51                 if (ret <= 0) {
52                         if (!ret)
53                                 die("early EOF");
54                         die("read error on input: %s", strerror(errno));
55                 }
56                 len += ret;
57         } while (len < min);
58         return buffer;
59 }
60
61 static void use(int bytes)
62 {
63         if (bytes > len)
64                 die("used more bytes than were available");
65         len -= bytes;
66         offset += bytes;
67
68         /* make sure off_t is sufficiently large not to wrap */
69         if (consumed_bytes > consumed_bytes + bytes)
70                 die("pack too large for current definition of off_t");
71         consumed_bytes += bytes;
72 }
73
74 static void *get_data(unsigned long size)
75 {
76         z_stream stream;
77         void *buf = xmalloc(size);
78
79         memset(&stream, 0, sizeof(stream));
80
81         stream.next_out = buf;
82         stream.avail_out = size;
83         stream.next_in = fill(1);
84         stream.avail_in = len;
85         inflateInit(&stream);
86
87         for (;;) {
88                 int ret = inflate(&stream, 0);
89                 use(len - stream.avail_in);
90                 if (stream.total_out == size && ret == Z_STREAM_END)
91                         break;
92                 if (ret != Z_OK) {
93                         error("inflate returned %d\n", ret);
94                         free(buf);
95                         buf = NULL;
96                         if (!recover)
97                                 exit(1);
98                         has_errors = 1;
99                         break;
100                 }
101                 stream.next_in = fill(1);
102                 stream.avail_in = len;
103         }
104         inflateEnd(&stream);
105         return buf;
106 }
107
108 struct delta_info {
109         unsigned char base_sha1[20];
110         unsigned nr;
111         off_t base_offset;
112         unsigned long size;
113         void *delta;
114         struct delta_info *next;
115 };
116
117 static struct delta_info *delta_list;
118
119 static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
120                               off_t base_offset,
121                               void *delta, unsigned long size)
122 {
123         struct delta_info *info = xmalloc(sizeof(*info));
124
125         hashcpy(info->base_sha1, base_sha1);
126         info->base_offset = base_offset;
127         info->size = size;
128         info->delta = delta;
129         info->nr = nr;
130         info->next = delta_list;
131         delta_list = info;
132 }
133
134 struct obj_info {
135         off_t offset;
136         unsigned char sha1[20];
137 };
138
139 static struct obj_info *obj_list;
140
141 static void added_object(unsigned nr, enum object_type type,
142                          void *data, unsigned long size);
143
144 static void write_object(unsigned nr, enum object_type type,
145                          void *buf, unsigned long size)
146 {
147         if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
148                 die("failed to write object");
149         added_object(nr, type, buf, size);
150 }
151
152 static void resolve_delta(unsigned nr, enum object_type type,
153                           void *base, unsigned long base_size,
154                           void *delta, unsigned long delta_size)
155 {
156         void *result;
157         unsigned long result_size;
158
159         result = patch_delta(base, base_size,
160                              delta, delta_size,
161                              &result_size);
162         if (!result)
163                 die("failed to apply delta");
164         free(delta);
165         write_object(nr, type, result, result_size);
166         free(result);
167 }
168
169 static void added_object(unsigned nr, enum object_type type,
170                          void *data, unsigned long size)
171 {
172         struct delta_info **p = &delta_list;
173         struct delta_info *info;
174
175         while ((info = *p) != NULL) {
176                 if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
177                     info->base_offset == obj_list[nr].offset) {
178                         *p = info->next;
179                         p = &delta_list;
180                         resolve_delta(info->nr, type, data, size,
181                                       info->delta, info->size);
182                         free(info);
183                         continue;
184                 }
185                 p = &info->next;
186         }
187 }
188
189 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
190                                    unsigned nr)
191 {
192         void *buf = get_data(size);
193
194         if (!dry_run && buf)
195                 write_object(nr, type, buf, size);
196         free(buf);
197 }
198
199 static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
200                                unsigned nr)
201 {
202         void *delta_data, *base;
203         unsigned long base_size;
204         unsigned char base_sha1[20];
205         struct object *obj;
206
207         if (type == OBJ_REF_DELTA) {
208                 hashcpy(base_sha1, fill(20));
209                 use(20);
210                 delta_data = get_data(delta_size);
211                 if (dry_run || !delta_data) {
212                         free(delta_data);
213                         return;
214                 }
215                 if (!has_sha1_file(base_sha1)) {
216                         hashcpy(obj_list[nr].sha1, null_sha1);
217                         add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
218                         return;
219                 }
220         } else {
221                 unsigned base_found = 0;
222                 unsigned char *pack, c;
223                 off_t base_offset;
224                 unsigned lo, mid, hi;
225
226                 pack = fill(1);
227                 c = *pack;
228                 use(1);
229                 base_offset = c & 127;
230                 while (c & 128) {
231                         base_offset += 1;
232                         if (!base_offset || MSB(base_offset, 7))
233                                 die("offset value overflow for delta base object");
234                         pack = fill(1);
235                         c = *pack;
236                         use(1);
237                         base_offset = (base_offset << 7) + (c & 127);
238                 }
239                 base_offset = obj_list[nr].offset - base_offset;
240
241                 delta_data = get_data(delta_size);
242                 if (dry_run || !delta_data) {
243                         free(delta_data);
244                         return;
245                 }
246                 lo = 0;
247                 hi = nr;
248                 while (lo < hi) {
249                         mid = (lo + hi)/2;
250                         if (base_offset < obj_list[mid].offset) {
251                                 hi = mid;
252                         } else if (base_offset > obj_list[mid].offset) {
253                                 lo = mid + 1;
254                         } else {
255                                 hashcpy(base_sha1, obj_list[mid].sha1);
256                                 base_found = !is_null_sha1(base_sha1);
257                                 break;
258                         }
259                 }
260                 if (!base_found) {
261                         /* The delta base object is itself a delta that
262                            has not been resolved yet. */
263                         hashcpy(obj_list[nr].sha1, null_sha1);
264                         add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
265                         return;
266                 }
267         }
268
269         obj = lookup_object(base_sha1);
270         if (obj) {
271                 struct obj_buffer *obj_buf = lookup_object_buffer(obj);
272                 if (obj_buf) {
273                         resolve_delta(nr, obj->type, obj_buf->buffer, obj_buf->size, delta_data, delta_size);
274                         return;
275                 }
276         }
277
278         base = read_sha1_file(base_sha1, &type, &base_size);
279         if (!base) {
280                 error("failed to read delta-pack base object %s",
281                       sha1_to_hex(base_sha1));
282                 if (!recover)
283                         exit(1);
284                 has_errors = 1;
285                 return;
286         }
287         resolve_delta(nr, type, base, base_size, delta_data, delta_size);
288         free(base);
289 }
290
291 static void unpack_one(unsigned nr)
292 {
293         unsigned shift;
294         unsigned char *pack, c;
295         unsigned long size;
296         enum object_type type;
297
298         obj_list[nr].offset = consumed_bytes;
299
300         pack = fill(1);
301         c = *pack;
302         use(1);
303         type = (c >> 4) & 7;
304         size = (c & 15);
305         shift = 4;
306         while (c & 0x80) {
307                 pack = fill(1);
308                 c = *pack;
309                 use(1);
310                 size += (c & 0x7f) << shift;
311                 shift += 7;
312         }
313
314         switch (type) {
315         case OBJ_COMMIT:
316         case OBJ_TREE:
317         case OBJ_BLOB:
318         case OBJ_TAG:
319                 unpack_non_delta_entry(type, size, nr);
320                 return;
321         case OBJ_REF_DELTA:
322         case OBJ_OFS_DELTA:
323                 unpack_delta_entry(type, size, nr);
324                 return;
325         default:
326                 error("bad object type %d", type);
327                 has_errors = 1;
328                 if (recover)
329                         return;
330                 exit(1);
331         }
332 }
333
334 static void unpack_all(void)
335 {
336         int i;
337         struct progress *progress = NULL;
338         struct pack_header *hdr = fill(sizeof(struct pack_header));
339         unsigned nr_objects = ntohl(hdr->hdr_entries);
340
341         if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
342                 die("bad pack file");
343         if (!pack_version_ok(hdr->hdr_version))
344                 die("unknown pack file version %d", ntohl(hdr->hdr_version));
345         use(sizeof(struct pack_header));
346
347         if (!quiet)
348                 progress = start_progress("Unpacking objects", nr_objects);
349         obj_list = xmalloc(nr_objects * sizeof(*obj_list));
350         for (i = 0; i < nr_objects; i++) {
351                 unpack_one(i);
352                 display_progress(progress, i + 1);
353         }
354         stop_progress(&progress);
355
356         if (delta_list)
357                 die("unresolved deltas left after unpacking");
358 }
359
360 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
361 {
362         int i;
363         unsigned char sha1[20];
364
365         git_config(git_default_config);
366
367         quiet = !isatty(2);
368
369         for (i = 1 ; i < argc; i++) {
370                 const char *arg = argv[i];
371
372                 if (*arg == '-') {
373                         if (!strcmp(arg, "-n")) {
374                                 dry_run = 1;
375                                 continue;
376                         }
377                         if (!strcmp(arg, "-q")) {
378                                 quiet = 1;
379                                 continue;
380                         }
381                         if (!strcmp(arg, "-r")) {
382                                 recover = 1;
383                                 continue;
384                         }
385                         if (!prefixcmp(arg, "--pack_header=")) {
386                                 struct pack_header *hdr;
387                                 char *c;
388
389                                 hdr = (struct pack_header *)buffer;
390                                 hdr->hdr_signature = htonl(PACK_SIGNATURE);
391                                 hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
392                                 if (*c != ',')
393                                         die("bad %s", arg);
394                                 hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
395                                 if (*c)
396                                         die("bad %s", arg);
397                                 len = sizeof(*hdr);
398                                 continue;
399                         }
400                         usage(unpack_usage);
401                 }
402
403                 /* We don't take any non-flag arguments now.. Maybe some day */
404                 usage(unpack_usage);
405         }
406         SHA1_Init(&ctx);
407         unpack_all();
408         SHA1_Update(&ctx, buffer, offset);
409         SHA1_Final(sha1, &ctx);
410         if (hashcmp(fill(20), sha1))
411                 die("final sha1 did not match");
412         use(20);
413
414         /* Write the last part of the buffer to stdout */
415         while (len) {
416                 int ret = xwrite(1, buffer + offset, len);
417                 if (ret <= 0)
418                         break;
419                 len -= ret;
420                 offset += ret;
421         }
422
423         /* All done */
424         return has_errors;
425 }