Merge branch 'maint'
[git] / builtin-unpack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "delta.h"
5 #include "pack.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "tag.h"
9 #include "tree.h"
10 #include "progress.h"
11
12 static int dry_run, quiet, recover, has_errors;
13 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
14
15 /* We always read in 4kB chunks. */
16 static unsigned char buffer[4096];
17 static unsigned int offset, len;
18 static off_t consumed_bytes;
19 static SHA_CTX ctx;
20
21 /*
22  * Make sure at least "min" bytes are available in the buffer, and
23  * return the pointer to the buffer.
24  */
25 static void *fill(int min)
26 {
27         if (min <= len)
28                 return buffer + offset;
29         if (min > sizeof(buffer))
30                 die("cannot fill %d bytes", min);
31         if (offset) {
32                 SHA1_Update(&ctx, buffer, offset);
33                 memmove(buffer, buffer + offset, len);
34                 offset = 0;
35         }
36         do {
37                 ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
38                 if (ret <= 0) {
39                         if (!ret)
40                                 die("early EOF");
41                         die("read error on input: %s", strerror(errno));
42                 }
43                 len += ret;
44         } while (len < min);
45         return buffer;
46 }
47
48 static void use(int bytes)
49 {
50         if (bytes > len)
51                 die("used more bytes than were available");
52         len -= bytes;
53         offset += bytes;
54
55         /* make sure off_t is sufficiently large not to wrap */
56         if (consumed_bytes > consumed_bytes + bytes)
57                 die("pack too large for current definition of off_t");
58         consumed_bytes += bytes;
59 }
60
61 static void *get_data(unsigned long size)
62 {
63         z_stream stream;
64         void *buf = xmalloc(size);
65
66         memset(&stream, 0, sizeof(stream));
67
68         stream.next_out = buf;
69         stream.avail_out = size;
70         stream.next_in = fill(1);
71         stream.avail_in = len;
72         inflateInit(&stream);
73
74         for (;;) {
75                 int ret = inflate(&stream, 0);
76                 use(len - stream.avail_in);
77                 if (stream.total_out == size && ret == Z_STREAM_END)
78                         break;
79                 if (ret != Z_OK) {
80                         error("inflate returned %d\n", ret);
81                         free(buf);
82                         buf = NULL;
83                         if (!recover)
84                                 exit(1);
85                         has_errors = 1;
86                         break;
87                 }
88                 stream.next_in = fill(1);
89                 stream.avail_in = len;
90         }
91         inflateEnd(&stream);
92         return buf;
93 }
94
95 struct delta_info {
96         unsigned char base_sha1[20];
97         unsigned nr;
98         off_t base_offset;
99         unsigned long size;
100         void *delta;
101         struct delta_info *next;
102 };
103
104 static struct delta_info *delta_list;
105
106 static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
107                               off_t base_offset,
108                               void *delta, unsigned long size)
109 {
110         struct delta_info *info = xmalloc(sizeof(*info));
111
112         hashcpy(info->base_sha1, base_sha1);
113         info->base_offset = base_offset;
114         info->size = size;
115         info->delta = delta;
116         info->nr = nr;
117         info->next = delta_list;
118         delta_list = info;
119 }
120
121 struct obj_info {
122         off_t offset;
123         unsigned char sha1[20];
124 };
125
126 static struct obj_info *obj_list;
127
128 static void added_object(unsigned nr, enum object_type type,
129                          void *data, unsigned long size);
130
131 static void write_object(unsigned nr, enum object_type type,
132                          void *buf, unsigned long size)
133 {
134         if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
135                 die("failed to write object");
136         added_object(nr, type, buf, size);
137 }
138
139 static void resolve_delta(unsigned nr, enum object_type type,
140                           void *base, unsigned long base_size,
141                           void *delta, unsigned long delta_size)
142 {
143         void *result;
144         unsigned long result_size;
145
146         result = patch_delta(base, base_size,
147                              delta, delta_size,
148                              &result_size);
149         if (!result)
150                 die("failed to apply delta");
151         free(delta);
152         write_object(nr, type, result, result_size);
153         free(result);
154 }
155
156 static void added_object(unsigned nr, enum object_type type,
157                          void *data, unsigned long size)
158 {
159         struct delta_info **p = &delta_list;
160         struct delta_info *info;
161
162         while ((info = *p) != NULL) {
163                 if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
164                     info->base_offset == obj_list[nr].offset) {
165                         *p = info->next;
166                         p = &delta_list;
167                         resolve_delta(info->nr, type, data, size,
168                                       info->delta, info->size);
169                         free(info);
170                         continue;
171                 }
172                 p = &info->next;
173         }
174 }
175
176 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
177                                    unsigned nr)
178 {
179         void *buf = get_data(size);
180
181         if (!dry_run && buf)
182                 write_object(nr, type, buf, size);
183         free(buf);
184 }
185
186 static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
187                                unsigned nr)
188 {
189         void *delta_data, *base;
190         unsigned long base_size;
191         unsigned char base_sha1[20];
192
193         if (type == OBJ_REF_DELTA) {
194                 hashcpy(base_sha1, fill(20));
195                 use(20);
196                 delta_data = get_data(delta_size);
197                 if (dry_run || !delta_data) {
198                         free(delta_data);
199                         return;
200                 }
201                 if (!has_sha1_file(base_sha1)) {
202                         hashcpy(obj_list[nr].sha1, null_sha1);
203                         add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
204                         return;
205                 }
206         } else {
207                 unsigned base_found = 0;
208                 unsigned char *pack, c;
209                 off_t base_offset;
210                 unsigned lo, mid, hi;
211
212                 pack = fill(1);
213                 c = *pack;
214                 use(1);
215                 base_offset = c & 127;
216                 while (c & 128) {
217                         base_offset += 1;
218                         if (!base_offset || MSB(base_offset, 7))
219                                 die("offset value overflow for delta base object");
220                         pack = fill(1);
221                         c = *pack;
222                         use(1);
223                         base_offset = (base_offset << 7) + (c & 127);
224                 }
225                 base_offset = obj_list[nr].offset - base_offset;
226
227                 delta_data = get_data(delta_size);
228                 if (dry_run || !delta_data) {
229                         free(delta_data);
230                         return;
231                 }
232                 lo = 0;
233                 hi = nr;
234                 while (lo < hi) {
235                         mid = (lo + hi)/2;
236                         if (base_offset < obj_list[mid].offset) {
237                                 hi = mid;
238                         } else if (base_offset > obj_list[mid].offset) {
239                                 lo = mid + 1;
240                         } else {
241                                 hashcpy(base_sha1, obj_list[mid].sha1);
242                                 base_found = !is_null_sha1(base_sha1);
243                                 break;
244                         }
245                 }
246                 if (!base_found) {
247                         /* The delta base object is itself a delta that
248                            has not been resolved yet. */
249                         hashcpy(obj_list[nr].sha1, null_sha1);
250                         add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
251                         return;
252                 }
253         }
254
255         base = read_sha1_file(base_sha1, &type, &base_size);
256         if (!base) {
257                 error("failed to read delta-pack base object %s",
258                       sha1_to_hex(base_sha1));
259                 if (!recover)
260                         exit(1);
261                 has_errors = 1;
262                 return;
263         }
264         resolve_delta(nr, type, base, base_size, delta_data, delta_size);
265         free(base);
266 }
267
268 static void unpack_one(unsigned nr)
269 {
270         unsigned shift;
271         unsigned char *pack, c;
272         unsigned long size;
273         enum object_type type;
274
275         obj_list[nr].offset = consumed_bytes;
276
277         pack = fill(1);
278         c = *pack;
279         use(1);
280         type = (c >> 4) & 7;
281         size = (c & 15);
282         shift = 4;
283         while (c & 0x80) {
284                 pack = fill(1);
285                 c = *pack;
286                 use(1);
287                 size += (c & 0x7f) << shift;
288                 shift += 7;
289         }
290
291         switch (type) {
292         case OBJ_COMMIT:
293         case OBJ_TREE:
294         case OBJ_BLOB:
295         case OBJ_TAG:
296                 unpack_non_delta_entry(type, size, nr);
297                 return;
298         case OBJ_REF_DELTA:
299         case OBJ_OFS_DELTA:
300                 unpack_delta_entry(type, size, nr);
301                 return;
302         default:
303                 error("bad object type %d", type);
304                 has_errors = 1;
305                 if (recover)
306                         return;
307                 exit(1);
308         }
309 }
310
311 static void unpack_all(void)
312 {
313         int i;
314         struct progress *progress = NULL;
315         struct pack_header *hdr = fill(sizeof(struct pack_header));
316         unsigned nr_objects = ntohl(hdr->hdr_entries);
317
318         if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
319                 die("bad pack file");
320         if (!pack_version_ok(hdr->hdr_version))
321                 die("unknown pack file version %d", ntohl(hdr->hdr_version));
322         use(sizeof(struct pack_header));
323
324         if (!quiet)
325                 progress = start_progress("Unpacking objects", nr_objects);
326         obj_list = xmalloc(nr_objects * sizeof(*obj_list));
327         for (i = 0; i < nr_objects; i++) {
328                 unpack_one(i);
329                 display_progress(progress, i + 1);
330         }
331         stop_progress(&progress);
332
333         if (delta_list)
334                 die("unresolved deltas left after unpacking");
335 }
336
337 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
338 {
339         int i;
340         unsigned char sha1[20];
341
342         git_config(git_default_config);
343
344         quiet = !isatty(2);
345
346         for (i = 1 ; i < argc; i++) {
347                 const char *arg = argv[i];
348
349                 if (*arg == '-') {
350                         if (!strcmp(arg, "-n")) {
351                                 dry_run = 1;
352                                 continue;
353                         }
354                         if (!strcmp(arg, "-q")) {
355                                 quiet = 1;
356                                 continue;
357                         }
358                         if (!strcmp(arg, "-r")) {
359                                 recover = 1;
360                                 continue;
361                         }
362                         if (!prefixcmp(arg, "--pack_header=")) {
363                                 struct pack_header *hdr;
364                                 char *c;
365
366                                 hdr = (struct pack_header *)buffer;
367                                 hdr->hdr_signature = htonl(PACK_SIGNATURE);
368                                 hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
369                                 if (*c != ',')
370                                         die("bad %s", arg);
371                                 hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
372                                 if (*c)
373                                         die("bad %s", arg);
374                                 len = sizeof(*hdr);
375                                 continue;
376                         }
377                         usage(unpack_usage);
378                 }
379
380                 /* We don't take any non-flag arguments now.. Maybe some day */
381                 usage(unpack_usage);
382         }
383         SHA1_Init(&ctx);
384         unpack_all();
385         SHA1_Update(&ctx, buffer, offset);
386         SHA1_Final(sha1, &ctx);
387         if (hashcmp(fill(20), sha1))
388                 die("final sha1 did not match");
389         use(20);
390
391         /* Write the last part of the buffer to stdout */
392         while (len) {
393                 int ret = xwrite(1, buffer + offset, len);
394                 if (ret <= 0)
395                         break;
396                 len -= ret;
397                 offset += ret;
398         }
399
400         /* All done */
401         return has_errors;
402 }