index-pack: start learning to emulate "verify-pack -v"
[git] / builtin / index-pack.c
1 #include "cache.h"
2 #include "delta.h"
3 #include "pack.h"
4 #include "csum-file.h"
5 #include "blob.h"
6 #include "commit.h"
7 #include "tag.h"
8 #include "tree.h"
9 #include "progress.h"
10 #include "fsck.h"
11 #include "exec_cmd.h"
12
13 static const char index_pack_usage[] =
14 "git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
15
16 struct object_entry
17 {
18         struct pack_idx_entry idx;
19         unsigned long size;
20         unsigned int hdr_size;
21         enum object_type type;
22         enum object_type real_type;
23         unsigned delta_depth;
24         int base_object_no;
25 };
26
27 union delta_base {
28         unsigned char sha1[20];
29         off_t offset;
30 };
31
32 struct base_data {
33         struct base_data *base;
34         struct base_data *child;
35         struct object_entry *obj;
36         void *data;
37         unsigned long size;
38 };
39
40 /*
41  * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
42  * to memcmp() only the first 20 bytes.
43  */
44 #define UNION_BASE_SZ   20
45
46 #define FLAG_LINK (1u<<20)
47 #define FLAG_CHECKED (1u<<21)
48
49 struct delta_entry
50 {
51         union delta_base base;
52         int obj_no;
53 };
54
55 static struct object_entry *objects;
56 static struct delta_entry *deltas;
57 static struct base_data *base_cache;
58 static size_t base_cache_used;
59 static int nr_objects;
60 static int nr_deltas;
61 static int nr_resolved_deltas;
62
63 static int from_stdin;
64 static int strict;
65 static int verbose;
66
67 static struct progress *progress;
68
69 /* We always read in 4kB chunks. */
70 static unsigned char input_buffer[4096];
71 static unsigned int input_offset, input_len;
72 static off_t consumed_bytes;
73 static git_SHA_CTX input_ctx;
74 static uint32_t input_crc32;
75 static int input_fd, output_fd, pack_fd;
76
77 static int mark_link(struct object *obj, int type, void *data)
78 {
79         if (!obj)
80                 return -1;
81
82         if (type != OBJ_ANY && obj->type != type)
83                 die("object type mismatch at %s", sha1_to_hex(obj->sha1));
84
85         obj->flags |= FLAG_LINK;
86         return 0;
87 }
88
89 /* The content of each linked object must have been checked
90    or it must be already present in the object database */
91 static void check_object(struct object *obj)
92 {
93         if (!obj)
94                 return;
95
96         if (!(obj->flags & FLAG_LINK))
97                 return;
98
99         if (!(obj->flags & FLAG_CHECKED)) {
100                 unsigned long size;
101                 int type = sha1_object_info(obj->sha1, &size);
102                 if (type != obj->type || type <= 0)
103                         die("object of unexpected type");
104                 obj->flags |= FLAG_CHECKED;
105                 return;
106         }
107 }
108
109 static void check_objects(void)
110 {
111         unsigned i, max;
112
113         max = get_max_object_index();
114         for (i = 0; i < max; i++)
115                 check_object(get_indexed_object(i));
116 }
117
118
119 /* Discard current buffer used content. */
120 static void flush(void)
121 {
122         if (input_offset) {
123                 if (output_fd >= 0)
124                         write_or_die(output_fd, input_buffer, input_offset);
125                 git_SHA1_Update(&input_ctx, input_buffer, input_offset);
126                 memmove(input_buffer, input_buffer + input_offset, input_len);
127                 input_offset = 0;
128         }
129 }
130
131 /*
132  * Make sure at least "min" bytes are available in the buffer, and
133  * return the pointer to the buffer.
134  */
135 static void *fill(int min)
136 {
137         if (min <= input_len)
138                 return input_buffer + input_offset;
139         if (min > sizeof(input_buffer))
140                 die("cannot fill %d bytes", min);
141         flush();
142         do {
143                 ssize_t ret = xread(input_fd, input_buffer + input_len,
144                                 sizeof(input_buffer) - input_len);
145                 if (ret <= 0) {
146                         if (!ret)
147                                 die("early EOF");
148                         die_errno("read error on input");
149                 }
150                 input_len += ret;
151                 if (from_stdin)
152                         display_throughput(progress, consumed_bytes + input_len);
153         } while (input_len < min);
154         return input_buffer;
155 }
156
157 static void use(int bytes)
158 {
159         if (bytes > input_len)
160                 die("used more bytes than were available");
161         input_crc32 = crc32(input_crc32, input_buffer + input_offset, bytes);
162         input_len -= bytes;
163         input_offset += bytes;
164
165         /* make sure off_t is sufficiently large not to wrap */
166         if (signed_add_overflows(consumed_bytes, bytes))
167                 die("pack too large for current definition of off_t");
168         consumed_bytes += bytes;
169 }
170
171 static const char *open_pack_file(const char *pack_name)
172 {
173         if (from_stdin) {
174                 input_fd = 0;
175                 if (!pack_name) {
176                         static char tmpfile[PATH_MAX];
177                         output_fd = odb_mkstemp(tmpfile, sizeof(tmpfile),
178                                                 "pack/tmp_pack_XXXXXX");
179                         pack_name = xstrdup(tmpfile);
180                 } else
181                         output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
182                 if (output_fd < 0)
183                         die_errno("unable to create '%s'", pack_name);
184                 pack_fd = output_fd;
185         } else {
186                 input_fd = open(pack_name, O_RDONLY);
187                 if (input_fd < 0)
188                         die_errno("cannot open packfile '%s'", pack_name);
189                 output_fd = -1;
190                 pack_fd = input_fd;
191         }
192         git_SHA1_Init(&input_ctx);
193         return pack_name;
194 }
195
196 static void parse_pack_header(void)
197 {
198         struct pack_header *hdr = fill(sizeof(struct pack_header));
199
200         /* Header consistency check */
201         if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
202                 die("pack signature mismatch");
203         if (!pack_version_ok(hdr->hdr_version))
204                 die("pack version %"PRIu32" unsupported",
205                         ntohl(hdr->hdr_version));
206
207         nr_objects = ntohl(hdr->hdr_entries);
208         use(sizeof(struct pack_header));
209 }
210
211 static NORETURN void bad_object(unsigned long offset, const char *format,
212                        ...) __attribute__((format (printf, 2, 3)));
213
214 static void bad_object(unsigned long offset, const char *format, ...)
215 {
216         va_list params;
217         char buf[1024];
218
219         va_start(params, format);
220         vsnprintf(buf, sizeof(buf), format, params);
221         va_end(params);
222         die("pack has bad object at offset %lu: %s", offset, buf);
223 }
224
225 static void free_base_data(struct base_data *c)
226 {
227         if (c->data) {
228                 free(c->data);
229                 c->data = NULL;
230                 base_cache_used -= c->size;
231         }
232 }
233
234 static void prune_base_data(struct base_data *retain)
235 {
236         struct base_data *b;
237         for (b = base_cache;
238              base_cache_used > delta_base_cache_limit && b;
239              b = b->child) {
240                 if (b->data && b != retain)
241                         free_base_data(b);
242         }
243 }
244
245 static void link_base_data(struct base_data *base, struct base_data *c)
246 {
247         if (base)
248                 base->child = c;
249         else
250                 base_cache = c;
251
252         c->base = base;
253         c->child = NULL;
254         if (c->data)
255                 base_cache_used += c->size;
256         prune_base_data(c);
257 }
258
259 static void unlink_base_data(struct base_data *c)
260 {
261         struct base_data *base = c->base;
262         if (base)
263                 base->child = NULL;
264         else
265                 base_cache = NULL;
266         free_base_data(c);
267 }
268
269 static void *unpack_entry_data(unsigned long offset, unsigned long size)
270 {
271         int status;
272         z_stream stream;
273         void *buf = xmalloc(size);
274
275         memset(&stream, 0, sizeof(stream));
276         git_inflate_init(&stream);
277         stream.next_out = buf;
278         stream.avail_out = size;
279
280         do {
281                 stream.next_in = fill(1);
282                 stream.avail_in = input_len;
283                 status = git_inflate(&stream, 0);
284                 use(input_len - stream.avail_in);
285         } while (status == Z_OK);
286         if (stream.total_out != size || status != Z_STREAM_END)
287                 bad_object(offset, "inflate returned %d", status);
288         git_inflate_end(&stream);
289         return buf;
290 }
291
292 static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
293 {
294         unsigned char *p;
295         unsigned long size, c;
296         off_t base_offset;
297         unsigned shift;
298         void *data;
299
300         obj->idx.offset = consumed_bytes;
301         input_crc32 = crc32(0, Z_NULL, 0);
302
303         p = fill(1);
304         c = *p;
305         use(1);
306         obj->type = (c >> 4) & 7;
307         size = (c & 15);
308         shift = 4;
309         while (c & 0x80) {
310                 p = fill(1);
311                 c = *p;
312                 use(1);
313                 size += (c & 0x7f) << shift;
314                 shift += 7;
315         }
316         obj->size = size;
317
318         switch (obj->type) {
319         case OBJ_REF_DELTA:
320                 hashcpy(delta_base->sha1, fill(20));
321                 use(20);
322                 break;
323         case OBJ_OFS_DELTA:
324                 memset(delta_base, 0, sizeof(*delta_base));
325                 p = fill(1);
326                 c = *p;
327                 use(1);
328                 base_offset = c & 127;
329                 while (c & 128) {
330                         base_offset += 1;
331                         if (!base_offset || MSB(base_offset, 7))
332                                 bad_object(obj->idx.offset, "offset value overflow for delta base object");
333                         p = fill(1);
334                         c = *p;
335                         use(1);
336                         base_offset = (base_offset << 7) + (c & 127);
337                 }
338                 delta_base->offset = obj->idx.offset - base_offset;
339                 if (delta_base->offset <= 0 || delta_base->offset >= obj->idx.offset)
340                         bad_object(obj->idx.offset, "delta base offset is out of bound");
341                 break;
342         case OBJ_COMMIT:
343         case OBJ_TREE:
344         case OBJ_BLOB:
345         case OBJ_TAG:
346                 break;
347         default:
348                 bad_object(obj->idx.offset, "unknown object type %d", obj->type);
349         }
350         obj->hdr_size = consumed_bytes - obj->idx.offset;
351
352         data = unpack_entry_data(obj->idx.offset, obj->size);
353         obj->idx.crc32 = input_crc32;
354         return data;
355 }
356
357 static void *get_data_from_pack(struct object_entry *obj)
358 {
359         off_t from = obj[0].idx.offset + obj[0].hdr_size;
360         unsigned long len = obj[1].idx.offset - from;
361         unsigned char *data, *inbuf;
362         z_stream stream;
363         int status;
364
365         data = xmalloc(obj->size);
366         inbuf = xmalloc((len < 64*1024) ? len : 64*1024);
367
368         memset(&stream, 0, sizeof(stream));
369         git_inflate_init(&stream);
370         stream.next_out = data;
371         stream.avail_out = obj->size;
372
373         do {
374                 ssize_t n = (len < 64*1024) ? len : 64*1024;
375                 n = pread(pack_fd, inbuf, n, from);
376                 if (n < 0)
377                         die_errno("cannot pread pack file");
378                 if (!n)
379                         die("premature end of pack file, %lu bytes missing", len);
380                 from += n;
381                 len -= n;
382                 stream.next_in = inbuf;
383                 stream.avail_in = n;
384                 status = git_inflate(&stream, 0);
385         } while (len && status == Z_OK && !stream.avail_in);
386
387         /* This has been inflated OK when first encountered, so... */
388         if (status != Z_STREAM_END || stream.total_out != obj->size)
389                 die("serious inflate inconsistency");
390
391         git_inflate_end(&stream);
392         free(inbuf);
393         return data;
394 }
395
396 static int compare_delta_bases(const union delta_base *base1,
397                                const union delta_base *base2,
398                                enum object_type type1,
399                                enum object_type type2)
400 {
401         int cmp = type1 - type2;
402         if (cmp)
403                 return cmp;
404         return memcmp(base1, base2, UNION_BASE_SZ);
405 }
406
407 static int find_delta(const union delta_base *base, enum object_type type)
408 {
409         int first = 0, last = nr_deltas;
410
411         while (first < last) {
412                 int next = (first + last) / 2;
413                 struct delta_entry *delta = &deltas[next];
414                 int cmp;
415
416                 cmp = compare_delta_bases(base, &delta->base,
417                                           type, objects[delta->obj_no].type);
418                 if (!cmp)
419                         return next;
420                 if (cmp < 0) {
421                         last = next;
422                         continue;
423                 }
424                 first = next+1;
425         }
426         return -first-1;
427 }
428
429 static void find_delta_children(const union delta_base *base,
430                                 int *first_index, int *last_index,
431                                 enum object_type type)
432 {
433         int first = find_delta(base, type);
434         int last = first;
435         int end = nr_deltas - 1;
436
437         if (first < 0) {
438                 *first_index = 0;
439                 *last_index = -1;
440                 return;
441         }
442         while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
443                 --first;
444         while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
445                 ++last;
446         *first_index = first;
447         *last_index = last;
448 }
449
450 static void sha1_object(const void *data, unsigned long size,
451                         enum object_type type, unsigned char *sha1)
452 {
453         hash_sha1_file(data, size, typename(type), sha1);
454         if (has_sha1_file(sha1)) {
455                 void *has_data;
456                 enum object_type has_type;
457                 unsigned long has_size;
458                 has_data = read_sha1_file(sha1, &has_type, &has_size);
459                 if (!has_data)
460                         die("cannot read existing object %s", sha1_to_hex(sha1));
461                 if (size != has_size || type != has_type ||
462                     memcmp(data, has_data, size) != 0)
463                         die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1));
464                 free(has_data);
465         }
466         if (strict) {
467                 if (type == OBJ_BLOB) {
468                         struct blob *blob = lookup_blob(sha1);
469                         if (blob)
470                                 blob->object.flags |= FLAG_CHECKED;
471                         else
472                                 die("invalid blob object %s", sha1_to_hex(sha1));
473                 } else {
474                         struct object *obj;
475                         int eaten;
476                         void *buf = (void *) data;
477
478                         /*
479                          * we do not need to free the memory here, as the
480                          * buf is deleted by the caller.
481                          */
482                         obj = parse_object_buffer(sha1, type, size, buf, &eaten);
483                         if (!obj)
484                                 die("invalid %s", typename(type));
485                         if (fsck_object(obj, 1, fsck_error_function))
486                                 die("Error in object");
487                         if (fsck_walk(obj, mark_link, NULL))
488                                 die("Not all child objects of %s are reachable", sha1_to_hex(obj->sha1));
489
490                         if (obj->type == OBJ_TREE) {
491                                 struct tree *item = (struct tree *) obj;
492                                 item->buffer = NULL;
493                         }
494                         if (obj->type == OBJ_COMMIT) {
495                                 struct commit *commit = (struct commit *) obj;
496                                 commit->buffer = NULL;
497                         }
498                         obj->flags |= FLAG_CHECKED;
499                 }
500         }
501 }
502
503 static int is_delta_type(enum object_type type)
504 {
505         return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
506 }
507
508 static void *get_base_data(struct base_data *c)
509 {
510         if (!c->data) {
511                 struct object_entry *obj = c->obj;
512
513                 if (is_delta_type(obj->type)) {
514                         void *base = get_base_data(c->base);
515                         void *raw = get_data_from_pack(obj);
516                         c->data = patch_delta(
517                                 base, c->base->size,
518                                 raw, obj->size,
519                                 &c->size);
520                         free(raw);
521                         if (!c->data)
522                                 bad_object(obj->idx.offset, "failed to apply delta");
523                 } else {
524                         c->data = get_data_from_pack(obj);
525                         c->size = obj->size;
526                 }
527
528                 base_cache_used += c->size;
529                 prune_base_data(c);
530         }
531         return c->data;
532 }
533
534 static void resolve_delta(struct object_entry *delta_obj,
535                           struct base_data *base, struct base_data *result)
536 {
537         void *base_data, *delta_data;
538
539         delta_obj->real_type = base->obj->real_type;
540         delta_obj->delta_depth = base->obj->delta_depth + 1;
541         delta_obj->base_object_no = base->obj - objects;
542         delta_data = get_data_from_pack(delta_obj);
543         base_data = get_base_data(base);
544         result->obj = delta_obj;
545         result->data = patch_delta(base_data, base->size,
546                                    delta_data, delta_obj->size, &result->size);
547         free(delta_data);
548         if (!result->data)
549                 bad_object(delta_obj->idx.offset, "failed to apply delta");
550         sha1_object(result->data, result->size, delta_obj->real_type,
551                     delta_obj->idx.sha1);
552         nr_resolved_deltas++;
553 }
554
555 static void find_unresolved_deltas(struct base_data *base,
556                                    struct base_data *prev_base)
557 {
558         int i, ref_first, ref_last, ofs_first, ofs_last;
559
560         /*
561          * This is a recursive function. Those brackets should help reducing
562          * stack usage by limiting the scope of the delta_base union.
563          */
564         {
565                 union delta_base base_spec;
566
567                 hashcpy(base_spec.sha1, base->obj->idx.sha1);
568                 find_delta_children(&base_spec,
569                                     &ref_first, &ref_last, OBJ_REF_DELTA);
570
571                 memset(&base_spec, 0, sizeof(base_spec));
572                 base_spec.offset = base->obj->idx.offset;
573                 find_delta_children(&base_spec,
574                                     &ofs_first, &ofs_last, OBJ_OFS_DELTA);
575         }
576
577         if (ref_last == -1 && ofs_last == -1) {
578                 free(base->data);
579                 return;
580         }
581
582         link_base_data(prev_base, base);
583
584         for (i = ref_first; i <= ref_last; i++) {
585                 struct object_entry *child = objects + deltas[i].obj_no;
586                 struct base_data result;
587
588                 assert(child->real_type == OBJ_REF_DELTA);
589                 resolve_delta(child, base, &result);
590                 if (i == ref_last && ofs_last == -1)
591                         free_base_data(base);
592                 find_unresolved_deltas(&result, base);
593         }
594
595         for (i = ofs_first; i <= ofs_last; i++) {
596                 struct object_entry *child = objects + deltas[i].obj_no;
597                 struct base_data result;
598
599                 assert(child->real_type == OBJ_OFS_DELTA);
600                 resolve_delta(child, base, &result);
601                 if (i == ofs_last)
602                         free_base_data(base);
603                 find_unresolved_deltas(&result, base);
604         }
605
606         unlink_base_data(base);
607 }
608
609 static int compare_delta_entry(const void *a, const void *b)
610 {
611         const struct delta_entry *delta_a = a;
612         const struct delta_entry *delta_b = b;
613
614         /* group by type (ref vs ofs) and then by value (sha-1 or offset) */
615         return compare_delta_bases(&delta_a->base, &delta_b->base,
616                                    objects[delta_a->obj_no].type,
617                                    objects[delta_b->obj_no].type);
618 }
619
620 /* Parse all objects and return the pack content SHA1 hash */
621 static void parse_pack_objects(unsigned char *sha1)
622 {
623         int i;
624         struct delta_entry *delta = deltas;
625         struct stat st;
626
627         /*
628          * First pass:
629          * - find locations of all objects;
630          * - calculate SHA1 of all non-delta objects;
631          * - remember base (SHA1 or offset) for all deltas.
632          */
633         if (verbose)
634                 progress = start_progress(
635                                 from_stdin ? "Receiving objects" : "Indexing objects",
636                                 nr_objects);
637         for (i = 0; i < nr_objects; i++) {
638                 struct object_entry *obj = &objects[i];
639                 void *data = unpack_raw_entry(obj, &delta->base);
640                 obj->real_type = obj->type;
641                 if (is_delta_type(obj->type)) {
642                         nr_deltas++;
643                         delta->obj_no = i;
644                         delta++;
645                 } else
646                         sha1_object(data, obj->size, obj->type, obj->idx.sha1);
647                 free(data);
648                 display_progress(progress, i+1);
649         }
650         objects[i].idx.offset = consumed_bytes;
651         stop_progress(&progress);
652
653         /* Check pack integrity */
654         flush();
655         git_SHA1_Final(sha1, &input_ctx);
656         if (hashcmp(fill(20), sha1))
657                 die("pack is corrupted (SHA1 mismatch)");
658         use(20);
659
660         /* If input_fd is a file, we should have reached its end now. */
661         if (fstat(input_fd, &st))
662                 die_errno("cannot fstat packfile");
663         if (S_ISREG(st.st_mode) &&
664                         lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size)
665                 die("pack has junk at the end");
666
667         if (!nr_deltas)
668                 return;
669
670         /* Sort deltas by base SHA1/offset for fast searching */
671         qsort(deltas, nr_deltas, sizeof(struct delta_entry),
672               compare_delta_entry);
673
674         /*
675          * Second pass:
676          * - for all non-delta objects, look if it is used as a base for
677          *   deltas;
678          * - if used as a base, uncompress the object and apply all deltas,
679          *   recursively checking if the resulting object is used as a base
680          *   for some more deltas.
681          */
682         if (verbose)
683                 progress = start_progress("Resolving deltas", nr_deltas);
684         for (i = 0; i < nr_objects; i++) {
685                 struct object_entry *obj = &objects[i];
686                 struct base_data base_obj;
687
688                 if (is_delta_type(obj->type))
689                         continue;
690                 base_obj.obj = obj;
691                 base_obj.data = NULL;
692                 find_unresolved_deltas(&base_obj, NULL);
693                 display_progress(progress, nr_resolved_deltas);
694         }
695 }
696
697 static int write_compressed(struct sha1file *f, void *in, unsigned int size)
698 {
699         z_stream stream;
700         int status;
701         unsigned char outbuf[4096];
702
703         memset(&stream, 0, sizeof(stream));
704         deflateInit(&stream, zlib_compression_level);
705         stream.next_in = in;
706         stream.avail_in = size;
707
708         do {
709                 stream.next_out = outbuf;
710                 stream.avail_out = sizeof(outbuf);
711                 status = deflate(&stream, Z_FINISH);
712                 sha1write(f, outbuf, sizeof(outbuf) - stream.avail_out);
713         } while (status == Z_OK);
714
715         if (status != Z_STREAM_END)
716                 die("unable to deflate appended object (%d)", status);
717         size = stream.total_out;
718         deflateEnd(&stream);
719         return size;
720 }
721
722 static struct object_entry *append_obj_to_pack(struct sha1file *f,
723                                const unsigned char *sha1, void *buf,
724                                unsigned long size, enum object_type type)
725 {
726         struct object_entry *obj = &objects[nr_objects++];
727         unsigned char header[10];
728         unsigned long s = size;
729         int n = 0;
730         unsigned char c = (type << 4) | (s & 15);
731         s >>= 4;
732         while (s) {
733                 header[n++] = c | 0x80;
734                 c = s & 0x7f;
735                 s >>= 7;
736         }
737         header[n++] = c;
738         crc32_begin(f);
739         sha1write(f, header, n);
740         obj[0].size = size;
741         obj[0].hdr_size = n;
742         obj[0].type = type;
743         obj[0].real_type = type;
744         obj[1].idx.offset = obj[0].idx.offset + n;
745         obj[1].idx.offset += write_compressed(f, buf, size);
746         obj[0].idx.crc32 = crc32_end(f);
747         sha1flush(f);
748         hashcpy(obj->idx.sha1, sha1);
749         return obj;
750 }
751
752 static int delta_pos_compare(const void *_a, const void *_b)
753 {
754         struct delta_entry *a = *(struct delta_entry **)_a;
755         struct delta_entry *b = *(struct delta_entry **)_b;
756         return a->obj_no - b->obj_no;
757 }
758
759 static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved)
760 {
761         struct delta_entry **sorted_by_pos;
762         int i, n = 0;
763
764         /*
765          * Since many unresolved deltas may well be themselves base objects
766          * for more unresolved deltas, we really want to include the
767          * smallest number of base objects that would cover as much delta
768          * as possible by picking the
769          * trunc deltas first, allowing for other deltas to resolve without
770          * additional base objects.  Since most base objects are to be found
771          * before deltas depending on them, a good heuristic is to start
772          * resolving deltas in the same order as their position in the pack.
773          */
774         sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
775         for (i = 0; i < nr_deltas; i++) {
776                 if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
777                         continue;
778                 sorted_by_pos[n++] = &deltas[i];
779         }
780         qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);
781
782         for (i = 0; i < n; i++) {
783                 struct delta_entry *d = sorted_by_pos[i];
784                 enum object_type type;
785                 struct base_data base_obj;
786
787                 if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
788                         continue;
789                 base_obj.data = read_sha1_file(d->base.sha1, &type, &base_obj.size);
790                 if (!base_obj.data)
791                         continue;
792
793                 if (check_sha1_signature(d->base.sha1, base_obj.data,
794                                 base_obj.size, typename(type)))
795                         die("local object %s is corrupt", sha1_to_hex(d->base.sha1));
796                 base_obj.obj = append_obj_to_pack(f, d->base.sha1,
797                                         base_obj.data, base_obj.size, type);
798                 find_unresolved_deltas(&base_obj, NULL);
799                 display_progress(progress, nr_resolved_deltas);
800         }
801         free(sorted_by_pos);
802 }
803
804 static void final(const char *final_pack_name, const char *curr_pack_name,
805                   const char *final_index_name, const char *curr_index_name,
806                   const char *keep_name, const char *keep_msg,
807                   unsigned char *sha1)
808 {
809         const char *report = "pack";
810         char name[PATH_MAX];
811         int err;
812
813         if (!from_stdin) {
814                 close(input_fd);
815         } else {
816                 fsync_or_die(output_fd, curr_pack_name);
817                 err = close(output_fd);
818                 if (err)
819                         die_errno("error while closing pack file");
820         }
821
822         if (keep_msg) {
823                 int keep_fd, keep_msg_len = strlen(keep_msg);
824
825                 if (!keep_name)
826                         keep_fd = odb_pack_keep(name, sizeof(name), sha1);
827                 else
828                         keep_fd = open(keep_name, O_RDWR|O_CREAT|O_EXCL, 0600);
829
830                 if (keep_fd < 0) {
831                         if (errno != EEXIST)
832                                 die_errno("cannot write keep file '%s'",
833                                           keep_name);
834                 } else {
835                         if (keep_msg_len > 0) {
836                                 write_or_die(keep_fd, keep_msg, keep_msg_len);
837                                 write_or_die(keep_fd, "\n", 1);
838                         }
839                         if (close(keep_fd) != 0)
840                                 die_errno("cannot close written keep file '%s'",
841                                     keep_name);
842                         report = "keep";
843                 }
844         }
845
846         if (final_pack_name != curr_pack_name) {
847                 if (!final_pack_name) {
848                         snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
849                                  get_object_directory(), sha1_to_hex(sha1));
850                         final_pack_name = name;
851                 }
852                 if (move_temp_to_file(curr_pack_name, final_pack_name))
853                         die("cannot store pack file");
854         } else if (from_stdin)
855                 chmod(final_pack_name, 0444);
856
857         if (final_index_name != curr_index_name) {
858                 if (!final_index_name) {
859                         snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
860                                  get_object_directory(), sha1_to_hex(sha1));
861                         final_index_name = name;
862                 }
863                 if (move_temp_to_file(curr_index_name, final_index_name))
864                         die("cannot store index file");
865         } else
866                 chmod(final_index_name, 0444);
867
868         if (!from_stdin) {
869                 printf("%s\n", sha1_to_hex(sha1));
870         } else {
871                 char buf[48];
872                 int len = snprintf(buf, sizeof(buf), "%s\t%s\n",
873                                    report, sha1_to_hex(sha1));
874                 write_or_die(1, buf, len);
875
876                 /*
877                  * Let's just mimic git-unpack-objects here and write
878                  * the last part of the input buffer to stdout.
879                  */
880                 while (input_len) {
881                         err = xwrite(1, input_buffer + input_offset, input_len);
882                         if (err <= 0)
883                                 break;
884                         input_len -= err;
885                         input_offset += err;
886                 }
887         }
888 }
889
890 static int git_index_pack_config(const char *k, const char *v, void *cb)
891 {
892         struct pack_idx_option *opts = cb;
893
894         if (!strcmp(k, "pack.indexversion")) {
895                 opts->version = git_config_int(k, v);
896                 if (opts->version > 2)
897                         die("bad pack.indexversion=%"PRIu32, opts->version);
898                 return 0;
899         }
900         return git_default_config(k, v, cb);
901 }
902
903 static int cmp_uint32(const void *a_, const void *b_)
904 {
905         uint32_t a = *((uint32_t *)a_);
906         uint32_t b = *((uint32_t *)b_);
907
908         return (a < b) ? -1 : (a != b);
909 }
910
911 static void read_v2_anomalous_offsets(struct packed_git *p,
912                                       struct pack_idx_option *opts)
913 {
914         const uint32_t *idx1, *idx2;
915         uint32_t i;
916
917         /* The address of the 4-byte offset table */
918         idx1 = (((const uint32_t *)p->index_data)
919                 + 2 /* 8-byte header */
920                 + 256 /* fan out */
921                 + 5 * p->num_objects /* 20-byte SHA-1 table */
922                 + p->num_objects /* CRC32 table */
923                 );
924
925         /* The address of the 8-byte offset table */
926         idx2 = idx1 + p->num_objects;
927
928         for (i = 0; i < p->num_objects; i++) {
929                 uint32_t off = ntohl(idx1[i]);
930                 if (!(off & 0x80000000))
931                         continue;
932                 off = off & 0x7fffffff;
933                 if (idx2[off * 2])
934                         continue;
935                 /*
936                  * The real offset is ntohl(idx2[off * 2]) in high 4
937                  * octets, and ntohl(idx2[off * 2 + 1]) in low 4
938                  * octets.  But idx2[off * 2] is Zero!!!
939                  */
940                 ALLOC_GROW(opts->anomaly, opts->anomaly_nr + 1, opts->anomaly_alloc);
941                 opts->anomaly[opts->anomaly_nr++] = ntohl(idx2[off * 2 + 1]);
942         }
943
944         if (1 < opts->anomaly_nr)
945                 qsort(opts->anomaly, opts->anomaly_nr, sizeof(uint32_t), cmp_uint32);
946 }
947
948 static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
949 {
950         struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
951
952         if (!p)
953                 die("Cannot open existing pack file '%s'", pack_name);
954         if (open_pack_index(p))
955                 die("Cannot open existing pack idx file for '%s'", pack_name);
956
957         /* Read the attributes from the existing idx file */
958         opts->version = p->index_version;
959
960         if (opts->version == 2)
961                 read_v2_anomalous_offsets(p, opts);
962
963         /*
964          * Get rid of the idx file as we do not need it anymore.
965          * NEEDSWORK: extract this bit from free_pack_by_name() in
966          * sha1_file.c, perhaps?  It shouldn't matter very much as we
967          * know we haven't installed this pack (hence we never have
968          * read anything from it).
969          */
970         close_pack_index(p);
971         free(p);
972 }
973
974 static void show_pack_info(int stat_only)
975 {
976         int i;
977         for (i = 0; i < nr_objects; i++) {
978                 struct object_entry *obj = &objects[i];
979
980                 /* NEEDSWORK: Compute data necessary for the "histogram" here */
981
982                 if (stat_only)
983                         continue;
984                 printf("%s %-6s %lu %lu %"PRIuMAX,
985                        sha1_to_hex(obj->idx.sha1),
986                        typename(obj->real_type), obj->size,
987                        (unsigned long)(obj[1].idx.offset - obj->idx.offset),
988                        (uintmax_t)obj->idx.offset);
989                 if (is_delta_type(obj->type)) {
990                         struct object_entry *bobj = &objects[obj->base_object_no];
991                         printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1));
992                 }
993                 putchar('\n');
994         }
995 }
996
997 int cmd_index_pack(int argc, const char **argv, const char *prefix)
998 {
999         int i, fix_thin_pack = 0, verify = 0, stat_only = 0, stat = 0;
1000         const char *curr_pack, *curr_index;
1001         const char *index_name = NULL, *pack_name = NULL;
1002         const char *keep_name = NULL, *keep_msg = NULL;
1003         char *index_name_buf = NULL, *keep_name_buf = NULL;
1004         struct pack_idx_entry **idx_objects;
1005         struct pack_idx_option opts;
1006         unsigned char pack_sha1[20];
1007
1008         if (argc == 2 && !strcmp(argv[1], "-h"))
1009                 usage(index_pack_usage);
1010
1011         read_replace_refs = 0;
1012
1013         reset_pack_idx_option(&opts);
1014         git_config(git_index_pack_config, &opts);
1015         if (prefix && chdir(prefix))
1016                 die("Cannot come back to cwd");
1017
1018         for (i = 1; i < argc; i++) {
1019                 const char *arg = argv[i];
1020
1021                 if (*arg == '-') {
1022                         if (!strcmp(arg, "--stdin")) {
1023                                 from_stdin = 1;
1024                         } else if (!strcmp(arg, "--fix-thin")) {
1025                                 fix_thin_pack = 1;
1026                         } else if (!strcmp(arg, "--strict")) {
1027                                 strict = 1;
1028                         } else if (!strcmp(arg, "--verify")) {
1029                                 verify = 1;
1030                         } else if (!strcmp(arg, "--verify-stat")) {
1031                                 verify = 1;
1032                                 stat = 1;
1033                         } else if (!strcmp(arg, "--verify-stat-only")) {
1034                                 verify = 1;
1035                                 stat = 1;
1036                                 stat_only = 1;
1037                         } else if (!strcmp(arg, "--keep")) {
1038                                 keep_msg = "";
1039                         } else if (!prefixcmp(arg, "--keep=")) {
1040                                 keep_msg = arg + 7;
1041                         } else if (!prefixcmp(arg, "--pack_header=")) {
1042                                 struct pack_header *hdr;
1043                                 char *c;
1044
1045                                 hdr = (struct pack_header *)input_buffer;
1046                                 hdr->hdr_signature = htonl(PACK_SIGNATURE);
1047                                 hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
1048                                 if (*c != ',')
1049                                         die("bad %s", arg);
1050                                 hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
1051                                 if (*c)
1052                                         die("bad %s", arg);
1053                                 input_len = sizeof(*hdr);
1054                         } else if (!strcmp(arg, "-v")) {
1055                                 verbose = 1;
1056                         } else if (!strcmp(arg, "-o")) {
1057                                 if (index_name || (i+1) >= argc)
1058                                         usage(index_pack_usage);
1059                                 index_name = argv[++i];
1060                         } else if (!prefixcmp(arg, "--index-version=")) {
1061                                 char *c;
1062                                 opts.version = strtoul(arg + 16, &c, 10);
1063                                 if (opts.version > 2)
1064                                         die("bad %s", arg);
1065                                 if (*c == ',')
1066                                         opts.off32_limit = strtoul(c+1, &c, 0);
1067                                 if (*c || opts.off32_limit & 0x80000000)
1068                                         die("bad %s", arg);
1069                         } else
1070                                 usage(index_pack_usage);
1071                         continue;
1072                 }
1073
1074                 if (pack_name)
1075                         usage(index_pack_usage);
1076                 pack_name = arg;
1077         }
1078
1079         if (!pack_name && !from_stdin)
1080                 usage(index_pack_usage);
1081         if (fix_thin_pack && !from_stdin)
1082                 die("--fix-thin cannot be used without --stdin");
1083         if (!index_name && pack_name) {
1084                 int len = strlen(pack_name);
1085                 if (!has_extension(pack_name, ".pack"))
1086                         die("packfile name '%s' does not end with '.pack'",
1087                             pack_name);
1088                 index_name_buf = xmalloc(len);
1089                 memcpy(index_name_buf, pack_name, len - 5);
1090                 strcpy(index_name_buf + len - 5, ".idx");
1091                 index_name = index_name_buf;
1092         }
1093         if (keep_msg && !keep_name && pack_name) {
1094                 int len = strlen(pack_name);
1095                 if (!has_extension(pack_name, ".pack"))
1096                         die("packfile name '%s' does not end with '.pack'",
1097                             pack_name);
1098                 keep_name_buf = xmalloc(len);
1099                 memcpy(keep_name_buf, pack_name, len - 5);
1100                 strcpy(keep_name_buf + len - 5, ".keep");
1101                 keep_name = keep_name_buf;
1102         }
1103         if (verify) {
1104                 if (!index_name)
1105                         die("--verify with no packfile name given");
1106                 read_idx_option(&opts, index_name);
1107                 opts.flags |= WRITE_IDX_VERIFY;
1108         }
1109
1110         curr_pack = open_pack_file(pack_name);
1111         parse_pack_header();
1112         objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
1113         deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
1114         parse_pack_objects(pack_sha1);
1115         if (nr_deltas == nr_resolved_deltas) {
1116                 stop_progress(&progress);
1117                 /* Flush remaining pack final 20-byte SHA1. */
1118                 flush();
1119         } else {
1120                 if (fix_thin_pack) {
1121                         struct sha1file *f;
1122                         unsigned char read_sha1[20], tail_sha1[20];
1123                         char msg[48];
1124                         int nr_unresolved = nr_deltas - nr_resolved_deltas;
1125                         int nr_objects_initial = nr_objects;
1126                         if (nr_unresolved <= 0)
1127                                 die("confusion beyond insanity");
1128                         objects = xrealloc(objects,
1129                                            (nr_objects + nr_unresolved + 1)
1130                                            * sizeof(*objects));
1131                         f = sha1fd(output_fd, curr_pack);
1132                         fix_unresolved_deltas(f, nr_unresolved);
1133                         sprintf(msg, "completed with %d local objects",
1134                                 nr_objects - nr_objects_initial);
1135                         stop_progress_msg(&progress, msg);
1136                         sha1close(f, tail_sha1, 0);
1137                         hashcpy(read_sha1, pack_sha1);
1138                         fixup_pack_header_footer(output_fd, pack_sha1,
1139                                                  curr_pack, nr_objects,
1140                                                  read_sha1, consumed_bytes-20);
1141                         if (hashcmp(read_sha1, tail_sha1) != 0)
1142                                 die("Unexpected tail checksum for %s "
1143                                     "(disk corruption?)", curr_pack);
1144                 }
1145                 if (nr_deltas != nr_resolved_deltas)
1146                         die("pack has %d unresolved deltas",
1147                             nr_deltas - nr_resolved_deltas);
1148         }
1149         free(deltas);
1150         if (strict)
1151                 check_objects();
1152
1153         if (stat)
1154                 show_pack_info(stat_only);
1155
1156         idx_objects = xmalloc((nr_objects) * sizeof(struct pack_idx_entry *));
1157         for (i = 0; i < nr_objects; i++)
1158                 idx_objects[i] = &objects[i].idx;
1159         curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1);
1160         free(idx_objects);
1161
1162         if (!verify)
1163                 final(pack_name, curr_pack,
1164                       index_name, curr_index,
1165                       keep_name, keep_msg,
1166                       pack_sha1);
1167         else
1168                 close(input_fd);
1169         free(objects);
1170         free(index_name_buf);
1171         free(keep_name_buf);
1172         if (pack_name == NULL)
1173                 free((void *) curr_pack);
1174         if (index_name == NULL)
1175                 free((void *) curr_index);
1176
1177         return 0;
1178 }