Merge branch 'jc/diff-test' into th/diff
[git] / index-pack.c
1 #include "cache.h"
2 #include "delta.h"
3 #include "pack.h"
4 #include "csum-file.h"
5 #include "blob.h"
6 #include "commit.h"
7 #include "tag.h"
8 #include "tree.h"
9
10 static const char index_pack_usage[] =
11 "git-index-pack [-o index-file] pack-file";
12
13 struct object_entry
14 {
15         unsigned long offset;
16         enum object_type type;
17         enum object_type real_type;
18         unsigned char sha1[20];
19 };
20
21 struct delta_entry
22 {
23         struct object_entry *obj;
24         unsigned char base_sha1[20];
25 };
26
27 static const char *pack_name;
28 static unsigned char *pack_base;
29 static unsigned long pack_size;
30 static struct object_entry *objects;
31 static struct delta_entry *deltas;
32 static int nr_objects;
33 static int nr_deltas;
34
35 static void open_pack_file(void)
36 {
37         int fd;
38         struct stat st;
39
40         fd = open(pack_name, O_RDONLY);
41         if (fd < 0)
42                 die("cannot open packfile '%s': %s", pack_name,
43                     strerror(errno));
44         if (fstat(fd, &st)) {
45                 int err = errno;
46                 close(fd);
47                 die("cannot fstat packfile '%s': %s", pack_name,
48                     strerror(err));
49         }
50         pack_size = st.st_size;
51         pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
52         if (pack_base == MAP_FAILED) {
53                 int err = errno;
54                 close(fd);
55                 die("cannot mmap packfile '%s': %s", pack_name,
56                     strerror(err));
57         }
58         close(fd);
59 }
60
61 static void parse_pack_header(void)
62 {
63         const struct pack_header *hdr;
64         unsigned char sha1[20];
65         SHA_CTX ctx;
66
67         /* Ensure there are enough bytes for the header and final SHA1 */
68         if (pack_size < sizeof(struct pack_header) + 20)
69                 die("packfile '%s' is too small", pack_name);
70
71         /* Header consistency check */
72         hdr = (void *)pack_base;
73         if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
74                 die("packfile '%s' signature mismatch", pack_name);
75         if (!pack_version_ok(hdr->hdr_version))
76                 die("packfile '%s' version %d unsupported",
77                     pack_name, ntohl(hdr->hdr_version));
78
79         nr_objects = ntohl(hdr->hdr_entries);
80
81         /* Check packfile integrity */
82         SHA1_Init(&ctx);
83         SHA1_Update(&ctx, pack_base, pack_size - 20);
84         SHA1_Final(sha1, &ctx);
85         if (memcmp(sha1, pack_base + pack_size - 20, 20))
86                 die("packfile '%s' SHA1 mismatch", pack_name);
87 }
88
89 static void bad_object(unsigned long offset, const char *format,
90                        ...) NORETURN __attribute__((format (printf, 2, 3)));
91
92 static void bad_object(unsigned long offset, const char *format, ...)
93 {
94         va_list params;
95         char buf[1024];
96
97         va_start(params, format);
98         vsnprintf(buf, sizeof(buf), format, params);
99         va_end(params);
100         die("packfile '%s': bad object at offset %lu: %s",
101             pack_name, offset, buf);
102 }
103
104 static void *unpack_entry_data(unsigned long offset,
105                                unsigned long *current_pos, unsigned long size)
106 {
107         unsigned long pack_limit = pack_size - 20;
108         unsigned long pos = *current_pos;
109         z_stream stream;
110         void *buf = xmalloc(size);
111
112         memset(&stream, 0, sizeof(stream));
113         stream.next_out = buf;
114         stream.avail_out = size;
115         stream.next_in = pack_base + pos;
116         stream.avail_in = pack_limit - pos;
117         inflateInit(&stream);
118
119         for (;;) {
120                 int ret = inflate(&stream, 0);
121                 if (ret == Z_STREAM_END)
122                         break;
123                 if (ret != Z_OK)
124                         bad_object(offset, "inflate returned %d", ret);
125         }
126         inflateEnd(&stream);
127         if (stream.total_out != size)
128                 bad_object(offset, "size mismatch (expected %lu, got %lu)",
129                            size, stream.total_out);
130         *current_pos = pack_limit - stream.avail_in;
131         return buf;
132 }
133
134 static void *unpack_raw_entry(unsigned long offset,
135                               enum object_type *obj_type,
136                               unsigned long *obj_size,
137                               unsigned char *delta_base,
138                               unsigned long *next_obj_offset)
139 {
140         unsigned long pack_limit = pack_size - 20;
141         unsigned long pos = offset;
142         unsigned char c;
143         unsigned long size;
144         unsigned shift;
145         enum object_type type;
146         void *data;
147
148         c = pack_base[pos++];
149         type = (c >> 4) & 7;
150         size = (c & 15);
151         shift = 4;
152         while (c & 0x80) {
153                 if (pos >= pack_limit)
154                         bad_object(offset, "object extends past end of pack");
155                 c = pack_base[pos++];
156                 size += (c & 0x7fUL) << shift;
157                 shift += 7;
158         }
159
160         switch (type) {
161         case OBJ_DELTA:
162                 if (pos + 20 >= pack_limit)
163                         bad_object(offset, "object extends past end of pack");
164                 memcpy(delta_base, pack_base + pos, 20);
165                 pos += 20;
166                 /* fallthru */
167         case OBJ_COMMIT:
168         case OBJ_TREE:
169         case OBJ_BLOB:
170         case OBJ_TAG:
171                 data = unpack_entry_data(offset, &pos, size);
172                 break;
173         default:
174                 bad_object(offset, "bad object type %d", type);
175         }
176
177         *obj_type = type;
178         *obj_size = size;
179         *next_obj_offset = pos;
180         return data;
181 }
182
183 static int find_delta(const unsigned char *base_sha1)
184 {
185         int first = 0, last = nr_deltas;
186
187         while (first < last) {
188                 int next = (first + last) / 2;
189                 struct delta_entry *delta = &deltas[next];
190                 int cmp;
191
192                 cmp = memcmp(base_sha1, delta->base_sha1, 20);
193                 if (!cmp)
194                         return next;
195                 if (cmp < 0) {
196                         last = next;
197                         continue;
198                 }
199                 first = next+1;
200         }
201         return -first-1;
202 }
203
204 static int find_deltas_based_on_sha1(const unsigned char *base_sha1,
205                                      int *first_index, int *last_index)
206 {
207         int first = find_delta(base_sha1);
208         int last = first;
209         int end = nr_deltas - 1;
210
211         if (first < 0)
212                 return -1;
213         while (first > 0 && !memcmp(deltas[first-1].base_sha1, base_sha1, 20))
214                 --first;
215         while (last < end && !memcmp(deltas[last+1].base_sha1, base_sha1, 20))
216                 ++last;
217         *first_index = first;
218         *last_index = last;
219         return 0;
220 }
221
222 static void sha1_object(const void *data, unsigned long size,
223                         enum object_type type, unsigned char *sha1)
224 {
225         SHA_CTX ctx;
226         char header[50];
227         int header_size;
228         const char *type_str;
229
230         switch (type) {
231         case OBJ_COMMIT: type_str = commit_type; break;
232         case OBJ_TREE:   type_str = tree_type; break;
233         case OBJ_BLOB:   type_str = blob_type; break;
234         case OBJ_TAG:    type_str = tag_type; break;
235         default:
236                 die("bad type %d", type);
237         }
238
239         header_size = sprintf(header, "%s %lu", type_str, size) + 1;
240
241         SHA1_Init(&ctx);
242         SHA1_Update(&ctx, header, header_size);
243         SHA1_Update(&ctx, data, size);
244         SHA1_Final(sha1, &ctx);
245 }
246
247 static void resolve_delta(struct delta_entry *delta, void *base_data,
248                           unsigned long base_size, enum object_type type)
249 {
250         struct object_entry *obj = delta->obj;
251         void *delta_data;
252         unsigned long delta_size;
253         void *result;
254         unsigned long result_size;
255         enum object_type delta_type;
256         unsigned char base_sha1[20];
257         unsigned long next_obj_offset;
258         int j, first, last;
259
260         obj->real_type = type;
261         delta_data = unpack_raw_entry(obj->offset, &delta_type,
262                                       &delta_size, base_sha1,
263                                       &next_obj_offset);
264         result = patch_delta(base_data, base_size, delta_data, delta_size,
265                              &result_size);
266         free(delta_data);
267         if (!result)
268                 bad_object(obj->offset, "failed to apply delta");
269         sha1_object(result, result_size, type, obj->sha1);
270         if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) {
271                 for (j = first; j <= last; j++)
272                         resolve_delta(&deltas[j], result, result_size, type);
273         }
274         free(result);
275 }
276
277 static int compare_delta_entry(const void *a, const void *b)
278 {
279         const struct delta_entry *delta_a = a;
280         const struct delta_entry *delta_b = b;
281         return memcmp(delta_a->base_sha1, delta_b->base_sha1, 20);
282 }
283
284 static void parse_pack_objects(void)
285 {
286         int i;
287         unsigned long offset = sizeof(struct pack_header);
288         unsigned char base_sha1[20];
289         void *data;
290         unsigned long data_size;
291
292         /*
293          * First pass:
294          * - find locations of all objects;
295          * - calculate SHA1 of all non-delta objects;
296          * - remember base SHA1 for all deltas.
297          */
298         for (i = 0; i < nr_objects; i++) {
299                 struct object_entry *obj = &objects[i];
300                 obj->offset = offset;
301                 data = unpack_raw_entry(offset, &obj->type, &data_size,
302                                         base_sha1, &offset);
303                 obj->real_type = obj->type;
304                 if (obj->type == OBJ_DELTA) {
305                         struct delta_entry *delta = &deltas[nr_deltas++];
306                         delta->obj = obj;
307                         memcpy(delta->base_sha1, base_sha1, 20);
308                 } else
309                         sha1_object(data, data_size, obj->type, obj->sha1);
310                 free(data);
311         }
312         if (offset != pack_size - 20)
313                 die("packfile '%s' has junk at the end", pack_name);
314
315         /* Sort deltas by base SHA1 for fast searching */
316         qsort(deltas, nr_deltas, sizeof(struct delta_entry),
317               compare_delta_entry);
318
319         /*
320          * Second pass:
321          * - for all non-delta objects, look if it is used as a base for
322          *   deltas;
323          * - if used as a base, uncompress the object and apply all deltas,
324          *   recursively checking if the resulting object is used as a base
325          *   for some more deltas.
326          */
327         for (i = 0; i < nr_objects; i++) {
328                 struct object_entry *obj = &objects[i];
329                 int j, first, last;
330
331                 if (obj->type == OBJ_DELTA)
332                         continue;
333                 if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
334                         continue;
335                 data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
336                                         base_sha1, &offset);
337                 for (j = first; j <= last; j++)
338                         resolve_delta(&deltas[j], data, data_size, obj->type);
339                 free(data);
340         }
341
342         /* Check for unresolved deltas */
343         for (i = 0; i < nr_deltas; i++) {
344                 if (deltas[i].obj->real_type == OBJ_DELTA)
345                         die("packfile '%s' has unresolved deltas",  pack_name);
346         }
347 }
348
349 static int sha1_compare(const void *_a, const void *_b)
350 {
351         struct object_entry *a = *(struct object_entry **)_a;
352         struct object_entry *b = *(struct object_entry **)_b;
353         return memcmp(a->sha1, b->sha1, 20);
354 }
355
356 static void write_index_file(const char *index_name, unsigned char *sha1)
357 {
358         struct sha1file *f;
359         struct object_entry **sorted_by_sha, **list, **last;
360         unsigned int array[256];
361         int i;
362         SHA_CTX ctx;
363
364         if (nr_objects) {
365                 sorted_by_sha =
366                         xcalloc(nr_objects, sizeof(struct object_entry *));
367                 list = sorted_by_sha;
368                 last = sorted_by_sha + nr_objects;
369                 for (i = 0; i < nr_objects; ++i)
370                         sorted_by_sha[i] = &objects[i];
371                 qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
372                       sha1_compare);
373
374         }
375         else
376                 sorted_by_sha = list = last = NULL;
377
378         unlink(index_name);
379         f = sha1create("%s", index_name);
380
381         /*
382          * Write the first-level table (the list is sorted,
383          * but we use a 256-entry lookup to be able to avoid
384          * having to do eight extra binary search iterations).
385          */
386         for (i = 0; i < 256; i++) {
387                 struct object_entry **next = list;
388                 while (next < last) {
389                         struct object_entry *obj = *next;
390                         if (obj->sha1[0] != i)
391                                 break;
392                         next++;
393                 }
394                 array[i] = htonl(next - sorted_by_sha);
395                 list = next;
396         }
397         sha1write(f, array, 256 * sizeof(int));
398
399         /* recompute the SHA1 hash of sorted object names.
400          * currently pack-objects does not do this, but that
401          * can be fixed.
402          */
403         SHA1_Init(&ctx);
404         /*
405          * Write the actual SHA1 entries..
406          */
407         list = sorted_by_sha;
408         for (i = 0; i < nr_objects; i++) {
409                 struct object_entry *obj = *list++;
410                 unsigned int offset = htonl(obj->offset);
411                 sha1write(f, &offset, 4);
412                 sha1write(f, obj->sha1, 20);
413                 SHA1_Update(&ctx, obj->sha1, 20);
414         }
415         sha1write(f, pack_base + pack_size - 20, 20);
416         sha1close(f, NULL, 1);
417         free(sorted_by_sha);
418         SHA1_Final(sha1, &ctx);
419 }
420
421 int main(int argc, char **argv)
422 {
423         int i;
424         char *index_name = NULL;
425         char *index_name_buf = NULL;
426         unsigned char sha1[20];
427
428         for (i = 1; i < argc; i++) {
429                 const char *arg = argv[i];
430
431                 if (*arg == '-') {
432                         if (!strcmp(arg, "-o")) {
433                                 if (index_name || (i+1) >= argc)
434                                         usage(index_pack_usage);
435                                 index_name = argv[++i];
436                         } else
437                                 usage(index_pack_usage);
438                         continue;
439                 }
440
441                 if (pack_name)
442                         usage(index_pack_usage);
443                 pack_name = arg;
444         }
445
446         if (!pack_name)
447                 usage(index_pack_usage);
448         if (!index_name) {
449                 int len = strlen(pack_name);
450                 if (len < 5 || strcmp(pack_name + len - 5, ".pack"))
451                         die("packfile name '%s' does not end with '.pack'",
452                             pack_name);
453                 index_name_buf = xmalloc(len);
454                 memcpy(index_name_buf, pack_name, len - 5);
455                 strcpy(index_name_buf + len - 5, ".idx");
456                 index_name = index_name_buf;
457         }
458
459         open_pack_file();
460         parse_pack_header();
461         objects = xcalloc(nr_objects, sizeof(struct object_entry));
462         deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
463         parse_pack_objects();
464         free(deltas);
465         write_index_file(index_name, sha1);
466         free(objects);
467         free(index_name_buf);
468
469         printf("%s\n", sha1_to_hex(sha1));
470
471         return 0;
472 }