commit-graph: not compatible with grafts
[git] / commit-graph.c
1 #include "cache.h"
2 #include "config.h"
3 #include "dir.h"
4 #include "git-compat-util.h"
5 #include "lockfile.h"
6 #include "pack.h"
7 #include "packfile.h"
8 #include "commit.h"
9 #include "object.h"
10 #include "refs.h"
11 #include "revision.h"
12 #include "sha1-lookup.h"
13 #include "commit-graph.h"
14 #include "object-store.h"
15 #include "alloc.h"
16 #include "hashmap.h"
17 #include "replace-object.h"
18
19 #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
20 #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
21 #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
22 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
23 #define GRAPH_CHUNKID_LARGEEDGES 0x45444745 /* "EDGE" */
24
25 #define GRAPH_DATA_WIDTH 36
26
27 #define GRAPH_VERSION_1 0x1
28 #define GRAPH_VERSION GRAPH_VERSION_1
29
30 #define GRAPH_OID_VERSION_SHA1 1
31 #define GRAPH_OID_LEN_SHA1 GIT_SHA1_RAWSZ
32 #define GRAPH_OID_VERSION GRAPH_OID_VERSION_SHA1
33 #define GRAPH_OID_LEN GRAPH_OID_LEN_SHA1
34
35 #define GRAPH_OCTOPUS_EDGES_NEEDED 0x80000000
36 #define GRAPH_PARENT_MISSING 0x7fffffff
37 #define GRAPH_EDGE_LAST_MASK 0x7fffffff
38 #define GRAPH_PARENT_NONE 0x70000000
39
40 #define GRAPH_LAST_EDGE 0x80000000
41
42 #define GRAPH_HEADER_SIZE 8
43 #define GRAPH_FANOUT_SIZE (4 * 256)
44 #define GRAPH_CHUNKLOOKUP_WIDTH 12
45 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
46                         + GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
47
48 char *get_commit_graph_filename(const char *obj_dir)
49 {
50         return xstrfmt("%s/info/commit-graph", obj_dir);
51 }
52
53 static struct commit_graph *alloc_commit_graph(void)
54 {
55         struct commit_graph *g = xcalloc(1, sizeof(*g));
56         g->graph_fd = -1;
57
58         return g;
59 }
60
61 extern int read_replace_refs;
62
63 static int commit_graph_compatible(struct repository *r)
64 {
65         if (read_replace_refs) {
66                 prepare_replace_object(r);
67                 if (hashmap_get_size(&r->objects->replace_map->map))
68                         return 0;
69         }
70
71         prepare_commit_graft(r);
72         if (r->parsed_objects && r->parsed_objects->grafts_nr)
73                 return 0;
74         if (is_repository_shallow(r))
75                 return 0;
76
77         return 1;
78 }
79
80 struct commit_graph *load_commit_graph_one(const char *graph_file)
81 {
82         void *graph_map;
83         const unsigned char *data, *chunk_lookup;
84         size_t graph_size;
85         struct stat st;
86         uint32_t i;
87         struct commit_graph *graph;
88         int fd = git_open(graph_file);
89         uint64_t last_chunk_offset;
90         uint32_t last_chunk_id;
91         uint32_t graph_signature;
92         unsigned char graph_version, hash_version;
93
94         if (fd < 0)
95                 return NULL;
96         if (fstat(fd, &st)) {
97                 close(fd);
98                 return NULL;
99         }
100         graph_size = xsize_t(st.st_size);
101
102         if (graph_size < GRAPH_MIN_SIZE) {
103                 close(fd);
104                 die("graph file %s is too small", graph_file);
105         }
106         graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
107         data = (const unsigned char *)graph_map;
108
109         graph_signature = get_be32(data);
110         if (graph_signature != GRAPH_SIGNATURE) {
111                 error("graph signature %X does not match signature %X",
112                       graph_signature, GRAPH_SIGNATURE);
113                 goto cleanup_fail;
114         }
115
116         graph_version = *(unsigned char*)(data + 4);
117         if (graph_version != GRAPH_VERSION) {
118                 error("graph version %X does not match version %X",
119                       graph_version, GRAPH_VERSION);
120                 goto cleanup_fail;
121         }
122
123         hash_version = *(unsigned char*)(data + 5);
124         if (hash_version != GRAPH_OID_VERSION) {
125                 error("hash version %X does not match version %X",
126                       hash_version, GRAPH_OID_VERSION);
127                 goto cleanup_fail;
128         }
129
130         graph = alloc_commit_graph();
131
132         graph->hash_len = GRAPH_OID_LEN;
133         graph->num_chunks = *(unsigned char*)(data + 6);
134         graph->graph_fd = fd;
135         graph->data = graph_map;
136         graph->data_len = graph_size;
137
138         last_chunk_id = 0;
139         last_chunk_offset = 8;
140         chunk_lookup = data + 8;
141         for (i = 0; i < graph->num_chunks; i++) {
142                 uint32_t chunk_id = get_be32(chunk_lookup + 0);
143                 uint64_t chunk_offset = get_be64(chunk_lookup + 4);
144                 int chunk_repeated = 0;
145
146                 chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
147
148                 if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
149                         error("improper chunk offset %08x%08x", (uint32_t)(chunk_offset >> 32),
150                               (uint32_t)chunk_offset);
151                         goto cleanup_fail;
152                 }
153
154                 switch (chunk_id) {
155                 case GRAPH_CHUNKID_OIDFANOUT:
156                         if (graph->chunk_oid_fanout)
157                                 chunk_repeated = 1;
158                         else
159                                 graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset);
160                         break;
161
162                 case GRAPH_CHUNKID_OIDLOOKUP:
163                         if (graph->chunk_oid_lookup)
164                                 chunk_repeated = 1;
165                         else
166                                 graph->chunk_oid_lookup = data + chunk_offset;
167                         break;
168
169                 case GRAPH_CHUNKID_DATA:
170                         if (graph->chunk_commit_data)
171                                 chunk_repeated = 1;
172                         else
173                                 graph->chunk_commit_data = data + chunk_offset;
174                         break;
175
176                 case GRAPH_CHUNKID_LARGEEDGES:
177                         if (graph->chunk_large_edges)
178                                 chunk_repeated = 1;
179                         else
180                                 graph->chunk_large_edges = data + chunk_offset;
181                         break;
182                 }
183
184                 if (chunk_repeated) {
185                         error("chunk id %08x appears multiple times", chunk_id);
186                         goto cleanup_fail;
187                 }
188
189                 if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
190                 {
191                         graph->num_commits = (chunk_offset - last_chunk_offset)
192                                              / graph->hash_len;
193                 }
194
195                 last_chunk_id = chunk_id;
196                 last_chunk_offset = chunk_offset;
197         }
198
199         return graph;
200
201 cleanup_fail:
202         munmap(graph_map, graph_size);
203         close(fd);
204         exit(1);
205 }
206
207 static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
208 {
209         char *graph_name;
210
211         if (r->objects->commit_graph)
212                 return;
213
214         graph_name = get_commit_graph_filename(obj_dir);
215         r->objects->commit_graph =
216                 load_commit_graph_one(graph_name);
217
218         FREE_AND_NULL(graph_name);
219 }
220
221 /*
222  * Return 1 if commit_graph is non-NULL, and 0 otherwise.
223  *
224  * On the first invocation, this function attemps to load the commit
225  * graph if the_repository is configured to have one.
226  */
227 static int prepare_commit_graph(struct repository *r)
228 {
229         struct alternate_object_database *alt;
230         char *obj_dir;
231         int config_value;
232
233         if (r->objects->commit_graph_attempted)
234                 return !!r->objects->commit_graph;
235         r->objects->commit_graph_attempted = 1;
236
237         if (repo_config_get_bool(r, "core.commitgraph", &config_value) ||
238             !config_value)
239                 /*
240                  * This repository is not configured to use commit graphs, so
241                  * do not load one. (But report commit_graph_attempted anyway
242                  * so that commit graph loading is not attempted again for this
243                  * repository.)
244                  */
245                 return 0;
246
247         if (!commit_graph_compatible(r))
248                 return 0;
249
250         obj_dir = r->objects->objectdir;
251         prepare_commit_graph_one(r, obj_dir);
252         prepare_alt_odb(r);
253         for (alt = r->objects->alt_odb_list;
254              !r->objects->commit_graph && alt;
255              alt = alt->next)
256                 prepare_commit_graph_one(r, alt->path);
257         return !!r->objects->commit_graph;
258 }
259
260 static void close_commit_graph(void)
261 {
262         free_commit_graph(the_repository->objects->commit_graph);
263         the_repository->objects->commit_graph = NULL;
264 }
265
266 static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos)
267 {
268         return bsearch_hash(oid->hash, g->chunk_oid_fanout,
269                             g->chunk_oid_lookup, g->hash_len, pos);
270 }
271
272 static struct commit_list **insert_parent_or_die(struct commit_graph *g,
273                                                  uint64_t pos,
274                                                  struct commit_list **pptr)
275 {
276         struct commit *c;
277         struct object_id oid;
278
279         if (pos >= g->num_commits)
280                 die("invalid parent position %"PRIu64, pos);
281
282         hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos);
283         c = lookup_commit(the_repository, &oid);
284         if (!c)
285                 die("could not find commit %s", oid_to_hex(&oid));
286         c->graph_pos = pos;
287         return &commit_list_insert(c, pptr)->next;
288 }
289
290 static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
291 {
292         const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos;
293         item->graph_pos = pos;
294         item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
295 }
296
297 static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos)
298 {
299         uint32_t edge_value;
300         uint32_t *parent_data_ptr;
301         uint64_t date_low, date_high;
302         struct commit_list **pptr;
303         const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * pos;
304
305         item->object.parsed = 1;
306         item->graph_pos = pos;
307
308         item->maybe_tree = NULL;
309
310         date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
311         date_low = get_be32(commit_data + g->hash_len + 12);
312         item->date = (timestamp_t)((date_high << 32) | date_low);
313
314         item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
315
316         pptr = &item->parents;
317
318         edge_value = get_be32(commit_data + g->hash_len);
319         if (edge_value == GRAPH_PARENT_NONE)
320                 return 1;
321         pptr = insert_parent_or_die(g, edge_value, pptr);
322
323         edge_value = get_be32(commit_data + g->hash_len + 4);
324         if (edge_value == GRAPH_PARENT_NONE)
325                 return 1;
326         if (!(edge_value & GRAPH_OCTOPUS_EDGES_NEEDED)) {
327                 pptr = insert_parent_or_die(g, edge_value, pptr);
328                 return 1;
329         }
330
331         parent_data_ptr = (uint32_t*)(g->chunk_large_edges +
332                           4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
333         do {
334                 edge_value = get_be32(parent_data_ptr);
335                 pptr = insert_parent_or_die(g,
336                                             edge_value & GRAPH_EDGE_LAST_MASK,
337                                             pptr);
338                 parent_data_ptr++;
339         } while (!(edge_value & GRAPH_LAST_EDGE));
340
341         return 1;
342 }
343
344 static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
345 {
346         if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
347                 *pos = item->graph_pos;
348                 return 1;
349         } else {
350                 return bsearch_graph(g, &(item->object.oid), pos);
351         }
352 }
353
354 static int parse_commit_in_graph_one(struct commit_graph *g, struct commit *item)
355 {
356         uint32_t pos;
357
358         if (item->object.parsed)
359                 return 1;
360
361         if (find_commit_in_graph(item, g, &pos))
362                 return fill_commit_in_graph(item, g, pos);
363
364         return 0;
365 }
366
367 int parse_commit_in_graph(struct repository *r, struct commit *item)
368 {
369         if (!prepare_commit_graph(r))
370                 return 0;
371         return parse_commit_in_graph_one(r->objects->commit_graph, item);
372 }
373
374 void load_commit_graph_info(struct repository *r, struct commit *item)
375 {
376         uint32_t pos;
377         if (!prepare_commit_graph(r))
378                 return;
379         if (find_commit_in_graph(item, r->objects->commit_graph, &pos))
380                 fill_commit_graph_info(item, r->objects->commit_graph, pos);
381 }
382
383 static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c)
384 {
385         struct object_id oid;
386         const unsigned char *commit_data = g->chunk_commit_data +
387                                            GRAPH_DATA_WIDTH * (c->graph_pos);
388
389         hashcpy(oid.hash, commit_data);
390         c->maybe_tree = lookup_tree(the_repository, &oid);
391
392         return c->maybe_tree;
393 }
394
395 static struct tree *get_commit_tree_in_graph_one(struct commit_graph *g,
396                                                  const struct commit *c)
397 {
398         if (c->maybe_tree)
399                 return c->maybe_tree;
400         if (c->graph_pos == COMMIT_NOT_FROM_GRAPH)
401                 BUG("get_commit_tree_in_graph_one called from non-commit-graph commit");
402
403         return load_tree_for_commit(g, (struct commit *)c);
404 }
405
406 struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit *c)
407 {
408         return get_commit_tree_in_graph_one(r->objects->commit_graph, c);
409 }
410
411 static void write_graph_chunk_fanout(struct hashfile *f,
412                                      struct commit **commits,
413                                      int nr_commits)
414 {
415         int i, count = 0;
416         struct commit **list = commits;
417
418         /*
419          * Write the first-level table (the list is sorted,
420          * but we use a 256-entry lookup to be able to avoid
421          * having to do eight extra binary search iterations).
422          */
423         for (i = 0; i < 256; i++) {
424                 while (count < nr_commits) {
425                         if ((*list)->object.oid.hash[0] != i)
426                                 break;
427                         count++;
428                         list++;
429                 }
430
431                 hashwrite_be32(f, count);
432         }
433 }
434
435 static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
436                                    struct commit **commits, int nr_commits)
437 {
438         struct commit **list = commits;
439         int count;
440         for (count = 0; count < nr_commits; count++, list++)
441                 hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
442 }
443
444 static const unsigned char *commit_to_sha1(size_t index, void *table)
445 {
446         struct commit **commits = table;
447         return commits[index]->object.oid.hash;
448 }
449
450 static void write_graph_chunk_data(struct hashfile *f, int hash_len,
451                                    struct commit **commits, int nr_commits)
452 {
453         struct commit **list = commits;
454         struct commit **last = commits + nr_commits;
455         uint32_t num_extra_edges = 0;
456
457         while (list < last) {
458                 struct commit_list *parent;
459                 int edge_value;
460                 uint32_t packedDate[2];
461
462                 parse_commit(*list);
463                 hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len);
464
465                 parent = (*list)->parents;
466
467                 if (!parent)
468                         edge_value = GRAPH_PARENT_NONE;
469                 else {
470                         edge_value = sha1_pos(parent->item->object.oid.hash,
471                                               commits,
472                                               nr_commits,
473                                               commit_to_sha1);
474
475                         if (edge_value < 0)
476                                 edge_value = GRAPH_PARENT_MISSING;
477                 }
478
479                 hashwrite_be32(f, edge_value);
480
481                 if (parent)
482                         parent = parent->next;
483
484                 if (!parent)
485                         edge_value = GRAPH_PARENT_NONE;
486                 else if (parent->next)
487                         edge_value = GRAPH_OCTOPUS_EDGES_NEEDED | num_extra_edges;
488                 else {
489                         edge_value = sha1_pos(parent->item->object.oid.hash,
490                                               commits,
491                                               nr_commits,
492                                               commit_to_sha1);
493                         if (edge_value < 0)
494                                 edge_value = GRAPH_PARENT_MISSING;
495                 }
496
497                 hashwrite_be32(f, edge_value);
498
499                 if (edge_value & GRAPH_OCTOPUS_EDGES_NEEDED) {
500                         do {
501                                 num_extra_edges++;
502                                 parent = parent->next;
503                         } while (parent);
504                 }
505
506                 if (sizeof((*list)->date) > 4)
507                         packedDate[0] = htonl(((*list)->date >> 32) & 0x3);
508                 else
509                         packedDate[0] = 0;
510
511                 packedDate[0] |= htonl((*list)->generation << 2);
512
513                 packedDate[1] = htonl((*list)->date);
514                 hashwrite(f, packedDate, 8);
515
516                 list++;
517         }
518 }
519
520 static void write_graph_chunk_large_edges(struct hashfile *f,
521                                           struct commit **commits,
522                                           int nr_commits)
523 {
524         struct commit **list = commits;
525         struct commit **last = commits + nr_commits;
526         struct commit_list *parent;
527
528         while (list < last) {
529                 int num_parents = 0;
530                 for (parent = (*list)->parents; num_parents < 3 && parent;
531                      parent = parent->next)
532                         num_parents++;
533
534                 if (num_parents <= 2) {
535                         list++;
536                         continue;
537                 }
538
539                 /* Since num_parents > 2, this initializer is safe. */
540                 for (parent = (*list)->parents->next; parent; parent = parent->next) {
541                         int edge_value = sha1_pos(parent->item->object.oid.hash,
542                                                   commits,
543                                                   nr_commits,
544                                                   commit_to_sha1);
545
546                         if (edge_value < 0)
547                                 edge_value = GRAPH_PARENT_MISSING;
548                         else if (!parent->next)
549                                 edge_value |= GRAPH_LAST_EDGE;
550
551                         hashwrite_be32(f, edge_value);
552                 }
553
554                 list++;
555         }
556 }
557
558 static int commit_compare(const void *_a, const void *_b)
559 {
560         const struct object_id *a = (const struct object_id *)_a;
561         const struct object_id *b = (const struct object_id *)_b;
562         return oidcmp(a, b);
563 }
564
565 struct packed_commit_list {
566         struct commit **list;
567         int nr;
568         int alloc;
569 };
570
571 struct packed_oid_list {
572         struct object_id *list;
573         int nr;
574         int alloc;
575 };
576
577 static int add_packed_commits(const struct object_id *oid,
578                               struct packed_git *pack,
579                               uint32_t pos,
580                               void *data)
581 {
582         struct packed_oid_list *list = (struct packed_oid_list*)data;
583         enum object_type type;
584         off_t offset = nth_packed_object_offset(pack, pos);
585         struct object_info oi = OBJECT_INFO_INIT;
586
587         oi.typep = &type;
588         if (packed_object_info(the_repository, pack, offset, &oi) < 0)
589                 die("unable to get type of object %s", oid_to_hex(oid));
590
591         if (type != OBJ_COMMIT)
592                 return 0;
593
594         ALLOC_GROW(list->list, list->nr + 1, list->alloc);
595         oidcpy(&(list->list[list->nr]), oid);
596         list->nr++;
597
598         return 0;
599 }
600
601 static void add_missing_parents(struct packed_oid_list *oids, struct commit *commit)
602 {
603         struct commit_list *parent;
604         for (parent = commit->parents; parent; parent = parent->next) {
605                 if (!(parent->item->object.flags & UNINTERESTING)) {
606                         ALLOC_GROW(oids->list, oids->nr + 1, oids->alloc);
607                         oidcpy(&oids->list[oids->nr], &(parent->item->object.oid));
608                         oids->nr++;
609                         parent->item->object.flags |= UNINTERESTING;
610                 }
611         }
612 }
613
614 static void close_reachable(struct packed_oid_list *oids)
615 {
616         int i;
617         struct commit *commit;
618
619         for (i = 0; i < oids->nr; i++) {
620                 commit = lookup_commit(the_repository, &oids->list[i]);
621                 if (commit)
622                         commit->object.flags |= UNINTERESTING;
623         }
624
625         /*
626          * As this loop runs, oids->nr may grow, but not more
627          * than the number of missing commits in the reachable
628          * closure.
629          */
630         for (i = 0; i < oids->nr; i++) {
631                 commit = lookup_commit(the_repository, &oids->list[i]);
632
633                 if (commit && !parse_commit(commit))
634                         add_missing_parents(oids, commit);
635         }
636
637         for (i = 0; i < oids->nr; i++) {
638                 commit = lookup_commit(the_repository, &oids->list[i]);
639
640                 if (commit)
641                         commit->object.flags &= ~UNINTERESTING;
642         }
643 }
644
645 static void compute_generation_numbers(struct packed_commit_list* commits)
646 {
647         int i;
648         struct commit_list *list = NULL;
649
650         for (i = 0; i < commits->nr; i++) {
651                 if (commits->list[i]->generation != GENERATION_NUMBER_INFINITY &&
652                     commits->list[i]->generation != GENERATION_NUMBER_ZERO)
653                         continue;
654
655                 commit_list_insert(commits->list[i], &list);
656                 while (list) {
657                         struct commit *current = list->item;
658                         struct commit_list *parent;
659                         int all_parents_computed = 1;
660                         uint32_t max_generation = 0;
661
662                         for (parent = current->parents; parent; parent = parent->next) {
663                                 if (parent->item->generation == GENERATION_NUMBER_INFINITY ||
664                                     parent->item->generation == GENERATION_NUMBER_ZERO) {
665                                         all_parents_computed = 0;
666                                         commit_list_insert(parent->item, &list);
667                                         break;
668                                 } else if (parent->item->generation > max_generation) {
669                                         max_generation = parent->item->generation;
670                                 }
671                         }
672
673                         if (all_parents_computed) {
674                                 current->generation = max_generation + 1;
675                                 pop_commit(&list);
676
677                                 if (current->generation > GENERATION_NUMBER_MAX)
678                                         current->generation = GENERATION_NUMBER_MAX;
679                         }
680                 }
681         }
682 }
683
684 static int add_ref_to_list(const char *refname,
685                            const struct object_id *oid,
686                            int flags, void *cb_data)
687 {
688         struct string_list *list = (struct string_list *)cb_data;
689
690         string_list_append(list, oid_to_hex(oid));
691         return 0;
692 }
693
694 void write_commit_graph_reachable(const char *obj_dir, int append)
695 {
696         struct string_list list;
697
698         string_list_init(&list, 1);
699         for_each_ref(add_ref_to_list, &list);
700         write_commit_graph(obj_dir, NULL, &list, append);
701 }
702
703 void write_commit_graph(const char *obj_dir,
704                         struct string_list *pack_indexes,
705                         struct string_list *commit_hex,
706                         int append)
707 {
708         struct packed_oid_list oids;
709         struct packed_commit_list commits;
710         struct hashfile *f;
711         uint32_t i, count_distinct = 0;
712         char *graph_name;
713         struct lock_file lk = LOCK_INIT;
714         uint32_t chunk_ids[5];
715         uint64_t chunk_offsets[5];
716         int num_chunks;
717         int num_extra_edges;
718         struct commit_list *parent;
719
720         if (!commit_graph_compatible(the_repository))
721                 return;
722
723         oids.nr = 0;
724         oids.alloc = approximate_object_count() / 4;
725
726         if (append) {
727                 prepare_commit_graph_one(the_repository, obj_dir);
728                 if (the_repository->objects->commit_graph)
729                         oids.alloc += the_repository->objects->commit_graph->num_commits;
730         }
731
732         if (oids.alloc < 1024)
733                 oids.alloc = 1024;
734         ALLOC_ARRAY(oids.list, oids.alloc);
735
736         if (append && the_repository->objects->commit_graph) {
737                 struct commit_graph *commit_graph =
738                         the_repository->objects->commit_graph;
739                 for (i = 0; i < commit_graph->num_commits; i++) {
740                         const unsigned char *hash = commit_graph->chunk_oid_lookup +
741                                 commit_graph->hash_len * i;
742                         hashcpy(oids.list[oids.nr++].hash, hash);
743                 }
744         }
745
746         if (pack_indexes) {
747                 struct strbuf packname = STRBUF_INIT;
748                 int dirlen;
749                 strbuf_addf(&packname, "%s/pack/", obj_dir);
750                 dirlen = packname.len;
751                 for (i = 0; i < pack_indexes->nr; i++) {
752                         struct packed_git *p;
753                         strbuf_setlen(&packname, dirlen);
754                         strbuf_addstr(&packname, pack_indexes->items[i].string);
755                         p = add_packed_git(packname.buf, packname.len, 1);
756                         if (!p)
757                                 die("error adding pack %s", packname.buf);
758                         if (open_pack_index(p))
759                                 die("error opening index for %s", packname.buf);
760                         for_each_object_in_pack(p, add_packed_commits, &oids);
761                         close_pack(p);
762                 }
763                 strbuf_release(&packname);
764         }
765
766         if (commit_hex) {
767                 for (i = 0; i < commit_hex->nr; i++) {
768                         const char *end;
769                         struct object_id oid;
770                         struct commit *result;
771
772                         if (commit_hex->items[i].string &&
773                             parse_oid_hex(commit_hex->items[i].string, &oid, &end))
774                                 continue;
775
776                         result = lookup_commit_reference_gently(the_repository, &oid, 1);
777
778                         if (result) {
779                                 ALLOC_GROW(oids.list, oids.nr + 1, oids.alloc);
780                                 oidcpy(&oids.list[oids.nr], &(result->object.oid));
781                                 oids.nr++;
782                         }
783                 }
784         }
785
786         if (!pack_indexes && !commit_hex)
787                 for_each_packed_object(add_packed_commits, &oids, 0);
788
789         close_reachable(&oids);
790
791         QSORT(oids.list, oids.nr, commit_compare);
792
793         count_distinct = 1;
794         for (i = 1; i < oids.nr; i++) {
795                 if (oidcmp(&oids.list[i-1], &oids.list[i]))
796                         count_distinct++;
797         }
798
799         if (count_distinct >= GRAPH_PARENT_MISSING)
800                 die(_("the commit graph format cannot write %d commits"), count_distinct);
801
802         commits.nr = 0;
803         commits.alloc = count_distinct;
804         ALLOC_ARRAY(commits.list, commits.alloc);
805
806         num_extra_edges = 0;
807         for (i = 0; i < oids.nr; i++) {
808                 int num_parents = 0;
809                 if (i > 0 && !oidcmp(&oids.list[i-1], &oids.list[i]))
810                         continue;
811
812                 commits.list[commits.nr] = lookup_commit(the_repository, &oids.list[i]);
813                 parse_commit(commits.list[commits.nr]);
814
815                 for (parent = commits.list[commits.nr]->parents;
816                      parent; parent = parent->next)
817                         num_parents++;
818
819                 if (num_parents > 2)
820                         num_extra_edges += num_parents - 1;
821
822                 commits.nr++;
823         }
824         num_chunks = num_extra_edges ? 4 : 3;
825
826         if (commits.nr >= GRAPH_PARENT_MISSING)
827                 die(_("too many commits to write graph"));
828
829         compute_generation_numbers(&commits);
830
831         graph_name = get_commit_graph_filename(obj_dir);
832         if (safe_create_leading_directories(graph_name))
833                 die_errno(_("unable to create leading directories of %s"),
834                           graph_name);
835
836         hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR);
837         f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
838
839         hashwrite_be32(f, GRAPH_SIGNATURE);
840
841         hashwrite_u8(f, GRAPH_VERSION);
842         hashwrite_u8(f, GRAPH_OID_VERSION);
843         hashwrite_u8(f, num_chunks);
844         hashwrite_u8(f, 0); /* unused padding byte */
845
846         chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
847         chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
848         chunk_ids[2] = GRAPH_CHUNKID_DATA;
849         if (num_extra_edges)
850                 chunk_ids[3] = GRAPH_CHUNKID_LARGEEDGES;
851         else
852                 chunk_ids[3] = 0;
853         chunk_ids[4] = 0;
854
855         chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
856         chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
857         chunk_offsets[2] = chunk_offsets[1] + GRAPH_OID_LEN * commits.nr;
858         chunk_offsets[3] = chunk_offsets[2] + (GRAPH_OID_LEN + 16) * commits.nr;
859         chunk_offsets[4] = chunk_offsets[3] + 4 * num_extra_edges;
860
861         for (i = 0; i <= num_chunks; i++) {
862                 uint32_t chunk_write[3];
863
864                 chunk_write[0] = htonl(chunk_ids[i]);
865                 chunk_write[1] = htonl(chunk_offsets[i] >> 32);
866                 chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
867                 hashwrite(f, chunk_write, 12);
868         }
869
870         write_graph_chunk_fanout(f, commits.list, commits.nr);
871         write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr);
872         write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
873         write_graph_chunk_large_edges(f, commits.list, commits.nr);
874
875         close_commit_graph();
876         finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
877         commit_lock_file(&lk);
878
879         free(oids.list);
880         oids.alloc = 0;
881         oids.nr = 0;
882 }
883
884 #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2
885 static int verify_commit_graph_error;
886
887 static void graph_report(const char *fmt, ...)
888 {
889         va_list ap;
890
891         verify_commit_graph_error = 1;
892         va_start(ap, fmt);
893         vfprintf(stderr, fmt, ap);
894         fprintf(stderr, "\n");
895         va_end(ap);
896 }
897
898 #define GENERATION_ZERO_EXISTS 1
899 #define GENERATION_NUMBER_EXISTS 2
900
901 int verify_commit_graph(struct repository *r, struct commit_graph *g)
902 {
903         uint32_t i, cur_fanout_pos = 0;
904         struct object_id prev_oid, cur_oid, checksum;
905         int generation_zero = 0;
906         struct hashfile *f;
907         int devnull;
908
909         if (!g) {
910                 graph_report("no commit-graph file loaded");
911                 return 1;
912         }
913
914         verify_commit_graph_error = 0;
915
916         if (!g->chunk_oid_fanout)
917                 graph_report("commit-graph is missing the OID Fanout chunk");
918         if (!g->chunk_oid_lookup)
919                 graph_report("commit-graph is missing the OID Lookup chunk");
920         if (!g->chunk_commit_data)
921                 graph_report("commit-graph is missing the Commit Data chunk");
922
923         if (verify_commit_graph_error)
924                 return verify_commit_graph_error;
925
926         devnull = open("/dev/null", O_WRONLY);
927         f = hashfd(devnull, NULL);
928         hashwrite(f, g->data, g->data_len - g->hash_len);
929         finalize_hashfile(f, checksum.hash, CSUM_CLOSE);
930         if (hashcmp(checksum.hash, g->data + g->data_len - g->hash_len)) {
931                 graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
932                 verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
933         }
934
935         for (i = 0; i < g->num_commits; i++) {
936                 struct commit *graph_commit;
937
938                 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
939
940                 if (i && oidcmp(&prev_oid, &cur_oid) >= 0)
941                         graph_report("commit-graph has incorrect OID order: %s then %s",
942                                      oid_to_hex(&prev_oid),
943                                      oid_to_hex(&cur_oid));
944
945                 oidcpy(&prev_oid, &cur_oid);
946
947                 while (cur_oid.hash[0] > cur_fanout_pos) {
948                         uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
949
950                         if (i != fanout_value)
951                                 graph_report("commit-graph has incorrect fanout value: fanout[%d] = %u != %u",
952                                              cur_fanout_pos, fanout_value, i);
953                         cur_fanout_pos++;
954                 }
955
956                 graph_commit = lookup_commit(r, &cur_oid);
957                 if (!parse_commit_in_graph_one(g, graph_commit))
958                         graph_report("failed to parse %s from commit-graph",
959                                      oid_to_hex(&cur_oid));
960         }
961
962         while (cur_fanout_pos < 256) {
963                 uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
964
965                 if (g->num_commits != fanout_value)
966                         graph_report("commit-graph has incorrect fanout value: fanout[%d] = %u != %u",
967                                      cur_fanout_pos, fanout_value, i);
968
969                 cur_fanout_pos++;
970         }
971
972         if (verify_commit_graph_error & ~VERIFY_COMMIT_GRAPH_ERROR_HASH)
973                 return verify_commit_graph_error;
974
975         for (i = 0; i < g->num_commits; i++) {
976                 struct commit *graph_commit, *odb_commit;
977                 struct commit_list *graph_parents, *odb_parents;
978                 uint32_t max_generation = 0;
979
980                 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
981
982                 graph_commit = lookup_commit(r, &cur_oid);
983                 odb_commit = (struct commit *)create_object(r, cur_oid.hash, alloc_commit_node(r));
984                 if (parse_commit_internal(odb_commit, 0, 0)) {
985                         graph_report("failed to parse %s from object database",
986                                      oid_to_hex(&cur_oid));
987                         continue;
988                 }
989
990                 if (oidcmp(&get_commit_tree_in_graph_one(g, graph_commit)->object.oid,
991                            get_commit_tree_oid(odb_commit)))
992                         graph_report("root tree OID for commit %s in commit-graph is %s != %s",
993                                      oid_to_hex(&cur_oid),
994                                      oid_to_hex(get_commit_tree_oid(graph_commit)),
995                                      oid_to_hex(get_commit_tree_oid(odb_commit)));
996
997                 graph_parents = graph_commit->parents;
998                 odb_parents = odb_commit->parents;
999
1000                 while (graph_parents) {
1001                         if (odb_parents == NULL) {
1002                                 graph_report("commit-graph parent list for commit %s is too long",
1003                                              oid_to_hex(&cur_oid));
1004                                 break;
1005                         }
1006
1007                         if (oidcmp(&graph_parents->item->object.oid, &odb_parents->item->object.oid))
1008                                 graph_report("commit-graph parent for %s is %s != %s",
1009                                              oid_to_hex(&cur_oid),
1010                                              oid_to_hex(&graph_parents->item->object.oid),
1011                                              oid_to_hex(&odb_parents->item->object.oid));
1012
1013                         if (graph_parents->item->generation > max_generation)
1014                                 max_generation = graph_parents->item->generation;
1015
1016                         graph_parents = graph_parents->next;
1017                         odb_parents = odb_parents->next;
1018                 }
1019
1020                 if (odb_parents != NULL)
1021                         graph_report("commit-graph parent list for commit %s terminates early",
1022                                      oid_to_hex(&cur_oid));
1023
1024                 if (!graph_commit->generation) {
1025                         if (generation_zero == GENERATION_NUMBER_EXISTS)
1026                                 graph_report("commit-graph has generation number zero for commit %s, but non-zero elsewhere",
1027                                              oid_to_hex(&cur_oid));
1028                         generation_zero = GENERATION_ZERO_EXISTS;
1029                 } else if (generation_zero == GENERATION_ZERO_EXISTS)
1030                         graph_report("commit-graph has non-zero generation number for commit %s, but zero elsewhere",
1031                                      oid_to_hex(&cur_oid));
1032
1033                 if (generation_zero == GENERATION_ZERO_EXISTS)
1034                         continue;
1035
1036                 /*
1037                  * If one of our parents has generation GENERATION_NUMBER_MAX, then
1038                  * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid
1039                  * extra logic in the following condition.
1040                  */
1041                 if (max_generation == GENERATION_NUMBER_MAX)
1042                         max_generation--;
1043
1044                 if (graph_commit->generation != max_generation + 1)
1045                         graph_report("commit-graph generation for commit %s is %u != %u",
1046                                      oid_to_hex(&cur_oid),
1047                                      graph_commit->generation,
1048                                      max_generation + 1);
1049
1050                 if (graph_commit->date != odb_commit->date)
1051                         graph_report("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime,
1052                                      oid_to_hex(&cur_oid),
1053                                      graph_commit->date,
1054                                      odb_commit->date);
1055         }
1056
1057         return verify_commit_graph_error;
1058 }
1059
1060 void free_commit_graph(struct commit_graph *g)
1061 {
1062         if (!g)
1063                 return;
1064         if (g->graph_fd >= 0) {
1065                 munmap((void *)g->data, g->data_len);
1066                 g->data = NULL;
1067                 close(g->graph_fd);
1068         }
1069         free(g);
1070 }