sparse-index: add guard to ensure full index
[git] / commit-graph.c
1 #include "git-compat-util.h"
2 #include "config.h"
3 #include "lockfile.h"
4 #include "pack.h"
5 #include "packfile.h"
6 #include "commit.h"
7 #include "object.h"
8 #include "refs.h"
9 #include "revision.h"
10 #include "hash-lookup.h"
11 #include "commit-graph.h"
12 #include "object-store.h"
13 #include "alloc.h"
14 #include "hashmap.h"
15 #include "replace-object.h"
16 #include "progress.h"
17 #include "bloom.h"
18 #include "commit-slab.h"
19 #include "shallow.h"
20 #include "json-writer.h"
21 #include "trace2.h"
22 #include "chunk-format.h"
23
24 void git_test_write_commit_graph_or_die(void)
25 {
26         int flags = 0;
27         if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0))
28                 return;
29
30         if (git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
31                 flags = COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
32
33         if (write_commit_graph_reachable(the_repository->objects->odb,
34                                          flags, NULL))
35                 die("failed to write commit-graph under GIT_TEST_COMMIT_GRAPH");
36 }
37
38 #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
39 #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
40 #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
41 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
42 #define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */
43 #define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */
44 #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
45 #define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
46 #define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
47 #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
48
49 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
50
51 #define GRAPH_VERSION_1 0x1
52 #define GRAPH_VERSION GRAPH_VERSION_1
53
54 #define GRAPH_EXTRA_EDGES_NEEDED 0x80000000
55 #define GRAPH_EDGE_LAST_MASK 0x7fffffff
56 #define GRAPH_PARENT_NONE 0x70000000
57
58 #define GRAPH_LAST_EDGE 0x80000000
59
60 #define GRAPH_HEADER_SIZE 8
61 #define GRAPH_FANOUT_SIZE (4 * 256)
62 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * CHUNK_TOC_ENTRY_SIZE \
63                         + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
64
65 #define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31)
66
67 /* Remember to update object flag allocation in object.h */
68 #define REACHABLE       (1u<<15)
69
70 define_commit_slab(topo_level_slab, uint32_t);
71
72 /* Keep track of the order in which commits are added to our list. */
73 define_commit_slab(commit_pos, int);
74 static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos);
75
76 static void set_commit_pos(struct repository *r, const struct object_id *oid)
77 {
78         static int32_t max_pos;
79         struct commit *commit = lookup_commit(r, oid);
80
81         if (!commit)
82                 return; /* should never happen, but be lenient */
83
84         *commit_pos_at(&commit_pos, commit) = max_pos++;
85 }
86
87 static int commit_pos_cmp(const void *va, const void *vb)
88 {
89         const struct commit *a = *(const struct commit **)va;
90         const struct commit *b = *(const struct commit **)vb;
91         return commit_pos_at(&commit_pos, a) -
92                commit_pos_at(&commit_pos, b);
93 }
94
95 define_commit_slab(commit_graph_data_slab, struct commit_graph_data);
96 static struct commit_graph_data_slab commit_graph_data_slab =
97         COMMIT_SLAB_INIT(1, commit_graph_data_slab);
98
99 uint32_t commit_graph_position(const struct commit *c)
100 {
101         struct commit_graph_data *data =
102                 commit_graph_data_slab_peek(&commit_graph_data_slab, c);
103
104         return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH;
105 }
106
107 timestamp_t commit_graph_generation(const struct commit *c)
108 {
109         struct commit_graph_data *data =
110                 commit_graph_data_slab_peek(&commit_graph_data_slab, c);
111
112         if (!data)
113                 return GENERATION_NUMBER_INFINITY;
114         else if (data->graph_pos == COMMIT_NOT_FROM_GRAPH)
115                 return GENERATION_NUMBER_INFINITY;
116
117         return data->generation;
118 }
119
120 static struct commit_graph_data *commit_graph_data_at(const struct commit *c)
121 {
122         unsigned int i, nth_slab;
123         struct commit_graph_data *data =
124                 commit_graph_data_slab_peek(&commit_graph_data_slab, c);
125
126         if (data)
127                 return data;
128
129         nth_slab = c->index / commit_graph_data_slab.slab_size;
130         data = commit_graph_data_slab_at(&commit_graph_data_slab, c);
131
132         /*
133          * commit-slab initializes elements with zero, overwrite this with
134          * COMMIT_NOT_FROM_GRAPH for graph_pos.
135          *
136          * We avoid initializing generation with checking if graph position
137          * is not COMMIT_NOT_FROM_GRAPH.
138          */
139         for (i = 0; i < commit_graph_data_slab.slab_size; i++) {
140                 commit_graph_data_slab.slab[nth_slab][i].graph_pos =
141                         COMMIT_NOT_FROM_GRAPH;
142         }
143
144         return data;
145 }
146
147 /*
148  * Should be used only while writing commit-graph as it compares
149  * generation value of commits by directly accessing commit-slab.
150  */
151 static int commit_gen_cmp(const void *va, const void *vb)
152 {
153         const struct commit *a = *(const struct commit **)va;
154         const struct commit *b = *(const struct commit **)vb;
155
156         const timestamp_t generation_a = commit_graph_data_at(a)->generation;
157         const timestamp_t generation_b = commit_graph_data_at(b)->generation;
158         /* lower generation commits first */
159         if (generation_a < generation_b)
160                 return -1;
161         else if (generation_a > generation_b)
162                 return 1;
163
164         /* use date as a heuristic when generations are equal */
165         if (a->date < b->date)
166                 return -1;
167         else if (a->date > b->date)
168                 return 1;
169         return 0;
170 }
171
172 char *get_commit_graph_filename(struct object_directory *obj_dir)
173 {
174         return xstrfmt("%s/info/commit-graph", obj_dir->path);
175 }
176
177 static char *get_split_graph_filename(struct object_directory *odb,
178                                       const char *oid_hex)
179 {
180         return xstrfmt("%s/info/commit-graphs/graph-%s.graph", odb->path,
181                        oid_hex);
182 }
183
184 char *get_commit_graph_chain_filename(struct object_directory *odb)
185 {
186         return xstrfmt("%s/info/commit-graphs/commit-graph-chain", odb->path);
187 }
188
189 static uint8_t oid_version(void)
190 {
191         switch (hash_algo_by_ptr(the_hash_algo)) {
192         case GIT_HASH_SHA1:
193                 return 1;
194         case GIT_HASH_SHA256:
195                 return 2;
196         default:
197                 die(_("invalid hash version"));
198         }
199 }
200
201 static struct commit_graph *alloc_commit_graph(void)
202 {
203         struct commit_graph *g = xcalloc(1, sizeof(*g));
204
205         return g;
206 }
207
208 extern int read_replace_refs;
209
210 static int commit_graph_compatible(struct repository *r)
211 {
212         if (!r->gitdir)
213                 return 0;
214
215         if (read_replace_refs) {
216                 prepare_replace_object(r);
217                 if (hashmap_get_size(&r->objects->replace_map->map))
218                         return 0;
219         }
220
221         prepare_commit_graft(r);
222         if (r->parsed_objects &&
223             (r->parsed_objects->grafts_nr || r->parsed_objects->substituted_parent))
224                 return 0;
225         if (is_repository_shallow(r))
226                 return 0;
227
228         return 1;
229 }
230
231 int open_commit_graph(const char *graph_file, int *fd, struct stat *st)
232 {
233         *fd = git_open(graph_file);
234         if (*fd < 0)
235                 return 0;
236         if (fstat(*fd, st)) {
237                 close(*fd);
238                 return 0;
239         }
240         return 1;
241 }
242
243 struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
244                                                  int fd, struct stat *st,
245                                                  struct object_directory *odb)
246 {
247         void *graph_map;
248         size_t graph_size;
249         struct commit_graph *ret;
250
251         graph_size = xsize_t(st->st_size);
252
253         if (graph_size < GRAPH_MIN_SIZE) {
254                 close(fd);
255                 error(_("commit-graph file is too small"));
256                 return NULL;
257         }
258         graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
259         close(fd);
260         ret = parse_commit_graph(r, graph_map, graph_size);
261
262         if (ret)
263                 ret->odb = odb;
264         else
265                 munmap(graph_map, graph_size);
266
267         return ret;
268 }
269
270 static int verify_commit_graph_lite(struct commit_graph *g)
271 {
272         /*
273          * Basic validation shared between parse_commit_graph()
274          * which'll be called every time the graph is used, and the
275          * much more expensive verify_commit_graph() used by
276          * "commit-graph verify".
277          *
278          * There should only be very basic checks here to ensure that
279          * we don't e.g. segfault in fill_commit_in_graph(), but
280          * because this is a very hot codepath nothing that e.g. loops
281          * over g->num_commits, or runs a checksum on the commit-graph
282          * itself.
283          */
284         if (!g->chunk_oid_fanout) {
285                 error("commit-graph is missing the OID Fanout chunk");
286                 return 1;
287         }
288         if (!g->chunk_oid_lookup) {
289                 error("commit-graph is missing the OID Lookup chunk");
290                 return 1;
291         }
292         if (!g->chunk_commit_data) {
293                 error("commit-graph is missing the Commit Data chunk");
294                 return 1;
295         }
296
297         return 0;
298 }
299
300 static int graph_read_oid_lookup(const unsigned char *chunk_start,
301                                  size_t chunk_size, void *data)
302 {
303         struct commit_graph *g = data;
304         g->chunk_oid_lookup = chunk_start;
305         g->num_commits = chunk_size / g->hash_len;
306         return 0;
307 }
308
309 static int graph_read_bloom_data(const unsigned char *chunk_start,
310                                   size_t chunk_size, void *data)
311 {
312         struct commit_graph *g = data;
313         uint32_t hash_version;
314         g->chunk_bloom_data = chunk_start;
315         hash_version = get_be32(chunk_start);
316
317         if (hash_version != 1)
318                 return 0;
319
320         g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
321         g->bloom_filter_settings->hash_version = hash_version;
322         g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
323         g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
324         g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
325
326         return 0;
327 }
328
329 struct commit_graph *parse_commit_graph(struct repository *r,
330                                         void *graph_map, size_t graph_size)
331 {
332         const unsigned char *data;
333         struct commit_graph *graph;
334         uint32_t graph_signature;
335         unsigned char graph_version, hash_version;
336         struct chunkfile *cf = NULL;
337
338         if (!graph_map)
339                 return NULL;
340
341         if (graph_size < GRAPH_MIN_SIZE)
342                 return NULL;
343
344         data = (const unsigned char *)graph_map;
345
346         graph_signature = get_be32(data);
347         if (graph_signature != GRAPH_SIGNATURE) {
348                 error(_("commit-graph signature %X does not match signature %X"),
349                       graph_signature, GRAPH_SIGNATURE);
350                 return NULL;
351         }
352
353         graph_version = *(unsigned char*)(data + 4);
354         if (graph_version != GRAPH_VERSION) {
355                 error(_("commit-graph version %X does not match version %X"),
356                       graph_version, GRAPH_VERSION);
357                 return NULL;
358         }
359
360         hash_version = *(unsigned char*)(data + 5);
361         if (hash_version != oid_version()) {
362                 error(_("commit-graph hash version %X does not match version %X"),
363                       hash_version, oid_version());
364                 return NULL;
365         }
366
367         prepare_repo_settings(r);
368
369         graph = alloc_commit_graph();
370
371         graph->hash_len = the_hash_algo->rawsz;
372         graph->num_chunks = *(unsigned char*)(data + 6);
373         graph->data = graph_map;
374         graph->data_len = graph_size;
375
376         if (graph_size < GRAPH_HEADER_SIZE +
377                          (graph->num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE +
378                          GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
379                 error(_("commit-graph file is too small to hold %u chunks"),
380                       graph->num_chunks);
381                 free(graph);
382                 return NULL;
383         }
384
385         cf = init_chunkfile(NULL);
386
387         if (read_table_of_contents(cf, graph->data, graph_size,
388                                    GRAPH_HEADER_SIZE, graph->num_chunks))
389                 goto free_and_return;
390
391         pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT,
392                    (const unsigned char **)&graph->chunk_oid_fanout);
393         read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph);
394         pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data);
395         pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges);
396         pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs);
397         pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
398                    &graph->chunk_generation_data);
399         pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
400                    &graph->chunk_generation_data_overflow);
401
402         if (r->settings.commit_graph_read_changed_paths) {
403                 pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
404                            &graph->chunk_bloom_indexes);
405                 read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
406                            graph_read_bloom_data, graph);
407         }
408
409         if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
410                 init_bloom_filters();
411         } else {
412                 /* We need both the bloom chunks to exist together. Else ignore the data */
413                 graph->chunk_bloom_indexes = NULL;
414                 graph->chunk_bloom_data = NULL;
415                 FREE_AND_NULL(graph->bloom_filter_settings);
416         }
417
418         hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len);
419
420         if (verify_commit_graph_lite(graph))
421                 goto free_and_return;
422
423         free_chunkfile(cf);
424         return graph;
425
426 free_and_return:
427         free_chunkfile(cf);
428         free(graph->bloom_filter_settings);
429         free(graph);
430         return NULL;
431 }
432
433 static struct commit_graph *load_commit_graph_one(struct repository *r,
434                                                   const char *graph_file,
435                                                   struct object_directory *odb)
436 {
437
438         struct stat st;
439         int fd;
440         struct commit_graph *g;
441         int open_ok = open_commit_graph(graph_file, &fd, &st);
442
443         if (!open_ok)
444                 return NULL;
445
446         g = load_commit_graph_one_fd_st(r, fd, &st, odb);
447
448         if (g)
449                 g->filename = xstrdup(graph_file);
450
451         return g;
452 }
453
454 static struct commit_graph *load_commit_graph_v1(struct repository *r,
455                                                  struct object_directory *odb)
456 {
457         char *graph_name = get_commit_graph_filename(odb);
458         struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
459         free(graph_name);
460
461         return g;
462 }
463
464 static int add_graph_to_chain(struct commit_graph *g,
465                               struct commit_graph *chain,
466                               struct object_id *oids,
467                               int n)
468 {
469         struct commit_graph *cur_g = chain;
470
471         if (n && !g->chunk_base_graphs) {
472                 warning(_("commit-graph has no base graphs chunk"));
473                 return 0;
474         }
475
476         while (n) {
477                 n--;
478
479                 if (!cur_g ||
480                     !oideq(&oids[n], &cur_g->oid) ||
481                     !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * n)) {
482                         warning(_("commit-graph chain does not match"));
483                         return 0;
484                 }
485
486                 cur_g = cur_g->base_graph;
487         }
488
489         g->base_graph = chain;
490
491         if (chain)
492                 g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base;
493
494         return 1;
495 }
496
497 static struct commit_graph *load_commit_graph_chain(struct repository *r,
498                                                     struct object_directory *odb)
499 {
500         struct commit_graph *graph_chain = NULL;
501         struct strbuf line = STRBUF_INIT;
502         struct stat st;
503         struct object_id *oids;
504         int i = 0, valid = 1, count;
505         char *chain_name = get_commit_graph_chain_filename(odb);
506         FILE *fp;
507         int stat_res;
508
509         fp = fopen(chain_name, "r");
510         stat_res = stat(chain_name, &st);
511         free(chain_name);
512
513         if (!fp ||
514             stat_res ||
515             st.st_size <= the_hash_algo->hexsz)
516                 return NULL;
517
518         count = st.st_size / (the_hash_algo->hexsz + 1);
519         oids = xcalloc(count, sizeof(struct object_id));
520
521         prepare_alt_odb(r);
522
523         for (i = 0; i < count; i++) {
524                 struct object_directory *odb;
525
526                 if (strbuf_getline_lf(&line, fp) == EOF)
527                         break;
528
529                 if (get_oid_hex(line.buf, &oids[i])) {
530                         warning(_("invalid commit-graph chain: line '%s' not a hash"),
531                                 line.buf);
532                         valid = 0;
533                         break;
534                 }
535
536                 valid = 0;
537                 for (odb = r->objects->odb; odb; odb = odb->next) {
538                         char *graph_name = get_split_graph_filename(odb, line.buf);
539                         struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
540
541                         free(graph_name);
542
543                         if (g) {
544                                 if (add_graph_to_chain(g, graph_chain, oids, i)) {
545                                         graph_chain = g;
546                                         valid = 1;
547                                 }
548
549                                 break;
550                         }
551                 }
552
553                 if (!valid) {
554                         warning(_("unable to find all commit-graph files"));
555                         break;
556                 }
557         }
558
559         free(oids);
560         fclose(fp);
561         strbuf_release(&line);
562
563         return graph_chain;
564 }
565
566 /*
567  * returns 1 if and only if all graphs in the chain have
568  * corrected commit dates stored in the generation_data chunk.
569  */
570 static int validate_mixed_generation_chain(struct commit_graph *g)
571 {
572         int read_generation_data = 1;
573         struct commit_graph *p = g;
574
575         while (read_generation_data && p) {
576                 read_generation_data = p->read_generation_data;
577                 p = p->base_graph;
578         }
579
580         if (read_generation_data)
581                 return 1;
582
583         while (g) {
584                 g->read_generation_data = 0;
585                 g = g->base_graph;
586         }
587
588         return 0;
589 }
590
591 struct commit_graph *read_commit_graph_one(struct repository *r,
592                                            struct object_directory *odb)
593 {
594         struct commit_graph *g = load_commit_graph_v1(r, odb);
595
596         if (!g)
597                 g = load_commit_graph_chain(r, odb);
598
599         validate_mixed_generation_chain(g);
600
601         return g;
602 }
603
604 static void prepare_commit_graph_one(struct repository *r,
605                                      struct object_directory *odb)
606 {
607
608         if (r->objects->commit_graph)
609                 return;
610
611         r->objects->commit_graph = read_commit_graph_one(r, odb);
612 }
613
614 /*
615  * Return 1 if commit_graph is non-NULL, and 0 otherwise.
616  *
617  * On the first invocation, this function attempts to load the commit
618  * graph if the_repository is configured to have one.
619  */
620 static int prepare_commit_graph(struct repository *r)
621 {
622         struct object_directory *odb;
623
624         /*
625          * This must come before the "already attempted?" check below, because
626          * we want to disable even an already-loaded graph file.
627          */
628         if (r->commit_graph_disabled)
629                 return 0;
630
631         if (r->objects->commit_graph_attempted)
632                 return !!r->objects->commit_graph;
633         r->objects->commit_graph_attempted = 1;
634
635         prepare_repo_settings(r);
636
637         if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
638             r->settings.core_commit_graph != 1)
639                 /*
640                  * This repository is not configured to use commit graphs, so
641                  * do not load one. (But report commit_graph_attempted anyway
642                  * so that commit graph loading is not attempted again for this
643                  * repository.)
644                  */
645                 return 0;
646
647         if (!commit_graph_compatible(r))
648                 return 0;
649
650         prepare_alt_odb(r);
651         for (odb = r->objects->odb;
652              !r->objects->commit_graph && odb;
653              odb = odb->next)
654                 prepare_commit_graph_one(r, odb);
655         return !!r->objects->commit_graph;
656 }
657
658 int generation_numbers_enabled(struct repository *r)
659 {
660         uint32_t first_generation;
661         struct commit_graph *g;
662         if (!prepare_commit_graph(r))
663                return 0;
664
665         g = r->objects->commit_graph;
666
667         if (!g->num_commits)
668                 return 0;
669
670         first_generation = get_be32(g->chunk_commit_data +
671                                     g->hash_len + 8) >> 2;
672
673         return !!first_generation;
674 }
675
676 int corrected_commit_dates_enabled(struct repository *r)
677 {
678         struct commit_graph *g;
679         if (!prepare_commit_graph(r))
680                 return 0;
681
682         g = r->objects->commit_graph;
683
684         if (!g->num_commits)
685                 return 0;
686
687         return g->read_generation_data;
688 }
689
690 struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
691 {
692         struct commit_graph *g = r->objects->commit_graph;
693         while (g) {
694                 if (g->bloom_filter_settings)
695                         return g->bloom_filter_settings;
696                 g = g->base_graph;
697         }
698         return NULL;
699 }
700
701 static void close_commit_graph_one(struct commit_graph *g)
702 {
703         if (!g)
704                 return;
705
706         close_commit_graph_one(g->base_graph);
707         free_commit_graph(g);
708 }
709
710 void close_commit_graph(struct raw_object_store *o)
711 {
712         close_commit_graph_one(o->commit_graph);
713         o->commit_graph = NULL;
714 }
715
716 static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos)
717 {
718         return bsearch_hash(oid->hash, g->chunk_oid_fanout,
719                             g->chunk_oid_lookup, g->hash_len, pos);
720 }
721
722 static void load_oid_from_graph(struct commit_graph *g,
723                                 uint32_t pos,
724                                 struct object_id *oid)
725 {
726         uint32_t lex_index;
727
728         while (g && pos < g->num_commits_in_base)
729                 g = g->base_graph;
730
731         if (!g)
732                 BUG("NULL commit-graph");
733
734         if (pos >= g->num_commits + g->num_commits_in_base)
735                 die(_("invalid commit position. commit-graph is likely corrupt"));
736
737         lex_index = pos - g->num_commits_in_base;
738
739         hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index);
740 }
741
742 static struct commit_list **insert_parent_or_die(struct repository *r,
743                                                  struct commit_graph *g,
744                                                  uint32_t pos,
745                                                  struct commit_list **pptr)
746 {
747         struct commit *c;
748         struct object_id oid;
749
750         if (pos >= g->num_commits + g->num_commits_in_base)
751                 die("invalid parent position %"PRIu32, pos);
752
753         load_oid_from_graph(g, pos, &oid);
754         c = lookup_commit(r, &oid);
755         if (!c)
756                 die(_("could not find commit %s"), oid_to_hex(&oid));
757         commit_graph_data_at(c)->graph_pos = pos;
758         return &commit_list_insert(c, pptr)->next;
759 }
760
761 static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
762 {
763         const unsigned char *commit_data;
764         struct commit_graph_data *graph_data;
765         uint32_t lex_index, offset_pos;
766         uint64_t date_high, date_low, offset;
767
768         while (pos < g->num_commits_in_base)
769                 g = g->base_graph;
770
771         if (pos >= g->num_commits + g->num_commits_in_base)
772                 die(_("invalid commit position. commit-graph is likely corrupt"));
773
774         lex_index = pos - g->num_commits_in_base;
775         commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index;
776
777         graph_data = commit_graph_data_at(item);
778         graph_data->graph_pos = pos;
779
780         date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
781         date_low = get_be32(commit_data + g->hash_len + 12);
782         item->date = (timestamp_t)((date_high << 32) | date_low);
783
784         if (g->read_generation_data) {
785                 offset = (timestamp_t)get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
786
787                 if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) {
788                         if (!g->chunk_generation_data_overflow)
789                                 die(_("commit-graph requires overflow generation data but has none"));
790
791                         offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
792                         graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
793                 } else
794                         graph_data->generation = item->date + offset;
795         } else
796                 graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
797
798         if (g->topo_levels)
799                 *topo_level_slab_at(g->topo_levels, item) = get_be32(commit_data + g->hash_len + 8) >> 2;
800 }
801
802 static inline void set_commit_tree(struct commit *c, struct tree *t)
803 {
804         c->maybe_tree = t;
805 }
806
807 static int fill_commit_in_graph(struct repository *r,
808                                 struct commit *item,
809                                 struct commit_graph *g, uint32_t pos)
810 {
811         uint32_t edge_value;
812         uint32_t *parent_data_ptr;
813         struct commit_list **pptr;
814         const unsigned char *commit_data;
815         uint32_t lex_index;
816
817         while (pos < g->num_commits_in_base)
818                 g = g->base_graph;
819
820         fill_commit_graph_info(item, g, pos);
821
822         lex_index = pos - g->num_commits_in_base;
823         commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index;
824
825         item->object.parsed = 1;
826
827         set_commit_tree(item, NULL);
828
829         pptr = &item->parents;
830
831         edge_value = get_be32(commit_data + g->hash_len);
832         if (edge_value == GRAPH_PARENT_NONE)
833                 return 1;
834         pptr = insert_parent_or_die(r, g, edge_value, pptr);
835
836         edge_value = get_be32(commit_data + g->hash_len + 4);
837         if (edge_value == GRAPH_PARENT_NONE)
838                 return 1;
839         if (!(edge_value & GRAPH_EXTRA_EDGES_NEEDED)) {
840                 pptr = insert_parent_or_die(r, g, edge_value, pptr);
841                 return 1;
842         }
843
844         parent_data_ptr = (uint32_t*)(g->chunk_extra_edges +
845                           4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
846         do {
847                 edge_value = get_be32(parent_data_ptr);
848                 pptr = insert_parent_or_die(r, g,
849                                             edge_value & GRAPH_EDGE_LAST_MASK,
850                                             pptr);
851                 parent_data_ptr++;
852         } while (!(edge_value & GRAPH_LAST_EDGE));
853
854         return 1;
855 }
856
857 static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
858 {
859         uint32_t graph_pos = commit_graph_position(item);
860         if (graph_pos != COMMIT_NOT_FROM_GRAPH) {
861                 *pos = graph_pos;
862                 return 1;
863         } else {
864                 struct commit_graph *cur_g = g;
865                 uint32_t lex_index;
866
867                 while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index))
868                         cur_g = cur_g->base_graph;
869
870                 if (cur_g) {
871                         *pos = lex_index + cur_g->num_commits_in_base;
872                         return 1;
873                 }
874
875                 return 0;
876         }
877 }
878
879 static int parse_commit_in_graph_one(struct repository *r,
880                                      struct commit_graph *g,
881                                      struct commit *item)
882 {
883         uint32_t pos;
884
885         if (item->object.parsed)
886                 return 1;
887
888         if (find_commit_in_graph(item, g, &pos))
889                 return fill_commit_in_graph(r, item, g, pos);
890
891         return 0;
892 }
893
894 int parse_commit_in_graph(struct repository *r, struct commit *item)
895 {
896         static int checked_env = 0;
897
898         if (!checked_env &&
899             git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0))
900                 die("dying as requested by the '%s' variable on commit-graph parse!",
901                     GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE);
902         checked_env = 1;
903
904         if (!prepare_commit_graph(r))
905                 return 0;
906         return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
907 }
908
909 void load_commit_graph_info(struct repository *r, struct commit *item)
910 {
911         uint32_t pos;
912         if (!prepare_commit_graph(r))
913                 return;
914         if (find_commit_in_graph(item, r->objects->commit_graph, &pos))
915                 fill_commit_graph_info(item, r->objects->commit_graph, pos);
916 }
917
918 static struct tree *load_tree_for_commit(struct repository *r,
919                                          struct commit_graph *g,
920                                          struct commit *c)
921 {
922         struct object_id oid;
923         const unsigned char *commit_data;
924         uint32_t graph_pos = commit_graph_position(c);
925
926         while (graph_pos < g->num_commits_in_base)
927                 g = g->base_graph;
928
929         commit_data = g->chunk_commit_data +
930                         GRAPH_DATA_WIDTH * (graph_pos - g->num_commits_in_base);
931
932         hashcpy(oid.hash, commit_data);
933         set_commit_tree(c, lookup_tree(r, &oid));
934
935         return c->maybe_tree;
936 }
937
938 static struct tree *get_commit_tree_in_graph_one(struct repository *r,
939                                                  struct commit_graph *g,
940                                                  const struct commit *c)
941 {
942         if (c->maybe_tree)
943                 return c->maybe_tree;
944         if (commit_graph_position(c) == COMMIT_NOT_FROM_GRAPH)
945                 BUG("get_commit_tree_in_graph_one called from non-commit-graph commit");
946
947         return load_tree_for_commit(r, g, (struct commit *)c);
948 }
949
950 struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit *c)
951 {
952         return get_commit_tree_in_graph_one(r, r->objects->commit_graph, c);
953 }
954
955 struct packed_commit_list {
956         struct commit **list;
957         size_t nr;
958         size_t alloc;
959 };
960
961 struct write_commit_graph_context {
962         struct repository *r;
963         struct object_directory *odb;
964         char *graph_name;
965         struct oid_array oids;
966         struct packed_commit_list commits;
967         int num_extra_edges;
968         int num_generation_data_overflows;
969         unsigned long approx_nr_objects;
970         struct progress *progress;
971         int progress_done;
972         uint64_t progress_cnt;
973
974         char *base_graph_name;
975         int num_commit_graphs_before;
976         int num_commit_graphs_after;
977         char **commit_graph_filenames_before;
978         char **commit_graph_filenames_after;
979         char **commit_graph_hash_after;
980         uint32_t new_num_commits_in_base;
981         struct commit_graph *new_base_graph;
982
983         unsigned append:1,
984                  report_progress:1,
985                  split:1,
986                  changed_paths:1,
987                  order_by_pack:1,
988                  write_generation_data:1,
989                  trust_generation_numbers:1;
990
991         struct topo_level_slab *topo_levels;
992         const struct commit_graph_opts *opts;
993         size_t total_bloom_filter_data_size;
994         const struct bloom_filter_settings *bloom_settings;
995
996         int count_bloom_filter_computed;
997         int count_bloom_filter_not_computed;
998         int count_bloom_filter_trunc_empty;
999         int count_bloom_filter_trunc_large;
1000 };
1001
1002 static int write_graph_chunk_fanout(struct hashfile *f,
1003                                     void *data)
1004 {
1005         struct write_commit_graph_context *ctx = data;
1006         int i, count = 0;
1007         struct commit **list = ctx->commits.list;
1008
1009         /*
1010          * Write the first-level table (the list is sorted,
1011          * but we use a 256-entry lookup to be able to avoid
1012          * having to do eight extra binary search iterations).
1013          */
1014         for (i = 0; i < 256; i++) {
1015                 while (count < ctx->commits.nr) {
1016                         if ((*list)->object.oid.hash[0] != i)
1017                                 break;
1018                         display_progress(ctx->progress, ++ctx->progress_cnt);
1019                         count++;
1020                         list++;
1021                 }
1022
1023                 hashwrite_be32(f, count);
1024         }
1025
1026         return 0;
1027 }
1028
1029 static int write_graph_chunk_oids(struct hashfile *f,
1030                                   void *data)
1031 {
1032         struct write_commit_graph_context *ctx = data;
1033         struct commit **list = ctx->commits.list;
1034         int count;
1035         for (count = 0; count < ctx->commits.nr; count++, list++) {
1036                 display_progress(ctx->progress, ++ctx->progress_cnt);
1037                 hashwrite(f, (*list)->object.oid.hash, the_hash_algo->rawsz);
1038         }
1039
1040         return 0;
1041 }
1042
1043 static const struct object_id *commit_to_oid(size_t index, const void *table)
1044 {
1045         const struct commit * const *commits = table;
1046         return &commits[index]->object.oid;
1047 }
1048
1049 static int write_graph_chunk_data(struct hashfile *f,
1050                                   void *data)
1051 {
1052         struct write_commit_graph_context *ctx = data;
1053         struct commit **list = ctx->commits.list;
1054         struct commit **last = ctx->commits.list + ctx->commits.nr;
1055         uint32_t num_extra_edges = 0;
1056
1057         while (list < last) {
1058                 struct commit_list *parent;
1059                 struct object_id *tree;
1060                 int edge_value;
1061                 uint32_t packedDate[2];
1062                 display_progress(ctx->progress, ++ctx->progress_cnt);
1063
1064                 if (repo_parse_commit_no_graph(ctx->r, *list))
1065                         die(_("unable to parse commit %s"),
1066                                 oid_to_hex(&(*list)->object.oid));
1067                 tree = get_commit_tree_oid(*list);
1068                 hashwrite(f, tree->hash, the_hash_algo->rawsz);
1069
1070                 parent = (*list)->parents;
1071
1072                 if (!parent)
1073                         edge_value = GRAPH_PARENT_NONE;
1074                 else {
1075                         edge_value = oid_pos(&parent->item->object.oid,
1076                                              ctx->commits.list,
1077                                              ctx->commits.nr,
1078                                              commit_to_oid);
1079
1080                         if (edge_value >= 0)
1081                                 edge_value += ctx->new_num_commits_in_base;
1082                         else if (ctx->new_base_graph) {
1083                                 uint32_t pos;
1084                                 if (find_commit_in_graph(parent->item,
1085                                                          ctx->new_base_graph,
1086                                                          &pos))
1087                                         edge_value = pos;
1088                         }
1089
1090                         if (edge_value < 0)
1091                                 BUG("missing parent %s for commit %s",
1092                                     oid_to_hex(&parent->item->object.oid),
1093                                     oid_to_hex(&(*list)->object.oid));
1094                 }
1095
1096                 hashwrite_be32(f, edge_value);
1097
1098                 if (parent)
1099                         parent = parent->next;
1100
1101                 if (!parent)
1102                         edge_value = GRAPH_PARENT_NONE;
1103                 else if (parent->next)
1104                         edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges;
1105                 else {
1106                         edge_value = oid_pos(&parent->item->object.oid,
1107                                              ctx->commits.list,
1108                                              ctx->commits.nr,
1109                                              commit_to_oid);
1110
1111                         if (edge_value >= 0)
1112                                 edge_value += ctx->new_num_commits_in_base;
1113                         else if (ctx->new_base_graph) {
1114                                 uint32_t pos;
1115                                 if (find_commit_in_graph(parent->item,
1116                                                          ctx->new_base_graph,
1117                                                          &pos))
1118                                         edge_value = pos;
1119                         }
1120
1121                         if (edge_value < 0)
1122                                 BUG("missing parent %s for commit %s",
1123                                     oid_to_hex(&parent->item->object.oid),
1124                                     oid_to_hex(&(*list)->object.oid));
1125                 }
1126
1127                 hashwrite_be32(f, edge_value);
1128
1129                 if (edge_value & GRAPH_EXTRA_EDGES_NEEDED) {
1130                         do {
1131                                 num_extra_edges++;
1132                                 parent = parent->next;
1133                         } while (parent);
1134                 }
1135
1136                 if (sizeof((*list)->date) > 4)
1137                         packedDate[0] = htonl(((*list)->date >> 32) & 0x3);
1138                 else
1139                         packedDate[0] = 0;
1140
1141                 packedDate[0] |= htonl(*topo_level_slab_at(ctx->topo_levels, *list) << 2);
1142
1143                 packedDate[1] = htonl((*list)->date);
1144                 hashwrite(f, packedDate, 8);
1145
1146                 list++;
1147         }
1148
1149         return 0;
1150 }
1151
1152 static int write_graph_chunk_generation_data(struct hashfile *f,
1153                                              void *data)
1154 {
1155         struct write_commit_graph_context *ctx = data;
1156         int i, num_generation_data_overflows = 0;
1157
1158         for (i = 0; i < ctx->commits.nr; i++) {
1159                 struct commit *c = ctx->commits.list[i];
1160                 timestamp_t offset;
1161                 repo_parse_commit(ctx->r, c);
1162                 offset = commit_graph_data_at(c)->generation - c->date;
1163                 display_progress(ctx->progress, ++ctx->progress_cnt);
1164
1165                 if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
1166                         offset = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | num_generation_data_overflows;
1167                         num_generation_data_overflows++;
1168                 }
1169
1170                 hashwrite_be32(f, offset);
1171         }
1172
1173         return 0;
1174 }
1175
1176 static int write_graph_chunk_generation_data_overflow(struct hashfile *f,
1177                                                       void *data)
1178 {
1179         struct write_commit_graph_context *ctx = data;
1180         int i;
1181         for (i = 0; i < ctx->commits.nr; i++) {
1182                 struct commit *c = ctx->commits.list[i];
1183                 timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
1184                 display_progress(ctx->progress, ++ctx->progress_cnt);
1185
1186                 if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
1187                         hashwrite_be32(f, offset >> 32);
1188                         hashwrite_be32(f, (uint32_t) offset);
1189                 }
1190         }
1191
1192         return 0;
1193 }
1194
1195 static int write_graph_chunk_extra_edges(struct hashfile *f,
1196                                          void *data)
1197 {
1198         struct write_commit_graph_context *ctx = data;
1199         struct commit **list = ctx->commits.list;
1200         struct commit **last = ctx->commits.list + ctx->commits.nr;
1201         struct commit_list *parent;
1202
1203         while (list < last) {
1204                 int num_parents = 0;
1205
1206                 display_progress(ctx->progress, ++ctx->progress_cnt);
1207
1208                 for (parent = (*list)->parents; num_parents < 3 && parent;
1209                      parent = parent->next)
1210                         num_parents++;
1211
1212                 if (num_parents <= 2) {
1213                         list++;
1214                         continue;
1215                 }
1216
1217                 /* Since num_parents > 2, this initializer is safe. */
1218                 for (parent = (*list)->parents->next; parent; parent = parent->next) {
1219                         int edge_value = oid_pos(&parent->item->object.oid,
1220                                                  ctx->commits.list,
1221                                                  ctx->commits.nr,
1222                                                  commit_to_oid);
1223
1224                         if (edge_value >= 0)
1225                                 edge_value += ctx->new_num_commits_in_base;
1226                         else if (ctx->new_base_graph) {
1227                                 uint32_t pos;
1228                                 if (find_commit_in_graph(parent->item,
1229                                                          ctx->new_base_graph,
1230                                                          &pos))
1231                                         edge_value = pos;
1232                         }
1233
1234                         if (edge_value < 0)
1235                                 BUG("missing parent %s for commit %s",
1236                                     oid_to_hex(&parent->item->object.oid),
1237                                     oid_to_hex(&(*list)->object.oid));
1238                         else if (!parent->next)
1239                                 edge_value |= GRAPH_LAST_EDGE;
1240
1241                         hashwrite_be32(f, edge_value);
1242                 }
1243
1244                 list++;
1245         }
1246
1247         return 0;
1248 }
1249
1250 static int write_graph_chunk_bloom_indexes(struct hashfile *f,
1251                                            void *data)
1252 {
1253         struct write_commit_graph_context *ctx = data;
1254         struct commit **list = ctx->commits.list;
1255         struct commit **last = ctx->commits.list + ctx->commits.nr;
1256         uint32_t cur_pos = 0;
1257
1258         while (list < last) {
1259                 struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
1260                 size_t len = filter ? filter->len : 0;
1261                 cur_pos += len;
1262                 display_progress(ctx->progress, ++ctx->progress_cnt);
1263                 hashwrite_be32(f, cur_pos);
1264                 list++;
1265         }
1266
1267         return 0;
1268 }
1269
1270 static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
1271 {
1272         struct json_writer jw = JSON_WRITER_INIT;
1273
1274         jw_object_begin(&jw, 0);
1275         jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
1276         jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
1277         jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
1278         jw_object_intmax(&jw, "max_changed_paths", ctx->bloom_settings->max_changed_paths);
1279         jw_end(&jw);
1280
1281         trace2_data_json("bloom", ctx->r, "settings", &jw);
1282
1283         jw_release(&jw);
1284 }
1285
1286 static int write_graph_chunk_bloom_data(struct hashfile *f,
1287                                         void *data)
1288 {
1289         struct write_commit_graph_context *ctx = data;
1290         struct commit **list = ctx->commits.list;
1291         struct commit **last = ctx->commits.list + ctx->commits.nr;
1292
1293         trace2_bloom_filter_settings(ctx);
1294
1295         hashwrite_be32(f, ctx->bloom_settings->hash_version);
1296         hashwrite_be32(f, ctx->bloom_settings->num_hashes);
1297         hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
1298
1299         while (list < last) {
1300                 struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
1301                 size_t len = filter ? filter->len : 0;
1302
1303                 display_progress(ctx->progress, ++ctx->progress_cnt);
1304                 if (len)
1305                         hashwrite(f, filter->data, len * sizeof(unsigned char));
1306                 list++;
1307         }
1308
1309         return 0;
1310 }
1311
1312 static int add_packed_commits(const struct object_id *oid,
1313                               struct packed_git *pack,
1314                               uint32_t pos,
1315                               void *data)
1316 {
1317         struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
1318         enum object_type type;
1319         off_t offset = nth_packed_object_offset(pack, pos);
1320         struct object_info oi = OBJECT_INFO_INIT;
1321
1322         if (ctx->progress)
1323                 display_progress(ctx->progress, ++ctx->progress_done);
1324
1325         oi.typep = &type;
1326         if (packed_object_info(ctx->r, pack, offset, &oi) < 0)
1327                 die(_("unable to get type of object %s"), oid_to_hex(oid));
1328
1329         if (type != OBJ_COMMIT)
1330                 return 0;
1331
1332         oid_array_append(&ctx->oids, oid);
1333         set_commit_pos(ctx->r, oid);
1334
1335         return 0;
1336 }
1337
1338 static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
1339 {
1340         struct commit_list *parent;
1341         for (parent = commit->parents; parent; parent = parent->next) {
1342                 if (!(parent->item->object.flags & REACHABLE)) {
1343                         oid_array_append(&ctx->oids, &parent->item->object.oid);
1344                         parent->item->object.flags |= REACHABLE;
1345                 }
1346         }
1347 }
1348
1349 static void close_reachable(struct write_commit_graph_context *ctx)
1350 {
1351         int i;
1352         struct commit *commit;
1353         enum commit_graph_split_flags flags = ctx->opts ?
1354                 ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1355
1356         if (ctx->report_progress)
1357                 ctx->progress = start_delayed_progress(
1358                                         _("Loading known commits in commit graph"),
1359                                         ctx->oids.nr);
1360         for (i = 0; i < ctx->oids.nr; i++) {
1361                 display_progress(ctx->progress, i + 1);
1362                 commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1363                 if (commit)
1364                         commit->object.flags |= REACHABLE;
1365         }
1366         stop_progress(&ctx->progress);
1367
1368         /*
1369          * As this loop runs, ctx->oids.nr may grow, but not more
1370          * than the number of missing commits in the reachable
1371          * closure.
1372          */
1373         if (ctx->report_progress)
1374                 ctx->progress = start_delayed_progress(
1375                                         _("Expanding reachable commits in commit graph"),
1376                                         0);
1377         for (i = 0; i < ctx->oids.nr; i++) {
1378                 display_progress(ctx->progress, i + 1);
1379                 commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1380
1381                 if (!commit)
1382                         continue;
1383                 if (ctx->split) {
1384                         if ((!repo_parse_commit(ctx->r, commit) &&
1385                              commit_graph_position(commit) == COMMIT_NOT_FROM_GRAPH) ||
1386                             flags == COMMIT_GRAPH_SPLIT_REPLACE)
1387                                 add_missing_parents(ctx, commit);
1388                 } else if (!repo_parse_commit_no_graph(ctx->r, commit))
1389                         add_missing_parents(ctx, commit);
1390         }
1391         stop_progress(&ctx->progress);
1392
1393         if (ctx->report_progress)
1394                 ctx->progress = start_delayed_progress(
1395                                         _("Clearing commit marks in commit graph"),
1396                                         ctx->oids.nr);
1397         for (i = 0; i < ctx->oids.nr; i++) {
1398                 display_progress(ctx->progress, i + 1);
1399                 commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1400
1401                 if (commit)
1402                         commit->object.flags &= ~REACHABLE;
1403         }
1404         stop_progress(&ctx->progress);
1405 }
1406
1407 static void compute_topological_levels(struct write_commit_graph_context *ctx)
1408 {
1409         int i;
1410         struct commit_list *list = NULL;
1411
1412         if (ctx->report_progress)
1413                 ctx->progress = start_delayed_progress(
1414                                         _("Computing commit graph topological levels"),
1415                                         ctx->commits.nr);
1416         for (i = 0; i < ctx->commits.nr; i++) {
1417                 struct commit *c = ctx->commits.list[i];
1418                 uint32_t level;
1419
1420                 repo_parse_commit(ctx->r, c);
1421                 level = *topo_level_slab_at(ctx->topo_levels, c);
1422
1423                 display_progress(ctx->progress, i + 1);
1424                 if (level != GENERATION_NUMBER_ZERO)
1425                         continue;
1426
1427                 commit_list_insert(c, &list);
1428                 while (list) {
1429                         struct commit *current = list->item;
1430                         struct commit_list *parent;
1431                         int all_parents_computed = 1;
1432                         uint32_t max_level = 0;
1433
1434                         for (parent = current->parents; parent; parent = parent->next) {
1435                                 repo_parse_commit(ctx->r, parent->item);
1436                                 level = *topo_level_slab_at(ctx->topo_levels, parent->item);
1437
1438                                 if (level == GENERATION_NUMBER_ZERO) {
1439                                         all_parents_computed = 0;
1440                                         commit_list_insert(parent->item, &list);
1441                                         break;
1442                                 }
1443
1444                                 if (level > max_level)
1445                                         max_level = level;
1446                         }
1447
1448                         if (all_parents_computed) {
1449                                 pop_commit(&list);
1450
1451                                 if (max_level > GENERATION_NUMBER_V1_MAX - 1)
1452                                         max_level = GENERATION_NUMBER_V1_MAX - 1;
1453                                 *topo_level_slab_at(ctx->topo_levels, current) = max_level + 1;
1454                         }
1455                 }
1456         }
1457         stop_progress(&ctx->progress);
1458 }
1459
1460 static void compute_generation_numbers(struct write_commit_graph_context *ctx)
1461 {
1462         int i;
1463         struct commit_list *list = NULL;
1464
1465         if (ctx->report_progress)
1466                 ctx->progress = start_delayed_progress(
1467                                         _("Computing commit graph generation numbers"),
1468                                         ctx->commits.nr);
1469
1470         if (!ctx->trust_generation_numbers) {
1471                 for (i = 0; i < ctx->commits.nr; i++) {
1472                         struct commit *c = ctx->commits.list[i];
1473                         repo_parse_commit(ctx->r, c);
1474                         commit_graph_data_at(c)->generation = GENERATION_NUMBER_ZERO;
1475                 }
1476         }
1477
1478         for (i = 0; i < ctx->commits.nr; i++) {
1479                 struct commit *c = ctx->commits.list[i];
1480                 timestamp_t corrected_commit_date;
1481
1482                 repo_parse_commit(ctx->r, c);
1483                 corrected_commit_date = commit_graph_data_at(c)->generation;
1484
1485                 display_progress(ctx->progress, i + 1);
1486                 if (corrected_commit_date != GENERATION_NUMBER_ZERO)
1487                         continue;
1488
1489                 commit_list_insert(c, &list);
1490                 while (list) {
1491                         struct commit *current = list->item;
1492                         struct commit_list *parent;
1493                         int all_parents_computed = 1;
1494                         timestamp_t max_corrected_commit_date = 0;
1495
1496                         for (parent = current->parents; parent; parent = parent->next) {
1497                                 repo_parse_commit(ctx->r, parent->item);
1498                                 corrected_commit_date = commit_graph_data_at(parent->item)->generation;
1499
1500                                 if (corrected_commit_date == GENERATION_NUMBER_ZERO) {
1501                                         all_parents_computed = 0;
1502                                         commit_list_insert(parent->item, &list);
1503                                         break;
1504                                 }
1505
1506                                 if (corrected_commit_date > max_corrected_commit_date)
1507                                         max_corrected_commit_date = corrected_commit_date;
1508                         }
1509
1510                         if (all_parents_computed) {
1511                                 pop_commit(&list);
1512
1513                                 if (current->date && current->date > max_corrected_commit_date)
1514                                         max_corrected_commit_date = current->date - 1;
1515                                 commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
1516
1517                                 if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX)
1518                                         ctx->num_generation_data_overflows++;
1519                         }
1520                 }
1521         }
1522         stop_progress(&ctx->progress);
1523 }
1524
1525 static void trace2_bloom_filter_write_statistics(struct write_commit_graph_context *ctx)
1526 {
1527         trace2_data_intmax("commit-graph", ctx->r, "filter-computed",
1528                            ctx->count_bloom_filter_computed);
1529         trace2_data_intmax("commit-graph", ctx->r, "filter-not-computed",
1530                            ctx->count_bloom_filter_not_computed);
1531         trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-empty",
1532                            ctx->count_bloom_filter_trunc_empty);
1533         trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
1534                            ctx->count_bloom_filter_trunc_large);
1535 }
1536
1537 static void compute_bloom_filters(struct write_commit_graph_context *ctx)
1538 {
1539         int i;
1540         struct progress *progress = NULL;
1541         struct commit **sorted_commits;
1542         int max_new_filters;
1543
1544         init_bloom_filters();
1545
1546         if (ctx->report_progress)
1547                 progress = start_delayed_progress(
1548                         _("Computing commit changed paths Bloom filters"),
1549                         ctx->commits.nr);
1550
1551         ALLOC_ARRAY(sorted_commits, ctx->commits.nr);
1552         COPY_ARRAY(sorted_commits, ctx->commits.list, ctx->commits.nr);
1553
1554         if (ctx->order_by_pack)
1555                 QSORT(sorted_commits, ctx->commits.nr, commit_pos_cmp);
1556         else
1557                 QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
1558
1559         max_new_filters = ctx->opts && ctx->opts->max_new_filters >= 0 ?
1560                 ctx->opts->max_new_filters : ctx->commits.nr;
1561
1562         for (i = 0; i < ctx->commits.nr; i++) {
1563                 enum bloom_filter_computed computed = 0;
1564                 struct commit *c = sorted_commits[i];
1565                 struct bloom_filter *filter = get_or_compute_bloom_filter(
1566                         ctx->r,
1567                         c,
1568                         ctx->count_bloom_filter_computed < max_new_filters,
1569                         ctx->bloom_settings,
1570                         &computed);
1571                 if (computed & BLOOM_COMPUTED) {
1572                         ctx->count_bloom_filter_computed++;
1573                         if (computed & BLOOM_TRUNC_EMPTY)
1574                                 ctx->count_bloom_filter_trunc_empty++;
1575                         if (computed & BLOOM_TRUNC_LARGE)
1576                                 ctx->count_bloom_filter_trunc_large++;
1577                 } else if (computed & BLOOM_NOT_COMPUTED)
1578                         ctx->count_bloom_filter_not_computed++;
1579                 ctx->total_bloom_filter_data_size += filter
1580                         ? sizeof(unsigned char) * filter->len : 0;
1581                 display_progress(progress, i + 1);
1582         }
1583
1584         if (trace2_is_enabled())
1585                 trace2_bloom_filter_write_statistics(ctx);
1586
1587         free(sorted_commits);
1588         stop_progress(&progress);
1589 }
1590
1591 struct refs_cb_data {
1592         struct oidset *commits;
1593         struct progress *progress;
1594 };
1595
1596 static int add_ref_to_set(const char *refname,
1597                           const struct object_id *oid,
1598                           int flags, void *cb_data)
1599 {
1600         struct object_id peeled;
1601         struct refs_cb_data *data = (struct refs_cb_data *)cb_data;
1602
1603         if (!peel_iterated_oid(oid, &peeled))
1604                 oid = &peeled;
1605         if (oid_object_info(the_repository, oid, NULL) == OBJ_COMMIT)
1606                 oidset_insert(data->commits, oid);
1607
1608         display_progress(data->progress, oidset_size(data->commits));
1609
1610         return 0;
1611 }
1612
1613 int write_commit_graph_reachable(struct object_directory *odb,
1614                                  enum commit_graph_write_flags flags,
1615                                  const struct commit_graph_opts *opts)
1616 {
1617         struct oidset commits = OIDSET_INIT;
1618         struct refs_cb_data data;
1619         int result;
1620
1621         memset(&data, 0, sizeof(data));
1622         data.commits = &commits;
1623         if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
1624                 data.progress = start_delayed_progress(
1625                         _("Collecting referenced commits"), 0);
1626
1627         for_each_ref(add_ref_to_set, &data);
1628
1629         stop_progress(&data.progress);
1630
1631         result = write_commit_graph(odb, NULL, &commits,
1632                                     flags, opts);
1633
1634         oidset_clear(&commits);
1635         return result;
1636 }
1637
1638 static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
1639                                 struct string_list *pack_indexes)
1640 {
1641         uint32_t i;
1642         struct strbuf progress_title = STRBUF_INIT;
1643         struct strbuf packname = STRBUF_INIT;
1644         int dirlen;
1645
1646         strbuf_addf(&packname, "%s/pack/", ctx->odb->path);
1647         dirlen = packname.len;
1648         if (ctx->report_progress) {
1649                 strbuf_addf(&progress_title,
1650                             Q_("Finding commits for commit graph in %d pack",
1651                                "Finding commits for commit graph in %d packs",
1652                                pack_indexes->nr),
1653                             pack_indexes->nr);
1654                 ctx->progress = start_delayed_progress(progress_title.buf, 0);
1655                 ctx->progress_done = 0;
1656         }
1657         for (i = 0; i < pack_indexes->nr; i++) {
1658                 struct packed_git *p;
1659                 strbuf_setlen(&packname, dirlen);
1660                 strbuf_addstr(&packname, pack_indexes->items[i].string);
1661                 p = add_packed_git(packname.buf, packname.len, 1);
1662                 if (!p) {
1663                         error(_("error adding pack %s"), packname.buf);
1664                         return -1;
1665                 }
1666                 if (open_pack_index(p)) {
1667                         error(_("error opening index for %s"), packname.buf);
1668                         return -1;
1669                 }
1670                 for_each_object_in_pack(p, add_packed_commits, ctx,
1671                                         FOR_EACH_OBJECT_PACK_ORDER);
1672                 close_pack(p);
1673                 free(p);
1674         }
1675
1676         stop_progress(&ctx->progress);
1677         strbuf_release(&progress_title);
1678         strbuf_release(&packname);
1679
1680         return 0;
1681 }
1682
1683 static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
1684                                   struct oidset *commits)
1685 {
1686         struct oidset_iter iter;
1687         struct object_id *oid;
1688
1689         if (!oidset_size(commits))
1690                 return 0;
1691
1692         oidset_iter_init(commits, &iter);
1693         while ((oid = oidset_iter_next(&iter))) {
1694                 oid_array_append(&ctx->oids, oid);
1695         }
1696
1697         return 0;
1698 }
1699
1700 static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
1701 {
1702         if (ctx->report_progress)
1703                 ctx->progress = start_delayed_progress(
1704                         _("Finding commits for commit graph among packed objects"),
1705                         ctx->approx_nr_objects);
1706         for_each_packed_object(add_packed_commits, ctx,
1707                                FOR_EACH_OBJECT_PACK_ORDER);
1708         if (ctx->progress_done < ctx->approx_nr_objects)
1709                 display_progress(ctx->progress, ctx->approx_nr_objects);
1710         stop_progress(&ctx->progress);
1711 }
1712
1713 static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
1714 {
1715         uint32_t i;
1716         enum commit_graph_split_flags flags = ctx->opts ?
1717                 ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1718
1719         ctx->num_extra_edges = 0;
1720         if (ctx->report_progress)
1721                 ctx->progress = start_delayed_progress(
1722                         _("Finding extra edges in commit graph"),
1723                         ctx->oids.nr);
1724         oid_array_sort(&ctx->oids);
1725         for (i = 0; i < ctx->oids.nr; i = oid_array_next_unique(&ctx->oids, i)) {
1726                 unsigned int num_parents;
1727
1728                 display_progress(ctx->progress, i + 1);
1729
1730                 ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
1731                 ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1732
1733                 if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE &&
1734                     commit_graph_position(ctx->commits.list[ctx->commits.nr]) != COMMIT_NOT_FROM_GRAPH)
1735                         continue;
1736
1737                 if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE)
1738                         repo_parse_commit(ctx->r, ctx->commits.list[ctx->commits.nr]);
1739                 else
1740                         repo_parse_commit_no_graph(ctx->r, ctx->commits.list[ctx->commits.nr]);
1741
1742                 num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents);
1743                 if (num_parents > 2)
1744                         ctx->num_extra_edges += num_parents - 1;
1745
1746                 ctx->commits.nr++;
1747         }
1748         stop_progress(&ctx->progress);
1749 }
1750
1751 static int write_graph_chunk_base_1(struct hashfile *f,
1752                                     struct commit_graph *g)
1753 {
1754         int num = 0;
1755
1756         if (!g)
1757                 return 0;
1758
1759         num = write_graph_chunk_base_1(f, g->base_graph);
1760         hashwrite(f, g->oid.hash, the_hash_algo->rawsz);
1761         return num + 1;
1762 }
1763
1764 static int write_graph_chunk_base(struct hashfile *f,
1765                                     void *data)
1766 {
1767         struct write_commit_graph_context *ctx = data;
1768         int num = write_graph_chunk_base_1(f, ctx->new_base_graph);
1769
1770         if (num != ctx->num_commit_graphs_after - 1) {
1771                 error(_("failed to write correct number of base graph ids"));
1772                 return -1;
1773         }
1774
1775         return 0;
1776 }
1777
1778 static int write_commit_graph_file(struct write_commit_graph_context *ctx)
1779 {
1780         uint32_t i;
1781         int fd;
1782         struct hashfile *f;
1783         struct lock_file lk = LOCK_INIT;
1784         const unsigned hashsz = the_hash_algo->rawsz;
1785         struct strbuf progress_title = STRBUF_INIT;
1786         struct object_id file_hash;
1787         struct chunkfile *cf;
1788
1789         if (ctx->split) {
1790                 struct strbuf tmp_file = STRBUF_INIT;
1791
1792                 strbuf_addf(&tmp_file,
1793                             "%s/info/commit-graphs/tmp_graph_XXXXXX",
1794                             ctx->odb->path);
1795                 ctx->graph_name = strbuf_detach(&tmp_file, NULL);
1796         } else {
1797                 ctx->graph_name = get_commit_graph_filename(ctx->odb);
1798         }
1799
1800         if (safe_create_leading_directories(ctx->graph_name)) {
1801                 UNLEAK(ctx->graph_name);
1802                 error(_("unable to create leading directories of %s"),
1803                         ctx->graph_name);
1804                 return -1;
1805         }
1806
1807         if (ctx->split) {
1808                 char *lock_name = get_commit_graph_chain_filename(ctx->odb);
1809
1810                 hold_lock_file_for_update_mode(&lk, lock_name,
1811                                                LOCK_DIE_ON_ERROR, 0444);
1812
1813                 fd = git_mkstemp_mode(ctx->graph_name, 0444);
1814                 if (fd < 0) {
1815                         error(_("unable to create temporary graph layer"));
1816                         return -1;
1817                 }
1818
1819                 if (adjust_shared_perm(ctx->graph_name)) {
1820                         error(_("unable to adjust shared permissions for '%s'"),
1821                               ctx->graph_name);
1822                         return -1;
1823                 }
1824
1825                 f = hashfd(fd, ctx->graph_name);
1826         } else {
1827                 hold_lock_file_for_update_mode(&lk, ctx->graph_name,
1828                                                LOCK_DIE_ON_ERROR, 0444);
1829                 fd = get_lock_file_fd(&lk);
1830                 f = hashfd(fd, get_lock_file_path(&lk));
1831         }
1832
1833         cf = init_chunkfile(f);
1834
1835         add_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, GRAPH_FANOUT_SIZE,
1836                   write_graph_chunk_fanout);
1837         add_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, hashsz * ctx->commits.nr,
1838                   write_graph_chunk_oids);
1839         add_chunk(cf, GRAPH_CHUNKID_DATA, (hashsz + 16) * ctx->commits.nr,
1840                   write_graph_chunk_data);
1841
1842         if (git_env_bool(GIT_TEST_COMMIT_GRAPH_NO_GDAT, 0))
1843                 ctx->write_generation_data = 0;
1844         if (ctx->write_generation_data)
1845                 add_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
1846                           sizeof(uint32_t) * ctx->commits.nr,
1847                           write_graph_chunk_generation_data);
1848         if (ctx->num_generation_data_overflows)
1849                 add_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
1850                           sizeof(timestamp_t) * ctx->num_generation_data_overflows,
1851                           write_graph_chunk_generation_data_overflow);
1852         if (ctx->num_extra_edges)
1853                 add_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES,
1854                           4 * ctx->num_extra_edges,
1855                           write_graph_chunk_extra_edges);
1856         if (ctx->changed_paths) {
1857                 add_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
1858                           sizeof(uint32_t) * ctx->commits.nr,
1859                           write_graph_chunk_bloom_indexes);
1860                 add_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
1861                           sizeof(uint32_t) * 3
1862                                 + ctx->total_bloom_filter_data_size,
1863                           write_graph_chunk_bloom_data);
1864         }
1865         if (ctx->num_commit_graphs_after > 1)
1866                 add_chunk(cf, GRAPH_CHUNKID_BASE,
1867                           hashsz * (ctx->num_commit_graphs_after - 1),
1868                           write_graph_chunk_base);
1869
1870         hashwrite_be32(f, GRAPH_SIGNATURE);
1871
1872         hashwrite_u8(f, GRAPH_VERSION);
1873         hashwrite_u8(f, oid_version());
1874         hashwrite_u8(f, get_num_chunks(cf));
1875         hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
1876
1877         if (ctx->report_progress) {
1878                 strbuf_addf(&progress_title,
1879                             Q_("Writing out commit graph in %d pass",
1880                                "Writing out commit graph in %d passes",
1881                                get_num_chunks(cf)),
1882                             get_num_chunks(cf));
1883                 ctx->progress = start_delayed_progress(
1884                         progress_title.buf,
1885                         get_num_chunks(cf) * ctx->commits.nr);
1886         }
1887
1888         write_chunkfile(cf, ctx);
1889
1890         stop_progress(&ctx->progress);
1891         strbuf_release(&progress_title);
1892
1893         if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) {
1894                 char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid));
1895                 char *new_base_name = get_split_graph_filename(ctx->new_base_graph->odb, new_base_hash);
1896
1897                 free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]);
1898                 free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]);
1899                 ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2] = new_base_name;
1900                 ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2] = new_base_hash;
1901         }
1902
1903         close_commit_graph(ctx->r->objects);
1904         finalize_hashfile(f, file_hash.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
1905         free_chunkfile(cf);
1906
1907         if (ctx->split) {
1908                 FILE *chainf = fdopen_lock_file(&lk, "w");
1909                 char *final_graph_name;
1910                 int result;
1911
1912                 close(fd);
1913
1914                 if (!chainf) {
1915                         error(_("unable to open commit-graph chain file"));
1916                         return -1;
1917                 }
1918
1919                 if (ctx->base_graph_name) {
1920                         const char *dest;
1921                         int idx = ctx->num_commit_graphs_after - 1;
1922                         if (ctx->num_commit_graphs_after > 1)
1923                                 idx--;
1924
1925                         dest = ctx->commit_graph_filenames_after[idx];
1926
1927                         if (strcmp(ctx->base_graph_name, dest)) {
1928                                 result = rename(ctx->base_graph_name, dest);
1929
1930                                 if (result) {
1931                                         error(_("failed to rename base commit-graph file"));
1932                                         return -1;
1933                                 }
1934                         }
1935                 } else {
1936                         char *graph_name = get_commit_graph_filename(ctx->odb);
1937                         unlink(graph_name);
1938                 }
1939
1940                 ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(oid_to_hex(&file_hash));
1941                 final_graph_name = get_split_graph_filename(ctx->odb,
1942                                         ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]);
1943                 ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name;
1944
1945                 result = rename(ctx->graph_name, final_graph_name);
1946
1947                 for (i = 0; i < ctx->num_commit_graphs_after; i++)
1948                         fprintf(get_lock_file_fp(&lk), "%s\n", ctx->commit_graph_hash_after[i]);
1949
1950                 if (result) {
1951                         error(_("failed to rename temporary commit-graph file"));
1952                         return -1;
1953                 }
1954         }
1955
1956         commit_lock_file(&lk);
1957
1958         return 0;
1959 }
1960
1961 static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
1962 {
1963         struct commit_graph *g;
1964         uint32_t num_commits;
1965         enum commit_graph_split_flags flags = COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1966         uint32_t i;
1967
1968         int max_commits = 0;
1969         int size_mult = 2;
1970
1971         if (ctx->opts) {
1972                 max_commits = ctx->opts->max_commits;
1973
1974                 if (ctx->opts->size_multiple)
1975                         size_mult = ctx->opts->size_multiple;
1976
1977                 flags = ctx->opts->split_flags;
1978         }
1979
1980         g = ctx->r->objects->commit_graph;
1981         num_commits = ctx->commits.nr;
1982         if (flags == COMMIT_GRAPH_SPLIT_REPLACE)
1983                 ctx->num_commit_graphs_after = 1;
1984         else
1985                 ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
1986
1987         if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED &&
1988             flags != COMMIT_GRAPH_SPLIT_REPLACE) {
1989                 while (g && (g->num_commits <= size_mult * num_commits ||
1990                             (max_commits && num_commits > max_commits))) {
1991                         if (g->odb != ctx->odb)
1992                                 break;
1993
1994                         num_commits += g->num_commits;
1995                         g = g->base_graph;
1996
1997                         ctx->num_commit_graphs_after--;
1998                 }
1999         }
2000
2001         if (flags != COMMIT_GRAPH_SPLIT_REPLACE)
2002                 ctx->new_base_graph = g;
2003         else if (ctx->num_commit_graphs_after != 1)
2004                 BUG("split_graph_merge_strategy: num_commit_graphs_after "
2005                     "should be 1 with --split=replace");
2006
2007         if (ctx->num_commit_graphs_after == 2) {
2008                 char *old_graph_name = get_commit_graph_filename(g->odb);
2009
2010                 if (!strcmp(g->filename, old_graph_name) &&
2011                     g->odb != ctx->odb) {
2012                         ctx->num_commit_graphs_after = 1;
2013                         ctx->new_base_graph = NULL;
2014                 }
2015
2016                 free(old_graph_name);
2017         }
2018
2019         CALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after);
2020         CALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after);
2021
2022         for (i = 0; i < ctx->num_commit_graphs_after &&
2023                     i < ctx->num_commit_graphs_before; i++)
2024                 ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]);
2025
2026         i = ctx->num_commit_graphs_before - 1;
2027         g = ctx->r->objects->commit_graph;
2028
2029         while (g) {
2030                 if (i < ctx->num_commit_graphs_after)
2031                         ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
2032
2033                 /*
2034                  * If the topmost remaining layer has generation data chunk, the
2035                  * resultant layer also has generation data chunk.
2036                  */
2037                 if (i == ctx->num_commit_graphs_after - 2)
2038                         ctx->write_generation_data = !!g->chunk_generation_data;
2039
2040                 i--;
2041                 g = g->base_graph;
2042         }
2043 }
2044
2045 static void merge_commit_graph(struct write_commit_graph_context *ctx,
2046                                struct commit_graph *g)
2047 {
2048         uint32_t i;
2049         uint32_t offset = g->num_commits_in_base;
2050
2051         ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc);
2052
2053         for (i = 0; i < g->num_commits; i++) {
2054                 struct object_id oid;
2055                 struct commit *result;
2056
2057                 display_progress(ctx->progress, i + 1);
2058
2059                 load_oid_from_graph(g, i + offset, &oid);
2060
2061                 /* only add commits if they still exist in the repo */
2062                 result = lookup_commit_reference_gently(ctx->r, &oid, 1);
2063
2064                 if (result) {
2065                         ctx->commits.list[ctx->commits.nr] = result;
2066                         ctx->commits.nr++;
2067                 }
2068         }
2069 }
2070
2071 static int commit_compare(const void *_a, const void *_b)
2072 {
2073         const struct commit *a = *(const struct commit **)_a;
2074         const struct commit *b = *(const struct commit **)_b;
2075         return oidcmp(&a->object.oid, &b->object.oid);
2076 }
2077
2078 static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx)
2079 {
2080         uint32_t i, dedup_i = 0;
2081
2082         if (ctx->report_progress)
2083                 ctx->progress = start_delayed_progress(
2084                                         _("Scanning merged commits"),
2085                                         ctx->commits.nr);
2086
2087         QSORT(ctx->commits.list, ctx->commits.nr, commit_compare);
2088
2089         ctx->num_extra_edges = 0;
2090         for (i = 0; i < ctx->commits.nr; i++) {
2091                 display_progress(ctx->progress, i);
2092
2093                 if (i && oideq(&ctx->commits.list[i - 1]->object.oid,
2094                           &ctx->commits.list[i]->object.oid)) {
2095                         /*
2096                          * Silently ignore duplicates. These were likely
2097                          * created due to a commit appearing in multiple
2098                          * layers of the chain, which is unexpected but
2099                          * not invalid. We should make sure there is a
2100                          * unique copy in the new layer.
2101                          */
2102                 } else {
2103                         unsigned int num_parents;
2104
2105                         ctx->commits.list[dedup_i] = ctx->commits.list[i];
2106                         dedup_i++;
2107
2108                         num_parents = commit_list_count(ctx->commits.list[i]->parents);
2109                         if (num_parents > 2)
2110                                 ctx->num_extra_edges += num_parents - 1;
2111                 }
2112         }
2113
2114         ctx->commits.nr = dedup_i;
2115
2116         stop_progress(&ctx->progress);
2117 }
2118
2119 static void merge_commit_graphs(struct write_commit_graph_context *ctx)
2120 {
2121         struct commit_graph *g = ctx->r->objects->commit_graph;
2122         uint32_t current_graph_number = ctx->num_commit_graphs_before;
2123
2124         while (g && current_graph_number >= ctx->num_commit_graphs_after) {
2125                 current_graph_number--;
2126
2127                 if (ctx->report_progress)
2128                         ctx->progress = start_delayed_progress(_("Merging commit-graph"), 0);
2129
2130                 merge_commit_graph(ctx, g);
2131                 stop_progress(&ctx->progress);
2132
2133                 g = g->base_graph;
2134         }
2135
2136         if (g) {
2137                 ctx->new_base_graph = g;
2138                 ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base;
2139         }
2140
2141         if (ctx->new_base_graph)
2142                 ctx->base_graph_name = xstrdup(ctx->new_base_graph->filename);
2143
2144         sort_and_scan_merged_commits(ctx);
2145 }
2146
2147 static void mark_commit_graphs(struct write_commit_graph_context *ctx)
2148 {
2149         uint32_t i;
2150         time_t now = time(NULL);
2151
2152         for (i = ctx->num_commit_graphs_after - 1; i < ctx->num_commit_graphs_before; i++) {
2153                 struct stat st;
2154                 struct utimbuf updated_time;
2155
2156                 stat(ctx->commit_graph_filenames_before[i], &st);
2157
2158                 updated_time.actime = st.st_atime;
2159                 updated_time.modtime = now;
2160                 utime(ctx->commit_graph_filenames_before[i], &updated_time);
2161         }
2162 }
2163
2164 static void expire_commit_graphs(struct write_commit_graph_context *ctx)
2165 {
2166         struct strbuf path = STRBUF_INIT;
2167         DIR *dir;
2168         struct dirent *de;
2169         size_t dirnamelen;
2170         timestamp_t expire_time = time(NULL);
2171
2172         if (ctx->opts && ctx->opts->expire_time)
2173                 expire_time = ctx->opts->expire_time;
2174         if (!ctx->split) {
2175                 char *chain_file_name = get_commit_graph_chain_filename(ctx->odb);
2176                 unlink(chain_file_name);
2177                 free(chain_file_name);
2178                 ctx->num_commit_graphs_after = 0;
2179         }
2180
2181         strbuf_addstr(&path, ctx->odb->path);
2182         strbuf_addstr(&path, "/info/commit-graphs");
2183         dir = opendir(path.buf);
2184
2185         if (!dir)
2186                 goto out;
2187
2188         strbuf_addch(&path, '/');
2189         dirnamelen = path.len;
2190         while ((de = readdir(dir)) != NULL) {
2191                 struct stat st;
2192                 uint32_t i, found = 0;
2193
2194                 strbuf_setlen(&path, dirnamelen);
2195                 strbuf_addstr(&path, de->d_name);
2196
2197                 stat(path.buf, &st);
2198
2199                 if (st.st_mtime > expire_time)
2200                         continue;
2201                 if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph"))
2202                         continue;
2203
2204                 for (i = 0; i < ctx->num_commit_graphs_after; i++) {
2205                         if (!strcmp(ctx->commit_graph_filenames_after[i],
2206                                     path.buf)) {
2207                                 found = 1;
2208                                 break;
2209                         }
2210                 }
2211
2212                 if (!found)
2213                         unlink(path.buf);
2214         }
2215
2216 out:
2217         strbuf_release(&path);
2218 }
2219
2220 int write_commit_graph(struct object_directory *odb,
2221                        struct string_list *pack_indexes,
2222                        struct oidset *commits,
2223                        enum commit_graph_write_flags flags,
2224                        const struct commit_graph_opts *opts)
2225 {
2226         struct write_commit_graph_context *ctx;
2227         uint32_t i;
2228         int res = 0;
2229         int replace = 0;
2230         struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
2231         struct topo_level_slab topo_levels;
2232
2233         prepare_repo_settings(the_repository);
2234         if (!the_repository->settings.core_commit_graph) {
2235                 warning(_("attempting to write a commit-graph, but 'core.commitGraph' is disabled"));
2236                 return 0;
2237         }
2238         if (!commit_graph_compatible(the_repository))
2239                 return 0;
2240
2241         ctx = xcalloc(1, sizeof(struct write_commit_graph_context));
2242         ctx->r = the_repository;
2243         ctx->odb = odb;
2244         ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0;
2245         ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
2246         ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
2247         ctx->opts = opts;
2248         ctx->total_bloom_filter_data_size = 0;
2249         ctx->write_generation_data = 1;
2250         ctx->num_generation_data_overflows = 0;
2251
2252         bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
2253                                                       bloom_settings.bits_per_entry);
2254         bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
2255                                                   bloom_settings.num_hashes);
2256         bloom_settings.max_changed_paths = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS",
2257                                                          bloom_settings.max_changed_paths);
2258         ctx->bloom_settings = &bloom_settings;
2259
2260         init_topo_level_slab(&topo_levels);
2261         ctx->topo_levels = &topo_levels;
2262
2263         prepare_commit_graph(ctx->r);
2264         if (ctx->r->objects->commit_graph) {
2265                 struct commit_graph *g = ctx->r->objects->commit_graph;
2266
2267                 while (g) {
2268                         g->topo_levels = &topo_levels;
2269                         g = g->base_graph;
2270                 }
2271         }
2272
2273         if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
2274                 ctx->changed_paths = 1;
2275         if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
2276                 struct commit_graph *g;
2277
2278                 g = ctx->r->objects->commit_graph;
2279
2280                 /* We have changed-paths already. Keep them in the next graph */
2281                 if (g && g->chunk_bloom_data) {
2282                         ctx->changed_paths = 1;
2283                         ctx->bloom_settings = g->bloom_filter_settings;
2284                 }
2285         }
2286
2287         if (ctx->split) {
2288                 struct commit_graph *g = ctx->r->objects->commit_graph;
2289
2290                 while (g) {
2291                         ctx->num_commit_graphs_before++;
2292                         g = g->base_graph;
2293                 }
2294
2295                 if (ctx->num_commit_graphs_before) {
2296                         ALLOC_ARRAY(ctx->commit_graph_filenames_before, ctx->num_commit_graphs_before);
2297                         i = ctx->num_commit_graphs_before;
2298                         g = ctx->r->objects->commit_graph;
2299
2300                         while (g) {
2301                                 ctx->commit_graph_filenames_before[--i] = xstrdup(g->filename);
2302                                 g = g->base_graph;
2303                         }
2304                 }
2305
2306                 if (ctx->opts)
2307                         replace = ctx->opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
2308         }
2309
2310         ctx->approx_nr_objects = approximate_object_count();
2311
2312         if (ctx->append && ctx->r->objects->commit_graph) {
2313                 struct commit_graph *g = ctx->r->objects->commit_graph;
2314                 for (i = 0; i < g->num_commits; i++) {
2315                         struct object_id oid;
2316                         hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2317                         oid_array_append(&ctx->oids, &oid);
2318                 }
2319         }
2320
2321         if (pack_indexes) {
2322                 ctx->order_by_pack = 1;
2323                 if ((res = fill_oids_from_packs(ctx, pack_indexes)))
2324                         goto cleanup;
2325         }
2326
2327         if (commits) {
2328                 if ((res = fill_oids_from_commits(ctx, commits)))
2329                         goto cleanup;
2330         }
2331
2332         if (!pack_indexes && !commits) {
2333                 ctx->order_by_pack = 1;
2334                 fill_oids_from_all_packs(ctx);
2335         }
2336
2337         close_reachable(ctx);
2338
2339         copy_oids_to_commits(ctx);
2340
2341         if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) {
2342                 error(_("too many commits to write graph"));
2343                 res = -1;
2344                 goto cleanup;
2345         }
2346
2347         if (!ctx->commits.nr && !replace)
2348                 goto cleanup;
2349
2350         if (ctx->split) {
2351                 split_graph_merge_strategy(ctx);
2352
2353                 if (!replace)
2354                         merge_commit_graphs(ctx);
2355         } else
2356                 ctx->num_commit_graphs_after = 1;
2357
2358         ctx->trust_generation_numbers = validate_mixed_generation_chain(ctx->r->objects->commit_graph);
2359
2360         compute_topological_levels(ctx);
2361         if (ctx->write_generation_data)
2362                 compute_generation_numbers(ctx);
2363
2364         if (ctx->changed_paths)
2365                 compute_bloom_filters(ctx);
2366
2367         res = write_commit_graph_file(ctx);
2368
2369         if (ctx->split)
2370                 mark_commit_graphs(ctx);
2371
2372         expire_commit_graphs(ctx);
2373
2374 cleanup:
2375         free(ctx->graph_name);
2376         free(ctx->commits.list);
2377         oid_array_clear(&ctx->oids);
2378         clear_topo_level_slab(&topo_levels);
2379
2380         if (ctx->commit_graph_filenames_after) {
2381                 for (i = 0; i < ctx->num_commit_graphs_after; i++) {
2382                         free(ctx->commit_graph_filenames_after[i]);
2383                         free(ctx->commit_graph_hash_after[i]);
2384                 }
2385
2386                 for (i = 0; i < ctx->num_commit_graphs_before; i++)
2387                         free(ctx->commit_graph_filenames_before[i]);
2388
2389                 free(ctx->commit_graph_filenames_after);
2390                 free(ctx->commit_graph_filenames_before);
2391                 free(ctx->commit_graph_hash_after);
2392         }
2393
2394         free(ctx);
2395
2396         return res;
2397 }
2398
2399 #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2
2400 static int verify_commit_graph_error;
2401
2402 static void graph_report(const char *fmt, ...)
2403 {
2404         va_list ap;
2405
2406         verify_commit_graph_error = 1;
2407         va_start(ap, fmt);
2408         vfprintf(stderr, fmt, ap);
2409         fprintf(stderr, "\n");
2410         va_end(ap);
2411 }
2412
2413 #define GENERATION_ZERO_EXISTS 1
2414 #define GENERATION_NUMBER_EXISTS 2
2415
2416 int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
2417 {
2418         uint32_t i, cur_fanout_pos = 0;
2419         struct object_id prev_oid, cur_oid, checksum;
2420         int generation_zero = 0;
2421         struct hashfile *f;
2422         int devnull;
2423         struct progress *progress = NULL;
2424         int local_error = 0;
2425
2426         if (!g) {
2427                 graph_report("no commit-graph file loaded");
2428                 return 1;
2429         }
2430
2431         verify_commit_graph_error = verify_commit_graph_lite(g);
2432         if (verify_commit_graph_error)
2433                 return verify_commit_graph_error;
2434
2435         devnull = open("/dev/null", O_WRONLY);
2436         f = hashfd(devnull, NULL);
2437         hashwrite(f, g->data, g->data_len - g->hash_len);
2438         finalize_hashfile(f, checksum.hash, CSUM_CLOSE);
2439         if (!hasheq(checksum.hash, g->data + g->data_len - g->hash_len)) {
2440                 graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
2441                 verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
2442         }
2443
2444         for (i = 0; i < g->num_commits; i++) {
2445                 struct commit *graph_commit;
2446
2447                 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2448
2449                 if (i && oidcmp(&prev_oid, &cur_oid) >= 0)
2450                         graph_report(_("commit-graph has incorrect OID order: %s then %s"),
2451                                      oid_to_hex(&prev_oid),
2452                                      oid_to_hex(&cur_oid));
2453
2454                 oidcpy(&prev_oid, &cur_oid);
2455
2456                 while (cur_oid.hash[0] > cur_fanout_pos) {
2457                         uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2458
2459                         if (i != fanout_value)
2460                                 graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2461                                              cur_fanout_pos, fanout_value, i);
2462                         cur_fanout_pos++;
2463                 }
2464
2465                 graph_commit = lookup_commit(r, &cur_oid);
2466                 if (!parse_commit_in_graph_one(r, g, graph_commit))
2467                         graph_report(_("failed to parse commit %s from commit-graph"),
2468                                      oid_to_hex(&cur_oid));
2469         }
2470
2471         while (cur_fanout_pos < 256) {
2472                 uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2473
2474                 if (g->num_commits != fanout_value)
2475                         graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2476                                      cur_fanout_pos, fanout_value, i);
2477
2478                 cur_fanout_pos++;
2479         }
2480
2481         if (verify_commit_graph_error & ~VERIFY_COMMIT_GRAPH_ERROR_HASH)
2482                 return verify_commit_graph_error;
2483
2484         if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
2485                 progress = start_progress(_("Verifying commits in commit graph"),
2486                                         g->num_commits);
2487
2488         for (i = 0; i < g->num_commits; i++) {
2489                 struct commit *graph_commit, *odb_commit;
2490                 struct commit_list *graph_parents, *odb_parents;
2491                 timestamp_t max_generation = 0;
2492                 timestamp_t generation;
2493
2494                 display_progress(progress, i + 1);
2495                 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2496
2497                 graph_commit = lookup_commit(r, &cur_oid);
2498                 odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r));
2499                 if (parse_commit_internal(odb_commit, 0, 0)) {
2500                         graph_report(_("failed to parse commit %s from object database for commit-graph"),
2501                                      oid_to_hex(&cur_oid));
2502                         continue;
2503                 }
2504
2505                 if (!oideq(&get_commit_tree_in_graph_one(r, g, graph_commit)->object.oid,
2506                            get_commit_tree_oid(odb_commit)))
2507                         graph_report(_("root tree OID for commit %s in commit-graph is %s != %s"),
2508                                      oid_to_hex(&cur_oid),
2509                                      oid_to_hex(get_commit_tree_oid(graph_commit)),
2510                                      oid_to_hex(get_commit_tree_oid(odb_commit)));
2511
2512                 graph_parents = graph_commit->parents;
2513                 odb_parents = odb_commit->parents;
2514
2515                 while (graph_parents) {
2516                         if (odb_parents == NULL) {
2517                                 graph_report(_("commit-graph parent list for commit %s is too long"),
2518                                              oid_to_hex(&cur_oid));
2519                                 break;
2520                         }
2521
2522                         /* parse parent in case it is in a base graph */
2523                         parse_commit_in_graph_one(r, g, graph_parents->item);
2524
2525                         if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid))
2526                                 graph_report(_("commit-graph parent for %s is %s != %s"),
2527                                              oid_to_hex(&cur_oid),
2528                                              oid_to_hex(&graph_parents->item->object.oid),
2529                                              oid_to_hex(&odb_parents->item->object.oid));
2530
2531                         generation = commit_graph_generation(graph_parents->item);
2532                         if (generation > max_generation)
2533                                 max_generation = generation;
2534
2535                         graph_parents = graph_parents->next;
2536                         odb_parents = odb_parents->next;
2537                 }
2538
2539                 if (odb_parents != NULL)
2540                         graph_report(_("commit-graph parent list for commit %s terminates early"),
2541                                      oid_to_hex(&cur_oid));
2542
2543                 if (!commit_graph_generation(graph_commit)) {
2544                         if (generation_zero == GENERATION_NUMBER_EXISTS)
2545                                 graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"),
2546                                              oid_to_hex(&cur_oid));
2547                         generation_zero = GENERATION_ZERO_EXISTS;
2548                 } else if (generation_zero == GENERATION_ZERO_EXISTS)
2549                         graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"),
2550                                      oid_to_hex(&cur_oid));
2551
2552                 if (generation_zero == GENERATION_ZERO_EXISTS)
2553                         continue;
2554
2555                 /*
2556                  * If we are using topological level and one of our parents has
2557                  * generation GENERATION_NUMBER_V1_MAX, then our generation is
2558                  * also GENERATION_NUMBER_V1_MAX. Decrement to avoid extra logic
2559                  * in the following condition.
2560                  */
2561                 if (!g->read_generation_data && max_generation == GENERATION_NUMBER_V1_MAX)
2562                         max_generation--;
2563
2564                 generation = commit_graph_generation(graph_commit);
2565                 if (generation < max_generation + 1)
2566                         graph_report(_("commit-graph generation for commit %s is %"PRItime" < %"PRItime),
2567                                      oid_to_hex(&cur_oid),
2568                                      generation,
2569                                      max_generation + 1);
2570
2571                 if (graph_commit->date != odb_commit->date)
2572                         graph_report(_("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime),
2573                                      oid_to_hex(&cur_oid),
2574                                      graph_commit->date,
2575                                      odb_commit->date);
2576         }
2577         stop_progress(&progress);
2578
2579         local_error = verify_commit_graph_error;
2580
2581         if (!(flags & COMMIT_GRAPH_VERIFY_SHALLOW) && g->base_graph)
2582                 local_error |= verify_commit_graph(r, g->base_graph, flags);
2583
2584         return local_error;
2585 }
2586
2587 void free_commit_graph(struct commit_graph *g)
2588 {
2589         if (!g)
2590                 return;
2591         if (g->data) {
2592                 munmap((void *)g->data, g->data_len);
2593                 g->data = NULL;
2594         }
2595         free(g->filename);
2596         free(g->bloom_filter_settings);
2597         free(g);
2598 }
2599
2600 void disable_commit_graph(struct repository *r)
2601 {
2602         r->commit_graph_disabled = 1;
2603 }