commit-graph: introduce 'get_bloom_filter_settings()'
[git] / commit-graph.c
1 #include "git-compat-util.h"
2 #include "config.h"
3 #include "lockfile.h"
4 #include "pack.h"
5 #include "packfile.h"
6 #include "commit.h"
7 #include "object.h"
8 #include "refs.h"
9 #include "revision.h"
10 #include "sha1-lookup.h"
11 #include "commit-graph.h"
12 #include "object-store.h"
13 #include "alloc.h"
14 #include "hashmap.h"
15 #include "replace-object.h"
16 #include "progress.h"
17 #include "bloom.h"
18 #include "commit-slab.h"
19 #include "shallow.h"
20 #include "json-writer.h"
21 #include "trace2.h"
22
23 void git_test_write_commit_graph_or_die(void)
24 {
25         int flags = 0;
26         if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0))
27                 return;
28
29         if (git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
30                 flags = COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
31
32         if (write_commit_graph_reachable(the_repository->objects->odb,
33                                          flags, NULL))
34                 die("failed to write commit-graph under GIT_TEST_COMMIT_GRAPH");
35 }
36
37 #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
38 #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
39 #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
40 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
41 #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
42 #define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
43 #define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
44 #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
45 #define MAX_NUM_CHUNKS 7
46
47 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
48
49 #define GRAPH_VERSION_1 0x1
50 #define GRAPH_VERSION GRAPH_VERSION_1
51
52 #define GRAPH_EXTRA_EDGES_NEEDED 0x80000000
53 #define GRAPH_EDGE_LAST_MASK 0x7fffffff
54 #define GRAPH_PARENT_NONE 0x70000000
55
56 #define GRAPH_LAST_EDGE 0x80000000
57
58 #define GRAPH_HEADER_SIZE 8
59 #define GRAPH_FANOUT_SIZE (4 * 256)
60 #define GRAPH_CHUNKLOOKUP_WIDTH 12
61 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
62                         + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
63
64 /* Remember to update object flag allocation in object.h */
65 #define REACHABLE       (1u<<15)
66
67 /* Keep track of the order in which commits are added to our list. */
68 define_commit_slab(commit_pos, int);
69 static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos);
70
71 static void set_commit_pos(struct repository *r, const struct object_id *oid)
72 {
73         static int32_t max_pos;
74         struct commit *commit = lookup_commit(r, oid);
75
76         if (!commit)
77                 return; /* should never happen, but be lenient */
78
79         *commit_pos_at(&commit_pos, commit) = max_pos++;
80 }
81
82 static int commit_pos_cmp(const void *va, const void *vb)
83 {
84         const struct commit *a = *(const struct commit **)va;
85         const struct commit *b = *(const struct commit **)vb;
86         return commit_pos_at(&commit_pos, a) -
87                commit_pos_at(&commit_pos, b);
88 }
89
90 define_commit_slab(commit_graph_data_slab, struct commit_graph_data);
91 static struct commit_graph_data_slab commit_graph_data_slab =
92         COMMIT_SLAB_INIT(1, commit_graph_data_slab);
93
94 uint32_t commit_graph_position(const struct commit *c)
95 {
96         struct commit_graph_data *data =
97                 commit_graph_data_slab_peek(&commit_graph_data_slab, c);
98
99         return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH;
100 }
101
102 uint32_t commit_graph_generation(const struct commit *c)
103 {
104         struct commit_graph_data *data =
105                 commit_graph_data_slab_peek(&commit_graph_data_slab, c);
106
107         if (!data)
108                 return GENERATION_NUMBER_INFINITY;
109         else if (data->graph_pos == COMMIT_NOT_FROM_GRAPH)
110                 return GENERATION_NUMBER_INFINITY;
111
112         return data->generation;
113 }
114
115 static struct commit_graph_data *commit_graph_data_at(const struct commit *c)
116 {
117         unsigned int i, nth_slab;
118         struct commit_graph_data *data =
119                 commit_graph_data_slab_peek(&commit_graph_data_slab, c);
120
121         if (data)
122                 return data;
123
124         nth_slab = c->index / commit_graph_data_slab.slab_size;
125         data = commit_graph_data_slab_at(&commit_graph_data_slab, c);
126
127         /*
128          * commit-slab initializes elements with zero, overwrite this with
129          * COMMIT_NOT_FROM_GRAPH for graph_pos.
130          *
131          * We avoid initializing generation with checking if graph position
132          * is not COMMIT_NOT_FROM_GRAPH.
133          */
134         for (i = 0; i < commit_graph_data_slab.slab_size; i++) {
135                 commit_graph_data_slab.slab[nth_slab][i].graph_pos =
136                         COMMIT_NOT_FROM_GRAPH;
137         }
138
139         return data;
140 }
141
142 static int commit_gen_cmp(const void *va, const void *vb)
143 {
144         const struct commit *a = *(const struct commit **)va;
145         const struct commit *b = *(const struct commit **)vb;
146
147         uint32_t generation_a = commit_graph_generation(a);
148         uint32_t generation_b = commit_graph_generation(b);
149         /* lower generation commits first */
150         if (generation_a < generation_b)
151                 return -1;
152         else if (generation_a > generation_b)
153                 return 1;
154
155         /* use date as a heuristic when generations are equal */
156         if (a->date < b->date)
157                 return -1;
158         else if (a->date > b->date)
159                 return 1;
160         return 0;
161 }
162
163 char *get_commit_graph_filename(struct object_directory *obj_dir)
164 {
165         return xstrfmt("%s/info/commit-graph", obj_dir->path);
166 }
167
168 static char *get_split_graph_filename(struct object_directory *odb,
169                                       const char *oid_hex)
170 {
171         return xstrfmt("%s/info/commit-graphs/graph-%s.graph", odb->path,
172                        oid_hex);
173 }
174
175 static char *get_chain_filename(struct object_directory *odb)
176 {
177         return xstrfmt("%s/info/commit-graphs/commit-graph-chain", odb->path);
178 }
179
180 static uint8_t oid_version(void)
181 {
182         return 1;
183 }
184
185 static struct commit_graph *alloc_commit_graph(void)
186 {
187         struct commit_graph *g = xcalloc(1, sizeof(*g));
188
189         return g;
190 }
191
192 extern int read_replace_refs;
193
194 static int commit_graph_compatible(struct repository *r)
195 {
196         if (!r->gitdir)
197                 return 0;
198
199         if (read_replace_refs) {
200                 prepare_replace_object(r);
201                 if (hashmap_get_size(&r->objects->replace_map->map))
202                         return 0;
203         }
204
205         prepare_commit_graft(r);
206         if (r->parsed_objects &&
207             (r->parsed_objects->grafts_nr || r->parsed_objects->substituted_parent))
208                 return 0;
209         if (is_repository_shallow(r))
210                 return 0;
211
212         return 1;
213 }
214
215 int open_commit_graph(const char *graph_file, int *fd, struct stat *st)
216 {
217         *fd = git_open(graph_file);
218         if (*fd < 0)
219                 return 0;
220         if (fstat(*fd, st)) {
221                 close(*fd);
222                 return 0;
223         }
224         return 1;
225 }
226
227 struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st,
228                                                  struct object_directory *odb)
229 {
230         void *graph_map;
231         size_t graph_size;
232         struct commit_graph *ret;
233
234         graph_size = xsize_t(st->st_size);
235
236         if (graph_size < GRAPH_MIN_SIZE) {
237                 close(fd);
238                 error(_("commit-graph file is too small"));
239                 return NULL;
240         }
241         graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
242         close(fd);
243         ret = parse_commit_graph(graph_map, graph_size);
244
245         if (ret)
246                 ret->odb = odb;
247         else
248                 munmap(graph_map, graph_size);
249
250         return ret;
251 }
252
253 static int verify_commit_graph_lite(struct commit_graph *g)
254 {
255         /*
256          * Basic validation shared between parse_commit_graph()
257          * which'll be called every time the graph is used, and the
258          * much more expensive verify_commit_graph() used by
259          * "commit-graph verify".
260          *
261          * There should only be very basic checks here to ensure that
262          * we don't e.g. segfault in fill_commit_in_graph(), but
263          * because this is a very hot codepath nothing that e.g. loops
264          * over g->num_commits, or runs a checksum on the commit-graph
265          * itself.
266          */
267         if (!g->chunk_oid_fanout) {
268                 error("commit-graph is missing the OID Fanout chunk");
269                 return 1;
270         }
271         if (!g->chunk_oid_lookup) {
272                 error("commit-graph is missing the OID Lookup chunk");
273                 return 1;
274         }
275         if (!g->chunk_commit_data) {
276                 error("commit-graph is missing the Commit Data chunk");
277                 return 1;
278         }
279
280         return 0;
281 }
282
283 struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
284 {
285         const unsigned char *data, *chunk_lookup;
286         uint32_t i;
287         struct commit_graph *graph;
288         uint64_t next_chunk_offset;
289         uint32_t graph_signature;
290         unsigned char graph_version, hash_version;
291
292         if (!graph_map)
293                 return NULL;
294
295         if (graph_size < GRAPH_MIN_SIZE)
296                 return NULL;
297
298         data = (const unsigned char *)graph_map;
299
300         graph_signature = get_be32(data);
301         if (graph_signature != GRAPH_SIGNATURE) {
302                 error(_("commit-graph signature %X does not match signature %X"),
303                       graph_signature, GRAPH_SIGNATURE);
304                 return NULL;
305         }
306
307         graph_version = *(unsigned char*)(data + 4);
308         if (graph_version != GRAPH_VERSION) {
309                 error(_("commit-graph version %X does not match version %X"),
310                       graph_version, GRAPH_VERSION);
311                 return NULL;
312         }
313
314         hash_version = *(unsigned char*)(data + 5);
315         if (hash_version != oid_version()) {
316                 error(_("commit-graph hash version %X does not match version %X"),
317                       hash_version, oid_version());
318                 return NULL;
319         }
320
321         graph = alloc_commit_graph();
322
323         graph->hash_len = the_hash_algo->rawsz;
324         graph->num_chunks = *(unsigned char*)(data + 6);
325         graph->data = graph_map;
326         graph->data_len = graph_size;
327
328         if (graph_size < GRAPH_HEADER_SIZE +
329                          (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH +
330                          GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
331                 error(_("commit-graph file is too small to hold %u chunks"),
332                       graph->num_chunks);
333                 free(graph);
334                 return NULL;
335         }
336
337         chunk_lookup = data + 8;
338         next_chunk_offset = get_be64(chunk_lookup + 4);
339         for (i = 0; i < graph->num_chunks; i++) {
340                 uint32_t chunk_id;
341                 uint64_t chunk_offset = next_chunk_offset;
342                 int chunk_repeated = 0;
343
344                 chunk_id = get_be32(chunk_lookup + 0);
345
346                 chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
347                 next_chunk_offset = get_be64(chunk_lookup + 4);
348
349                 if (chunk_offset > graph_size - the_hash_algo->rawsz) {
350                         error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
351                               (uint32_t)chunk_offset);
352                         goto free_and_return;
353                 }
354
355                 switch (chunk_id) {
356                 case GRAPH_CHUNKID_OIDFANOUT:
357                         if (graph->chunk_oid_fanout)
358                                 chunk_repeated = 1;
359                         else
360                                 graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset);
361                         break;
362
363                 case GRAPH_CHUNKID_OIDLOOKUP:
364                         if (graph->chunk_oid_lookup)
365                                 chunk_repeated = 1;
366                         else {
367                                 graph->chunk_oid_lookup = data + chunk_offset;
368                                 graph->num_commits = (next_chunk_offset - chunk_offset)
369                                                      / graph->hash_len;
370                         }
371                         break;
372
373                 case GRAPH_CHUNKID_DATA:
374                         if (graph->chunk_commit_data)
375                                 chunk_repeated = 1;
376                         else
377                                 graph->chunk_commit_data = data + chunk_offset;
378                         break;
379
380                 case GRAPH_CHUNKID_EXTRAEDGES:
381                         if (graph->chunk_extra_edges)
382                                 chunk_repeated = 1;
383                         else
384                                 graph->chunk_extra_edges = data + chunk_offset;
385                         break;
386
387                 case GRAPH_CHUNKID_BASE:
388                         if (graph->chunk_base_graphs)
389                                 chunk_repeated = 1;
390                         else
391                                 graph->chunk_base_graphs = data + chunk_offset;
392                         break;
393
394                 case GRAPH_CHUNKID_BLOOMINDEXES:
395                         if (graph->chunk_bloom_indexes)
396                                 chunk_repeated = 1;
397                         else
398                                 graph->chunk_bloom_indexes = data + chunk_offset;
399                         break;
400
401                 case GRAPH_CHUNKID_BLOOMDATA:
402                         if (graph->chunk_bloom_data)
403                                 chunk_repeated = 1;
404                         else {
405                                 uint32_t hash_version;
406                                 graph->chunk_bloom_data = data + chunk_offset;
407                                 hash_version = get_be32(data + chunk_offset);
408
409                                 if (hash_version != 1)
410                                         break;
411
412                                 graph->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
413                                 graph->bloom_filter_settings->hash_version = hash_version;
414                                 graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4);
415                                 graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8);
416                         }
417                         break;
418                 }
419
420                 if (chunk_repeated) {
421                         error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
422                         goto free_and_return;
423                 }
424         }
425
426         if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
427                 init_bloom_filters();
428         } else {
429                 /* We need both the bloom chunks to exist together. Else ignore the data */
430                 graph->chunk_bloom_indexes = NULL;
431                 graph->chunk_bloom_data = NULL;
432                 FREE_AND_NULL(graph->bloom_filter_settings);
433         }
434
435         hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len);
436
437         if (verify_commit_graph_lite(graph))
438                 goto free_and_return;
439
440         return graph;
441
442 free_and_return:
443         free(graph->bloom_filter_settings);
444         free(graph);
445         return NULL;
446 }
447
448 static struct commit_graph *load_commit_graph_one(const char *graph_file,
449                                                   struct object_directory *odb)
450 {
451
452         struct stat st;
453         int fd;
454         struct commit_graph *g;
455         int open_ok = open_commit_graph(graph_file, &fd, &st);
456
457         if (!open_ok)
458                 return NULL;
459
460         g = load_commit_graph_one_fd_st(fd, &st, odb);
461
462         if (g)
463                 g->filename = xstrdup(graph_file);
464
465         return g;
466 }
467
468 static struct commit_graph *load_commit_graph_v1(struct repository *r,
469                                                  struct object_directory *odb)
470 {
471         char *graph_name = get_commit_graph_filename(odb);
472         struct commit_graph *g = load_commit_graph_one(graph_name, odb);
473         free(graph_name);
474
475         return g;
476 }
477
478 static int add_graph_to_chain(struct commit_graph *g,
479                               struct commit_graph *chain,
480                               struct object_id *oids,
481                               int n)
482 {
483         struct commit_graph *cur_g = chain;
484
485         if (n && !g->chunk_base_graphs) {
486                 warning(_("commit-graph has no base graphs chunk"));
487                 return 0;
488         }
489
490         while (n) {
491                 n--;
492
493                 if (!cur_g ||
494                     !oideq(&oids[n], &cur_g->oid) ||
495                     !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * n)) {
496                         warning(_("commit-graph chain does not match"));
497                         return 0;
498                 }
499
500                 cur_g = cur_g->base_graph;
501         }
502
503         g->base_graph = chain;
504
505         if (chain)
506                 g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base;
507
508         return 1;
509 }
510
511 static struct commit_graph *load_commit_graph_chain(struct repository *r,
512                                                     struct object_directory *odb)
513 {
514         struct commit_graph *graph_chain = NULL;
515         struct strbuf line = STRBUF_INIT;
516         struct stat st;
517         struct object_id *oids;
518         int i = 0, valid = 1, count;
519         char *chain_name = get_chain_filename(odb);
520         FILE *fp;
521         int stat_res;
522
523         fp = fopen(chain_name, "r");
524         stat_res = stat(chain_name, &st);
525         free(chain_name);
526
527         if (!fp ||
528             stat_res ||
529             st.st_size <= the_hash_algo->hexsz)
530                 return NULL;
531
532         count = st.st_size / (the_hash_algo->hexsz + 1);
533         oids = xcalloc(count, sizeof(struct object_id));
534
535         prepare_alt_odb(r);
536
537         for (i = 0; i < count; i++) {
538                 struct object_directory *odb;
539
540                 if (strbuf_getline_lf(&line, fp) == EOF)
541                         break;
542
543                 if (get_oid_hex(line.buf, &oids[i])) {
544                         warning(_("invalid commit-graph chain: line '%s' not a hash"),
545                                 line.buf);
546                         valid = 0;
547                         break;
548                 }
549
550                 valid = 0;
551                 for (odb = r->objects->odb; odb; odb = odb->next) {
552                         char *graph_name = get_split_graph_filename(odb, line.buf);
553                         struct commit_graph *g = load_commit_graph_one(graph_name, odb);
554
555                         free(graph_name);
556
557                         if (g) {
558                                 if (add_graph_to_chain(g, graph_chain, oids, i)) {
559                                         graph_chain = g;
560                                         valid = 1;
561                                 }
562
563                                 break;
564                         }
565                 }
566
567                 if (!valid) {
568                         warning(_("unable to find all commit-graph files"));
569                         break;
570                 }
571         }
572
573         free(oids);
574         fclose(fp);
575         strbuf_release(&line);
576
577         return graph_chain;
578 }
579
580 struct commit_graph *read_commit_graph_one(struct repository *r,
581                                            struct object_directory *odb)
582 {
583         struct commit_graph *g = load_commit_graph_v1(r, odb);
584
585         if (!g)
586                 g = load_commit_graph_chain(r, odb);
587
588         return g;
589 }
590
591 static void prepare_commit_graph_one(struct repository *r,
592                                      struct object_directory *odb)
593 {
594
595         if (r->objects->commit_graph)
596                 return;
597
598         r->objects->commit_graph = read_commit_graph_one(r, odb);
599 }
600
601 /*
602  * Return 1 if commit_graph is non-NULL, and 0 otherwise.
603  *
604  * On the first invocation, this function attempts to load the commit
605  * graph if the_repository is configured to have one.
606  */
607 static int prepare_commit_graph(struct repository *r)
608 {
609         struct object_directory *odb;
610
611         /*
612          * This must come before the "already attempted?" check below, because
613          * we want to disable even an already-loaded graph file.
614          */
615         if (r->commit_graph_disabled)
616                 return 0;
617
618         if (r->objects->commit_graph_attempted)
619                 return !!r->objects->commit_graph;
620         r->objects->commit_graph_attempted = 1;
621
622         prepare_repo_settings(r);
623
624         if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
625             r->settings.core_commit_graph != 1)
626                 /*
627                  * This repository is not configured to use commit graphs, so
628                  * do not load one. (But report commit_graph_attempted anyway
629                  * so that commit graph loading is not attempted again for this
630                  * repository.)
631                  */
632                 return 0;
633
634         if (!commit_graph_compatible(r))
635                 return 0;
636
637         prepare_alt_odb(r);
638         for (odb = r->objects->odb;
639              !r->objects->commit_graph && odb;
640              odb = odb->next)
641                 prepare_commit_graph_one(r, odb);
642         return !!r->objects->commit_graph;
643 }
644
645 int generation_numbers_enabled(struct repository *r)
646 {
647         uint32_t first_generation;
648         struct commit_graph *g;
649         if (!prepare_commit_graph(r))
650                return 0;
651
652         g = r->objects->commit_graph;
653
654         if (!g->num_commits)
655                 return 0;
656
657         first_generation = get_be32(g->chunk_commit_data +
658                                     g->hash_len + 8) >> 2;
659
660         return !!first_generation;
661 }
662
663 struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
664 {
665         struct commit_graph *g = r->objects->commit_graph;
666         while (g) {
667                 if (g->bloom_filter_settings)
668                         return g->bloom_filter_settings;
669                 g = g->base_graph;
670         }
671         return NULL;
672 }
673
674 static void close_commit_graph_one(struct commit_graph *g)
675 {
676         if (!g)
677                 return;
678
679         close_commit_graph_one(g->base_graph);
680         free_commit_graph(g);
681 }
682
683 void close_commit_graph(struct raw_object_store *o)
684 {
685         close_commit_graph_one(o->commit_graph);
686         o->commit_graph = NULL;
687 }
688
689 static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos)
690 {
691         return bsearch_hash(oid->hash, g->chunk_oid_fanout,
692                             g->chunk_oid_lookup, g->hash_len, pos);
693 }
694
695 static void load_oid_from_graph(struct commit_graph *g,
696                                 uint32_t pos,
697                                 struct object_id *oid)
698 {
699         uint32_t lex_index;
700
701         while (g && pos < g->num_commits_in_base)
702                 g = g->base_graph;
703
704         if (!g)
705                 BUG("NULL commit-graph");
706
707         if (pos >= g->num_commits + g->num_commits_in_base)
708                 die(_("invalid commit position. commit-graph is likely corrupt"));
709
710         lex_index = pos - g->num_commits_in_base;
711
712         hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index);
713 }
714
715 static struct commit_list **insert_parent_or_die(struct repository *r,
716                                                  struct commit_graph *g,
717                                                  uint32_t pos,
718                                                  struct commit_list **pptr)
719 {
720         struct commit *c;
721         struct object_id oid;
722
723         if (pos >= g->num_commits + g->num_commits_in_base)
724                 die("invalid parent position %"PRIu32, pos);
725
726         load_oid_from_graph(g, pos, &oid);
727         c = lookup_commit(r, &oid);
728         if (!c)
729                 die(_("could not find commit %s"), oid_to_hex(&oid));
730         commit_graph_data_at(c)->graph_pos = pos;
731         return &commit_list_insert(c, pptr)->next;
732 }
733
734 static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
735 {
736         const unsigned char *commit_data;
737         struct commit_graph_data *graph_data;
738         uint32_t lex_index;
739
740         while (pos < g->num_commits_in_base)
741                 g = g->base_graph;
742
743         lex_index = pos - g->num_commits_in_base;
744         commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index;
745
746         graph_data = commit_graph_data_at(item);
747         graph_data->graph_pos = pos;
748         graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
749 }
750
751 static inline void set_commit_tree(struct commit *c, struct tree *t)
752 {
753         c->maybe_tree = t;
754 }
755
756 static int fill_commit_in_graph(struct repository *r,
757                                 struct commit *item,
758                                 struct commit_graph *g, uint32_t pos)
759 {
760         uint32_t edge_value;
761         uint32_t *parent_data_ptr;
762         uint64_t date_low, date_high;
763         struct commit_list **pptr;
764         struct commit_graph_data *graph_data;
765         const unsigned char *commit_data;
766         uint32_t lex_index;
767
768         while (pos < g->num_commits_in_base)
769                 g = g->base_graph;
770
771         if (pos >= g->num_commits + g->num_commits_in_base)
772                 die(_("invalid commit position. commit-graph is likely corrupt"));
773
774         /*
775          * Store the "full" position, but then use the
776          * "local" position for the rest of the calculation.
777          */
778         graph_data = commit_graph_data_at(item);
779         graph_data->graph_pos = pos;
780         lex_index = pos - g->num_commits_in_base;
781
782         commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index;
783
784         item->object.parsed = 1;
785
786         set_commit_tree(item, NULL);
787
788         date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
789         date_low = get_be32(commit_data + g->hash_len + 12);
790         item->date = (timestamp_t)((date_high << 32) | date_low);
791
792         graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
793
794         pptr = &item->parents;
795
796         edge_value = get_be32(commit_data + g->hash_len);
797         if (edge_value == GRAPH_PARENT_NONE)
798                 return 1;
799         pptr = insert_parent_or_die(r, g, edge_value, pptr);
800
801         edge_value = get_be32(commit_data + g->hash_len + 4);
802         if (edge_value == GRAPH_PARENT_NONE)
803                 return 1;
804         if (!(edge_value & GRAPH_EXTRA_EDGES_NEEDED)) {
805                 pptr = insert_parent_or_die(r, g, edge_value, pptr);
806                 return 1;
807         }
808
809         parent_data_ptr = (uint32_t*)(g->chunk_extra_edges +
810                           4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
811         do {
812                 edge_value = get_be32(parent_data_ptr);
813                 pptr = insert_parent_or_die(r, g,
814                                             edge_value & GRAPH_EDGE_LAST_MASK,
815                                             pptr);
816                 parent_data_ptr++;
817         } while (!(edge_value & GRAPH_LAST_EDGE));
818
819         return 1;
820 }
821
822 static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
823 {
824         uint32_t graph_pos = commit_graph_position(item);
825         if (graph_pos != COMMIT_NOT_FROM_GRAPH) {
826                 *pos = graph_pos;
827                 return 1;
828         } else {
829                 struct commit_graph *cur_g = g;
830                 uint32_t lex_index;
831
832                 while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index))
833                         cur_g = cur_g->base_graph;
834
835                 if (cur_g) {
836                         *pos = lex_index + cur_g->num_commits_in_base;
837                         return 1;
838                 }
839
840                 return 0;
841         }
842 }
843
844 static int parse_commit_in_graph_one(struct repository *r,
845                                      struct commit_graph *g,
846                                      struct commit *item)
847 {
848         uint32_t pos;
849
850         if (item->object.parsed)
851                 return 1;
852
853         if (find_commit_in_graph(item, g, &pos))
854                 return fill_commit_in_graph(r, item, g, pos);
855
856         return 0;
857 }
858
859 int parse_commit_in_graph(struct repository *r, struct commit *item)
860 {
861         static int checked_env = 0;
862
863         if (!checked_env &&
864             git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0))
865                 die("dying as requested by the '%s' variable on commit-graph parse!",
866                     GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE);
867         checked_env = 1;
868
869         if (!prepare_commit_graph(r))
870                 return 0;
871         return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
872 }
873
874 void load_commit_graph_info(struct repository *r, struct commit *item)
875 {
876         uint32_t pos;
877         if (!prepare_commit_graph(r))
878                 return;
879         if (find_commit_in_graph(item, r->objects->commit_graph, &pos))
880                 fill_commit_graph_info(item, r->objects->commit_graph, pos);
881 }
882
883 static struct tree *load_tree_for_commit(struct repository *r,
884                                          struct commit_graph *g,
885                                          struct commit *c)
886 {
887         struct object_id oid;
888         const unsigned char *commit_data;
889         uint32_t graph_pos = commit_graph_position(c);
890
891         while (graph_pos < g->num_commits_in_base)
892                 g = g->base_graph;
893
894         commit_data = g->chunk_commit_data +
895                         GRAPH_DATA_WIDTH * (graph_pos - g->num_commits_in_base);
896
897         hashcpy(oid.hash, commit_data);
898         set_commit_tree(c, lookup_tree(r, &oid));
899
900         return c->maybe_tree;
901 }
902
903 static struct tree *get_commit_tree_in_graph_one(struct repository *r,
904                                                  struct commit_graph *g,
905                                                  const struct commit *c)
906 {
907         if (c->maybe_tree)
908                 return c->maybe_tree;
909         if (commit_graph_position(c) == COMMIT_NOT_FROM_GRAPH)
910                 BUG("get_commit_tree_in_graph_one called from non-commit-graph commit");
911
912         return load_tree_for_commit(r, g, (struct commit *)c);
913 }
914
915 struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit *c)
916 {
917         return get_commit_tree_in_graph_one(r, r->objects->commit_graph, c);
918 }
919
920 struct packed_commit_list {
921         struct commit **list;
922         int nr;
923         int alloc;
924 };
925
926 struct packed_oid_list {
927         struct object_id *list;
928         int nr;
929         int alloc;
930 };
931
932 struct write_commit_graph_context {
933         struct repository *r;
934         struct object_directory *odb;
935         char *graph_name;
936         struct packed_oid_list oids;
937         struct packed_commit_list commits;
938         int num_extra_edges;
939         unsigned long approx_nr_objects;
940         struct progress *progress;
941         int progress_done;
942         uint64_t progress_cnt;
943
944         char *base_graph_name;
945         int num_commit_graphs_before;
946         int num_commit_graphs_after;
947         char **commit_graph_filenames_before;
948         char **commit_graph_filenames_after;
949         char **commit_graph_hash_after;
950         uint32_t new_num_commits_in_base;
951         struct commit_graph *new_base_graph;
952
953         unsigned append:1,
954                  report_progress:1,
955                  split:1,
956                  changed_paths:1,
957                  order_by_pack:1;
958
959         const struct split_commit_graph_opts *split_opts;
960         size_t total_bloom_filter_data_size;
961         const struct bloom_filter_settings *bloom_settings;
962 };
963
964 static int write_graph_chunk_fanout(struct hashfile *f,
965                                     struct write_commit_graph_context *ctx)
966 {
967         int i, count = 0;
968         struct commit **list = ctx->commits.list;
969
970         /*
971          * Write the first-level table (the list is sorted,
972          * but we use a 256-entry lookup to be able to avoid
973          * having to do eight extra binary search iterations).
974          */
975         for (i = 0; i < 256; i++) {
976                 while (count < ctx->commits.nr) {
977                         if ((*list)->object.oid.hash[0] != i)
978                                 break;
979                         display_progress(ctx->progress, ++ctx->progress_cnt);
980                         count++;
981                         list++;
982                 }
983
984                 hashwrite_be32(f, count);
985         }
986
987         return 0;
988 }
989
990 static int write_graph_chunk_oids(struct hashfile *f,
991                                   struct write_commit_graph_context *ctx)
992 {
993         struct commit **list = ctx->commits.list;
994         int count;
995         for (count = 0; count < ctx->commits.nr; count++, list++) {
996                 display_progress(ctx->progress, ++ctx->progress_cnt);
997                 hashwrite(f, (*list)->object.oid.hash, the_hash_algo->rawsz);
998         }
999
1000         return 0;
1001 }
1002
1003 static const unsigned char *commit_to_sha1(size_t index, void *table)
1004 {
1005         struct commit **commits = table;
1006         return commits[index]->object.oid.hash;
1007 }
1008
1009 static int write_graph_chunk_data(struct hashfile *f,
1010                                   struct write_commit_graph_context *ctx)
1011 {
1012         struct commit **list = ctx->commits.list;
1013         struct commit **last = ctx->commits.list + ctx->commits.nr;
1014         uint32_t num_extra_edges = 0;
1015
1016         while (list < last) {
1017                 struct commit_list *parent;
1018                 struct object_id *tree;
1019                 int edge_value;
1020                 uint32_t packedDate[2];
1021                 display_progress(ctx->progress, ++ctx->progress_cnt);
1022
1023                 if (parse_commit_no_graph(*list))
1024                         die(_("unable to parse commit %s"),
1025                                 oid_to_hex(&(*list)->object.oid));
1026                 tree = get_commit_tree_oid(*list);
1027                 hashwrite(f, tree->hash, the_hash_algo->rawsz);
1028
1029                 parent = (*list)->parents;
1030
1031                 if (!parent)
1032                         edge_value = GRAPH_PARENT_NONE;
1033                 else {
1034                         edge_value = sha1_pos(parent->item->object.oid.hash,
1035                                               ctx->commits.list,
1036                                               ctx->commits.nr,
1037                                               commit_to_sha1);
1038
1039                         if (edge_value >= 0)
1040                                 edge_value += ctx->new_num_commits_in_base;
1041                         else if (ctx->new_base_graph) {
1042                                 uint32_t pos;
1043                                 if (find_commit_in_graph(parent->item,
1044                                                          ctx->new_base_graph,
1045                                                          &pos))
1046                                         edge_value = pos;
1047                         }
1048
1049                         if (edge_value < 0)
1050                                 BUG("missing parent %s for commit %s",
1051                                     oid_to_hex(&parent->item->object.oid),
1052                                     oid_to_hex(&(*list)->object.oid));
1053                 }
1054
1055                 hashwrite_be32(f, edge_value);
1056
1057                 if (parent)
1058                         parent = parent->next;
1059
1060                 if (!parent)
1061                         edge_value = GRAPH_PARENT_NONE;
1062                 else if (parent->next)
1063                         edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges;
1064                 else {
1065                         edge_value = sha1_pos(parent->item->object.oid.hash,
1066                                               ctx->commits.list,
1067                                               ctx->commits.nr,
1068                                               commit_to_sha1);
1069
1070                         if (edge_value >= 0)
1071                                 edge_value += ctx->new_num_commits_in_base;
1072                         else if (ctx->new_base_graph) {
1073                                 uint32_t pos;
1074                                 if (find_commit_in_graph(parent->item,
1075                                                          ctx->new_base_graph,
1076                                                          &pos))
1077                                         edge_value = pos;
1078                         }
1079
1080                         if (edge_value < 0)
1081                                 BUG("missing parent %s for commit %s",
1082                                     oid_to_hex(&parent->item->object.oid),
1083                                     oid_to_hex(&(*list)->object.oid));
1084                 }
1085
1086                 hashwrite_be32(f, edge_value);
1087
1088                 if (edge_value & GRAPH_EXTRA_EDGES_NEEDED) {
1089                         do {
1090                                 num_extra_edges++;
1091                                 parent = parent->next;
1092                         } while (parent);
1093                 }
1094
1095                 if (sizeof((*list)->date) > 4)
1096                         packedDate[0] = htonl(((*list)->date >> 32) & 0x3);
1097                 else
1098                         packedDate[0] = 0;
1099
1100                 packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2);
1101
1102                 packedDate[1] = htonl((*list)->date);
1103                 hashwrite(f, packedDate, 8);
1104
1105                 list++;
1106         }
1107
1108         return 0;
1109 }
1110
1111 static int write_graph_chunk_extra_edges(struct hashfile *f,
1112                                          struct write_commit_graph_context *ctx)
1113 {
1114         struct commit **list = ctx->commits.list;
1115         struct commit **last = ctx->commits.list + ctx->commits.nr;
1116         struct commit_list *parent;
1117
1118         while (list < last) {
1119                 int num_parents = 0;
1120
1121                 display_progress(ctx->progress, ++ctx->progress_cnt);
1122
1123                 for (parent = (*list)->parents; num_parents < 3 && parent;
1124                      parent = parent->next)
1125                         num_parents++;
1126
1127                 if (num_parents <= 2) {
1128                         list++;
1129                         continue;
1130                 }
1131
1132                 /* Since num_parents > 2, this initializer is safe. */
1133                 for (parent = (*list)->parents->next; parent; parent = parent->next) {
1134                         int edge_value = sha1_pos(parent->item->object.oid.hash,
1135                                                   ctx->commits.list,
1136                                                   ctx->commits.nr,
1137                                                   commit_to_sha1);
1138
1139                         if (edge_value >= 0)
1140                                 edge_value += ctx->new_num_commits_in_base;
1141                         else if (ctx->new_base_graph) {
1142                                 uint32_t pos;
1143                                 if (find_commit_in_graph(parent->item,
1144                                                          ctx->new_base_graph,
1145                                                          &pos))
1146                                         edge_value = pos;
1147                         }
1148
1149                         if (edge_value < 0)
1150                                 BUG("missing parent %s for commit %s",
1151                                     oid_to_hex(&parent->item->object.oid),
1152                                     oid_to_hex(&(*list)->object.oid));
1153                         else if (!parent->next)
1154                                 edge_value |= GRAPH_LAST_EDGE;
1155
1156                         hashwrite_be32(f, edge_value);
1157                 }
1158
1159                 list++;
1160         }
1161
1162         return 0;
1163 }
1164
1165 static int write_graph_chunk_bloom_indexes(struct hashfile *f,
1166                                            struct write_commit_graph_context *ctx)
1167 {
1168         struct commit **list = ctx->commits.list;
1169         struct commit **last = ctx->commits.list + ctx->commits.nr;
1170         uint32_t cur_pos = 0;
1171
1172         while (list < last) {
1173                 struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
1174                 size_t len = filter ? filter->len : 0;
1175                 cur_pos += len;
1176                 display_progress(ctx->progress, ++ctx->progress_cnt);
1177                 hashwrite_be32(f, cur_pos);
1178                 list++;
1179         }
1180
1181         return 0;
1182 }
1183
1184 static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
1185 {
1186         struct json_writer jw = JSON_WRITER_INIT;
1187
1188         jw_object_begin(&jw, 0);
1189         jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
1190         jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
1191         jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
1192         jw_end(&jw);
1193
1194         trace2_data_json("bloom", ctx->r, "settings", &jw);
1195
1196         jw_release(&jw);
1197 }
1198
1199 static int write_graph_chunk_bloom_data(struct hashfile *f,
1200                                         struct write_commit_graph_context *ctx)
1201 {
1202         struct commit **list = ctx->commits.list;
1203         struct commit **last = ctx->commits.list + ctx->commits.nr;
1204
1205         trace2_bloom_filter_settings(ctx);
1206
1207         hashwrite_be32(f, ctx->bloom_settings->hash_version);
1208         hashwrite_be32(f, ctx->bloom_settings->num_hashes);
1209         hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
1210
1211         while (list < last) {
1212                 struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
1213                 size_t len = filter ? filter->len : 0;
1214
1215                 display_progress(ctx->progress, ++ctx->progress_cnt);
1216                 if (len)
1217                         hashwrite(f, filter->data, len * sizeof(unsigned char));
1218                 list++;
1219         }
1220
1221         return 0;
1222 }
1223
1224 static int oid_compare(const void *_a, const void *_b)
1225 {
1226         const struct object_id *a = (const struct object_id *)_a;
1227         const struct object_id *b = (const struct object_id *)_b;
1228         return oidcmp(a, b);
1229 }
1230
1231 static int add_packed_commits(const struct object_id *oid,
1232                               struct packed_git *pack,
1233                               uint32_t pos,
1234                               void *data)
1235 {
1236         struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
1237         enum object_type type;
1238         off_t offset = nth_packed_object_offset(pack, pos);
1239         struct object_info oi = OBJECT_INFO_INIT;
1240
1241         if (ctx->progress)
1242                 display_progress(ctx->progress, ++ctx->progress_done);
1243
1244         oi.typep = &type;
1245         if (packed_object_info(ctx->r, pack, offset, &oi) < 0)
1246                 die(_("unable to get type of object %s"), oid_to_hex(oid));
1247
1248         if (type != OBJ_COMMIT)
1249                 return 0;
1250
1251         ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
1252         oidcpy(&(ctx->oids.list[ctx->oids.nr]), oid);
1253         ctx->oids.nr++;
1254
1255         set_commit_pos(ctx->r, oid);
1256
1257         return 0;
1258 }
1259
1260 static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
1261 {
1262         struct commit_list *parent;
1263         for (parent = commit->parents; parent; parent = parent->next) {
1264                 if (!(parent->item->object.flags & REACHABLE)) {
1265                         ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
1266                         oidcpy(&ctx->oids.list[ctx->oids.nr], &(parent->item->object.oid));
1267                         ctx->oids.nr++;
1268                         parent->item->object.flags |= REACHABLE;
1269                 }
1270         }
1271 }
1272
1273 static void close_reachable(struct write_commit_graph_context *ctx)
1274 {
1275         int i;
1276         struct commit *commit;
1277         enum commit_graph_split_flags flags = ctx->split_opts ?
1278                 ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1279
1280         if (ctx->report_progress)
1281                 ctx->progress = start_delayed_progress(
1282                                         _("Loading known commits in commit graph"),
1283                                         ctx->oids.nr);
1284         for (i = 0; i < ctx->oids.nr; i++) {
1285                 display_progress(ctx->progress, i + 1);
1286                 commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
1287                 if (commit)
1288                         commit->object.flags |= REACHABLE;
1289         }
1290         stop_progress(&ctx->progress);
1291
1292         /*
1293          * As this loop runs, ctx->oids.nr may grow, but not more
1294          * than the number of missing commits in the reachable
1295          * closure.
1296          */
1297         if (ctx->report_progress)
1298                 ctx->progress = start_delayed_progress(
1299                                         _("Expanding reachable commits in commit graph"),
1300                                         0);
1301         for (i = 0; i < ctx->oids.nr; i++) {
1302                 display_progress(ctx->progress, i + 1);
1303                 commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
1304
1305                 if (!commit)
1306                         continue;
1307                 if (ctx->split) {
1308                         if ((!parse_commit(commit) &&
1309                              commit_graph_position(commit) == COMMIT_NOT_FROM_GRAPH) ||
1310                             flags == COMMIT_GRAPH_SPLIT_REPLACE)
1311                                 add_missing_parents(ctx, commit);
1312                 } else if (!parse_commit_no_graph(commit))
1313                         add_missing_parents(ctx, commit);
1314         }
1315         stop_progress(&ctx->progress);
1316
1317         if (ctx->report_progress)
1318                 ctx->progress = start_delayed_progress(
1319                                         _("Clearing commit marks in commit graph"),
1320                                         ctx->oids.nr);
1321         for (i = 0; i < ctx->oids.nr; i++) {
1322                 display_progress(ctx->progress, i + 1);
1323                 commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
1324
1325                 if (commit)
1326                         commit->object.flags &= ~REACHABLE;
1327         }
1328         stop_progress(&ctx->progress);
1329 }
1330
1331 static void compute_generation_numbers(struct write_commit_graph_context *ctx)
1332 {
1333         int i;
1334         struct commit_list *list = NULL;
1335
1336         if (ctx->report_progress)
1337                 ctx->progress = start_delayed_progress(
1338                                         _("Computing commit graph generation numbers"),
1339                                         ctx->commits.nr);
1340         for (i = 0; i < ctx->commits.nr; i++) {
1341                 uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation;
1342
1343                 display_progress(ctx->progress, i + 1);
1344                 if (generation != GENERATION_NUMBER_INFINITY &&
1345                     generation != GENERATION_NUMBER_ZERO)
1346                         continue;
1347
1348                 commit_list_insert(ctx->commits.list[i], &list);
1349                 while (list) {
1350                         struct commit *current = list->item;
1351                         struct commit_list *parent;
1352                         int all_parents_computed = 1;
1353                         uint32_t max_generation = 0;
1354
1355                         for (parent = current->parents; parent; parent = parent->next) {
1356                                 generation = commit_graph_data_at(parent->item)->generation;
1357
1358                                 if (generation == GENERATION_NUMBER_INFINITY ||
1359                                     generation == GENERATION_NUMBER_ZERO) {
1360                                         all_parents_computed = 0;
1361                                         commit_list_insert(parent->item, &list);
1362                                         break;
1363                                 } else if (generation > max_generation) {
1364                                         max_generation = generation;
1365                                 }
1366                         }
1367
1368                         if (all_parents_computed) {
1369                                 struct commit_graph_data *data = commit_graph_data_at(current);
1370
1371                                 data->generation = max_generation + 1;
1372                                 pop_commit(&list);
1373
1374                                 if (data->generation > GENERATION_NUMBER_MAX)
1375                                         data->generation = GENERATION_NUMBER_MAX;
1376                         }
1377                 }
1378         }
1379         stop_progress(&ctx->progress);
1380 }
1381
1382 static void compute_bloom_filters(struct write_commit_graph_context *ctx)
1383 {
1384         int i;
1385         struct progress *progress = NULL;
1386         struct commit **sorted_commits;
1387
1388         init_bloom_filters();
1389
1390         if (ctx->report_progress)
1391                 progress = start_delayed_progress(
1392                         _("Computing commit changed paths Bloom filters"),
1393                         ctx->commits.nr);
1394
1395         ALLOC_ARRAY(sorted_commits, ctx->commits.nr);
1396         COPY_ARRAY(sorted_commits, ctx->commits.list, ctx->commits.nr);
1397
1398         if (ctx->order_by_pack)
1399                 QSORT(sorted_commits, ctx->commits.nr, commit_pos_cmp);
1400         else
1401                 QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
1402
1403         for (i = 0; i < ctx->commits.nr; i++) {
1404                 struct commit *c = sorted_commits[i];
1405                 struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
1406                 ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
1407                 display_progress(progress, i + 1);
1408         }
1409
1410         free(sorted_commits);
1411         stop_progress(&progress);
1412 }
1413
1414 struct refs_cb_data {
1415         struct oidset *commits;
1416         struct progress *progress;
1417 };
1418
1419 static int add_ref_to_set(const char *refname,
1420                           const struct object_id *oid,
1421                           int flags, void *cb_data)
1422 {
1423         struct object_id peeled;
1424         struct refs_cb_data *data = (struct refs_cb_data *)cb_data;
1425
1426         if (!peel_ref(refname, &peeled))
1427                 oid = &peeled;
1428         if (oid_object_info(the_repository, oid, NULL) == OBJ_COMMIT)
1429                 oidset_insert(data->commits, oid);
1430
1431         display_progress(data->progress, oidset_size(data->commits));
1432
1433         return 0;
1434 }
1435
1436 int write_commit_graph_reachable(struct object_directory *odb,
1437                                  enum commit_graph_write_flags flags,
1438                                  const struct split_commit_graph_opts *split_opts)
1439 {
1440         struct oidset commits = OIDSET_INIT;
1441         struct refs_cb_data data;
1442         int result;
1443
1444         memset(&data, 0, sizeof(data));
1445         data.commits = &commits;
1446         if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
1447                 data.progress = start_delayed_progress(
1448                         _("Collecting referenced commits"), 0);
1449
1450         for_each_ref(add_ref_to_set, &data);
1451
1452         stop_progress(&data.progress);
1453
1454         result = write_commit_graph(odb, NULL, &commits,
1455                                     flags, split_opts);
1456
1457         oidset_clear(&commits);
1458         return result;
1459 }
1460
1461 static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
1462                                 struct string_list *pack_indexes)
1463 {
1464         uint32_t i;
1465         struct strbuf progress_title = STRBUF_INIT;
1466         struct strbuf packname = STRBUF_INIT;
1467         int dirlen;
1468
1469         strbuf_addf(&packname, "%s/pack/", ctx->odb->path);
1470         dirlen = packname.len;
1471         if (ctx->report_progress) {
1472                 strbuf_addf(&progress_title,
1473                             Q_("Finding commits for commit graph in %d pack",
1474                                "Finding commits for commit graph in %d packs",
1475                                pack_indexes->nr),
1476                             pack_indexes->nr);
1477                 ctx->progress = start_delayed_progress(progress_title.buf, 0);
1478                 ctx->progress_done = 0;
1479         }
1480         for (i = 0; i < pack_indexes->nr; i++) {
1481                 struct packed_git *p;
1482                 strbuf_setlen(&packname, dirlen);
1483                 strbuf_addstr(&packname, pack_indexes->items[i].string);
1484                 p = add_packed_git(packname.buf, packname.len, 1);
1485                 if (!p) {
1486                         error(_("error adding pack %s"), packname.buf);
1487                         return -1;
1488                 }
1489                 if (open_pack_index(p)) {
1490                         error(_("error opening index for %s"), packname.buf);
1491                         return -1;
1492                 }
1493                 for_each_object_in_pack(p, add_packed_commits, ctx,
1494                                         FOR_EACH_OBJECT_PACK_ORDER);
1495                 close_pack(p);
1496                 free(p);
1497         }
1498
1499         stop_progress(&ctx->progress);
1500         strbuf_release(&progress_title);
1501         strbuf_release(&packname);
1502
1503         return 0;
1504 }
1505
1506 static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
1507                                   struct oidset *commits)
1508 {
1509         struct oidset_iter iter;
1510         struct object_id *oid;
1511
1512         if (!oidset_size(commits))
1513                 return 0;
1514
1515         oidset_iter_init(commits, &iter);
1516         while ((oid = oidset_iter_next(&iter))) {
1517                 ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
1518                 oidcpy(&ctx->oids.list[ctx->oids.nr], oid);
1519                 ctx->oids.nr++;
1520         }
1521
1522         return 0;
1523 }
1524
1525 static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
1526 {
1527         if (ctx->report_progress)
1528                 ctx->progress = start_delayed_progress(
1529                         _("Finding commits for commit graph among packed objects"),
1530                         ctx->approx_nr_objects);
1531         for_each_packed_object(add_packed_commits, ctx,
1532                                FOR_EACH_OBJECT_PACK_ORDER);
1533         if (ctx->progress_done < ctx->approx_nr_objects)
1534                 display_progress(ctx->progress, ctx->approx_nr_objects);
1535         stop_progress(&ctx->progress);
1536 }
1537
1538 static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
1539 {
1540         uint32_t i, count_distinct = 1;
1541
1542         if (ctx->report_progress)
1543                 ctx->progress = start_delayed_progress(
1544                         _("Counting distinct commits in commit graph"),
1545                         ctx->oids.nr);
1546         display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
1547         QSORT(ctx->oids.list, ctx->oids.nr, oid_compare);
1548
1549         for (i = 1; i < ctx->oids.nr; i++) {
1550                 display_progress(ctx->progress, i + 1);
1551                 if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) {
1552                         if (ctx->split) {
1553                                 struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]);
1554
1555                                 if (!c || commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH)
1556                                         continue;
1557                         }
1558
1559                         count_distinct++;
1560                 }
1561         }
1562         stop_progress(&ctx->progress);
1563
1564         return count_distinct;
1565 }
1566
1567 static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
1568 {
1569         uint32_t i;
1570         enum commit_graph_split_flags flags = ctx->split_opts ?
1571                 ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1572
1573         ctx->num_extra_edges = 0;
1574         if (ctx->report_progress)
1575                 ctx->progress = start_delayed_progress(
1576                         _("Finding extra edges in commit graph"),
1577                         ctx->oids.nr);
1578         for (i = 0; i < ctx->oids.nr; i++) {
1579                 unsigned int num_parents;
1580
1581                 display_progress(ctx->progress, i + 1);
1582                 if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
1583                         continue;
1584
1585                 ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
1586                 ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]);
1587
1588                 if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE &&
1589                     commit_graph_position(ctx->commits.list[ctx->commits.nr]) != COMMIT_NOT_FROM_GRAPH)
1590                         continue;
1591
1592                 if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE)
1593                         parse_commit(ctx->commits.list[ctx->commits.nr]);
1594                 else
1595                         parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
1596
1597                 num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents);
1598                 if (num_parents > 2)
1599                         ctx->num_extra_edges += num_parents - 1;
1600
1601                 ctx->commits.nr++;
1602         }
1603         stop_progress(&ctx->progress);
1604 }
1605
1606 static int write_graph_chunk_base_1(struct hashfile *f,
1607                                     struct commit_graph *g)
1608 {
1609         int num = 0;
1610
1611         if (!g)
1612                 return 0;
1613
1614         num = write_graph_chunk_base_1(f, g->base_graph);
1615         hashwrite(f, g->oid.hash, the_hash_algo->rawsz);
1616         return num + 1;
1617 }
1618
1619 static int write_graph_chunk_base(struct hashfile *f,
1620                                   struct write_commit_graph_context *ctx)
1621 {
1622         int num = write_graph_chunk_base_1(f, ctx->new_base_graph);
1623
1624         if (num != ctx->num_commit_graphs_after - 1) {
1625                 error(_("failed to write correct number of base graph ids"));
1626                 return -1;
1627         }
1628
1629         return 0;
1630 }
1631
1632 typedef int (*chunk_write_fn)(struct hashfile *f,
1633                               struct write_commit_graph_context *ctx);
1634
1635 struct chunk_info {
1636         uint32_t id;
1637         uint64_t size;
1638         chunk_write_fn write_fn;
1639 };
1640
1641 static int write_commit_graph_file(struct write_commit_graph_context *ctx)
1642 {
1643         uint32_t i;
1644         int fd;
1645         struct hashfile *f;
1646         struct lock_file lk = LOCK_INIT;
1647         struct chunk_info chunks[MAX_NUM_CHUNKS + 1];
1648         const unsigned hashsz = the_hash_algo->rawsz;
1649         struct strbuf progress_title = STRBUF_INIT;
1650         int num_chunks = 3;
1651         uint64_t chunk_offset;
1652         struct object_id file_hash;
1653         struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
1654
1655         if (!ctx->bloom_settings) {
1656                 bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
1657                                                               bloom_settings.bits_per_entry);
1658                 bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
1659                                                           bloom_settings.num_hashes);
1660                 ctx->bloom_settings = &bloom_settings;
1661         }
1662
1663         if (ctx->split) {
1664                 struct strbuf tmp_file = STRBUF_INIT;
1665
1666                 strbuf_addf(&tmp_file,
1667                             "%s/info/commit-graphs/tmp_graph_XXXXXX",
1668                             ctx->odb->path);
1669                 ctx->graph_name = strbuf_detach(&tmp_file, NULL);
1670         } else {
1671                 ctx->graph_name = get_commit_graph_filename(ctx->odb);
1672         }
1673
1674         if (safe_create_leading_directories(ctx->graph_name)) {
1675                 UNLEAK(ctx->graph_name);
1676                 error(_("unable to create leading directories of %s"),
1677                         ctx->graph_name);
1678                 return -1;
1679         }
1680
1681         if (ctx->split) {
1682                 char *lock_name = get_chain_filename(ctx->odb);
1683
1684                 hold_lock_file_for_update_mode(&lk, lock_name,
1685                                                LOCK_DIE_ON_ERROR, 0444);
1686
1687                 fd = git_mkstemp_mode(ctx->graph_name, 0444);
1688                 if (fd < 0) {
1689                         error(_("unable to create temporary graph layer"));
1690                         return -1;
1691                 }
1692
1693                 if (adjust_shared_perm(ctx->graph_name)) {
1694                         error(_("unable to adjust shared permissions for '%s'"),
1695                               ctx->graph_name);
1696                         return -1;
1697                 }
1698
1699                 f = hashfd(fd, ctx->graph_name);
1700         } else {
1701                 hold_lock_file_for_update_mode(&lk, ctx->graph_name,
1702                                                LOCK_DIE_ON_ERROR, 0444);
1703                 fd = lk.tempfile->fd;
1704                 f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
1705         }
1706
1707         chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
1708         chunks[0].size = GRAPH_FANOUT_SIZE;
1709         chunks[0].write_fn = write_graph_chunk_fanout;
1710         chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
1711         chunks[1].size = hashsz * ctx->commits.nr;
1712         chunks[1].write_fn = write_graph_chunk_oids;
1713         chunks[2].id = GRAPH_CHUNKID_DATA;
1714         chunks[2].size = (hashsz + 16) * ctx->commits.nr;
1715         chunks[2].write_fn = write_graph_chunk_data;
1716         if (ctx->num_extra_edges) {
1717                 chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
1718                 chunks[num_chunks].size = 4 * ctx->num_extra_edges;
1719                 chunks[num_chunks].write_fn = write_graph_chunk_extra_edges;
1720                 num_chunks++;
1721         }
1722         if (ctx->changed_paths) {
1723                 chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
1724                 chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
1725                 chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes;
1726                 num_chunks++;
1727                 chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
1728                 chunks[num_chunks].size = sizeof(uint32_t) * 3
1729                                           + ctx->total_bloom_filter_data_size;
1730                 chunks[num_chunks].write_fn = write_graph_chunk_bloom_data;
1731                 num_chunks++;
1732         }
1733         if (ctx->num_commit_graphs_after > 1) {
1734                 chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
1735                 chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
1736                 chunks[num_chunks].write_fn = write_graph_chunk_base;
1737                 num_chunks++;
1738         }
1739
1740         chunks[num_chunks].id = 0;
1741         chunks[num_chunks].size = 0;
1742
1743         hashwrite_be32(f, GRAPH_SIGNATURE);
1744
1745         hashwrite_u8(f, GRAPH_VERSION);
1746         hashwrite_u8(f, oid_version());
1747         hashwrite_u8(f, num_chunks);
1748         hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
1749
1750         chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
1751         for (i = 0; i <= num_chunks; i++) {
1752                 uint32_t chunk_write[3];
1753
1754                 chunk_write[0] = htonl(chunks[i].id);
1755                 chunk_write[1] = htonl(chunk_offset >> 32);
1756                 chunk_write[2] = htonl(chunk_offset & 0xffffffff);
1757                 hashwrite(f, chunk_write, 12);
1758
1759                 chunk_offset += chunks[i].size;
1760         }
1761
1762         if (ctx->report_progress) {
1763                 strbuf_addf(&progress_title,
1764                             Q_("Writing out commit graph in %d pass",
1765                                "Writing out commit graph in %d passes",
1766                                num_chunks),
1767                             num_chunks);
1768                 ctx->progress = start_delayed_progress(
1769                         progress_title.buf,
1770                         num_chunks * ctx->commits.nr);
1771         }
1772
1773         for (i = 0; i < num_chunks; i++) {
1774                 uint64_t start_offset = f->total + f->offset;
1775
1776                 if (chunks[i].write_fn(f, ctx))
1777                         return -1;
1778
1779                 if (f->total + f->offset != start_offset + chunks[i].size)
1780                         BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
1781                             chunks[i].size, chunks[i].id,
1782                             f->total + f->offset - start_offset);
1783         }
1784
1785         stop_progress(&ctx->progress);
1786         strbuf_release(&progress_title);
1787
1788         if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) {
1789                 char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid));
1790                 char *new_base_name = get_split_graph_filename(ctx->new_base_graph->odb, new_base_hash);
1791
1792                 free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]);
1793                 free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]);
1794                 ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2] = new_base_name;
1795                 ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2] = new_base_hash;
1796         }
1797
1798         close_commit_graph(ctx->r->objects);
1799         finalize_hashfile(f, file_hash.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
1800
1801         if (ctx->split) {
1802                 FILE *chainf = fdopen_lock_file(&lk, "w");
1803                 char *final_graph_name;
1804                 int result;
1805
1806                 close(fd);
1807
1808                 if (!chainf) {
1809                         error(_("unable to open commit-graph chain file"));
1810                         return -1;
1811                 }
1812
1813                 if (ctx->base_graph_name) {
1814                         const char *dest;
1815                         int idx = ctx->num_commit_graphs_after - 1;
1816                         if (ctx->num_commit_graphs_after > 1)
1817                                 idx--;
1818
1819                         dest = ctx->commit_graph_filenames_after[idx];
1820
1821                         if (strcmp(ctx->base_graph_name, dest)) {
1822                                 result = rename(ctx->base_graph_name, dest);
1823
1824                                 if (result) {
1825                                         error(_("failed to rename base commit-graph file"));
1826                                         return -1;
1827                                 }
1828                         }
1829                 } else {
1830                         char *graph_name = get_commit_graph_filename(ctx->odb);
1831                         unlink(graph_name);
1832                 }
1833
1834                 ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(oid_to_hex(&file_hash));
1835                 final_graph_name = get_split_graph_filename(ctx->odb,
1836                                         ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]);
1837                 ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name;
1838
1839                 result = rename(ctx->graph_name, final_graph_name);
1840
1841                 for (i = 0; i < ctx->num_commit_graphs_after; i++)
1842                         fprintf(lk.tempfile->fp, "%s\n", ctx->commit_graph_hash_after[i]);
1843
1844                 if (result) {
1845                         error(_("failed to rename temporary commit-graph file"));
1846                         return -1;
1847                 }
1848         }
1849
1850         commit_lock_file(&lk);
1851
1852         return 0;
1853 }
1854
1855 static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
1856 {
1857         struct commit_graph *g;
1858         uint32_t num_commits;
1859         enum commit_graph_split_flags flags = COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1860         uint32_t i;
1861
1862         int max_commits = 0;
1863         int size_mult = 2;
1864
1865         if (ctx->split_opts) {
1866                 max_commits = ctx->split_opts->max_commits;
1867
1868                 if (ctx->split_opts->size_multiple)
1869                         size_mult = ctx->split_opts->size_multiple;
1870
1871                 flags = ctx->split_opts->flags;
1872         }
1873
1874         g = ctx->r->objects->commit_graph;
1875         num_commits = ctx->commits.nr;
1876         if (flags == COMMIT_GRAPH_SPLIT_REPLACE)
1877                 ctx->num_commit_graphs_after = 1;
1878         else
1879                 ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
1880
1881         if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED &&
1882             flags != COMMIT_GRAPH_SPLIT_REPLACE) {
1883                 while (g && (g->num_commits <= size_mult * num_commits ||
1884                             (max_commits && num_commits > max_commits))) {
1885                         if (g->odb != ctx->odb)
1886                                 break;
1887
1888                         num_commits += g->num_commits;
1889                         g = g->base_graph;
1890
1891                         ctx->num_commit_graphs_after--;
1892                 }
1893         }
1894
1895         if (flags != COMMIT_GRAPH_SPLIT_REPLACE)
1896                 ctx->new_base_graph = g;
1897         else if (ctx->num_commit_graphs_after != 1)
1898                 BUG("split_graph_merge_strategy: num_commit_graphs_after "
1899                     "should be 1 with --split=replace");
1900
1901         if (ctx->num_commit_graphs_after == 2) {
1902                 char *old_graph_name = get_commit_graph_filename(g->odb);
1903
1904                 if (!strcmp(g->filename, old_graph_name) &&
1905                     g->odb != ctx->odb) {
1906                         ctx->num_commit_graphs_after = 1;
1907                         ctx->new_base_graph = NULL;
1908                 }
1909
1910                 free(old_graph_name);
1911         }
1912
1913         CALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after);
1914         CALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after);
1915
1916         for (i = 0; i < ctx->num_commit_graphs_after &&
1917                     i < ctx->num_commit_graphs_before; i++)
1918                 ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]);
1919
1920         i = ctx->num_commit_graphs_before - 1;
1921         g = ctx->r->objects->commit_graph;
1922
1923         while (g) {
1924                 if (i < ctx->num_commit_graphs_after)
1925                         ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
1926
1927                 i--;
1928                 g = g->base_graph;
1929         }
1930 }
1931
1932 static void merge_commit_graph(struct write_commit_graph_context *ctx,
1933                                struct commit_graph *g)
1934 {
1935         uint32_t i;
1936         uint32_t offset = g->num_commits_in_base;
1937
1938         ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc);
1939
1940         for (i = 0; i < g->num_commits; i++) {
1941                 struct object_id oid;
1942                 struct commit *result;
1943
1944                 display_progress(ctx->progress, i + 1);
1945
1946                 load_oid_from_graph(g, i + offset, &oid);
1947
1948                 /* only add commits if they still exist in the repo */
1949                 result = lookup_commit_reference_gently(ctx->r, &oid, 1);
1950
1951                 if (result) {
1952                         ctx->commits.list[ctx->commits.nr] = result;
1953                         ctx->commits.nr++;
1954                 }
1955         }
1956 }
1957
1958 static int commit_compare(const void *_a, const void *_b)
1959 {
1960         const struct commit *a = *(const struct commit **)_a;
1961         const struct commit *b = *(const struct commit **)_b;
1962         return oidcmp(&a->object.oid, &b->object.oid);
1963 }
1964
1965 static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx)
1966 {
1967         uint32_t i;
1968
1969         if (ctx->report_progress)
1970                 ctx->progress = start_delayed_progress(
1971                                         _("Scanning merged commits"),
1972                                         ctx->commits.nr);
1973
1974         QSORT(ctx->commits.list, ctx->commits.nr, commit_compare);
1975
1976         ctx->num_extra_edges = 0;
1977         for (i = 0; i < ctx->commits.nr; i++) {
1978                 display_progress(ctx->progress, i);
1979
1980                 if (i && oideq(&ctx->commits.list[i - 1]->object.oid,
1981                           &ctx->commits.list[i]->object.oid)) {
1982                         die(_("unexpected duplicate commit id %s"),
1983                             oid_to_hex(&ctx->commits.list[i]->object.oid));
1984                 } else {
1985                         unsigned int num_parents;
1986
1987                         num_parents = commit_list_count(ctx->commits.list[i]->parents);
1988                         if (num_parents > 2)
1989                                 ctx->num_extra_edges += num_parents - 1;
1990                 }
1991         }
1992
1993         stop_progress(&ctx->progress);
1994 }
1995
1996 static void merge_commit_graphs(struct write_commit_graph_context *ctx)
1997 {
1998         struct commit_graph *g = ctx->r->objects->commit_graph;
1999         uint32_t current_graph_number = ctx->num_commit_graphs_before;
2000
2001         while (g && current_graph_number >= ctx->num_commit_graphs_after) {
2002                 current_graph_number--;
2003
2004                 if (ctx->report_progress)
2005                         ctx->progress = start_delayed_progress(_("Merging commit-graph"), 0);
2006
2007                 merge_commit_graph(ctx, g);
2008                 stop_progress(&ctx->progress);
2009
2010                 g = g->base_graph;
2011         }
2012
2013         if (g) {
2014                 ctx->new_base_graph = g;
2015                 ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base;
2016         }
2017
2018         if (ctx->new_base_graph)
2019                 ctx->base_graph_name = xstrdup(ctx->new_base_graph->filename);
2020
2021         sort_and_scan_merged_commits(ctx);
2022 }
2023
2024 static void mark_commit_graphs(struct write_commit_graph_context *ctx)
2025 {
2026         uint32_t i;
2027         time_t now = time(NULL);
2028
2029         for (i = ctx->num_commit_graphs_after - 1; i < ctx->num_commit_graphs_before; i++) {
2030                 struct stat st;
2031                 struct utimbuf updated_time;
2032
2033                 stat(ctx->commit_graph_filenames_before[i], &st);
2034
2035                 updated_time.actime = st.st_atime;
2036                 updated_time.modtime = now;
2037                 utime(ctx->commit_graph_filenames_before[i], &updated_time);
2038         }
2039 }
2040
2041 static void expire_commit_graphs(struct write_commit_graph_context *ctx)
2042 {
2043         struct strbuf path = STRBUF_INIT;
2044         DIR *dir;
2045         struct dirent *de;
2046         size_t dirnamelen;
2047         timestamp_t expire_time = time(NULL);
2048
2049         if (ctx->split_opts && ctx->split_opts->expire_time)
2050                 expire_time = ctx->split_opts->expire_time;
2051         if (!ctx->split) {
2052                 char *chain_file_name = get_chain_filename(ctx->odb);
2053                 unlink(chain_file_name);
2054                 free(chain_file_name);
2055                 ctx->num_commit_graphs_after = 0;
2056         }
2057
2058         strbuf_addstr(&path, ctx->odb->path);
2059         strbuf_addstr(&path, "/info/commit-graphs");
2060         dir = opendir(path.buf);
2061
2062         if (!dir)
2063                 goto out;
2064
2065         strbuf_addch(&path, '/');
2066         dirnamelen = path.len;
2067         while ((de = readdir(dir)) != NULL) {
2068                 struct stat st;
2069                 uint32_t i, found = 0;
2070
2071                 strbuf_setlen(&path, dirnamelen);
2072                 strbuf_addstr(&path, de->d_name);
2073
2074                 stat(path.buf, &st);
2075
2076                 if (st.st_mtime > expire_time)
2077                         continue;
2078                 if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph"))
2079                         continue;
2080
2081                 for (i = 0; i < ctx->num_commit_graphs_after; i++) {
2082                         if (!strcmp(ctx->commit_graph_filenames_after[i],
2083                                     path.buf)) {
2084                                 found = 1;
2085                                 break;
2086                         }
2087                 }
2088
2089                 if (!found)
2090                         unlink(path.buf);
2091         }
2092
2093 out:
2094         strbuf_release(&path);
2095 }
2096
2097 int write_commit_graph(struct object_directory *odb,
2098                        struct string_list *pack_indexes,
2099                        struct oidset *commits,
2100                        enum commit_graph_write_flags flags,
2101                        const struct split_commit_graph_opts *split_opts)
2102 {
2103         struct write_commit_graph_context *ctx;
2104         uint32_t i, count_distinct = 0;
2105         int res = 0;
2106         int replace = 0;
2107
2108         if (!commit_graph_compatible(the_repository))
2109                 return 0;
2110
2111         ctx = xcalloc(1, sizeof(struct write_commit_graph_context));
2112         ctx->r = the_repository;
2113         ctx->odb = odb;
2114         ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0;
2115         ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
2116         ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
2117         ctx->split_opts = split_opts;
2118         ctx->total_bloom_filter_data_size = 0;
2119
2120         if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
2121                 ctx->changed_paths = 1;
2122         if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
2123                 struct commit_graph *g;
2124                 prepare_commit_graph_one(ctx->r, ctx->odb);
2125
2126                 g = ctx->r->objects->commit_graph;
2127
2128                 /* We have changed-paths already. Keep them in the next graph */
2129                 if (g && g->chunk_bloom_data) {
2130                         ctx->changed_paths = 1;
2131                         ctx->bloom_settings = g->bloom_filter_settings;
2132                 }
2133         }
2134
2135         if (ctx->split) {
2136                 struct commit_graph *g;
2137                 prepare_commit_graph(ctx->r);
2138
2139                 g = ctx->r->objects->commit_graph;
2140
2141                 while (g) {
2142                         ctx->num_commit_graphs_before++;
2143                         g = g->base_graph;
2144                 }
2145
2146                 if (ctx->num_commit_graphs_before) {
2147                         ALLOC_ARRAY(ctx->commit_graph_filenames_before, ctx->num_commit_graphs_before);
2148                         i = ctx->num_commit_graphs_before;
2149                         g = ctx->r->objects->commit_graph;
2150
2151                         while (g) {
2152                                 ctx->commit_graph_filenames_before[--i] = xstrdup(g->filename);
2153                                 g = g->base_graph;
2154                         }
2155                 }
2156
2157                 if (ctx->split_opts)
2158                         replace = ctx->split_opts->flags & COMMIT_GRAPH_SPLIT_REPLACE;
2159         }
2160
2161         ctx->approx_nr_objects = approximate_object_count();
2162         ctx->oids.alloc = ctx->approx_nr_objects / 32;
2163
2164         if (ctx->split && split_opts && ctx->oids.alloc > split_opts->max_commits)
2165                 ctx->oids.alloc = split_opts->max_commits;
2166
2167         if (ctx->append) {
2168                 prepare_commit_graph_one(ctx->r, ctx->odb);
2169                 if (ctx->r->objects->commit_graph)
2170                         ctx->oids.alloc += ctx->r->objects->commit_graph->num_commits;
2171         }
2172
2173         if (ctx->oids.alloc < 1024)
2174                 ctx->oids.alloc = 1024;
2175         ALLOC_ARRAY(ctx->oids.list, ctx->oids.alloc);
2176
2177         if (ctx->append && ctx->r->objects->commit_graph) {
2178                 struct commit_graph *g = ctx->r->objects->commit_graph;
2179                 for (i = 0; i < g->num_commits; i++) {
2180                         const unsigned char *hash = g->chunk_oid_lookup + g->hash_len * i;
2181                         hashcpy(ctx->oids.list[ctx->oids.nr++].hash, hash);
2182                 }
2183         }
2184
2185         if (pack_indexes) {
2186                 ctx->order_by_pack = 1;
2187                 if ((res = fill_oids_from_packs(ctx, pack_indexes)))
2188                         goto cleanup;
2189         }
2190
2191         if (commits) {
2192                 if ((res = fill_oids_from_commits(ctx, commits)))
2193                         goto cleanup;
2194         }
2195
2196         if (!pack_indexes && !commits) {
2197                 ctx->order_by_pack = 1;
2198                 fill_oids_from_all_packs(ctx);
2199         }
2200
2201         close_reachable(ctx);
2202
2203         count_distinct = count_distinct_commits(ctx);
2204
2205         if (count_distinct >= GRAPH_EDGE_LAST_MASK) {
2206                 error(_("the commit graph format cannot write %d commits"), count_distinct);
2207                 res = -1;
2208                 goto cleanup;
2209         }
2210
2211         ctx->commits.alloc = count_distinct;
2212         ALLOC_ARRAY(ctx->commits.list, ctx->commits.alloc);
2213
2214         copy_oids_to_commits(ctx);
2215
2216         if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) {
2217                 error(_("too many commits to write graph"));
2218                 res = -1;
2219                 goto cleanup;
2220         }
2221
2222         if (!ctx->commits.nr && !replace)
2223                 goto cleanup;
2224
2225         if (ctx->split) {
2226                 split_graph_merge_strategy(ctx);
2227
2228                 if (!replace)
2229                         merge_commit_graphs(ctx);
2230         } else
2231                 ctx->num_commit_graphs_after = 1;
2232
2233         compute_generation_numbers(ctx);
2234
2235         if (ctx->changed_paths)
2236                 compute_bloom_filters(ctx);
2237
2238         res = write_commit_graph_file(ctx);
2239
2240         if (ctx->split)
2241                 mark_commit_graphs(ctx);
2242
2243         expire_commit_graphs(ctx);
2244
2245 cleanup:
2246         free(ctx->graph_name);
2247         free(ctx->commits.list);
2248         free(ctx->oids.list);
2249
2250         if (ctx->commit_graph_filenames_after) {
2251                 for (i = 0; i < ctx->num_commit_graphs_after; i++) {
2252                         free(ctx->commit_graph_filenames_after[i]);
2253                         free(ctx->commit_graph_hash_after[i]);
2254                 }
2255
2256                 for (i = 0; i < ctx->num_commit_graphs_before; i++)
2257                         free(ctx->commit_graph_filenames_before[i]);
2258
2259                 free(ctx->commit_graph_filenames_after);
2260                 free(ctx->commit_graph_filenames_before);
2261                 free(ctx->commit_graph_hash_after);
2262         }
2263
2264         free(ctx);
2265
2266         return res;
2267 }
2268
2269 #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2
2270 static int verify_commit_graph_error;
2271
2272 static void graph_report(const char *fmt, ...)
2273 {
2274         va_list ap;
2275
2276         verify_commit_graph_error = 1;
2277         va_start(ap, fmt);
2278         vfprintf(stderr, fmt, ap);
2279         fprintf(stderr, "\n");
2280         va_end(ap);
2281 }
2282
2283 #define GENERATION_ZERO_EXISTS 1
2284 #define GENERATION_NUMBER_EXISTS 2
2285
2286 int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
2287 {
2288         uint32_t i, cur_fanout_pos = 0;
2289         struct object_id prev_oid, cur_oid, checksum;
2290         int generation_zero = 0;
2291         struct hashfile *f;
2292         int devnull;
2293         struct progress *progress = NULL;
2294         int local_error = 0;
2295
2296         if (!g) {
2297                 graph_report("no commit-graph file loaded");
2298                 return 1;
2299         }
2300
2301         verify_commit_graph_error = verify_commit_graph_lite(g);
2302         if (verify_commit_graph_error)
2303                 return verify_commit_graph_error;
2304
2305         devnull = open("/dev/null", O_WRONLY);
2306         f = hashfd(devnull, NULL);
2307         hashwrite(f, g->data, g->data_len - g->hash_len);
2308         finalize_hashfile(f, checksum.hash, CSUM_CLOSE);
2309         if (!hasheq(checksum.hash, g->data + g->data_len - g->hash_len)) {
2310                 graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
2311                 verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
2312         }
2313
2314         for (i = 0; i < g->num_commits; i++) {
2315                 struct commit *graph_commit;
2316
2317                 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2318
2319                 if (i && oidcmp(&prev_oid, &cur_oid) >= 0)
2320                         graph_report(_("commit-graph has incorrect OID order: %s then %s"),
2321                                      oid_to_hex(&prev_oid),
2322                                      oid_to_hex(&cur_oid));
2323
2324                 oidcpy(&prev_oid, &cur_oid);
2325
2326                 while (cur_oid.hash[0] > cur_fanout_pos) {
2327                         uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2328
2329                         if (i != fanout_value)
2330                                 graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2331                                              cur_fanout_pos, fanout_value, i);
2332                         cur_fanout_pos++;
2333                 }
2334
2335                 graph_commit = lookup_commit(r, &cur_oid);
2336                 if (!parse_commit_in_graph_one(r, g, graph_commit))
2337                         graph_report(_("failed to parse commit %s from commit-graph"),
2338                                      oid_to_hex(&cur_oid));
2339         }
2340
2341         while (cur_fanout_pos < 256) {
2342                 uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2343
2344                 if (g->num_commits != fanout_value)
2345                         graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2346                                      cur_fanout_pos, fanout_value, i);
2347
2348                 cur_fanout_pos++;
2349         }
2350
2351         if (verify_commit_graph_error & ~VERIFY_COMMIT_GRAPH_ERROR_HASH)
2352                 return verify_commit_graph_error;
2353
2354         if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
2355                 progress = start_progress(_("Verifying commits in commit graph"),
2356                                         g->num_commits);
2357
2358         for (i = 0; i < g->num_commits; i++) {
2359                 struct commit *graph_commit, *odb_commit;
2360                 struct commit_list *graph_parents, *odb_parents;
2361                 uint32_t max_generation = 0;
2362                 uint32_t generation;
2363
2364                 display_progress(progress, i + 1);
2365                 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2366
2367                 graph_commit = lookup_commit(r, &cur_oid);
2368                 odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r));
2369                 if (parse_commit_internal(odb_commit, 0, 0)) {
2370                         graph_report(_("failed to parse commit %s from object database for commit-graph"),
2371                                      oid_to_hex(&cur_oid));
2372                         continue;
2373                 }
2374
2375                 if (!oideq(&get_commit_tree_in_graph_one(r, g, graph_commit)->object.oid,
2376                            get_commit_tree_oid(odb_commit)))
2377                         graph_report(_("root tree OID for commit %s in commit-graph is %s != %s"),
2378                                      oid_to_hex(&cur_oid),
2379                                      oid_to_hex(get_commit_tree_oid(graph_commit)),
2380                                      oid_to_hex(get_commit_tree_oid(odb_commit)));
2381
2382                 graph_parents = graph_commit->parents;
2383                 odb_parents = odb_commit->parents;
2384
2385                 while (graph_parents) {
2386                         if (odb_parents == NULL) {
2387                                 graph_report(_("commit-graph parent list for commit %s is too long"),
2388                                              oid_to_hex(&cur_oid));
2389                                 break;
2390                         }
2391
2392                         /* parse parent in case it is in a base graph */
2393                         parse_commit_in_graph_one(r, g, graph_parents->item);
2394
2395                         if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid))
2396                                 graph_report(_("commit-graph parent for %s is %s != %s"),
2397                                              oid_to_hex(&cur_oid),
2398                                              oid_to_hex(&graph_parents->item->object.oid),
2399                                              oid_to_hex(&odb_parents->item->object.oid));
2400
2401                         generation = commit_graph_generation(graph_parents->item);
2402                         if (generation > max_generation)
2403                                 max_generation = generation;
2404
2405                         graph_parents = graph_parents->next;
2406                         odb_parents = odb_parents->next;
2407                 }
2408
2409                 if (odb_parents != NULL)
2410                         graph_report(_("commit-graph parent list for commit %s terminates early"),
2411                                      oid_to_hex(&cur_oid));
2412
2413                 if (!commit_graph_generation(graph_commit)) {
2414                         if (generation_zero == GENERATION_NUMBER_EXISTS)
2415                                 graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"),
2416                                              oid_to_hex(&cur_oid));
2417                         generation_zero = GENERATION_ZERO_EXISTS;
2418                 } else if (generation_zero == GENERATION_ZERO_EXISTS)
2419                         graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"),
2420                                      oid_to_hex(&cur_oid));
2421
2422                 if (generation_zero == GENERATION_ZERO_EXISTS)
2423                         continue;
2424
2425                 /*
2426                  * If one of our parents has generation GENERATION_NUMBER_MAX, then
2427                  * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid
2428                  * extra logic in the following condition.
2429                  */
2430                 if (max_generation == GENERATION_NUMBER_MAX)
2431                         max_generation--;
2432
2433                 generation = commit_graph_generation(graph_commit);
2434                 if (generation != max_generation + 1)
2435                         graph_report(_("commit-graph generation for commit %s is %u != %u"),
2436                                      oid_to_hex(&cur_oid),
2437                                      generation,
2438                                      max_generation + 1);
2439
2440                 if (graph_commit->date != odb_commit->date)
2441                         graph_report(_("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime),
2442                                      oid_to_hex(&cur_oid),
2443                                      graph_commit->date,
2444                                      odb_commit->date);
2445         }
2446         stop_progress(&progress);
2447
2448         local_error = verify_commit_graph_error;
2449
2450         if (!(flags & COMMIT_GRAPH_VERIFY_SHALLOW) && g->base_graph)
2451                 local_error |= verify_commit_graph(r, g->base_graph, flags);
2452
2453         return local_error;
2454 }
2455
2456 void free_commit_graph(struct commit_graph *g)
2457 {
2458         if (!g)
2459                 return;
2460         if (g->data) {
2461                 munmap((void *)g->data, g->data_len);
2462                 g->data = NULL;
2463         }
2464         free(g->filename);
2465         free(g->bloom_filter_settings);
2466         free(g);
2467 }
2468
2469 void disable_commit_graph(struct repository *r)
2470 {
2471         r->commit_graph_disabled = 1;
2472 }