Merge branch 'gs/commit-graph-path-filter'
[git] / commit-graph.c
1 #include "cache.h"
2 #include "config.h"
3 #include "dir.h"
4 #include "git-compat-util.h"
5 #include "lockfile.h"
6 #include "pack.h"
7 #include "packfile.h"
8 #include "commit.h"
9 #include "object.h"
10 #include "refs.h"
11 #include "revision.h"
12 #include "sha1-lookup.h"
13 #include "commit-graph.h"
14 #include "object-store.h"
15 #include "alloc.h"
16 #include "hashmap.h"
17 #include "replace-object.h"
18 #include "progress.h"
19 #include "bloom.h"
20 #include "commit-slab.h"
21
22 #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
23 #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
24 #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
25 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
26 #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
27 #define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
28 #define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
29 #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
30 #define MAX_NUM_CHUNKS 7
31
32 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
33
34 #define GRAPH_VERSION_1 0x1
35 #define GRAPH_VERSION GRAPH_VERSION_1
36
37 #define GRAPH_EXTRA_EDGES_NEEDED 0x80000000
38 #define GRAPH_EDGE_LAST_MASK 0x7fffffff
39 #define GRAPH_PARENT_NONE 0x70000000
40
41 #define GRAPH_LAST_EDGE 0x80000000
42
43 #define GRAPH_HEADER_SIZE 8
44 #define GRAPH_FANOUT_SIZE (4 * 256)
45 #define GRAPH_CHUNKLOOKUP_WIDTH 12
46 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
47                         + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
48
49 /* Remember to update object flag allocation in object.h */
50 #define REACHABLE       (1u<<15)
51
52 /* Keep track of the order in which commits are added to our list. */
53 define_commit_slab(commit_pos, int);
54 static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos);
55
56 static void set_commit_pos(struct repository *r, const struct object_id *oid)
57 {
58         static int32_t max_pos;
59         struct commit *commit = lookup_commit(r, oid);
60
61         if (!commit)
62                 return; /* should never happen, but be lenient */
63
64         *commit_pos_at(&commit_pos, commit) = max_pos++;
65 }
66
67 static int commit_pos_cmp(const void *va, const void *vb)
68 {
69         const struct commit *a = *(const struct commit **)va;
70         const struct commit *b = *(const struct commit **)vb;
71         return commit_pos_at(&commit_pos, a) -
72                commit_pos_at(&commit_pos, b);
73 }
74
75 static int commit_gen_cmp(const void *va, const void *vb)
76 {
77         const struct commit *a = *(const struct commit **)va;
78         const struct commit *b = *(const struct commit **)vb;
79
80         /* lower generation commits first */
81         if (a->generation < b->generation)
82                 return -1;
83         else if (a->generation > b->generation)
84                 return 1;
85
86         /* use date as a heuristic when generations are equal */
87         if (a->date < b->date)
88                 return -1;
89         else if (a->date > b->date)
90                 return 1;
91         return 0;
92 }
93
94 char *get_commit_graph_filename(struct object_directory *obj_dir)
95 {
96         return xstrfmt("%s/info/commit-graph", obj_dir->path);
97 }
98
99 static char *get_split_graph_filename(struct object_directory *odb,
100                                       const char *oid_hex)
101 {
102         return xstrfmt("%s/info/commit-graphs/graph-%s.graph", odb->path,
103                        oid_hex);
104 }
105
106 static char *get_chain_filename(struct object_directory *odb)
107 {
108         return xstrfmt("%s/info/commit-graphs/commit-graph-chain", odb->path);
109 }
110
111 static uint8_t oid_version(void)
112 {
113         return 1;
114 }
115
116 static struct commit_graph *alloc_commit_graph(void)
117 {
118         struct commit_graph *g = xcalloc(1, sizeof(*g));
119
120         return g;
121 }
122
123 extern int read_replace_refs;
124
125 static int commit_graph_compatible(struct repository *r)
126 {
127         if (!r->gitdir)
128                 return 0;
129
130         if (read_replace_refs) {
131                 prepare_replace_object(r);
132                 if (hashmap_get_size(&r->objects->replace_map->map))
133                         return 0;
134         }
135
136         prepare_commit_graft(r);
137         if (r->parsed_objects && r->parsed_objects->grafts_nr)
138                 return 0;
139         if (is_repository_shallow(r))
140                 return 0;
141
142         return 1;
143 }
144
145 int open_commit_graph(const char *graph_file, int *fd, struct stat *st)
146 {
147         *fd = git_open(graph_file);
148         if (*fd < 0)
149                 return 0;
150         if (fstat(*fd, st)) {
151                 close(*fd);
152                 return 0;
153         }
154         return 1;
155 }
156
157 struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st,
158                                                  struct object_directory *odb)
159 {
160         void *graph_map;
161         size_t graph_size;
162         struct commit_graph *ret;
163
164         graph_size = xsize_t(st->st_size);
165
166         if (graph_size < GRAPH_MIN_SIZE) {
167                 close(fd);
168                 error(_("commit-graph file is too small"));
169                 return NULL;
170         }
171         graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
172         close(fd);
173         ret = parse_commit_graph(graph_map, graph_size);
174
175         if (ret)
176                 ret->odb = odb;
177         else
178                 munmap(graph_map, graph_size);
179
180         return ret;
181 }
182
183 static int verify_commit_graph_lite(struct commit_graph *g)
184 {
185         /*
186          * Basic validation shared between parse_commit_graph()
187          * which'll be called every time the graph is used, and the
188          * much more expensive verify_commit_graph() used by
189          * "commit-graph verify".
190          *
191          * There should only be very basic checks here to ensure that
192          * we don't e.g. segfault in fill_commit_in_graph(), but
193          * because this is a very hot codepath nothing that e.g. loops
194          * over g->num_commits, or runs a checksum on the commit-graph
195          * itself.
196          */
197         if (!g->chunk_oid_fanout) {
198                 error("commit-graph is missing the OID Fanout chunk");
199                 return 1;
200         }
201         if (!g->chunk_oid_lookup) {
202                 error("commit-graph is missing the OID Lookup chunk");
203                 return 1;
204         }
205         if (!g->chunk_commit_data) {
206                 error("commit-graph is missing the Commit Data chunk");
207                 return 1;
208         }
209
210         return 0;
211 }
212
213 struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
214 {
215         const unsigned char *data, *chunk_lookup;
216         uint32_t i;
217         struct commit_graph *graph;
218         uint64_t last_chunk_offset;
219         uint32_t last_chunk_id;
220         uint32_t graph_signature;
221         unsigned char graph_version, hash_version;
222
223         if (!graph_map)
224                 return NULL;
225
226         if (graph_size < GRAPH_MIN_SIZE)
227                 return NULL;
228
229         data = (const unsigned char *)graph_map;
230
231         graph_signature = get_be32(data);
232         if (graph_signature != GRAPH_SIGNATURE) {
233                 error(_("commit-graph signature %X does not match signature %X"),
234                       graph_signature, GRAPH_SIGNATURE);
235                 return NULL;
236         }
237
238         graph_version = *(unsigned char*)(data + 4);
239         if (graph_version != GRAPH_VERSION) {
240                 error(_("commit-graph version %X does not match version %X"),
241                       graph_version, GRAPH_VERSION);
242                 return NULL;
243         }
244
245         hash_version = *(unsigned char*)(data + 5);
246         if (hash_version != oid_version()) {
247                 error(_("commit-graph hash version %X does not match version %X"),
248                       hash_version, oid_version());
249                 return NULL;
250         }
251
252         graph = alloc_commit_graph();
253
254         graph->hash_len = the_hash_algo->rawsz;
255         graph->num_chunks = *(unsigned char*)(data + 6);
256         graph->data = graph_map;
257         graph->data_len = graph_size;
258
259         last_chunk_id = 0;
260         last_chunk_offset = 8;
261         chunk_lookup = data + 8;
262         for (i = 0; i < graph->num_chunks; i++) {
263                 uint32_t chunk_id;
264                 uint64_t chunk_offset;
265                 int chunk_repeated = 0;
266
267                 if (data + graph_size - chunk_lookup <
268                     GRAPH_CHUNKLOOKUP_WIDTH) {
269                         error(_("commit-graph chunk lookup table entry missing; file may be incomplete"));
270                         free(graph);
271                         return NULL;
272                 }
273
274                 chunk_id = get_be32(chunk_lookup + 0);
275                 chunk_offset = get_be64(chunk_lookup + 4);
276
277                 chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
278
279                 if (chunk_offset > graph_size - the_hash_algo->rawsz) {
280                         error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
281                               (uint32_t)chunk_offset);
282                         free(graph);
283                         return NULL;
284                 }
285
286                 switch (chunk_id) {
287                 case GRAPH_CHUNKID_OIDFANOUT:
288                         if (graph->chunk_oid_fanout)
289                                 chunk_repeated = 1;
290                         else
291                                 graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset);
292                         break;
293
294                 case GRAPH_CHUNKID_OIDLOOKUP:
295                         if (graph->chunk_oid_lookup)
296                                 chunk_repeated = 1;
297                         else
298                                 graph->chunk_oid_lookup = data + chunk_offset;
299                         break;
300
301                 case GRAPH_CHUNKID_DATA:
302                         if (graph->chunk_commit_data)
303                                 chunk_repeated = 1;
304                         else
305                                 graph->chunk_commit_data = data + chunk_offset;
306                         break;
307
308                 case GRAPH_CHUNKID_EXTRAEDGES:
309                         if (graph->chunk_extra_edges)
310                                 chunk_repeated = 1;
311                         else
312                                 graph->chunk_extra_edges = data + chunk_offset;
313                         break;
314
315                 case GRAPH_CHUNKID_BASE:
316                         if (graph->chunk_base_graphs)
317                                 chunk_repeated = 1;
318                         else
319                                 graph->chunk_base_graphs = data + chunk_offset;
320                         break;
321
322                 case GRAPH_CHUNKID_BLOOMINDEXES:
323                         if (graph->chunk_bloom_indexes)
324                                 chunk_repeated = 1;
325                         else
326                                 graph->chunk_bloom_indexes = data + chunk_offset;
327                         break;
328
329                 case GRAPH_CHUNKID_BLOOMDATA:
330                         if (graph->chunk_bloom_data)
331                                 chunk_repeated = 1;
332                         else {
333                                 uint32_t hash_version;
334                                 graph->chunk_bloom_data = data + chunk_offset;
335                                 hash_version = get_be32(data + chunk_offset);
336
337                                 if (hash_version != 1)
338                                         break;
339
340                                 graph->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
341                                 graph->bloom_filter_settings->hash_version = hash_version;
342                                 graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4);
343                                 graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8);
344                         }
345                         break;
346                 }
347
348                 if (chunk_repeated) {
349                         error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
350                         free(graph);
351                         return NULL;
352                 }
353
354                 if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
355                 {
356                         graph->num_commits = (chunk_offset - last_chunk_offset)
357                                              / graph->hash_len;
358                 }
359
360                 last_chunk_id = chunk_id;
361                 last_chunk_offset = chunk_offset;
362         }
363
364         if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
365                 init_bloom_filters();
366         } else {
367                 /* We need both the bloom chunks to exist together. Else ignore the data */
368                 graph->chunk_bloom_indexes = NULL;
369                 graph->chunk_bloom_data = NULL;
370                 graph->bloom_filter_settings = NULL;
371         }
372
373         hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len);
374
375         if (verify_commit_graph_lite(graph)) {
376                 free(graph);
377                 return NULL;
378         }
379
380         return graph;
381 }
382
383 static struct commit_graph *load_commit_graph_one(const char *graph_file,
384                                                   struct object_directory *odb)
385 {
386
387         struct stat st;
388         int fd;
389         struct commit_graph *g;
390         int open_ok = open_commit_graph(graph_file, &fd, &st);
391
392         if (!open_ok)
393                 return NULL;
394
395         g = load_commit_graph_one_fd_st(fd, &st, odb);
396
397         if (g)
398                 g->filename = xstrdup(graph_file);
399
400         return g;
401 }
402
403 static struct commit_graph *load_commit_graph_v1(struct repository *r,
404                                                  struct object_directory *odb)
405 {
406         char *graph_name = get_commit_graph_filename(odb);
407         struct commit_graph *g = load_commit_graph_one(graph_name, odb);
408         free(graph_name);
409
410         return g;
411 }
412
413 static int add_graph_to_chain(struct commit_graph *g,
414                               struct commit_graph *chain,
415                               struct object_id *oids,
416                               int n)
417 {
418         struct commit_graph *cur_g = chain;
419
420         if (n && !g->chunk_base_graphs) {
421                 warning(_("commit-graph has no base graphs chunk"));
422                 return 0;
423         }
424
425         while (n) {
426                 n--;
427
428                 if (!cur_g ||
429                     !oideq(&oids[n], &cur_g->oid) ||
430                     !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * n)) {
431                         warning(_("commit-graph chain does not match"));
432                         return 0;
433                 }
434
435                 cur_g = cur_g->base_graph;
436         }
437
438         g->base_graph = chain;
439
440         if (chain)
441                 g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base;
442
443         return 1;
444 }
445
446 static struct commit_graph *load_commit_graph_chain(struct repository *r,
447                                                     struct object_directory *odb)
448 {
449         struct commit_graph *graph_chain = NULL;
450         struct strbuf line = STRBUF_INIT;
451         struct stat st;
452         struct object_id *oids;
453         int i = 0, valid = 1, count;
454         char *chain_name = get_chain_filename(odb);
455         FILE *fp;
456         int stat_res;
457
458         fp = fopen(chain_name, "r");
459         stat_res = stat(chain_name, &st);
460         free(chain_name);
461
462         if (!fp ||
463             stat_res ||
464             st.st_size <= the_hash_algo->hexsz)
465                 return NULL;
466
467         count = st.st_size / (the_hash_algo->hexsz + 1);
468         oids = xcalloc(count, sizeof(struct object_id));
469
470         prepare_alt_odb(r);
471
472         for (i = 0; i < count; i++) {
473                 struct object_directory *odb;
474
475                 if (strbuf_getline_lf(&line, fp) == EOF)
476                         break;
477
478                 if (get_oid_hex(line.buf, &oids[i])) {
479                         warning(_("invalid commit-graph chain: line '%s' not a hash"),
480                                 line.buf);
481                         valid = 0;
482                         break;
483                 }
484
485                 valid = 0;
486                 for (odb = r->objects->odb; odb; odb = odb->next) {
487                         char *graph_name = get_split_graph_filename(odb, line.buf);
488                         struct commit_graph *g = load_commit_graph_one(graph_name, odb);
489
490                         free(graph_name);
491
492                         if (g) {
493                                 if (add_graph_to_chain(g, graph_chain, oids, i)) {
494                                         graph_chain = g;
495                                         valid = 1;
496                                 }
497
498                                 break;
499                         }
500                 }
501
502                 if (!valid) {
503                         warning(_("unable to find all commit-graph files"));
504                         break;
505                 }
506         }
507
508         free(oids);
509         fclose(fp);
510         strbuf_release(&line);
511
512         return graph_chain;
513 }
514
515 struct commit_graph *read_commit_graph_one(struct repository *r,
516                                            struct object_directory *odb)
517 {
518         struct commit_graph *g = load_commit_graph_v1(r, odb);
519
520         if (!g)
521                 g = load_commit_graph_chain(r, odb);
522
523         return g;
524 }
525
526 static void prepare_commit_graph_one(struct repository *r,
527                                      struct object_directory *odb)
528 {
529
530         if (r->objects->commit_graph)
531                 return;
532
533         r->objects->commit_graph = read_commit_graph_one(r, odb);
534 }
535
536 /*
537  * Return 1 if commit_graph is non-NULL, and 0 otherwise.
538  *
539  * On the first invocation, this function attempts to load the commit
540  * graph if the_repository is configured to have one.
541  */
542 static int prepare_commit_graph(struct repository *r)
543 {
544         struct object_directory *odb;
545
546         /*
547          * This must come before the "already attempted?" check below, because
548          * we want to disable even an already-loaded graph file.
549          */
550         if (r->commit_graph_disabled)
551                 return 0;
552
553         if (r->objects->commit_graph_attempted)
554                 return !!r->objects->commit_graph;
555         r->objects->commit_graph_attempted = 1;
556
557         if (git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD, 0))
558                 die("dying as requested by the '%s' variable on commit-graph load!",
559                     GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD);
560
561         prepare_repo_settings(r);
562
563         if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
564             r->settings.core_commit_graph != 1)
565                 /*
566                  * This repository is not configured to use commit graphs, so
567                  * do not load one. (But report commit_graph_attempted anyway
568                  * so that commit graph loading is not attempted again for this
569                  * repository.)
570                  */
571                 return 0;
572
573         if (!commit_graph_compatible(r))
574                 return 0;
575
576         prepare_alt_odb(r);
577         for (odb = r->objects->odb;
578              !r->objects->commit_graph && odb;
579              odb = odb->next)
580                 prepare_commit_graph_one(r, odb);
581         return !!r->objects->commit_graph;
582 }
583
584 int generation_numbers_enabled(struct repository *r)
585 {
586         uint32_t first_generation;
587         struct commit_graph *g;
588         if (!prepare_commit_graph(r))
589                return 0;
590
591         g = r->objects->commit_graph;
592
593         if (!g->num_commits)
594                 return 0;
595
596         first_generation = get_be32(g->chunk_commit_data +
597                                     g->hash_len + 8) >> 2;
598
599         return !!first_generation;
600 }
601
602 static void close_commit_graph_one(struct commit_graph *g)
603 {
604         if (!g)
605                 return;
606
607         close_commit_graph_one(g->base_graph);
608         free_commit_graph(g);
609 }
610
611 void close_commit_graph(struct raw_object_store *o)
612 {
613         close_commit_graph_one(o->commit_graph);
614         o->commit_graph = NULL;
615 }
616
617 static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos)
618 {
619         return bsearch_hash(oid->hash, g->chunk_oid_fanout,
620                             g->chunk_oid_lookup, g->hash_len, pos);
621 }
622
623 static void load_oid_from_graph(struct commit_graph *g,
624                                 uint32_t pos,
625                                 struct object_id *oid)
626 {
627         uint32_t lex_index;
628
629         while (g && pos < g->num_commits_in_base)
630                 g = g->base_graph;
631
632         if (!g)
633                 BUG("NULL commit-graph");
634
635         if (pos >= g->num_commits + g->num_commits_in_base)
636                 die(_("invalid commit position. commit-graph is likely corrupt"));
637
638         lex_index = pos - g->num_commits_in_base;
639
640         hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index);
641 }
642
643 static struct commit_list **insert_parent_or_die(struct repository *r,
644                                                  struct commit_graph *g,
645                                                  uint32_t pos,
646                                                  struct commit_list **pptr)
647 {
648         struct commit *c;
649         struct object_id oid;
650
651         if (pos >= g->num_commits + g->num_commits_in_base)
652                 die("invalid parent position %"PRIu32, pos);
653
654         load_oid_from_graph(g, pos, &oid);
655         c = lookup_commit(r, &oid);
656         if (!c)
657                 die(_("could not find commit %s"), oid_to_hex(&oid));
658         c->graph_pos = pos;
659         return &commit_list_insert(c, pptr)->next;
660 }
661
662 static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
663 {
664         const unsigned char *commit_data;
665         uint32_t lex_index;
666
667         while (pos < g->num_commits_in_base)
668                 g = g->base_graph;
669
670         lex_index = pos - g->num_commits_in_base;
671         commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index;
672         item->graph_pos = pos;
673         item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
674 }
675
676 static inline void set_commit_tree(struct commit *c, struct tree *t)
677 {
678         c->maybe_tree = t;
679 }
680
681 static int fill_commit_in_graph(struct repository *r,
682                                 struct commit *item,
683                                 struct commit_graph *g, uint32_t pos)
684 {
685         uint32_t edge_value;
686         uint32_t *parent_data_ptr;
687         uint64_t date_low, date_high;
688         struct commit_list **pptr;
689         const unsigned char *commit_data;
690         uint32_t lex_index;
691
692         while (pos < g->num_commits_in_base)
693                 g = g->base_graph;
694
695         if (pos >= g->num_commits + g->num_commits_in_base)
696                 die(_("invalid commit position. commit-graph is likely corrupt"));
697
698         /*
699          * Store the "full" position, but then use the
700          * "local" position for the rest of the calculation.
701          */
702         item->graph_pos = pos;
703         lex_index = pos - g->num_commits_in_base;
704
705         commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index;
706
707         item->object.parsed = 1;
708
709         set_commit_tree(item, NULL);
710
711         date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
712         date_low = get_be32(commit_data + g->hash_len + 12);
713         item->date = (timestamp_t)((date_high << 32) | date_low);
714
715         item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
716
717         pptr = &item->parents;
718
719         edge_value = get_be32(commit_data + g->hash_len);
720         if (edge_value == GRAPH_PARENT_NONE)
721                 return 1;
722         pptr = insert_parent_or_die(r, g, edge_value, pptr);
723
724         edge_value = get_be32(commit_data + g->hash_len + 4);
725         if (edge_value == GRAPH_PARENT_NONE)
726                 return 1;
727         if (!(edge_value & GRAPH_EXTRA_EDGES_NEEDED)) {
728                 pptr = insert_parent_or_die(r, g, edge_value, pptr);
729                 return 1;
730         }
731
732         parent_data_ptr = (uint32_t*)(g->chunk_extra_edges +
733                           4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
734         do {
735                 edge_value = get_be32(parent_data_ptr);
736                 pptr = insert_parent_or_die(r, g,
737                                             edge_value & GRAPH_EDGE_LAST_MASK,
738                                             pptr);
739                 parent_data_ptr++;
740         } while (!(edge_value & GRAPH_LAST_EDGE));
741
742         return 1;
743 }
744
745 static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
746 {
747         if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
748                 *pos = item->graph_pos;
749                 return 1;
750         } else {
751                 struct commit_graph *cur_g = g;
752                 uint32_t lex_index;
753
754                 while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index))
755                         cur_g = cur_g->base_graph;
756
757                 if (cur_g) {
758                         *pos = lex_index + cur_g->num_commits_in_base;
759                         return 1;
760                 }
761
762                 return 0;
763         }
764 }
765
766 static int parse_commit_in_graph_one(struct repository *r,
767                                      struct commit_graph *g,
768                                      struct commit *item)
769 {
770         uint32_t pos;
771
772         if (item->object.parsed)
773                 return 1;
774
775         if (find_commit_in_graph(item, g, &pos))
776                 return fill_commit_in_graph(r, item, g, pos);
777
778         return 0;
779 }
780
781 int parse_commit_in_graph(struct repository *r, struct commit *item)
782 {
783         if (!prepare_commit_graph(r))
784                 return 0;
785         return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
786 }
787
788 void load_commit_graph_info(struct repository *r, struct commit *item)
789 {
790         uint32_t pos;
791         if (!prepare_commit_graph(r))
792                 return;
793         if (find_commit_in_graph(item, r->objects->commit_graph, &pos))
794                 fill_commit_graph_info(item, r->objects->commit_graph, pos);
795 }
796
797 static struct tree *load_tree_for_commit(struct repository *r,
798                                          struct commit_graph *g,
799                                          struct commit *c)
800 {
801         struct object_id oid;
802         const unsigned char *commit_data;
803
804         while (c->graph_pos < g->num_commits_in_base)
805                 g = g->base_graph;
806
807         commit_data = g->chunk_commit_data +
808                         GRAPH_DATA_WIDTH * (c->graph_pos - g->num_commits_in_base);
809
810         hashcpy(oid.hash, commit_data);
811         set_commit_tree(c, lookup_tree(r, &oid));
812
813         return c->maybe_tree;
814 }
815
816 static struct tree *get_commit_tree_in_graph_one(struct repository *r,
817                                                  struct commit_graph *g,
818                                                  const struct commit *c)
819 {
820         if (c->maybe_tree)
821                 return c->maybe_tree;
822         if (c->graph_pos == COMMIT_NOT_FROM_GRAPH)
823                 BUG("get_commit_tree_in_graph_one called from non-commit-graph commit");
824
825         return load_tree_for_commit(r, g, (struct commit *)c);
826 }
827
828 struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit *c)
829 {
830         return get_commit_tree_in_graph_one(r, r->objects->commit_graph, c);
831 }
832
833 struct packed_commit_list {
834         struct commit **list;
835         int nr;
836         int alloc;
837 };
838
839 struct packed_oid_list {
840         struct object_id *list;
841         int nr;
842         int alloc;
843 };
844
845 struct write_commit_graph_context {
846         struct repository *r;
847         struct object_directory *odb;
848         char *graph_name;
849         struct packed_oid_list oids;
850         struct packed_commit_list commits;
851         int num_extra_edges;
852         unsigned long approx_nr_objects;
853         struct progress *progress;
854         int progress_done;
855         uint64_t progress_cnt;
856
857         char *base_graph_name;
858         int num_commit_graphs_before;
859         int num_commit_graphs_after;
860         char **commit_graph_filenames_before;
861         char **commit_graph_filenames_after;
862         char **commit_graph_hash_after;
863         uint32_t new_num_commits_in_base;
864         struct commit_graph *new_base_graph;
865
866         unsigned append:1,
867                  report_progress:1,
868                  split:1,
869                  check_oids:1,
870                  changed_paths:1,
871                  order_by_pack:1;
872
873         const struct split_commit_graph_opts *split_opts;
874         size_t total_bloom_filter_data_size;
875 };
876
877 static void write_graph_chunk_fanout(struct hashfile *f,
878                                      struct write_commit_graph_context *ctx)
879 {
880         int i, count = 0;
881         struct commit **list = ctx->commits.list;
882
883         /*
884          * Write the first-level table (the list is sorted,
885          * but we use a 256-entry lookup to be able to avoid
886          * having to do eight extra binary search iterations).
887          */
888         for (i = 0; i < 256; i++) {
889                 while (count < ctx->commits.nr) {
890                         if ((*list)->object.oid.hash[0] != i)
891                                 break;
892                         display_progress(ctx->progress, ++ctx->progress_cnt);
893                         count++;
894                         list++;
895                 }
896
897                 hashwrite_be32(f, count);
898         }
899 }
900
901 static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
902                                    struct write_commit_graph_context *ctx)
903 {
904         struct commit **list = ctx->commits.list;
905         int count;
906         for (count = 0; count < ctx->commits.nr; count++, list++) {
907                 display_progress(ctx->progress, ++ctx->progress_cnt);
908                 hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
909         }
910 }
911
912 static const unsigned char *commit_to_sha1(size_t index, void *table)
913 {
914         struct commit **commits = table;
915         return commits[index]->object.oid.hash;
916 }
917
918 static void write_graph_chunk_data(struct hashfile *f, int hash_len,
919                                    struct write_commit_graph_context *ctx)
920 {
921         struct commit **list = ctx->commits.list;
922         struct commit **last = ctx->commits.list + ctx->commits.nr;
923         uint32_t num_extra_edges = 0;
924
925         while (list < last) {
926                 struct commit_list *parent;
927                 struct object_id *tree;
928                 int edge_value;
929                 uint32_t packedDate[2];
930                 display_progress(ctx->progress, ++ctx->progress_cnt);
931
932                 if (parse_commit_no_graph(*list))
933                         die(_("unable to parse commit %s"),
934                                 oid_to_hex(&(*list)->object.oid));
935                 tree = get_commit_tree_oid(*list);
936                 hashwrite(f, tree->hash, hash_len);
937
938                 parent = (*list)->parents;
939
940                 if (!parent)
941                         edge_value = GRAPH_PARENT_NONE;
942                 else {
943                         edge_value = sha1_pos(parent->item->object.oid.hash,
944                                               ctx->commits.list,
945                                               ctx->commits.nr,
946                                               commit_to_sha1);
947
948                         if (edge_value >= 0)
949                                 edge_value += ctx->new_num_commits_in_base;
950                         else if (ctx->new_base_graph) {
951                                 uint32_t pos;
952                                 if (find_commit_in_graph(parent->item,
953                                                          ctx->new_base_graph,
954                                                          &pos))
955                                         edge_value = pos;
956                         }
957
958                         if (edge_value < 0)
959                                 BUG("missing parent %s for commit %s",
960                                     oid_to_hex(&parent->item->object.oid),
961                                     oid_to_hex(&(*list)->object.oid));
962                 }
963
964                 hashwrite_be32(f, edge_value);
965
966                 if (parent)
967                         parent = parent->next;
968
969                 if (!parent)
970                         edge_value = GRAPH_PARENT_NONE;
971                 else if (parent->next)
972                         edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges;
973                 else {
974                         edge_value = sha1_pos(parent->item->object.oid.hash,
975                                               ctx->commits.list,
976                                               ctx->commits.nr,
977                                               commit_to_sha1);
978
979                         if (edge_value >= 0)
980                                 edge_value += ctx->new_num_commits_in_base;
981                         else if (ctx->new_base_graph) {
982                                 uint32_t pos;
983                                 if (find_commit_in_graph(parent->item,
984                                                          ctx->new_base_graph,
985                                                          &pos))
986                                         edge_value = pos;
987                         }
988
989                         if (edge_value < 0)
990                                 BUG("missing parent %s for commit %s",
991                                     oid_to_hex(&parent->item->object.oid),
992                                     oid_to_hex(&(*list)->object.oid));
993                 }
994
995                 hashwrite_be32(f, edge_value);
996
997                 if (edge_value & GRAPH_EXTRA_EDGES_NEEDED) {
998                         do {
999                                 num_extra_edges++;
1000                                 parent = parent->next;
1001                         } while (parent);
1002                 }
1003
1004                 if (sizeof((*list)->date) > 4)
1005                         packedDate[0] = htonl(((*list)->date >> 32) & 0x3);
1006                 else
1007                         packedDate[0] = 0;
1008
1009                 packedDate[0] |= htonl((*list)->generation << 2);
1010
1011                 packedDate[1] = htonl((*list)->date);
1012                 hashwrite(f, packedDate, 8);
1013
1014                 list++;
1015         }
1016 }
1017
1018 static void write_graph_chunk_extra_edges(struct hashfile *f,
1019                                           struct write_commit_graph_context *ctx)
1020 {
1021         struct commit **list = ctx->commits.list;
1022         struct commit **last = ctx->commits.list + ctx->commits.nr;
1023         struct commit_list *parent;
1024
1025         while (list < last) {
1026                 int num_parents = 0;
1027
1028                 display_progress(ctx->progress, ++ctx->progress_cnt);
1029
1030                 for (parent = (*list)->parents; num_parents < 3 && parent;
1031                      parent = parent->next)
1032                         num_parents++;
1033
1034                 if (num_parents <= 2) {
1035                         list++;
1036                         continue;
1037                 }
1038
1039                 /* Since num_parents > 2, this initializer is safe. */
1040                 for (parent = (*list)->parents->next; parent; parent = parent->next) {
1041                         int edge_value = sha1_pos(parent->item->object.oid.hash,
1042                                                   ctx->commits.list,
1043                                                   ctx->commits.nr,
1044                                                   commit_to_sha1);
1045
1046                         if (edge_value >= 0)
1047                                 edge_value += ctx->new_num_commits_in_base;
1048                         else if (ctx->new_base_graph) {
1049                                 uint32_t pos;
1050                                 if (find_commit_in_graph(parent->item,
1051                                                          ctx->new_base_graph,
1052                                                          &pos))
1053                                         edge_value = pos;
1054                         }
1055
1056                         if (edge_value < 0)
1057                                 BUG("missing parent %s for commit %s",
1058                                     oid_to_hex(&parent->item->object.oid),
1059                                     oid_to_hex(&(*list)->object.oid));
1060                         else if (!parent->next)
1061                                 edge_value |= GRAPH_LAST_EDGE;
1062
1063                         hashwrite_be32(f, edge_value);
1064                 }
1065
1066                 list++;
1067         }
1068 }
1069
1070 static void write_graph_chunk_bloom_indexes(struct hashfile *f,
1071                                             struct write_commit_graph_context *ctx)
1072 {
1073         struct commit **list = ctx->commits.list;
1074         struct commit **last = ctx->commits.list + ctx->commits.nr;
1075         uint32_t cur_pos = 0;
1076         struct progress *progress = NULL;
1077         int i = 0;
1078
1079         if (ctx->report_progress)
1080                 progress = start_delayed_progress(
1081                         _("Writing changed paths Bloom filters index"),
1082                         ctx->commits.nr);
1083
1084         while (list < last) {
1085                 struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
1086                 cur_pos += filter->len;
1087                 display_progress(progress, ++i);
1088                 hashwrite_be32(f, cur_pos);
1089                 list++;
1090         }
1091
1092         stop_progress(&progress);
1093 }
1094
1095 static void write_graph_chunk_bloom_data(struct hashfile *f,
1096                                          struct write_commit_graph_context *ctx,
1097                                          const struct bloom_filter_settings *settings)
1098 {
1099         struct commit **list = ctx->commits.list;
1100         struct commit **last = ctx->commits.list + ctx->commits.nr;
1101         struct progress *progress = NULL;
1102         int i = 0;
1103
1104         if (ctx->report_progress)
1105                 progress = start_delayed_progress(
1106                         _("Writing changed paths Bloom filters data"),
1107                         ctx->commits.nr);
1108
1109         hashwrite_be32(f, settings->hash_version);
1110         hashwrite_be32(f, settings->num_hashes);
1111         hashwrite_be32(f, settings->bits_per_entry);
1112
1113         while (list < last) {
1114                 struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
1115                 display_progress(progress, ++i);
1116                 hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
1117                 list++;
1118         }
1119
1120         stop_progress(&progress);
1121 }
1122
1123 static int oid_compare(const void *_a, const void *_b)
1124 {
1125         const struct object_id *a = (const struct object_id *)_a;
1126         const struct object_id *b = (const struct object_id *)_b;
1127         return oidcmp(a, b);
1128 }
1129
1130 static int add_packed_commits(const struct object_id *oid,
1131                               struct packed_git *pack,
1132                               uint32_t pos,
1133                               void *data)
1134 {
1135         struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
1136         enum object_type type;
1137         off_t offset = nth_packed_object_offset(pack, pos);
1138         struct object_info oi = OBJECT_INFO_INIT;
1139
1140         if (ctx->progress)
1141                 display_progress(ctx->progress, ++ctx->progress_done);
1142
1143         oi.typep = &type;
1144         if (packed_object_info(ctx->r, pack, offset, &oi) < 0)
1145                 die(_("unable to get type of object %s"), oid_to_hex(oid));
1146
1147         if (type != OBJ_COMMIT)
1148                 return 0;
1149
1150         ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
1151         oidcpy(&(ctx->oids.list[ctx->oids.nr]), oid);
1152         ctx->oids.nr++;
1153
1154         set_commit_pos(ctx->r, oid);
1155
1156         return 0;
1157 }
1158
1159 static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
1160 {
1161         struct commit_list *parent;
1162         for (parent = commit->parents; parent; parent = parent->next) {
1163                 if (!(parent->item->object.flags & REACHABLE)) {
1164                         ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
1165                         oidcpy(&ctx->oids.list[ctx->oids.nr], &(parent->item->object.oid));
1166                         ctx->oids.nr++;
1167                         parent->item->object.flags |= REACHABLE;
1168                 }
1169         }
1170 }
1171
1172 static void close_reachable(struct write_commit_graph_context *ctx)
1173 {
1174         int i;
1175         struct commit *commit;
1176         enum commit_graph_split_flags flags = ctx->split_opts ?
1177                 ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1178
1179         if (ctx->report_progress)
1180                 ctx->progress = start_delayed_progress(
1181                                         _("Loading known commits in commit graph"),
1182                                         ctx->oids.nr);
1183         for (i = 0; i < ctx->oids.nr; i++) {
1184                 display_progress(ctx->progress, i + 1);
1185                 commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
1186                 if (commit)
1187                         commit->object.flags |= REACHABLE;
1188         }
1189         stop_progress(&ctx->progress);
1190
1191         /*
1192          * As this loop runs, ctx->oids.nr may grow, but not more
1193          * than the number of missing commits in the reachable
1194          * closure.
1195          */
1196         if (ctx->report_progress)
1197                 ctx->progress = start_delayed_progress(
1198                                         _("Expanding reachable commits in commit graph"),
1199                                         0);
1200         for (i = 0; i < ctx->oids.nr; i++) {
1201                 display_progress(ctx->progress, i + 1);
1202                 commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
1203
1204                 if (!commit)
1205                         continue;
1206                 if (ctx->split) {
1207                         if ((!parse_commit(commit) &&
1208                              commit->graph_pos == COMMIT_NOT_FROM_GRAPH) ||
1209                             flags == COMMIT_GRAPH_SPLIT_REPLACE)
1210                                 add_missing_parents(ctx, commit);
1211                 } else if (!parse_commit_no_graph(commit))
1212                         add_missing_parents(ctx, commit);
1213         }
1214         stop_progress(&ctx->progress);
1215
1216         if (ctx->report_progress)
1217                 ctx->progress = start_delayed_progress(
1218                                         _("Clearing commit marks in commit graph"),
1219                                         ctx->oids.nr);
1220         for (i = 0; i < ctx->oids.nr; i++) {
1221                 display_progress(ctx->progress, i + 1);
1222                 commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
1223
1224                 if (commit)
1225                         commit->object.flags &= ~REACHABLE;
1226         }
1227         stop_progress(&ctx->progress);
1228 }
1229
1230 static void compute_generation_numbers(struct write_commit_graph_context *ctx)
1231 {
1232         int i;
1233         struct commit_list *list = NULL;
1234
1235         if (ctx->report_progress)
1236                 ctx->progress = start_delayed_progress(
1237                                         _("Computing commit graph generation numbers"),
1238                                         ctx->commits.nr);
1239         for (i = 0; i < ctx->commits.nr; i++) {
1240                 display_progress(ctx->progress, i + 1);
1241                 if (ctx->commits.list[i]->generation != GENERATION_NUMBER_INFINITY &&
1242                     ctx->commits.list[i]->generation != GENERATION_NUMBER_ZERO)
1243                         continue;
1244
1245                 commit_list_insert(ctx->commits.list[i], &list);
1246                 while (list) {
1247                         struct commit *current = list->item;
1248                         struct commit_list *parent;
1249                         int all_parents_computed = 1;
1250                         uint32_t max_generation = 0;
1251
1252                         for (parent = current->parents; parent; parent = parent->next) {
1253                                 if (parent->item->generation == GENERATION_NUMBER_INFINITY ||
1254                                     parent->item->generation == GENERATION_NUMBER_ZERO) {
1255                                         all_parents_computed = 0;
1256                                         commit_list_insert(parent->item, &list);
1257                                         break;
1258                                 } else if (parent->item->generation > max_generation) {
1259                                         max_generation = parent->item->generation;
1260                                 }
1261                         }
1262
1263                         if (all_parents_computed) {
1264                                 current->generation = max_generation + 1;
1265                                 pop_commit(&list);
1266
1267                                 if (current->generation > GENERATION_NUMBER_MAX)
1268                                         current->generation = GENERATION_NUMBER_MAX;
1269                         }
1270                 }
1271         }
1272         stop_progress(&ctx->progress);
1273 }
1274
1275 static void compute_bloom_filters(struct write_commit_graph_context *ctx)
1276 {
1277         int i;
1278         struct progress *progress = NULL;
1279         struct commit **sorted_commits;
1280
1281         init_bloom_filters();
1282
1283         if (ctx->report_progress)
1284                 progress = start_delayed_progress(
1285                         _("Computing commit changed paths Bloom filters"),
1286                         ctx->commits.nr);
1287
1288         ALLOC_ARRAY(sorted_commits, ctx->commits.nr);
1289         COPY_ARRAY(sorted_commits, ctx->commits.list, ctx->commits.nr);
1290
1291         if (ctx->order_by_pack)
1292                 QSORT(sorted_commits, ctx->commits.nr, commit_pos_cmp);
1293         else
1294                 QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
1295
1296         for (i = 0; i < ctx->commits.nr; i++) {
1297                 struct commit *c = sorted_commits[i];
1298                 struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
1299                 ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
1300                 display_progress(progress, i + 1);
1301         }
1302
1303         free(sorted_commits);
1304         stop_progress(&progress);
1305 }
1306
1307 static int add_ref_to_set(const char *refname,
1308                           const struct object_id *oid,
1309                           int flags, void *cb_data)
1310 {
1311         struct oidset *commits = (struct oidset *)cb_data;
1312
1313         oidset_insert(commits, oid);
1314         return 0;
1315 }
1316
1317 int write_commit_graph_reachable(struct object_directory *odb,
1318                                  enum commit_graph_write_flags flags,
1319                                  const struct split_commit_graph_opts *split_opts)
1320 {
1321         struct oidset commits = OIDSET_INIT;
1322         int result;
1323
1324         for_each_ref(add_ref_to_set, &commits);
1325         result = write_commit_graph(odb, NULL, &commits,
1326                                     flags, split_opts);
1327
1328         oidset_clear(&commits);
1329         return result;
1330 }
1331
1332 static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
1333                                 struct string_list *pack_indexes)
1334 {
1335         uint32_t i;
1336         struct strbuf progress_title = STRBUF_INIT;
1337         struct strbuf packname = STRBUF_INIT;
1338         int dirlen;
1339
1340         strbuf_addf(&packname, "%s/pack/", ctx->odb->path);
1341         dirlen = packname.len;
1342         if (ctx->report_progress) {
1343                 strbuf_addf(&progress_title,
1344                             Q_("Finding commits for commit graph in %d pack",
1345                                "Finding commits for commit graph in %d packs",
1346                                pack_indexes->nr),
1347                             pack_indexes->nr);
1348                 ctx->progress = start_delayed_progress(progress_title.buf, 0);
1349                 ctx->progress_done = 0;
1350         }
1351         for (i = 0; i < pack_indexes->nr; i++) {
1352                 struct packed_git *p;
1353                 strbuf_setlen(&packname, dirlen);
1354                 strbuf_addstr(&packname, pack_indexes->items[i].string);
1355                 p = add_packed_git(packname.buf, packname.len, 1);
1356                 if (!p) {
1357                         error(_("error adding pack %s"), packname.buf);
1358                         return -1;
1359                 }
1360                 if (open_pack_index(p)) {
1361                         error(_("error opening index for %s"), packname.buf);
1362                         return -1;
1363                 }
1364                 for_each_object_in_pack(p, add_packed_commits, ctx,
1365                                         FOR_EACH_OBJECT_PACK_ORDER);
1366                 close_pack(p);
1367                 free(p);
1368         }
1369
1370         stop_progress(&ctx->progress);
1371         strbuf_release(&progress_title);
1372         strbuf_release(&packname);
1373
1374         return 0;
1375 }
1376
1377 static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
1378                                   struct oidset *commits)
1379 {
1380         uint32_t i = 0;
1381         struct strbuf progress_title = STRBUF_INIT;
1382         struct oidset_iter iter;
1383         struct object_id *oid;
1384
1385         if (!oidset_size(commits))
1386                 return 0;
1387
1388         if (ctx->report_progress) {
1389                 strbuf_addf(&progress_title,
1390                             Q_("Finding commits for commit graph from %d ref",
1391                                "Finding commits for commit graph from %d refs",
1392                                oidset_size(commits)),
1393                             oidset_size(commits));
1394                 ctx->progress = start_delayed_progress(
1395                                         progress_title.buf,
1396                                         oidset_size(commits));
1397         }
1398
1399         oidset_iter_init(commits, &iter);
1400         while ((oid = oidset_iter_next(&iter))) {
1401                 struct commit *result;
1402
1403                 display_progress(ctx->progress, ++i);
1404
1405                 result = lookup_commit_reference_gently(ctx->r, oid, 1);
1406                 if (result) {
1407                         ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
1408                         oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid));
1409                         ctx->oids.nr++;
1410                 } else if (ctx->check_oids) {
1411                         error(_("invalid commit object id: %s"),
1412                               oid_to_hex(oid));
1413                         return -1;
1414                 }
1415         }
1416
1417         stop_progress(&ctx->progress);
1418         strbuf_release(&progress_title);
1419
1420         return 0;
1421 }
1422
1423 static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
1424 {
1425         if (ctx->report_progress)
1426                 ctx->progress = start_delayed_progress(
1427                         _("Finding commits for commit graph among packed objects"),
1428                         ctx->approx_nr_objects);
1429         for_each_packed_object(add_packed_commits, ctx,
1430                                FOR_EACH_OBJECT_PACK_ORDER);
1431         if (ctx->progress_done < ctx->approx_nr_objects)
1432                 display_progress(ctx->progress, ctx->approx_nr_objects);
1433         stop_progress(&ctx->progress);
1434 }
1435
1436 static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
1437 {
1438         uint32_t i, count_distinct = 1;
1439
1440         if (ctx->report_progress)
1441                 ctx->progress = start_delayed_progress(
1442                         _("Counting distinct commits in commit graph"),
1443                         ctx->oids.nr);
1444         display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
1445         QSORT(ctx->oids.list, ctx->oids.nr, oid_compare);
1446
1447         for (i = 1; i < ctx->oids.nr; i++) {
1448                 display_progress(ctx->progress, i + 1);
1449                 if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) {
1450                         if (ctx->split) {
1451                                 struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]);
1452
1453                                 if (!c || c->graph_pos != COMMIT_NOT_FROM_GRAPH)
1454                                         continue;
1455                         }
1456
1457                         count_distinct++;
1458                 }
1459         }
1460         stop_progress(&ctx->progress);
1461
1462         return count_distinct;
1463 }
1464
1465 static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
1466 {
1467         uint32_t i;
1468         enum commit_graph_split_flags flags = ctx->split_opts ?
1469                 ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1470
1471         ctx->num_extra_edges = 0;
1472         if (ctx->report_progress)
1473                 ctx->progress = start_delayed_progress(
1474                         _("Finding extra edges in commit graph"),
1475                         ctx->oids.nr);
1476         for (i = 0; i < ctx->oids.nr; i++) {
1477                 unsigned int num_parents;
1478
1479                 display_progress(ctx->progress, i + 1);
1480                 if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
1481                         continue;
1482
1483                 ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
1484                 ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]);
1485
1486                 if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE &&
1487                     ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH)
1488                         continue;
1489
1490                 if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE)
1491                         parse_commit(ctx->commits.list[ctx->commits.nr]);
1492                 else
1493                         parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
1494
1495                 num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents);
1496                 if (num_parents > 2)
1497                         ctx->num_extra_edges += num_parents - 1;
1498
1499                 ctx->commits.nr++;
1500         }
1501         stop_progress(&ctx->progress);
1502 }
1503
1504 static int write_graph_chunk_base_1(struct hashfile *f,
1505                                     struct commit_graph *g)
1506 {
1507         int num = 0;
1508
1509         if (!g)
1510                 return 0;
1511
1512         num = write_graph_chunk_base_1(f, g->base_graph);
1513         hashwrite(f, g->oid.hash, the_hash_algo->rawsz);
1514         return num + 1;
1515 }
1516
1517 static int write_graph_chunk_base(struct hashfile *f,
1518                                   struct write_commit_graph_context *ctx)
1519 {
1520         int num = write_graph_chunk_base_1(f, ctx->new_base_graph);
1521
1522         if (num != ctx->num_commit_graphs_after - 1) {
1523                 error(_("failed to write correct number of base graph ids"));
1524                 return -1;
1525         }
1526
1527         return 0;
1528 }
1529
1530 static int write_commit_graph_file(struct write_commit_graph_context *ctx)
1531 {
1532         uint32_t i;
1533         int fd;
1534         struct hashfile *f;
1535         struct lock_file lk = LOCK_INIT;
1536         uint32_t chunk_ids[MAX_NUM_CHUNKS + 1];
1537         uint64_t chunk_offsets[MAX_NUM_CHUNKS + 1];
1538         const unsigned hashsz = the_hash_algo->rawsz;
1539         struct strbuf progress_title = STRBUF_INIT;
1540         int num_chunks = 3;
1541         struct object_id file_hash;
1542         const struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
1543
1544         if (ctx->split) {
1545                 struct strbuf tmp_file = STRBUF_INIT;
1546
1547                 strbuf_addf(&tmp_file,
1548                             "%s/info/commit-graphs/tmp_graph_XXXXXX",
1549                             ctx->odb->path);
1550                 ctx->graph_name = strbuf_detach(&tmp_file, NULL);
1551         } else {
1552                 ctx->graph_name = get_commit_graph_filename(ctx->odb);
1553         }
1554
1555         if (safe_create_leading_directories(ctx->graph_name)) {
1556                 UNLEAK(ctx->graph_name);
1557                 error(_("unable to create leading directories of %s"),
1558                         ctx->graph_name);
1559                 return -1;
1560         }
1561
1562         if (ctx->split) {
1563                 char *lock_name = get_chain_filename(ctx->odb);
1564
1565                 hold_lock_file_for_update(&lk, lock_name, LOCK_DIE_ON_ERROR);
1566
1567                 fd = git_mkstemp_mode(ctx->graph_name, 0444);
1568                 if (fd < 0) {
1569                         error(_("unable to create temporary graph layer"));
1570                         return -1;
1571                 }
1572
1573                 f = hashfd(fd, ctx->graph_name);
1574         } else {
1575                 hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR);
1576                 fd = lk.tempfile->fd;
1577                 f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
1578         }
1579
1580         chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
1581         chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
1582         chunk_ids[2] = GRAPH_CHUNKID_DATA;
1583         if (ctx->num_extra_edges) {
1584                 chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES;
1585                 num_chunks++;
1586         }
1587         if (ctx->changed_paths) {
1588                 chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMINDEXES;
1589                 num_chunks++;
1590                 chunk_ids[num_chunks] = GRAPH_CHUNKID_BLOOMDATA;
1591                 num_chunks++;
1592         }
1593         if (ctx->num_commit_graphs_after > 1) {
1594                 chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE;
1595                 num_chunks++;
1596         }
1597
1598         chunk_ids[num_chunks] = 0;
1599
1600         chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
1601         chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
1602         chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr;
1603         chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr;
1604
1605         num_chunks = 3;
1606         if (ctx->num_extra_edges) {
1607                 chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
1608                                                 4 * ctx->num_extra_edges;
1609                 num_chunks++;
1610         }
1611         if (ctx->changed_paths) {
1612                 chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
1613                                                 sizeof(uint32_t) * ctx->commits.nr;
1614                 num_chunks++;
1615
1616                 chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
1617                                                 sizeof(uint32_t) * 3 + ctx->total_bloom_filter_data_size;
1618                 num_chunks++;
1619         }
1620         if (ctx->num_commit_graphs_after > 1) {
1621                 chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] +
1622                                                 hashsz * (ctx->num_commit_graphs_after - 1);
1623                 num_chunks++;
1624         }
1625
1626         hashwrite_be32(f, GRAPH_SIGNATURE);
1627
1628         hashwrite_u8(f, GRAPH_VERSION);
1629         hashwrite_u8(f, oid_version());
1630         hashwrite_u8(f, num_chunks);
1631         hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
1632
1633         for (i = 0; i <= num_chunks; i++) {
1634                 uint32_t chunk_write[3];
1635
1636                 chunk_write[0] = htonl(chunk_ids[i]);
1637                 chunk_write[1] = htonl(chunk_offsets[i] >> 32);
1638                 chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
1639                 hashwrite(f, chunk_write, 12);
1640         }
1641
1642         if (ctx->report_progress) {
1643                 strbuf_addf(&progress_title,
1644                             Q_("Writing out commit graph in %d pass",
1645                                "Writing out commit graph in %d passes",
1646                                num_chunks),
1647                             num_chunks);
1648                 ctx->progress = start_delayed_progress(
1649                         progress_title.buf,
1650                         num_chunks * ctx->commits.nr);
1651         }
1652         write_graph_chunk_fanout(f, ctx);
1653         write_graph_chunk_oids(f, hashsz, ctx);
1654         write_graph_chunk_data(f, hashsz, ctx);
1655         if (ctx->num_extra_edges)
1656                 write_graph_chunk_extra_edges(f, ctx);
1657         if (ctx->changed_paths) {
1658                 write_graph_chunk_bloom_indexes(f, ctx);
1659                 write_graph_chunk_bloom_data(f, ctx, &bloom_settings);
1660         }
1661         if (ctx->num_commit_graphs_after > 1 &&
1662             write_graph_chunk_base(f, ctx)) {
1663                 return -1;
1664         }
1665         stop_progress(&ctx->progress);
1666         strbuf_release(&progress_title);
1667
1668         if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) {
1669                 char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid));
1670                 char *new_base_name = get_split_graph_filename(ctx->new_base_graph->odb, new_base_hash);
1671
1672                 free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]);
1673                 free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]);
1674                 ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2] = new_base_name;
1675                 ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2] = new_base_hash;
1676         }
1677
1678         close_commit_graph(ctx->r->objects);
1679         finalize_hashfile(f, file_hash.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
1680
1681         if (ctx->split) {
1682                 FILE *chainf = fdopen_lock_file(&lk, "w");
1683                 char *final_graph_name;
1684                 int result;
1685
1686                 close(fd);
1687
1688                 if (!chainf) {
1689                         error(_("unable to open commit-graph chain file"));
1690                         return -1;
1691                 }
1692
1693                 if (ctx->base_graph_name) {
1694                         const char *dest;
1695                         int idx = ctx->num_commit_graphs_after - 1;
1696                         if (ctx->num_commit_graphs_after > 1)
1697                                 idx--;
1698
1699                         dest = ctx->commit_graph_filenames_after[idx];
1700
1701                         if (strcmp(ctx->base_graph_name, dest)) {
1702                                 result = rename(ctx->base_graph_name, dest);
1703
1704                                 if (result) {
1705                                         error(_("failed to rename base commit-graph file"));
1706                                         return -1;
1707                                 }
1708                         }
1709                 } else {
1710                         char *graph_name = get_commit_graph_filename(ctx->odb);
1711                         unlink(graph_name);
1712                 }
1713
1714                 ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(oid_to_hex(&file_hash));
1715                 final_graph_name = get_split_graph_filename(ctx->odb,
1716                                         ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]);
1717                 ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name;
1718
1719                 result = rename(ctx->graph_name, final_graph_name);
1720
1721                 for (i = 0; i < ctx->num_commit_graphs_after; i++)
1722                         fprintf(lk.tempfile->fp, "%s\n", ctx->commit_graph_hash_after[i]);
1723
1724                 if (result) {
1725                         error(_("failed to rename temporary commit-graph file"));
1726                         return -1;
1727                 }
1728         }
1729
1730         commit_lock_file(&lk);
1731
1732         return 0;
1733 }
1734
1735 static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
1736 {
1737         struct commit_graph *g;
1738         uint32_t num_commits;
1739         enum commit_graph_split_flags flags = COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1740         uint32_t i;
1741
1742         int max_commits = 0;
1743         int size_mult = 2;
1744
1745         if (ctx->split_opts) {
1746                 max_commits = ctx->split_opts->max_commits;
1747
1748                 if (ctx->split_opts->size_multiple)
1749                         size_mult = ctx->split_opts->size_multiple;
1750
1751                 flags = ctx->split_opts->flags;
1752         }
1753
1754         g = ctx->r->objects->commit_graph;
1755         num_commits = ctx->commits.nr;
1756         if (flags == COMMIT_GRAPH_SPLIT_REPLACE)
1757                 ctx->num_commit_graphs_after = 1;
1758         else
1759                 ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
1760
1761         if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED &&
1762             flags != COMMIT_GRAPH_SPLIT_REPLACE) {
1763                 while (g && (g->num_commits <= size_mult * num_commits ||
1764                             (max_commits && num_commits > max_commits))) {
1765                         if (g->odb != ctx->odb)
1766                                 break;
1767
1768                         num_commits += g->num_commits;
1769                         g = g->base_graph;
1770
1771                         ctx->num_commit_graphs_after--;
1772                 }
1773         }
1774
1775         if (flags != COMMIT_GRAPH_SPLIT_REPLACE)
1776                 ctx->new_base_graph = g;
1777         else if (ctx->num_commit_graphs_after != 1)
1778                 BUG("split_graph_merge_strategy: num_commit_graphs_after "
1779                     "should be 1 with --split=replace");
1780
1781         if (ctx->num_commit_graphs_after == 2) {
1782                 char *old_graph_name = get_commit_graph_filename(g->odb);
1783
1784                 if (!strcmp(g->filename, old_graph_name) &&
1785                     g->odb != ctx->odb) {
1786                         ctx->num_commit_graphs_after = 1;
1787                         ctx->new_base_graph = NULL;
1788                 }
1789
1790                 free(old_graph_name);
1791         }
1792
1793         CALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after);
1794         CALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after);
1795
1796         for (i = 0; i < ctx->num_commit_graphs_after &&
1797                     i < ctx->num_commit_graphs_before; i++)
1798                 ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]);
1799
1800         i = ctx->num_commit_graphs_before - 1;
1801         g = ctx->r->objects->commit_graph;
1802
1803         while (g) {
1804                 if (i < ctx->num_commit_graphs_after)
1805                         ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
1806
1807                 i--;
1808                 g = g->base_graph;
1809         }
1810 }
1811
1812 static void merge_commit_graph(struct write_commit_graph_context *ctx,
1813                                struct commit_graph *g)
1814 {
1815         uint32_t i;
1816         uint32_t offset = g->num_commits_in_base;
1817
1818         ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc);
1819
1820         for (i = 0; i < g->num_commits; i++) {
1821                 struct object_id oid;
1822                 struct commit *result;
1823
1824                 display_progress(ctx->progress, i + 1);
1825
1826                 load_oid_from_graph(g, i + offset, &oid);
1827
1828                 /* only add commits if they still exist in the repo */
1829                 result = lookup_commit_reference_gently(ctx->r, &oid, 1);
1830
1831                 if (result) {
1832                         ctx->commits.list[ctx->commits.nr] = result;
1833                         ctx->commits.nr++;
1834                 }
1835         }
1836 }
1837
1838 static int commit_compare(const void *_a, const void *_b)
1839 {
1840         const struct commit *a = *(const struct commit **)_a;
1841         const struct commit *b = *(const struct commit **)_b;
1842         return oidcmp(&a->object.oid, &b->object.oid);
1843 }
1844
1845 static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx)
1846 {
1847         uint32_t i;
1848
1849         if (ctx->report_progress)
1850                 ctx->progress = start_delayed_progress(
1851                                         _("Scanning merged commits"),
1852                                         ctx->commits.nr);
1853
1854         QSORT(ctx->commits.list, ctx->commits.nr, commit_compare);
1855
1856         ctx->num_extra_edges = 0;
1857         for (i = 0; i < ctx->commits.nr; i++) {
1858                 display_progress(ctx->progress, i);
1859
1860                 if (i && oideq(&ctx->commits.list[i - 1]->object.oid,
1861                           &ctx->commits.list[i]->object.oid)) {
1862                         die(_("unexpected duplicate commit id %s"),
1863                             oid_to_hex(&ctx->commits.list[i]->object.oid));
1864                 } else {
1865                         unsigned int num_parents;
1866
1867                         num_parents = commit_list_count(ctx->commits.list[i]->parents);
1868                         if (num_parents > 2)
1869                                 ctx->num_extra_edges += num_parents - 1;
1870                 }
1871         }
1872
1873         stop_progress(&ctx->progress);
1874 }
1875
1876 static void merge_commit_graphs(struct write_commit_graph_context *ctx)
1877 {
1878         struct commit_graph *g = ctx->r->objects->commit_graph;
1879         uint32_t current_graph_number = ctx->num_commit_graphs_before;
1880
1881         while (g && current_graph_number >= ctx->num_commit_graphs_after) {
1882                 current_graph_number--;
1883
1884                 if (ctx->report_progress)
1885                         ctx->progress = start_delayed_progress(_("Merging commit-graph"), 0);
1886
1887                 merge_commit_graph(ctx, g);
1888                 stop_progress(&ctx->progress);
1889
1890                 g = g->base_graph;
1891         }
1892
1893         if (g) {
1894                 ctx->new_base_graph = g;
1895                 ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base;
1896         }
1897
1898         if (ctx->new_base_graph)
1899                 ctx->base_graph_name = xstrdup(ctx->new_base_graph->filename);
1900
1901         sort_and_scan_merged_commits(ctx);
1902 }
1903
1904 static void mark_commit_graphs(struct write_commit_graph_context *ctx)
1905 {
1906         uint32_t i;
1907         time_t now = time(NULL);
1908
1909         for (i = ctx->num_commit_graphs_after - 1; i < ctx->num_commit_graphs_before; i++) {
1910                 struct stat st;
1911                 struct utimbuf updated_time;
1912
1913                 stat(ctx->commit_graph_filenames_before[i], &st);
1914
1915                 updated_time.actime = st.st_atime;
1916                 updated_time.modtime = now;
1917                 utime(ctx->commit_graph_filenames_before[i], &updated_time);
1918         }
1919 }
1920
1921 static void expire_commit_graphs(struct write_commit_graph_context *ctx)
1922 {
1923         struct strbuf path = STRBUF_INIT;
1924         DIR *dir;
1925         struct dirent *de;
1926         size_t dirnamelen;
1927         timestamp_t expire_time = time(NULL);
1928
1929         if (ctx->split_opts && ctx->split_opts->expire_time)
1930                 expire_time = ctx->split_opts->expire_time;
1931         if (!ctx->split) {
1932                 char *chain_file_name = get_chain_filename(ctx->odb);
1933                 unlink(chain_file_name);
1934                 free(chain_file_name);
1935                 ctx->num_commit_graphs_after = 0;
1936         }
1937
1938         strbuf_addstr(&path, ctx->odb->path);
1939         strbuf_addstr(&path, "/info/commit-graphs");
1940         dir = opendir(path.buf);
1941
1942         if (!dir)
1943                 goto out;
1944
1945         strbuf_addch(&path, '/');
1946         dirnamelen = path.len;
1947         while ((de = readdir(dir)) != NULL) {
1948                 struct stat st;
1949                 uint32_t i, found = 0;
1950
1951                 strbuf_setlen(&path, dirnamelen);
1952                 strbuf_addstr(&path, de->d_name);
1953
1954                 stat(path.buf, &st);
1955
1956                 if (st.st_mtime > expire_time)
1957                         continue;
1958                 if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph"))
1959                         continue;
1960
1961                 for (i = 0; i < ctx->num_commit_graphs_after; i++) {
1962                         if (!strcmp(ctx->commit_graph_filenames_after[i],
1963                                     path.buf)) {
1964                                 found = 1;
1965                                 break;
1966                         }
1967                 }
1968
1969                 if (!found)
1970                         unlink(path.buf);
1971         }
1972
1973 out:
1974         strbuf_release(&path);
1975 }
1976
1977 int write_commit_graph(struct object_directory *odb,
1978                        struct string_list *pack_indexes,
1979                        struct oidset *commits,
1980                        enum commit_graph_write_flags flags,
1981                        const struct split_commit_graph_opts *split_opts)
1982 {
1983         struct write_commit_graph_context *ctx;
1984         uint32_t i, count_distinct = 0;
1985         int res = 0;
1986         int replace = 0;
1987
1988         if (!commit_graph_compatible(the_repository))
1989                 return 0;
1990
1991         ctx = xcalloc(1, sizeof(struct write_commit_graph_context));
1992         ctx->r = the_repository;
1993         ctx->odb = odb;
1994         ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0;
1995         ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
1996         ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
1997         ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0;
1998         ctx->split_opts = split_opts;
1999         ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0;
2000         ctx->total_bloom_filter_data_size = 0;
2001
2002         if (ctx->split) {
2003                 struct commit_graph *g;
2004                 prepare_commit_graph(ctx->r);
2005
2006                 g = ctx->r->objects->commit_graph;
2007
2008                 while (g) {
2009                         ctx->num_commit_graphs_before++;
2010                         g = g->base_graph;
2011                 }
2012
2013                 if (ctx->num_commit_graphs_before) {
2014                         ALLOC_ARRAY(ctx->commit_graph_filenames_before, ctx->num_commit_graphs_before);
2015                         i = ctx->num_commit_graphs_before;
2016                         g = ctx->r->objects->commit_graph;
2017
2018                         while (g) {
2019                                 ctx->commit_graph_filenames_before[--i] = xstrdup(g->filename);
2020                                 g = g->base_graph;
2021                         }
2022                 }
2023
2024                 if (ctx->split_opts)
2025                         replace = ctx->split_opts->flags & COMMIT_GRAPH_SPLIT_REPLACE;
2026         }
2027
2028         ctx->approx_nr_objects = approximate_object_count();
2029         ctx->oids.alloc = ctx->approx_nr_objects / 32;
2030
2031         if (ctx->split && split_opts && ctx->oids.alloc > split_opts->max_commits)
2032                 ctx->oids.alloc = split_opts->max_commits;
2033
2034         if (ctx->append) {
2035                 prepare_commit_graph_one(ctx->r, ctx->odb);
2036                 if (ctx->r->objects->commit_graph)
2037                         ctx->oids.alloc += ctx->r->objects->commit_graph->num_commits;
2038         }
2039
2040         if (ctx->oids.alloc < 1024)
2041                 ctx->oids.alloc = 1024;
2042         ALLOC_ARRAY(ctx->oids.list, ctx->oids.alloc);
2043
2044         if (ctx->append && ctx->r->objects->commit_graph) {
2045                 struct commit_graph *g = ctx->r->objects->commit_graph;
2046                 for (i = 0; i < g->num_commits; i++) {
2047                         const unsigned char *hash = g->chunk_oid_lookup + g->hash_len * i;
2048                         hashcpy(ctx->oids.list[ctx->oids.nr++].hash, hash);
2049                 }
2050         }
2051
2052         if (pack_indexes) {
2053                 ctx->order_by_pack = 1;
2054                 if ((res = fill_oids_from_packs(ctx, pack_indexes)))
2055                         goto cleanup;
2056         }
2057
2058         if (commits) {
2059                 if ((res = fill_oids_from_commits(ctx, commits)))
2060                         goto cleanup;
2061         }
2062
2063         if (!pack_indexes && !commits) {
2064                 ctx->order_by_pack = 1;
2065                 fill_oids_from_all_packs(ctx);
2066         }
2067
2068         close_reachable(ctx);
2069
2070         count_distinct = count_distinct_commits(ctx);
2071
2072         if (count_distinct >= GRAPH_EDGE_LAST_MASK) {
2073                 error(_("the commit graph format cannot write %d commits"), count_distinct);
2074                 res = -1;
2075                 goto cleanup;
2076         }
2077
2078         ctx->commits.alloc = count_distinct;
2079         ALLOC_ARRAY(ctx->commits.list, ctx->commits.alloc);
2080
2081         copy_oids_to_commits(ctx);
2082
2083         if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) {
2084                 error(_("too many commits to write graph"));
2085                 res = -1;
2086                 goto cleanup;
2087         }
2088
2089         if (!ctx->commits.nr && !replace)
2090                 goto cleanup;
2091
2092         if (ctx->split) {
2093                 split_graph_merge_strategy(ctx);
2094
2095                 if (!replace)
2096                         merge_commit_graphs(ctx);
2097         } else
2098                 ctx->num_commit_graphs_after = 1;
2099
2100         compute_generation_numbers(ctx);
2101
2102         if (ctx->changed_paths)
2103                 compute_bloom_filters(ctx);
2104
2105         res = write_commit_graph_file(ctx);
2106
2107         if (ctx->split)
2108                 mark_commit_graphs(ctx);
2109
2110         expire_commit_graphs(ctx);
2111
2112 cleanup:
2113         free(ctx->graph_name);
2114         free(ctx->commits.list);
2115         free(ctx->oids.list);
2116
2117         if (ctx->commit_graph_filenames_after) {
2118                 for (i = 0; i < ctx->num_commit_graphs_after; i++) {
2119                         free(ctx->commit_graph_filenames_after[i]);
2120                         free(ctx->commit_graph_hash_after[i]);
2121                 }
2122
2123                 for (i = 0; i < ctx->num_commit_graphs_before; i++)
2124                         free(ctx->commit_graph_filenames_before[i]);
2125
2126                 free(ctx->commit_graph_filenames_after);
2127                 free(ctx->commit_graph_filenames_before);
2128                 free(ctx->commit_graph_hash_after);
2129         }
2130
2131         free(ctx);
2132
2133         return res;
2134 }
2135
2136 #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2
2137 static int verify_commit_graph_error;
2138
2139 static void graph_report(const char *fmt, ...)
2140 {
2141         va_list ap;
2142
2143         verify_commit_graph_error = 1;
2144         va_start(ap, fmt);
2145         vfprintf(stderr, fmt, ap);
2146         fprintf(stderr, "\n");
2147         va_end(ap);
2148 }
2149
2150 #define GENERATION_ZERO_EXISTS 1
2151 #define GENERATION_NUMBER_EXISTS 2
2152
2153 int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
2154 {
2155         uint32_t i, cur_fanout_pos = 0;
2156         struct object_id prev_oid, cur_oid, checksum;
2157         int generation_zero = 0;
2158         struct hashfile *f;
2159         int devnull;
2160         struct progress *progress = NULL;
2161         int local_error = 0;
2162
2163         if (!g) {
2164                 graph_report("no commit-graph file loaded");
2165                 return 1;
2166         }
2167
2168         verify_commit_graph_error = verify_commit_graph_lite(g);
2169         if (verify_commit_graph_error)
2170                 return verify_commit_graph_error;
2171
2172         devnull = open("/dev/null", O_WRONLY);
2173         f = hashfd(devnull, NULL);
2174         hashwrite(f, g->data, g->data_len - g->hash_len);
2175         finalize_hashfile(f, checksum.hash, CSUM_CLOSE);
2176         if (!hasheq(checksum.hash, g->data + g->data_len - g->hash_len)) {
2177                 graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
2178                 verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
2179         }
2180
2181         for (i = 0; i < g->num_commits; i++) {
2182                 struct commit *graph_commit;
2183
2184                 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2185
2186                 if (i && oidcmp(&prev_oid, &cur_oid) >= 0)
2187                         graph_report(_("commit-graph has incorrect OID order: %s then %s"),
2188                                      oid_to_hex(&prev_oid),
2189                                      oid_to_hex(&cur_oid));
2190
2191                 oidcpy(&prev_oid, &cur_oid);
2192
2193                 while (cur_oid.hash[0] > cur_fanout_pos) {
2194                         uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2195
2196                         if (i != fanout_value)
2197                                 graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2198                                              cur_fanout_pos, fanout_value, i);
2199                         cur_fanout_pos++;
2200                 }
2201
2202                 graph_commit = lookup_commit(r, &cur_oid);
2203                 if (!parse_commit_in_graph_one(r, g, graph_commit))
2204                         graph_report(_("failed to parse commit %s from commit-graph"),
2205                                      oid_to_hex(&cur_oid));
2206         }
2207
2208         while (cur_fanout_pos < 256) {
2209                 uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2210
2211                 if (g->num_commits != fanout_value)
2212                         graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2213                                      cur_fanout_pos, fanout_value, i);
2214
2215                 cur_fanout_pos++;
2216         }
2217
2218         if (verify_commit_graph_error & ~VERIFY_COMMIT_GRAPH_ERROR_HASH)
2219                 return verify_commit_graph_error;
2220
2221         if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
2222                 progress = start_progress(_("Verifying commits in commit graph"),
2223                                         g->num_commits);
2224
2225         for (i = 0; i < g->num_commits; i++) {
2226                 struct commit *graph_commit, *odb_commit;
2227                 struct commit_list *graph_parents, *odb_parents;
2228                 uint32_t max_generation = 0;
2229
2230                 display_progress(progress, i + 1);
2231                 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2232
2233                 graph_commit = lookup_commit(r, &cur_oid);
2234                 odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r));
2235                 if (parse_commit_internal(odb_commit, 0, 0)) {
2236                         graph_report(_("failed to parse commit %s from object database for commit-graph"),
2237                                      oid_to_hex(&cur_oid));
2238                         continue;
2239                 }
2240
2241                 if (!oideq(&get_commit_tree_in_graph_one(r, g, graph_commit)->object.oid,
2242                            get_commit_tree_oid(odb_commit)))
2243                         graph_report(_("root tree OID for commit %s in commit-graph is %s != %s"),
2244                                      oid_to_hex(&cur_oid),
2245                                      oid_to_hex(get_commit_tree_oid(graph_commit)),
2246                                      oid_to_hex(get_commit_tree_oid(odb_commit)));
2247
2248                 graph_parents = graph_commit->parents;
2249                 odb_parents = odb_commit->parents;
2250
2251                 while (graph_parents) {
2252                         if (odb_parents == NULL) {
2253                                 graph_report(_("commit-graph parent list for commit %s is too long"),
2254                                              oid_to_hex(&cur_oid));
2255                                 break;
2256                         }
2257
2258                         /* parse parent in case it is in a base graph */
2259                         parse_commit_in_graph_one(r, g, graph_parents->item);
2260
2261                         if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid))
2262                                 graph_report(_("commit-graph parent for %s is %s != %s"),
2263                                              oid_to_hex(&cur_oid),
2264                                              oid_to_hex(&graph_parents->item->object.oid),
2265                                              oid_to_hex(&odb_parents->item->object.oid));
2266
2267                         if (graph_parents->item->generation > max_generation)
2268                                 max_generation = graph_parents->item->generation;
2269
2270                         graph_parents = graph_parents->next;
2271                         odb_parents = odb_parents->next;
2272                 }
2273
2274                 if (odb_parents != NULL)
2275                         graph_report(_("commit-graph parent list for commit %s terminates early"),
2276                                      oid_to_hex(&cur_oid));
2277
2278                 if (!graph_commit->generation) {
2279                         if (generation_zero == GENERATION_NUMBER_EXISTS)
2280                                 graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"),
2281                                              oid_to_hex(&cur_oid));
2282                         generation_zero = GENERATION_ZERO_EXISTS;
2283                 } else if (generation_zero == GENERATION_ZERO_EXISTS)
2284                         graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"),
2285                                      oid_to_hex(&cur_oid));
2286
2287                 if (generation_zero == GENERATION_ZERO_EXISTS)
2288                         continue;
2289
2290                 /*
2291                  * If one of our parents has generation GENERATION_NUMBER_MAX, then
2292                  * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid
2293                  * extra logic in the following condition.
2294                  */
2295                 if (max_generation == GENERATION_NUMBER_MAX)
2296                         max_generation--;
2297
2298                 if (graph_commit->generation != max_generation + 1)
2299                         graph_report(_("commit-graph generation for commit %s is %u != %u"),
2300                                      oid_to_hex(&cur_oid),
2301                                      graph_commit->generation,
2302                                      max_generation + 1);
2303
2304                 if (graph_commit->date != odb_commit->date)
2305                         graph_report(_("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime),
2306                                      oid_to_hex(&cur_oid),
2307                                      graph_commit->date,
2308                                      odb_commit->date);
2309         }
2310         stop_progress(&progress);
2311
2312         local_error = verify_commit_graph_error;
2313
2314         if (!(flags & COMMIT_GRAPH_VERIFY_SHALLOW) && g->base_graph)
2315                 local_error |= verify_commit_graph(r, g->base_graph, flags);
2316
2317         return local_error;
2318 }
2319
2320 void free_commit_graph(struct commit_graph *g)
2321 {
2322         if (!g)
2323                 return;
2324         if (g->data) {
2325                 munmap((void *)g->data, g->data_len);
2326                 g->data = NULL;
2327         }
2328         free(g->filename);
2329         free(g->bloom_filter_settings);
2330         free(g);
2331 }
2332
2333 void disable_commit_graph(struct repository *r)
2334 {
2335         r->commit_graph_disabled = 1;
2336 }