commit-graph: implement git commit-graph read
[git] / commit-graph.c
1 #include "cache.h"
2 #include "config.h"
3 #include "git-compat-util.h"
4 #include "lockfile.h"
5 #include "pack.h"
6 #include "packfile.h"
7 #include "commit.h"
8 #include "object.h"
9 #include "revision.h"
10 #include "sha1-lookup.h"
11 #include "commit-graph.h"
12
13 #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
14 #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
15 #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
16 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
17 #define GRAPH_CHUNKID_LARGEEDGES 0x45444745 /* "EDGE" */
18
19 #define GRAPH_DATA_WIDTH 36
20
21 #define GRAPH_VERSION_1 0x1
22 #define GRAPH_VERSION GRAPH_VERSION_1
23
24 #define GRAPH_OID_VERSION_SHA1 1
25 #define GRAPH_OID_LEN_SHA1 GIT_SHA1_RAWSZ
26 #define GRAPH_OID_VERSION GRAPH_OID_VERSION_SHA1
27 #define GRAPH_OID_LEN GRAPH_OID_LEN_SHA1
28
29 #define GRAPH_OCTOPUS_EDGES_NEEDED 0x80000000
30 #define GRAPH_PARENT_MISSING 0x7fffffff
31 #define GRAPH_EDGE_LAST_MASK 0x7fffffff
32 #define GRAPH_PARENT_NONE 0x70000000
33
34 #define GRAPH_LAST_EDGE 0x80000000
35
36 #define GRAPH_FANOUT_SIZE (4 * 256)
37 #define GRAPH_CHUNKLOOKUP_WIDTH 12
38 #define GRAPH_MIN_SIZE (5 * GRAPH_CHUNKLOOKUP_WIDTH + GRAPH_FANOUT_SIZE + \
39                         GRAPH_OID_LEN + 8)
40
41
42 char *get_commit_graph_filename(const char *obj_dir)
43 {
44         return xstrfmt("%s/info/commit-graph", obj_dir);
45 }
46
47 static struct commit_graph *alloc_commit_graph(void)
48 {
49         struct commit_graph *g = xcalloc(1, sizeof(*g));
50         g->graph_fd = -1;
51
52         return g;
53 }
54
55 struct commit_graph *load_commit_graph_one(const char *graph_file)
56 {
57         void *graph_map;
58         const unsigned char *data, *chunk_lookup;
59         size_t graph_size;
60         struct stat st;
61         uint32_t i;
62         struct commit_graph *graph;
63         int fd = git_open(graph_file);
64         uint64_t last_chunk_offset;
65         uint32_t last_chunk_id;
66         uint32_t graph_signature;
67         unsigned char graph_version, hash_version;
68
69         if (fd < 0)
70                 return NULL;
71         if (fstat(fd, &st)) {
72                 close(fd);
73                 return NULL;
74         }
75         graph_size = xsize_t(st.st_size);
76
77         if (graph_size < GRAPH_MIN_SIZE) {
78                 close(fd);
79                 die("graph file %s is too small", graph_file);
80         }
81         graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
82         data = (const unsigned char *)graph_map;
83
84         graph_signature = get_be32(data);
85         if (graph_signature != GRAPH_SIGNATURE) {
86                 error("graph signature %X does not match signature %X",
87                       graph_signature, GRAPH_SIGNATURE);
88                 goto cleanup_fail;
89         }
90
91         graph_version = *(unsigned char*)(data + 4);
92         if (graph_version != GRAPH_VERSION) {
93                 error("graph version %X does not match version %X",
94                       graph_version, GRAPH_VERSION);
95                 goto cleanup_fail;
96         }
97
98         hash_version = *(unsigned char*)(data + 5);
99         if (hash_version != GRAPH_OID_VERSION) {
100                 error("hash version %X does not match version %X",
101                       hash_version, GRAPH_OID_VERSION);
102                 goto cleanup_fail;
103         }
104
105         graph = alloc_commit_graph();
106
107         graph->hash_len = GRAPH_OID_LEN;
108         graph->num_chunks = *(unsigned char*)(data + 6);
109         graph->graph_fd = fd;
110         graph->data = graph_map;
111         graph->data_len = graph_size;
112
113         last_chunk_id = 0;
114         last_chunk_offset = 8;
115         chunk_lookup = data + 8;
116         for (i = 0; i < graph->num_chunks; i++) {
117                 uint32_t chunk_id = get_be32(chunk_lookup + 0);
118                 uint64_t chunk_offset = get_be64(chunk_lookup + 4);
119                 int chunk_repeated = 0;
120
121                 chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
122
123                 if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
124                         error("improper chunk offset %08x%08x", (uint32_t)(chunk_offset >> 32),
125                               (uint32_t)chunk_offset);
126                         goto cleanup_fail;
127                 }
128
129                 switch (chunk_id) {
130                 case GRAPH_CHUNKID_OIDFANOUT:
131                         if (graph->chunk_oid_fanout)
132                                 chunk_repeated = 1;
133                         else
134                                 graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset);
135                         break;
136
137                 case GRAPH_CHUNKID_OIDLOOKUP:
138                         if (graph->chunk_oid_lookup)
139                                 chunk_repeated = 1;
140                         else
141                                 graph->chunk_oid_lookup = data + chunk_offset;
142                         break;
143
144                 case GRAPH_CHUNKID_DATA:
145                         if (graph->chunk_commit_data)
146                                 chunk_repeated = 1;
147                         else
148                                 graph->chunk_commit_data = data + chunk_offset;
149                         break;
150
151                 case GRAPH_CHUNKID_LARGEEDGES:
152                         if (graph->chunk_large_edges)
153                                 chunk_repeated = 1;
154                         else
155                                 graph->chunk_large_edges = data + chunk_offset;
156                         break;
157                 }
158
159                 if (chunk_repeated) {
160                         error("chunk id %08x appears multiple times", chunk_id);
161                         goto cleanup_fail;
162                 }
163
164                 if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
165                 {
166                         graph->num_commits = (chunk_offset - last_chunk_offset)
167                                              / graph->hash_len;
168                 }
169
170                 last_chunk_id = chunk_id;
171                 last_chunk_offset = chunk_offset;
172         }
173
174         return graph;
175
176 cleanup_fail:
177         munmap(graph_map, graph_size);
178         close(fd);
179         exit(1);
180 }
181
182 static void write_graph_chunk_fanout(struct hashfile *f,
183                                      struct commit **commits,
184                                      int nr_commits)
185 {
186         int i, count = 0;
187         struct commit **list = commits;
188
189         /*
190          * Write the first-level table (the list is sorted,
191          * but we use a 256-entry lookup to be able to avoid
192          * having to do eight extra binary search iterations).
193          */
194         for (i = 0; i < 256; i++) {
195                 while (count < nr_commits) {
196                         if ((*list)->object.oid.hash[0] != i)
197                                 break;
198                         count++;
199                         list++;
200                 }
201
202                 hashwrite_be32(f, count);
203         }
204 }
205
206 static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
207                                    struct commit **commits, int nr_commits)
208 {
209         struct commit **list = commits;
210         int count;
211         for (count = 0; count < nr_commits; count++, list++)
212                 hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
213 }
214
215 static const unsigned char *commit_to_sha1(size_t index, void *table)
216 {
217         struct commit **commits = table;
218         return commits[index]->object.oid.hash;
219 }
220
221 static void write_graph_chunk_data(struct hashfile *f, int hash_len,
222                                    struct commit **commits, int nr_commits)
223 {
224         struct commit **list = commits;
225         struct commit **last = commits + nr_commits;
226         uint32_t num_extra_edges = 0;
227
228         while (list < last) {
229                 struct commit_list *parent;
230                 int edge_value;
231                 uint32_t packedDate[2];
232
233                 parse_commit(*list);
234                 hashwrite(f, (*list)->tree->object.oid.hash, hash_len);
235
236                 parent = (*list)->parents;
237
238                 if (!parent)
239                         edge_value = GRAPH_PARENT_NONE;
240                 else {
241                         edge_value = sha1_pos(parent->item->object.oid.hash,
242                                               commits,
243                                               nr_commits,
244                                               commit_to_sha1);
245
246                         if (edge_value < 0)
247                                 edge_value = GRAPH_PARENT_MISSING;
248                 }
249
250                 hashwrite_be32(f, edge_value);
251
252                 if (parent)
253                         parent = parent->next;
254
255                 if (!parent)
256                         edge_value = GRAPH_PARENT_NONE;
257                 else if (parent->next)
258                         edge_value = GRAPH_OCTOPUS_EDGES_NEEDED | num_extra_edges;
259                 else {
260                         edge_value = sha1_pos(parent->item->object.oid.hash,
261                                               commits,
262                                               nr_commits,
263                                               commit_to_sha1);
264                         if (edge_value < 0)
265                                 edge_value = GRAPH_PARENT_MISSING;
266                 }
267
268                 hashwrite_be32(f, edge_value);
269
270                 if (edge_value & GRAPH_OCTOPUS_EDGES_NEEDED) {
271                         do {
272                                 num_extra_edges++;
273                                 parent = parent->next;
274                         } while (parent);
275                 }
276
277                 if (sizeof((*list)->date) > 4)
278                         packedDate[0] = htonl(((*list)->date >> 32) & 0x3);
279                 else
280                         packedDate[0] = 0;
281
282                 packedDate[1] = htonl((*list)->date);
283                 hashwrite(f, packedDate, 8);
284
285                 list++;
286         }
287 }
288
289 static void write_graph_chunk_large_edges(struct hashfile *f,
290                                           struct commit **commits,
291                                           int nr_commits)
292 {
293         struct commit **list = commits;
294         struct commit **last = commits + nr_commits;
295         struct commit_list *parent;
296
297         while (list < last) {
298                 int num_parents = 0;
299                 for (parent = (*list)->parents; num_parents < 3 && parent;
300                      parent = parent->next)
301                         num_parents++;
302
303                 if (num_parents <= 2) {
304                         list++;
305                         continue;
306                 }
307
308                 /* Since num_parents > 2, this initializer is safe. */
309                 for (parent = (*list)->parents->next; parent; parent = parent->next) {
310                         int edge_value = sha1_pos(parent->item->object.oid.hash,
311                                                   commits,
312                                                   nr_commits,
313                                                   commit_to_sha1);
314
315                         if (edge_value < 0)
316                                 edge_value = GRAPH_PARENT_MISSING;
317                         else if (!parent->next)
318                                 edge_value |= GRAPH_LAST_EDGE;
319
320                         hashwrite_be32(f, edge_value);
321                 }
322
323                 list++;
324         }
325 }
326
327 static int commit_compare(const void *_a, const void *_b)
328 {
329         const struct object_id *a = (const struct object_id *)_a;
330         const struct object_id *b = (const struct object_id *)_b;
331         return oidcmp(a, b);
332 }
333
334 struct packed_commit_list {
335         struct commit **list;
336         int nr;
337         int alloc;
338 };
339
340 struct packed_oid_list {
341         struct object_id *list;
342         int nr;
343         int alloc;
344 };
345
346 static int add_packed_commits(const struct object_id *oid,
347                               struct packed_git *pack,
348                               uint32_t pos,
349                               void *data)
350 {
351         struct packed_oid_list *list = (struct packed_oid_list*)data;
352         enum object_type type;
353         off_t offset = nth_packed_object_offset(pack, pos);
354         struct object_info oi = OBJECT_INFO_INIT;
355
356         oi.typep = &type;
357         if (packed_object_info(pack, offset, &oi) < 0)
358                 die("unable to get type of object %s", oid_to_hex(oid));
359
360         if (type != OBJ_COMMIT)
361                 return 0;
362
363         ALLOC_GROW(list->list, list->nr + 1, list->alloc);
364         oidcpy(&(list->list[list->nr]), oid);
365         list->nr++;
366
367         return 0;
368 }
369
370 void write_commit_graph(const char *obj_dir)
371 {
372         struct packed_oid_list oids;
373         struct packed_commit_list commits;
374         struct hashfile *f;
375         uint32_t i, count_distinct = 0;
376         char *graph_name;
377         int fd;
378         struct lock_file lk = LOCK_INIT;
379         uint32_t chunk_ids[5];
380         uint64_t chunk_offsets[5];
381         int num_chunks;
382         int num_extra_edges;
383         struct commit_list *parent;
384
385         oids.nr = 0;
386         oids.alloc = approximate_object_count() / 4;
387
388         if (oids.alloc < 1024)
389                 oids.alloc = 1024;
390         ALLOC_ARRAY(oids.list, oids.alloc);
391
392         for_each_packed_object(add_packed_commits, &oids, 0);
393
394         QSORT(oids.list, oids.nr, commit_compare);
395
396         count_distinct = 1;
397         for (i = 1; i < oids.nr; i++) {
398                 if (oidcmp(&oids.list[i-1], &oids.list[i]))
399                         count_distinct++;
400         }
401
402         if (count_distinct >= GRAPH_PARENT_MISSING)
403                 die(_("the commit graph format cannot write %d commits"), count_distinct);
404
405         commits.nr = 0;
406         commits.alloc = count_distinct;
407         ALLOC_ARRAY(commits.list, commits.alloc);
408
409         num_extra_edges = 0;
410         for (i = 0; i < oids.nr; i++) {
411                 int num_parents = 0;
412                 if (i > 0 && !oidcmp(&oids.list[i-1], &oids.list[i]))
413                         continue;
414
415                 commits.list[commits.nr] = lookup_commit(&oids.list[i]);
416                 parse_commit(commits.list[commits.nr]);
417
418                 for (parent = commits.list[commits.nr]->parents;
419                      parent; parent = parent->next)
420                         num_parents++;
421
422                 if (num_parents > 2)
423                         num_extra_edges += num_parents - 1;
424
425                 commits.nr++;
426         }
427         num_chunks = num_extra_edges ? 4 : 3;
428
429         if (commits.nr >= GRAPH_PARENT_MISSING)
430                 die(_("too many commits to write graph"));
431
432         graph_name = get_commit_graph_filename(obj_dir);
433         fd = hold_lock_file_for_update(&lk, graph_name, 0);
434
435         if (fd < 0) {
436                 struct strbuf folder = STRBUF_INIT;
437                 strbuf_addstr(&folder, graph_name);
438                 strbuf_setlen(&folder, strrchr(folder.buf, '/') - folder.buf);
439
440                 if (mkdir(folder.buf, 0777) < 0)
441                         die_errno(_("cannot mkdir %s"), folder.buf);
442                 strbuf_release(&folder);
443
444                 fd = hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR);
445
446                 if (fd < 0)
447                         die_errno("unable to create '%s'", graph_name);
448         }
449
450         f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
451
452         hashwrite_be32(f, GRAPH_SIGNATURE);
453
454         hashwrite_u8(f, GRAPH_VERSION);
455         hashwrite_u8(f, GRAPH_OID_VERSION);
456         hashwrite_u8(f, num_chunks);
457         hashwrite_u8(f, 0); /* unused padding byte */
458
459         chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
460         chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
461         chunk_ids[2] = GRAPH_CHUNKID_DATA;
462         if (num_extra_edges)
463                 chunk_ids[3] = GRAPH_CHUNKID_LARGEEDGES;
464         else
465                 chunk_ids[3] = 0;
466         chunk_ids[4] = 0;
467
468         chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
469         chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
470         chunk_offsets[2] = chunk_offsets[1] + GRAPH_OID_LEN * commits.nr;
471         chunk_offsets[3] = chunk_offsets[2] + (GRAPH_OID_LEN + 16) * commits.nr;
472         chunk_offsets[4] = chunk_offsets[3] + 4 * num_extra_edges;
473
474         for (i = 0; i <= num_chunks; i++) {
475                 uint32_t chunk_write[3];
476
477                 chunk_write[0] = htonl(chunk_ids[i]);
478                 chunk_write[1] = htonl(chunk_offsets[i] >> 32);
479                 chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
480                 hashwrite(f, chunk_write, 12);
481         }
482
483         write_graph_chunk_fanout(f, commits.list, commits.nr);
484         write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr);
485         write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
486         write_graph_chunk_large_edges(f, commits.list, commits.nr);
487
488         finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
489         commit_lock_file(&lk);
490
491         free(oids.list);
492         oids.alloc = 0;
493         oids.nr = 0;
494 }