The sixth batch
[git] / delta-islands.c
1 #include "cache.h"
2 #include "attr.h"
3 #include "object.h"
4 #include "blob.h"
5 #include "commit.h"
6 #include "tag.h"
7 #include "tree.h"
8 #include "delta.h"
9 #include "pack.h"
10 #include "tree-walk.h"
11 #include "diff.h"
12 #include "revision.h"
13 #include "list-objects.h"
14 #include "progress.h"
15 #include "refs.h"
16 #include "khash.h"
17 #include "pack-bitmap.h"
18 #include "pack-objects.h"
19 #include "delta-islands.h"
20 #include "sha1-array.h"
21 #include "config.h"
22
23 KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal)
24
25 static khash_sha1 *island_marks;
26 static unsigned island_counter;
27 static unsigned island_counter_core;
28
29 static kh_str_t *remote_islands;
30
31 struct remote_island {
32         uint64_t hash;
33         struct oid_array oids;
34 };
35
36 struct island_bitmap {
37         uint32_t refcount;
38         uint32_t bits[FLEX_ARRAY];
39 };
40
41 static uint32_t island_bitmap_size;
42
43 /*
44  * Allocate a new bitmap; if "old" is not NULL, the new bitmap will be a copy
45  * of "old". Otherwise, the new bitmap is empty.
46  */
47 static struct island_bitmap *island_bitmap_new(const struct island_bitmap *old)
48 {
49         size_t size = sizeof(struct island_bitmap) + (island_bitmap_size * 4);
50         struct island_bitmap *b = xcalloc(1, size);
51
52         if (old)
53                 memcpy(b, old, size);
54
55         b->refcount = 1;
56         return b;
57 }
58
59 static void island_bitmap_or(struct island_bitmap *a, const struct island_bitmap *b)
60 {
61         uint32_t i;
62
63         for (i = 0; i < island_bitmap_size; ++i)
64                 a->bits[i] |= b->bits[i];
65 }
66
67 static int island_bitmap_is_subset(struct island_bitmap *self,
68                 struct island_bitmap *super)
69 {
70         uint32_t i;
71
72         if (self == super)
73                 return 1;
74
75         for (i = 0; i < island_bitmap_size; ++i) {
76                 if ((self->bits[i] & super->bits[i]) != self->bits[i])
77                         return 0;
78         }
79
80         return 1;
81 }
82
83 #define ISLAND_BITMAP_BLOCK(x) (x / 32)
84 #define ISLAND_BITMAP_MASK(x) (1 << (x % 32))
85
86 static void island_bitmap_set(struct island_bitmap *self, uint32_t i)
87 {
88         self->bits[ISLAND_BITMAP_BLOCK(i)] |= ISLAND_BITMAP_MASK(i);
89 }
90
91 static int island_bitmap_get(struct island_bitmap *self, uint32_t i)
92 {
93         return (self->bits[ISLAND_BITMAP_BLOCK(i)] & ISLAND_BITMAP_MASK(i)) != 0;
94 }
95
96 int in_same_island(const struct object_id *trg_oid, const struct object_id *src_oid)
97 {
98         khiter_t trg_pos, src_pos;
99
100         /* If we aren't using islands, assume everything goes together. */
101         if (!island_marks)
102                 return 1;
103
104         /*
105          * If we don't have a bitmap for the target, we can delta it
106          * against anything -- it's not an important object
107          */
108         trg_pos = kh_get_sha1(island_marks, trg_oid->hash);
109         if (trg_pos >= kh_end(island_marks))
110                 return 1;
111
112         /*
113          * if the source (our delta base) doesn't have a bitmap,
114          * we don't want to base any deltas on it!
115          */
116         src_pos = kh_get_sha1(island_marks, src_oid->hash);
117         if (src_pos >= kh_end(island_marks))
118                 return 0;
119
120         return island_bitmap_is_subset(kh_value(island_marks, trg_pos),
121                                 kh_value(island_marks, src_pos));
122 }
123
124 int island_delta_cmp(const struct object_id *a, const struct object_id *b)
125 {
126         khiter_t a_pos, b_pos;
127         struct island_bitmap *a_bitmap = NULL, *b_bitmap = NULL;
128
129         if (!island_marks)
130                 return 0;
131
132         a_pos = kh_get_sha1(island_marks, a->hash);
133         if (a_pos < kh_end(island_marks))
134                 a_bitmap = kh_value(island_marks, a_pos);
135
136         b_pos = kh_get_sha1(island_marks, b->hash);
137         if (b_pos < kh_end(island_marks))
138                 b_bitmap = kh_value(island_marks, b_pos);
139
140         if (a_bitmap) {
141                 if (!b_bitmap || !island_bitmap_is_subset(a_bitmap, b_bitmap))
142                         return -1;
143         }
144         if (b_bitmap) {
145                 if (!a_bitmap || !island_bitmap_is_subset(b_bitmap, a_bitmap))
146                         return 1;
147         }
148
149         return 0;
150 }
151
152 static struct island_bitmap *create_or_get_island_marks(struct object *obj)
153 {
154         khiter_t pos;
155         int hash_ret;
156
157         pos = kh_put_sha1(island_marks, obj->oid.hash, &hash_ret);
158         if (hash_ret)
159                 kh_value(island_marks, pos) = island_bitmap_new(NULL);
160
161         return kh_value(island_marks, pos);
162 }
163
164 static void set_island_marks(struct object *obj, struct island_bitmap *marks)
165 {
166         struct island_bitmap *b;
167         khiter_t pos;
168         int hash_ret;
169
170         pos = kh_put_sha1(island_marks, obj->oid.hash, &hash_ret);
171         if (hash_ret) {
172                 /*
173                  * We don't have one yet; make a copy-on-write of the
174                  * parent.
175                  */
176                 marks->refcount++;
177                 kh_value(island_marks, pos) = marks;
178                 return;
179         }
180
181         /*
182          * We do have it. Make sure we split any copy-on-write before
183          * updating.
184          */
185         b = kh_value(island_marks, pos);
186         if (b->refcount > 1) {
187                 b->refcount--;
188                 b = kh_value(island_marks, pos) = island_bitmap_new(b);
189         }
190         island_bitmap_or(b, marks);
191 }
192
193 static void mark_remote_island_1(struct repository *r,
194                                  struct remote_island *rl,
195                                  int is_core_island)
196 {
197         uint32_t i;
198
199         for (i = 0; i < rl->oids.nr; ++i) {
200                 struct island_bitmap *marks;
201                 struct object *obj = parse_object(r, &rl->oids.oid[i]);
202
203                 if (!obj)
204                         continue;
205
206                 marks = create_or_get_island_marks(obj);
207                 island_bitmap_set(marks, island_counter);
208
209                 if (is_core_island && obj->type == OBJ_COMMIT)
210                         obj->flags |= NEEDS_BITMAP;
211
212                 /* If it was a tag, also make sure we hit the underlying object. */
213                 while (obj && obj->type == OBJ_TAG) {
214                         obj = ((struct tag *)obj)->tagged;
215                         if (obj) {
216                                 parse_object(r, &obj->oid);
217                                 marks = create_or_get_island_marks(obj);
218                                 island_bitmap_set(marks, island_counter);
219                         }
220                 }
221         }
222
223         if (is_core_island)
224                 island_counter_core = island_counter;
225
226         island_counter++;
227 }
228
229 struct tree_islands_todo {
230         struct object_entry *entry;
231         unsigned int depth;
232 };
233
234 static int tree_depth_compare(const void *a, const void *b)
235 {
236         const struct tree_islands_todo *todo_a = a;
237         const struct tree_islands_todo *todo_b = b;
238
239         return todo_a->depth - todo_b->depth;
240 }
241
242 void resolve_tree_islands(struct repository *r,
243                           int progress,
244                           struct packing_data *to_pack)
245 {
246         struct progress *progress_state = NULL;
247         struct tree_islands_todo *todo;
248         int nr = 0;
249         int i;
250
251         if (!island_marks)
252                 return;
253
254         /*
255          * We process only trees, as commits and tags have already been handled
256          * (and passed their marks on to root trees, as well. We must make sure
257          * to process them in descending tree-depth order so that marks
258          * propagate down the tree properly, even if a sub-tree is found in
259          * multiple parent trees.
260          */
261         ALLOC_ARRAY(todo, to_pack->nr_objects);
262         for (i = 0; i < to_pack->nr_objects; i++) {
263                 if (oe_type(&to_pack->objects[i]) == OBJ_TREE) {
264                         todo[nr].entry = &to_pack->objects[i];
265                         todo[nr].depth = oe_tree_depth(to_pack, &to_pack->objects[i]);
266                         nr++;
267                 }
268         }
269         QSORT(todo, nr, tree_depth_compare);
270
271         if (progress)
272                 progress_state = start_progress(_("Propagating island marks"), nr);
273
274         for (i = 0; i < nr; i++) {
275                 struct object_entry *ent = todo[i].entry;
276                 struct island_bitmap *root_marks;
277                 struct tree *tree;
278                 struct tree_desc desc;
279                 struct name_entry entry;
280                 khiter_t pos;
281
282                 pos = kh_get_sha1(island_marks, ent->idx.oid.hash);
283                 if (pos >= kh_end(island_marks))
284                         continue;
285
286                 root_marks = kh_value(island_marks, pos);
287
288                 tree = lookup_tree(r, &ent->idx.oid);
289                 if (!tree || parse_tree(tree) < 0)
290                         die(_("bad tree object %s"), oid_to_hex(&ent->idx.oid));
291
292                 init_tree_desc(&desc, tree->buffer, tree->size);
293                 while (tree_entry(&desc, &entry)) {
294                         struct object *obj;
295
296                         if (S_ISGITLINK(entry.mode))
297                                 continue;
298
299                         obj = lookup_object(r, entry.oid.hash);
300                         if (!obj)
301                                 continue;
302
303                         set_island_marks(obj, root_marks);
304                 }
305
306                 free_tree_buffer(tree);
307
308                 display_progress(progress_state, i+1);
309         }
310
311         stop_progress(&progress_state);
312         free(todo);
313 }
314
315 static regex_t *island_regexes;
316 static unsigned int island_regexes_alloc, island_regexes_nr;
317 static const char *core_island_name;
318
319 static int island_config_callback(const char *k, const char *v, void *cb)
320 {
321         if (!strcmp(k, "pack.island")) {
322                 struct strbuf re = STRBUF_INIT;
323
324                 if (!v)
325                         return config_error_nonbool(k);
326
327                 ALLOC_GROW(island_regexes, island_regexes_nr + 1, island_regexes_alloc);
328
329                 if (*v != '^')
330                         strbuf_addch(&re, '^');
331                 strbuf_addstr(&re, v);
332
333                 if (regcomp(&island_regexes[island_regexes_nr], re.buf, REG_EXTENDED))
334                         die(_("failed to load island regex for '%s': %s"), k, re.buf);
335
336                 strbuf_release(&re);
337                 island_regexes_nr++;
338                 return 0;
339         }
340
341         if (!strcmp(k, "pack.islandcore"))
342                 return git_config_string(&core_island_name, k, v);
343
344         return 0;
345 }
346
347 static void add_ref_to_island(const char *island_name, const struct object_id *oid)
348 {
349         uint64_t sha_core;
350         struct remote_island *rl = NULL;
351
352         int hash_ret;
353         khiter_t pos = kh_put_str(remote_islands, island_name, &hash_ret);
354
355         if (hash_ret) {
356                 kh_key(remote_islands, pos) = xstrdup(island_name);
357                 kh_value(remote_islands, pos) = xcalloc(1, sizeof(struct remote_island));
358         }
359
360         rl = kh_value(remote_islands, pos);
361         oid_array_append(&rl->oids, oid);
362
363         memcpy(&sha_core, oid->hash, sizeof(uint64_t));
364         rl->hash += sha_core;
365 }
366
367 static int find_island_for_ref(const char *refname, const struct object_id *oid,
368                                int flags, void *data)
369 {
370         /*
371          * We should advertise 'ARRAY_SIZE(matches) - 2' as the max,
372          * so we can diagnose below a config with more capture groups
373          * than we support.
374          */
375         regmatch_t matches[16];
376         int i, m;
377         struct strbuf island_name = STRBUF_INIT;
378
379         /* walk backwards to get last-one-wins ordering */
380         for (i = island_regexes_nr - 1; i >= 0; i--) {
381                 if (!regexec(&island_regexes[i], refname,
382                              ARRAY_SIZE(matches), matches, 0))
383                         break;
384         }
385
386         if (i < 0)
387                 return 0;
388
389         if (matches[ARRAY_SIZE(matches) - 1].rm_so != -1)
390                 warning(_("island regex from config has "
391                           "too many capture groups (max=%d)"),
392                         (int)ARRAY_SIZE(matches) - 2);
393
394         for (m = 1; m < ARRAY_SIZE(matches); m++) {
395                 regmatch_t *match = &matches[m];
396
397                 if (match->rm_so == -1)
398                         continue;
399
400                 if (island_name.len)
401                         strbuf_addch(&island_name, '-');
402
403                 strbuf_add(&island_name, refname + match->rm_so, match->rm_eo - match->rm_so);
404         }
405
406         add_ref_to_island(island_name.buf, oid);
407         strbuf_release(&island_name);
408         return 0;
409 }
410
411 static struct remote_island *get_core_island(void)
412 {
413         if (core_island_name) {
414                 khiter_t pos = kh_get_str(remote_islands, core_island_name);
415                 if (pos < kh_end(remote_islands))
416                         return kh_value(remote_islands, pos);
417         }
418
419         return NULL;
420 }
421
422 static void deduplicate_islands(struct repository *r)
423 {
424         struct remote_island *island, *core = NULL, **list;
425         unsigned int island_count, dst, src, ref, i = 0;
426
427         island_count = kh_size(remote_islands);
428         ALLOC_ARRAY(list, island_count);
429
430         kh_foreach_value(remote_islands, island, {
431                 list[i++] = island;
432         });
433
434         for (ref = 0; ref + 1 < island_count; ref++) {
435                 for (src = ref + 1, dst = src; src < island_count; src++) {
436                         if (list[ref]->hash == list[src]->hash)
437                                 continue;
438
439                         if (src != dst)
440                                 list[dst] = list[src];
441
442                         dst++;
443                 }
444                 island_count = dst;
445         }
446
447         island_bitmap_size = (island_count / 32) + 1;
448         core = get_core_island();
449
450         for (i = 0; i < island_count; ++i) {
451                 mark_remote_island_1(r, list[i], core && list[i]->hash == core->hash);
452         }
453
454         free(list);
455 }
456
457 void load_delta_islands(struct repository *r)
458 {
459         island_marks = kh_init_sha1();
460         remote_islands = kh_init_str();
461
462         git_config(island_config_callback, NULL);
463         for_each_ref(find_island_for_ref, NULL);
464         deduplicate_islands(r);
465
466         fprintf(stderr, _("Marked %d islands, done.\n"), island_counter);
467 }
468
469 void propagate_island_marks(struct commit *commit)
470 {
471         khiter_t pos = kh_get_sha1(island_marks, commit->object.oid.hash);
472
473         if (pos < kh_end(island_marks)) {
474                 struct commit_list *p;
475                 struct island_bitmap *root_marks = kh_value(island_marks, pos);
476
477                 parse_commit(commit);
478                 set_island_marks(&get_commit_tree(commit)->object, root_marks);
479                 for (p = commit->parents; p; p = p->next)
480                         set_island_marks(&p->item->object, root_marks);
481         }
482 }
483
484 int compute_pack_layers(struct packing_data *to_pack)
485 {
486         uint32_t i;
487
488         if (!core_island_name || !island_marks)
489                 return 1;
490
491         for (i = 0; i < to_pack->nr_objects; ++i) {
492                 struct object_entry *entry = &to_pack->objects[i];
493                 khiter_t pos = kh_get_sha1(island_marks, entry->idx.oid.hash);
494
495                 oe_set_layer(to_pack, entry, 1);
496
497                 if (pos < kh_end(island_marks)) {
498                         struct island_bitmap *bitmap = kh_value(island_marks, pos);
499
500                         if (island_bitmap_get(bitmap, island_counter_core))
501                                 oe_set_layer(to_pack, entry, 0);
502                 }
503         }
504
505         return 2;
506 }