administrative functions for rev-cache, start of integration into git
[git] / rev-cache.c
1 #include "cache.h"
2 #include "object.h"
3 #include "commit.h"
4 #include "tree.h"
5 #include "tree-walk.h"
6 #include "blob.h"
7 #include "tag.h"
8 #include "diff.h"
9 #include "revision.h"
10 #include "rev-cache.h"
11 #include "run-command.h"
12 #include "string-list.h"
13
14 struct cache_slice_pointer {
15         char signature[8]; /* REVCOPTR */
16         char version;
17         char path[PATH_MAX + 1];
18 };
19
20 /* list resembles pack index format */
21 static uint32_t fanout[0xff + 2];
22
23 static unsigned char *idx_map;
24 static int idx_size;
25 static struct rc_index_header idx_head;
26 static unsigned char *idx_caches;
27 static char no_idx;
28
29 static struct strbuf *acc_buffer;
30
31 #define SLOP                    5
32
33 /* initialization */
34
35 struct rc_index_entry *from_disked_rc_index_entry(struct rc_index_entry_ondisk *src, struct rc_index_entry *dst)
36 {
37         static struct rc_index_entry entry[4];
38         static int cur;
39
40         if (!dst)
41                 dst = &entry[cur++ & 0x3];
42
43         dst->sha1 = src->sha1;
44         dst->is_start = !!(src->flags & 0x80);
45         dst->cache_index = src->flags & 0x7f;
46         dst->pos = ntohl(src->pos);
47
48         return dst;
49 }
50
51 struct rc_index_entry_ondisk *to_disked_rc_index_entry(struct rc_index_entry *src, struct rc_index_entry_ondisk *dst)
52 {
53         static struct rc_index_entry_ondisk entry[4];
54         static int cur;
55
56         if (!dst)
57                 dst = &entry[cur++ & 0x3];
58
59         if (dst->sha1 != src->sha1)
60                 hashcpy(dst->sha1, src->sha1);
61         dst->flags = (unsigned char)src->is_start << 7 | (unsigned char)src->cache_index;
62         dst->pos = htonl(src->pos);
63
64         return dst;
65 }
66
67 struct rc_object_entry *from_disked_rc_object_entry(struct rc_object_entry_ondisk *src, struct rc_object_entry *dst)
68 {
69         static struct rc_object_entry entry[4];
70         static int cur;
71
72         if (!dst)
73                 dst = &entry[cur++ & 0x3];
74
75         dst->type = src->flags >> 5;
76         dst->is_end = !!(src->flags & 0x10);
77         dst->is_start = !!(src->flags & 0x08);
78         dst->uninteresting = !!(src->flags & 0x04);
79         dst->include = !!(src->flags & 0x02);
80         dst->flag = !!(src->flags & 0x01);
81
82         dst->sha1 = src->sha1;
83         dst->merge_nr = src->merge_nr;
84         dst->split_nr = src->split_nr;
85
86         dst->size_size = src->sizes >> 5;
87         dst->padding = src->sizes & 0x1f;
88
89         dst->date = ntohl(src->date);
90         dst->path = ntohs(src->path);
91
92         return dst;
93 }
94
95 struct rc_object_entry_ondisk *to_disked_rc_object_entry(struct rc_object_entry *src, struct rc_object_entry_ondisk *dst)
96 {
97         static struct rc_object_entry_ondisk entry[4];
98         static int cur;
99
100         if (!dst)
101                 dst = &entry[cur++ & 0x3];
102
103         dst->flags  = (unsigned char)src->type << 5;
104         dst->flags |= (unsigned char)src->is_end << 4;
105         dst->flags |= (unsigned char)src->is_start << 3;
106         dst->flags |= (unsigned char)src->uninteresting << 2;
107         dst->flags |= (unsigned char)src->include << 1;
108         dst->flags |= (unsigned char)src->flag;
109
110         if (dst->sha1 != src->sha1)
111                 hashcpy(dst->sha1, src->sha1);
112         dst->merge_nr = src->merge_nr;
113         dst->split_nr = src->split_nr;
114
115         dst->sizes  = (unsigned char)src->size_size << 5;
116         dst->sizes |= (unsigned char)src->padding;
117
118         dst->date = htonl(src->date);
119         dst->path = htons(src->path);
120
121         return dst;
122 }
123
124 static int get_index_head(unsigned char *map, int len, struct rc_index_header *head, uint32_t *fanout, unsigned char **caches)
125 {
126         struct rc_index_header whead;
127         int i, index = sizeof(struct rc_index_header);
128
129         memcpy(&whead, map, sizeof(struct rc_index_header));
130         if (memcmp(whead.signature, "REVINDEX", 8) || whead.version != SUPPORTED_REVINDEX_VERSION)
131                 return -1;
132
133         memcpy(head->signature, "REVINDEX", 8);
134         head->version = whead.version;
135         head->ofs_objects = ntohl(whead.ofs_objects);
136         head->object_nr = ntohl(whead.object_nr);
137         head->cache_nr = whead.cache_nr;
138         head->max_date = ntohl(whead.max_date);
139
140         if (len < index + head->cache_nr * 20 + 0x100 * sizeof(uint32_t))
141                 return -2;
142
143         *caches = xmalloc(head->cache_nr * 20);
144         memcpy(*caches, map + index, head->cache_nr * 20);
145         index += head->cache_nr * 20;
146
147         memcpy(fanout, map + index, 0x100 * sizeof(uint32_t));
148         for (i = 0; i <= 0xff; i++)
149                 fanout[i] = ntohl(fanout[i]);
150         fanout[0x100] = len;
151
152         return 0;
153 }
154
155 /* added in init_index */
156 static void cleanup_cache_slices(void)
157 {
158         if (idx_map) {
159                 free(idx_caches);
160                 munmap(idx_map, idx_size);
161                 idx_map = 0;
162         }
163
164 }
165
166 static int init_index(void)
167 {
168         int fd;
169         struct stat fi;
170
171         fd = open(git_path("rev-cache/index"), O_RDONLY);
172         if (fd == -1 || fstat(fd, &fi))
173                 goto end;
174         if (fi.st_size < sizeof(struct rc_index_header))
175                 goto end;
176
177         idx_size = fi.st_size;
178         idx_map = xmmap(0, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
179         close(fd);
180         if (idx_map == MAP_FAILED)
181                 goto end;
182         if (get_index_head(idx_map, fi.st_size, &idx_head, fanout, &idx_caches))
183                 goto end;
184
185         atexit(cleanup_cache_slices);
186
187         return 0;
188
189 end:
190         idx_map = 0;
191         no_idx = 1;
192         return -1;
193 }
194
195 /* this assumes index is already loaded */
196 static struct rc_index_entry_ondisk *search_index_1(unsigned char *sha1)
197 {
198         int start, end, starti, endi, i, len, r;
199         struct rc_index_entry_ondisk *ie;
200
201         if (!idx_map)
202                 return 0;
203
204         /* binary search */
205         start = fanout[(int)sha1[0]];
206         end = fanout[(int)sha1[0] + 1];
207         len = (end - start) / sizeof(struct rc_index_entry_ondisk);
208         if (!len || len * sizeof(struct rc_index_entry_ondisk) != end - start)
209                 return 0;
210
211         starti = 0;
212         endi = len - 1;
213         for (;;) {
214                 i = (endi + starti) / 2;
215                 ie = (struct rc_index_entry_ondisk *)(idx_map + start + i * sizeof(struct rc_index_entry_ondisk));
216                 r = hashcmp(sha1, ie->sha1);
217
218                 if (r) {
219                         if (starti + 1 == endi) {
220                                 starti++;
221                                 continue;
222                         } else if (starti == endi)
223                                 break;
224
225                         if (r > 0)
226                                 starti = i;
227                         else /* r < 0 */
228                                 endi = i;
229                 } else
230                         return ie;
231         }
232
233         return 0;
234 }
235
236 static struct rc_index_entry *search_index(unsigned char *sha1)
237 {
238         struct rc_index_entry_ondisk *ied = search_index_1(sha1);
239
240         if (ied)
241                 return from_disked_rc_index_entry(ied, 0);
242
243         return 0;
244 }
245
246 unsigned char *get_cache_slice(struct commit *commit)
247 {
248         struct rc_index_entry *ie;
249
250         if (!idx_map) {
251                 if (no_idx)
252                         return 0;
253                 init_index();
254         }
255
256         if (commit->date > idx_head.max_date)
257                 return 0;
258
259         ie = search_index(commit->object.sha1);
260         if (ie && ie->cache_index < idx_head.cache_nr)
261                 return idx_caches + ie->cache_index * 20;
262
263         return 0;
264 }
265
266
267 /* traversal */
268
269 static unsigned long decode_size(unsigned char *str, int len);
270
271 static void handle_noncommit(struct rev_info *revs, struct commit *commit, unsigned char *ptr, struct rc_object_entry *entry)
272 {
273         struct blob *blob;
274         struct tree *tree;
275         struct object *obj;
276         unsigned long size;
277
278         size = decode_size(ptr + RC_ENTRY_SIZE_OFFSET(entry), entry->size_size);
279         switch (entry->type) {
280         case OBJ_TREE :
281                 if (!revs->tree_objects)
282                         return;
283
284                 tree = lookup_tree(entry->sha1);
285                 if (!tree)
286                         return;
287
288                 tree->size = size;
289                 commit->tree = tree;
290                 obj = (struct object *)tree;
291                 break;
292
293         case OBJ_BLOB :
294                 if (!revs->blob_objects)
295                         return;
296
297                 blob = lookup_blob(entry->sha1);
298                 if (!blob)
299                         return;
300
301                 obj = (struct object *)blob;
302                 break;
303
304         default :
305                 /* tag objects aren't really supposed to be here */
306                 return;
307         }
308
309         obj->flags |= FACE_VALUE;
310         add_pending_object(revs, obj, "");
311 }
312
313 static int setup_traversal(struct rc_slice_header *head, unsigned char *map, struct commit *commit, struct commit_list **work)
314 {
315         struct rc_index_entry *iep;
316         struct rc_object_entry *oep;
317         struct commit_list *prev, *wp, **wpp;
318         int retval;
319
320         iep = search_index(commit->object.sha1), 0;
321         oep = RC_OBTAIN_OBJECT_ENTRY(map + iep->pos);
322
323         /* the .uniniteresting bit isn't strictly necessary, as we check the object during traversal as well,
324          * but we might as well initialize it while we're at it */
325         oep->include = 1;
326         oep->uninteresting = !!(commit->object.flags & UNINTERESTING);
327         to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));
328         retval = iep->pos;
329
330         /* include any others in the work array */
331         prev = 0;
332         wpp = work;
333         wp = *work;
334         while (wp) {
335                 struct object *obj = &wp->item->object;
336                 struct commit *co;
337
338                 /* is this in our cache slice? */
339                 iep = search_index(obj->sha1);
340                 if (!iep || hashcmp(idx_caches + iep->cache_index * 20, head->sha1)) {
341                         prev = wp;
342                         wp = wp->next;
343                         wpp = &wp;
344                         continue;
345                 }
346
347                 if (iep->pos < retval)
348                         retval = iep->pos;
349
350                 oep = RC_OBTAIN_OBJECT_ENTRY(map + iep->pos);
351
352                 /* mark this for later */
353                 oep->include = 1;
354                 oep->uninteresting = !!(obj->flags & UNINTERESTING);
355                 to_disked_rc_object_entry(oep, (struct rc_object_entry_ondisk *)(map + iep->pos));
356
357                 /* remove from work list */
358                 co = pop_commit(wpp);
359                 wp = *wpp;
360                 if (prev)
361                         prev->next = wp;
362         }
363
364         return retval;
365 }
366
367 #define IPATH                           0x40
368 #define UPATH                           0x80
369
370 #define GET_COUNT(x)            ((x) & 0x3f)
371 #define SET_COUNT(x, s)         ((x) = ((x) & ~0x3f) | ((s) & 0x3f))
372
373 static int traverse_cache_slice_1(struct rc_slice_header *head, unsigned char *map,
374         struct rev_info *revs, struct commit *commit,
375         unsigned long *date_so_far, int *slop_so_far,
376         struct commit_list ***queue, struct commit_list **work)
377 {
378         struct commit_list *insert_cache = 0;
379         struct commit **last_objects, *co;
380         int i, total_path_nr = head->path_nr, retval = -1;
381         char consume_children = 0;
382         unsigned char *paths;
383
384         i = setup_traversal(head, map, commit, work);
385         if (i < 0)
386                 return -1;
387
388         paths = xcalloc(total_path_nr, sizeof(uint16_t));
389         last_objects = xcalloc(total_path_nr, sizeof(struct commit *));
390
391         /* i already set */
392         while (i < head->size) {
393                 struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
394                 int path = entry->path;
395                 struct object *obj;
396                 int index = i;
397
398                 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
399
400                 /* add extra objects if necessary */
401                 if (entry->type != OBJ_COMMIT) {
402                         if (consume_children)
403                                 handle_noncommit(revs, co, map + index, entry);
404
405                         continue;
406                 } else
407                         consume_children = 0;
408
409                 if (path >= total_path_nr)
410                         goto end;
411
412                 /* in one of our branches?
413                  * uninteresting trumps interesting */
414                 if (entry->include)
415                         paths[path] |= entry->uninteresting ? UPATH : IPATH;
416                 else if (!paths[path])
417                         continue;
418
419                 /* date stuff */
420                 if (revs->max_age != -1 && entry->date < revs->max_age)
421                         paths[path] |= UPATH;
422
423                 /* lookup object */
424                 co = lookup_commit(entry->sha1);
425                 obj = &co->object;
426
427                 if (obj->flags & UNINTERESTING)
428                         paths[path] |= UPATH;
429
430                 if ((paths[path] & IPATH) && (paths[path] & UPATH)) {
431                         paths[path] = UPATH;
432
433                         /* mark edge */
434                         if (last_objects[path]) {
435                                 parse_commit(last_objects[path]);
436
437                                 /* we needn't worry about the unique field; that will be valid as
438                                  * long as we're not a end entry */
439                                 last_objects[path]->object.flags &= ~FACE_VALUE;
440                                 last_objects[path] = 0;
441                         }
442                 }
443
444                 /* now we gotta re-assess the whole interesting thing... */
445                 entry->uninteresting = !!(paths[path] & UPATH);
446
447                 /* first close paths */
448                 if (entry->split_nr) {
449                         int j, off = index + sizeof(struct rc_object_entry_ondisk) + RC_PATH_SIZE(entry->merge_nr);
450
451                         for (j = 0; j < entry->split_nr; j++) {
452                                 unsigned short p = ntohs(*(uint16_t *)(map + off + RC_PATH_SIZE(j)));
453
454                                 if (p >= total_path_nr)
455                                         goto end;
456
457                                 /* boundary commit? */
458                                 if ((paths[p] & IPATH) && entry->uninteresting) {
459                                         if (last_objects[p]) {
460                                                 parse_commit(last_objects[p]);
461
462                                                 last_objects[p]->object.flags &= ~FACE_VALUE;
463                                                 last_objects[p] = 0;
464                                         }
465                                 } else if (last_objects[p] && !last_objects[p]->object.parsed)
466                                         commit_list_insert(co, &last_objects[p]->parents);
467
468                                 /* can't close a merge path until all are parents have been encountered */
469                                 if (GET_COUNT(paths[p])) {
470                                         SET_COUNT(paths[p], GET_COUNT(paths[p]) - 1);
471
472                                         if (GET_COUNT(paths[p]))
473                                                 continue;
474                                 }
475
476                                 paths[p] = 0;
477                                 last_objects[p] = 0;
478                         }
479                 }
480
481                 /* make topo relations */
482                 if (last_objects[path] && !last_objects[path]->object.parsed)
483                         commit_list_insert(co, &last_objects[path]->parents);
484
485                 /* initialize commit */
486                 if (!entry->is_end) {
487                         co->date = entry->date;
488                         obj->flags |= ADDED | FACE_VALUE;
489                 } else
490                         parse_commit(co);
491
492                 obj->flags |= SEEN;
493
494                 if (entry->uninteresting)
495                         obj->flags |= UNINTERESTING;
496
497                 /* we need to know what the edges are */
498                 last_objects[path] = co;
499
500                 /* add to list */
501                 if (!(obj->flags & UNINTERESTING) || revs->show_all) {
502                         if (entry->is_end)
503                                 insert_by_date_cached(co, work, insert_cache, &insert_cache);
504                         else
505                                 *queue = &commit_list_insert(co, *queue)->next;
506
507                         /* add children to list as well */
508                         if (obj->flags & UNINTERESTING)
509                                 consume_children = 0;
510                         else
511                                 consume_children = 1;
512                 }
513
514                 /* open parents */
515                 if (entry->merge_nr) {
516                         int j, off = index + sizeof(struct rc_object_entry_ondisk);
517                         char flag = entry->uninteresting ? UPATH : IPATH;
518
519                         for (j = 0; j < entry->merge_nr; j++) {
520                                 unsigned short p = ntohs(*(uint16_t *)(map + off + RC_PATH_SIZE(j)));
521
522                                 if (p >= total_path_nr)
523                                         goto end;
524
525                                 if (paths[p] & flag)
526                                         continue;
527
528                                 paths[p] |= flag;
529                         }
530
531                         /* make sure we don't use this path before all our parents have had their say */
532                         SET_COUNT(paths[path], entry->merge_nr);
533                 }
534
535         }
536
537         retval = 0;
538
539 end:
540         free(paths);
541         free(last_objects);
542
543         return retval;
544 }
545
546 static int get_cache_slice_header(unsigned char *cache_sha1, unsigned char *map, int len, struct rc_slice_header *head)
547 {
548         int t;
549
550         memcpy(head, map, sizeof(struct rc_slice_header));
551         head->ofs_objects = ntohl(head->ofs_objects);
552         head->object_nr = ntohl(head->object_nr);
553         head->size = ntohl(head->size);
554         head->path_nr = ntohs(head->path_nr);
555
556         if (memcmp(head->signature, "REVCACHE", 8))
557                 return -1;
558         if (head->version != SUPPORTED_REVCACHE_VERSION)
559                 return -2;
560         if (hashcmp(head->sha1, cache_sha1))
561                 return -3;
562         t = sizeof(struct rc_slice_header);
563         if (t != head->ofs_objects || t >= len)
564                 return -4;
565
566         head->size = len;
567
568         return 0;
569 }
570
571 int open_cache_slice(unsigned char *sha1, int flags)
572 {
573         int fd;
574         char signature[8];
575
576         fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), flags);
577         if (fd <= 0)
578                 goto end;
579
580         if (read(fd, signature, 8) != 8)
581                 goto end;
582
583         /* a normal revision slice */
584         if (!memcmp(signature, "REVCACHE", 8)) {
585                 lseek(fd, 0, SEEK_SET);
586                 return fd;
587         }
588
589         /* slice pointer */
590         if (!memcmp(signature, "REVCOPTR", 8)) {
591                 struct cache_slice_pointer ptr;
592
593                 lseek(fd, 0, SEEK_SET);
594                 if (read_in_full(fd, &ptr, sizeof(ptr)) != sizeof(ptr))
595                         goto end;
596
597                 if (ptr.version != SUPPORTED_REVCOPTR_VERSION)
598                         goto end;
599
600                 close(fd);
601                 fd = open(ptr.path, flags);
602
603                 return fd;
604         }
605
606 end:
607         if (fd > 0)
608                 close(fd);
609
610         return -1;
611 }
612
613 int traverse_cache_slice(struct rev_info *revs,
614         unsigned char *cache_sha1, struct commit *commit,
615         unsigned long *date_so_far, int *slop_so_far,
616         struct commit_list ***queue, struct commit_list **work)
617 {
618         int fd = -1, retval = -3;
619         struct stat fi;
620         struct rc_slice_header head;
621         struct rev_cache_info *rci;
622         unsigned char *map = MAP_FAILED;
623
624         /* the index should've been loaded already to find cache_sha1, but it's good
625          * to be absolutely sure... */
626         if (!idx_map)
627                 init_index();
628         if (!idx_map)
629                 return -1;
630
631         /* load options */
632         rci = &revs->rev_cache_info;
633
634         memset(&head, 0, sizeof(struct rc_slice_header));
635
636         fd = open_cache_slice(cache_sha1, O_RDONLY);
637         if (fd == -1)
638                 goto end;
639         if (fstat(fd, &fi) || fi.st_size < sizeof(struct rc_slice_header))
640                 goto end;
641
642         map = xmmap(0, fi.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
643         if (map == MAP_FAILED)
644                 goto end;
645         if (get_cache_slice_header(cache_sha1, map, fi.st_size, &head))
646                 goto end;
647
648         retval = traverse_cache_slice_1(&head, map, revs, commit, date_so_far, slop_so_far, queue, work);
649
650 end:
651         if (map != MAP_FAILED)
652                 munmap(map, fi.st_size);
653         if (fd != -1)
654                 close(fd);
655
656         return retval;
657 }
658
659
660
661 /* generation */
662
663 static int is_endpoint(struct commit *commit)
664 {
665         struct commit_list *list = commit->parents;
666
667         while (list) {
668                 if (!(list->item->object.flags & UNINTERESTING))
669                         return 0;
670
671                 list = list->next;
672         }
673
674         return 1;
675 }
676
677 /* ensures branch is self-contained: parents are either all interesting or all uninteresting */
678 static void make_legs(struct rev_info *revs)
679 {
680         struct commit_list *list, **plist;
681         int total = 0;
682
683         /* attach plist to end of commits list */
684         list = revs->commits;
685         while (list && list->next)
686                 list = list->next;
687
688         if (list)
689                 plist = &list->next;
690         else
691                 return;
692
693         /* duplicates don't matter, as get_revision() ignores them */
694         for (list = revs->commits; list; list = list->next) {
695                 struct commit *item = list->item;
696                 struct commit_list *parents = item->parents;
697
698                 if (item->object.flags & UNINTERESTING)
699                         continue;
700                 if (is_endpoint(item))
701                         continue;
702
703                 while (parents) {
704                         struct commit *p = parents->item;
705                         parents = parents->next;
706
707                         if (!(p->object.flags & UNINTERESTING))
708                                 continue;
709
710                         p->object.flags &= ~UNINTERESTING;
711                         parse_commit(p);
712                         plist = &commit_list_insert(p, plist)->next;
713
714                         if (!(p->object.flags & SEEN))
715                                 total++;
716                 }
717         }
718
719         if (total)
720                 sort_in_topological_order(&revs->commits, 1);
721
722 }
723
724
725 struct path_track {
726         struct commit *commit;
727         int path; /* for keeping track of children */
728
729         struct path_track *next, *prev;
730 };
731
732 static unsigned char *paths;
733 static int path_nr = 1, path_sz;
734
735 static struct path_track *path_track;
736 static struct path_track *path_track_alloc;
737
738 #define PATH_IN_USE                     0x80 /* biggest bit we can get as a char */
739
740 static int get_new_path(void)
741 {
742         int i;
743
744         for (i = 1; i < path_nr; i++)
745                 if (!paths[i])
746                         break;
747
748         if (i == path_nr) {
749                 if (path_nr >= path_sz) {
750                         path_sz += 50;
751                         paths = xrealloc(paths, path_sz);
752                         memset(paths + path_sz - 50, 0, 50);
753                 }
754                 path_nr++;
755         }
756
757         paths[i] = PATH_IN_USE;
758         return i;
759 }
760
761 static void remove_path_track(struct path_track **ppt, char total_free)
762 {
763         struct path_track *t = *ppt;
764
765         if (t->next)
766                 t->next->prev = t->prev;
767         if (t->prev)
768                 t->prev->next = t->next;
769
770         t = t->next;
771
772         if (total_free)
773                 free(*ppt);
774         else {
775                 (*ppt)->next = path_track_alloc;
776                 path_track_alloc = *ppt;
777         }
778
779         *ppt = t;
780 }
781
782 static struct path_track *make_path_track(struct path_track **head, struct commit *commit)
783 {
784         struct path_track *pt;
785
786         if (path_track_alloc) {
787                 pt = path_track_alloc;
788                 path_track_alloc = pt->next;
789         } else
790                 pt = xmalloc(sizeof(struct path_track));
791
792         memset(pt, 0, sizeof(struct path_track));
793         pt->commit = commit;
794
795         pt->next = *head;
796         if (*head)
797                 (*head)->prev = pt;
798         *head = pt;
799
800         return pt;
801 }
802
803 static void add_path_to_track(struct commit *commit, int path)
804 {
805         make_path_track(&path_track, commit);
806         path_track->path = path;
807 }
808
809 static void handle_paths(struct commit *commit, struct rc_object_entry *object, struct strbuf *merge_str, struct strbuf *split_str)
810 {
811         int child_nr, parent_nr, open_parent_nr, this_path;
812         struct commit_list *list;
813         struct commit *first_parent;
814         struct path_track **ppt, *pt;
815
816         /* we can only re-use a closed path once all it's children have been encountered,
817          * as we need to keep track of commit boundaries */
818         ppt = &path_track;
819         pt = *ppt;
820         child_nr = 0;
821         while (pt) {
822                 if (pt->commit == commit) {
823                         uint16_t write_path;
824
825                         if (paths[pt->path] != PATH_IN_USE)
826                                 paths[pt->path]--;
827
828                         /* make sure we can handle this */
829                         child_nr++;
830                         if (child_nr > 0x7f)
831                                 die("%s: too many branches!  rev-cache can only handle %d parents/children per commit",
832                                         sha1_to_hex(object->sha1), 0x7f);
833
834                         /* add to split list */
835                         object->split_nr++;
836                         write_path = htons((uint16_t)pt->path);
837                         strbuf_add(split_str, &write_path, sizeof(uint16_t));
838
839                         remove_path_track(ppt, 0);
840                         pt = *ppt;
841                 } else {
842                         pt = pt->next;
843                         ppt = &pt;
844                 }
845         }
846
847         /* initialize our self! */
848         if (!commit->indegree) {
849                 commit->indegree = get_new_path();
850                 object->is_start = 1;
851         }
852
853         this_path = commit->indegree;
854         paths[this_path] = PATH_IN_USE;
855         object->path = this_path;
856
857         /* count interesting parents */
858         parent_nr = open_parent_nr = 0;
859         first_parent = 0;
860         for (list = commit->parents; list; list = list->next) {
861                 if (list->item->object.flags & UNINTERESTING) {
862                         object->is_end = 1;
863                         continue;
864                 }
865
866                 parent_nr++;
867                 if (!list->item->indegree)
868                         open_parent_nr++;
869                 if (!first_parent)
870                         first_parent = list->item;
871         }
872
873         if (!parent_nr)
874                 return;
875
876         if (parent_nr == 1 && open_parent_nr == 1) {
877                 first_parent->indegree = this_path;
878                 return;
879         }
880
881         /* bail out on obscene parent/child #s */
882         if (parent_nr > 0x7f)
883                 die("%s: too many parents in merge!  rev-cache can only handle %d parents/children per commit",
884                         sha1_to_hex(object->sha1), 0x7f);
885
886         /* make merge list */
887         object->merge_nr = parent_nr;
888         paths[this_path] = parent_nr;
889
890         for (list = commit->parents; list; list = list->next) {
891                 struct commit *p = list->item;
892                 uint16_t write_path;
893
894                 if (p->object.flags & UNINTERESTING)
895                         continue;
896
897                 /* unfortunately due to boundary tracking we can't re-use merge paths
898                  * (unable to guarantee last parent path = this -> last won't always be able to
899                  * set this as a boundary object */
900                 if (!p->indegree)
901                         p->indegree = get_new_path();
902
903                 write_path = htons((uint16_t)p->indegree);
904                 strbuf_add(merge_str, &write_path, sizeof(uint16_t));
905
906                 /* make sure path is properly ended */
907                 add_path_to_track(p, this_path);
908         }
909
910 }
911
912
913 static int encode_size(unsigned long size, unsigned char *out)
914 {
915         int len = 0;
916
917         while (size) {
918                 *out++ = (unsigned char)(size & 0xff);
919                 size >>= 8;
920                 len++;
921         }
922
923         return len;
924 }
925
926 static unsigned long decode_size(unsigned char *str, int len)
927 {
928         unsigned long size = 0;
929         int shift = 0;
930
931         while (len--) {
932                 size |= (unsigned long)*str << shift;
933                 shift += 8;
934                 str++;
935         }
936
937         return size;
938 }
939
940 static void add_object_entry(const unsigned char *sha1, struct rc_object_entry *entryp,
941         struct strbuf *merge_str, struct strbuf *split_str)
942 {
943         struct rc_object_entry entry;
944         unsigned char size_str[7];
945         unsigned long size;
946         enum object_type type;
947         void *data;
948
949         if (entryp)
950                 sha1 = entryp->sha1;
951
952         /* retrieve size data */
953         data = read_sha1_file(sha1, &type, &size);
954
955         if (data)
956                 free(data);
957
958         /* initialize! */
959         if (!entryp) {
960                 memset(&entry, 0, sizeof(entry));
961                 entry.sha1 = (unsigned char *)sha1;
962                 entry.type = type;
963
964                 if (merge_str)
965                         entry.merge_nr = merge_str->len / sizeof(uint16_t);
966                 if (split_str)
967                         entry.split_nr = split_str->len / sizeof(uint16_t);
968
969                 entryp = &entry;
970         }
971
972         entryp->size_size = encode_size(size, size_str);
973
974         /* write the muvabitch */
975         strbuf_add(acc_buffer, to_disked_rc_object_entry(entryp, 0), sizeof(struct rc_object_entry_ondisk));
976
977         if (merge_str)
978                 strbuf_add(acc_buffer, merge_str->buf, merge_str->len);
979         if (split_str)
980                 strbuf_add(acc_buffer, split_str->buf, split_str->len);
981
982         strbuf_add(acc_buffer, size_str, entryp->size_size);
983 }
984
985 /* returns non-zero to continue parsing, 0 to skip */
986 typedef int (*dump_tree_fn)(const unsigned char *, const char *, unsigned int); /* sha1, path, mode */
987
988 /* we need to walk the trees by hash, so unfortunately we can't use traverse_trees in tree-walk.c */
989 static int dump_tree(struct tree *tree, dump_tree_fn fn)
990 {
991         struct tree_desc desc;
992         struct name_entry entry;
993         struct tree *subtree;
994         int r;
995
996         if (parse_tree(tree))
997                 return -1;
998
999         init_tree_desc(&desc, tree->buffer, tree->size);
1000         while (tree_entry(&desc, &entry)) {
1001                 switch (fn(entry.sha1, entry.path, entry.mode)) {
1002                 case 0 :
1003                         goto continue_loop;
1004                 default :
1005                         break;
1006                 }
1007
1008                 if (S_ISDIR(entry.mode)) {
1009                         subtree = lookup_tree(entry.sha1);
1010                         if (!subtree)
1011                                 return -2;
1012
1013                         if ((r = dump_tree(subtree, fn)) < 0)
1014                                 return r;
1015                 }
1016
1017 continue_loop:
1018                 continue;
1019         }
1020
1021         return 0;
1022 }
1023
1024 static int dump_tree_callback(const unsigned char *sha1, const char *path, unsigned int mode)
1025 {
1026         strbuf_add(acc_buffer, sha1, 20);
1027
1028         return 1;
1029 }
1030
1031 static void tree_addremove(struct diff_options *options,
1032         int whatnow, unsigned mode,
1033         const unsigned char *sha1,
1034         const char *concatpath)
1035 {
1036         if (whatnow != '+')
1037                 return;
1038
1039         strbuf_add(acc_buffer, sha1, 20);
1040 }
1041
1042 static void tree_change(struct diff_options *options,
1043         unsigned old_mode, unsigned new_mode,
1044         const unsigned char *old_sha1,
1045         const unsigned char *new_sha1,
1046         const char *concatpath)
1047 {
1048         if (!hashcmp(old_sha1, new_sha1))
1049                 return;
1050
1051         strbuf_add(acc_buffer, new_sha1, 20);
1052 }
1053
1054 static int add_unique_objects(struct commit *commit)
1055 {
1056         struct commit_list *list;
1057         struct strbuf os, ost, *orig_buf;
1058         struct diff_options opts;
1059         int i, j, next;
1060         char is_first = 1;
1061
1062         /* ...no, calculate unique objects */
1063         strbuf_init(&os, 0);
1064         strbuf_init(&ost, 0);
1065         orig_buf = acc_buffer;
1066
1067         diff_setup(&opts);
1068         DIFF_OPT_SET(&opts, RECURSIVE);
1069         DIFF_OPT_SET(&opts, TREE_IN_RECURSIVE);
1070         opts.change = tree_change;
1071         opts.add_remove = tree_addremove;
1072
1073         /* this is only called for non-ends (ie. all parents interesting) */
1074         for (list = commit->parents; list; list = list->next) {
1075                 if (is_first)
1076                         acc_buffer = &os;
1077                 else
1078                         acc_buffer = &ost;
1079
1080                 strbuf_setlen(acc_buffer, 0);
1081                 diff_tree_sha1(list->item->tree->object.sha1, commit->tree->object.sha1, "", &opts);
1082                 qsort(acc_buffer->buf, acc_buffer->len / 20, 20, (int (*)(const void *, const void *))hashcmp);
1083
1084                 /* take intersection */
1085                 if (!is_first) {
1086                         for (next = i = j = 0; i < os.len; i += 20) {
1087                                 while (j < ost.len && hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)) < 0)
1088                                         j += 20;
1089
1090                                 if (j >= ost.len || hashcmp((unsigned char *)(ost.buf + j), (unsigned char *)(os.buf + i)))
1091                                         continue;
1092
1093                                 if (next != i)
1094                                         memcpy(os.buf + next, os.buf + i, 20);
1095                                 next += 20;
1096                         }
1097
1098                         if (next != i)
1099                                 strbuf_setlen(&os, next);
1100                 } else
1101                         is_first = 0;
1102         }
1103
1104         /* no parents (!) */
1105         if (is_first) {
1106                 acc_buffer = &os;
1107                 dump_tree(commit->tree, dump_tree_callback);
1108         }
1109
1110         /* the ordering of non-commit objects dosn't really matter, so we're not gonna bother */
1111         acc_buffer = orig_buf;
1112         for (i = 0; i < os.len; i += 20)
1113                 add_object_entry((unsigned char *)(os.buf + i), 0, 0, 0);
1114
1115         /* last but not least, the main tree */
1116         add_object_entry(commit->tree->object.sha1, 0, 0, 0);
1117
1118         return i / 20 + 1;
1119 }
1120
1121 static int add_objects_verbatim_1(struct rev_cache_slice_map *mapping, int *index)
1122 {
1123         unsigned char *map = mapping->map;
1124         int i = *index, object_nr = 0;
1125         struct rc_object_entry *entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1126
1127         i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
1128         while (i < mapping->size) {
1129                 int pos = i;
1130
1131                 entry = RC_OBTAIN_OBJECT_ENTRY(map + i;
1132                 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(entry);
1133
1134                 if (entry->type == OBJ_COMMIT) {
1135                         *index = pos;
1136                         return object_nr;
1137                 }
1138
1139                 strbuf_add(acc_buffer, map + pos, i - pos);
1140                 object_nr++;
1141         }
1142
1143         *index = 0;
1144         return object_nr;
1145 }
1146
1147 static int add_objects_verbatim(struct rev_cache_info *rci, struct commit *commit)
1148 {
1149         struct rev_cache_slice_map *map;
1150         char found = 0;
1151         struct rc_index_entry *ie;
1152         struct rc_object_entry *entry;
1153         int object_nr, i;
1154
1155         if (!rci->maps)
1156                 return -1;
1157
1158         /* check if we can continue where we left off */
1159         map = rci->last_map;
1160         if (!map)
1161                 goto search_me;
1162
1163         i = map->last_index;
1164         entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
1165         if (hashcmp(entry->sha1, commit->object.sha1))
1166                 goto search_me;
1167
1168         found = 1;
1169
1170 search_me:
1171         if (!found) {
1172                 ie = search_index(commit->object.sha1);
1173                 if (!ie || ie->cache_index >= idx_head.cache_nr)
1174                         return -2;
1175
1176                 map = rci->maps + ie->cache_index;
1177                 if (!map->size)
1178                         return -3;
1179
1180                 i = ie->pos;
1181                 entry = RC_OBTAIN_OBJECT_ENTRY(map->map + i);
1182                 if (entry->type != OBJ_COMMIT || hashcmp(entry->sha1, commit->object.sha1))
1183                         return -4;
1184         }
1185
1186         /* can't handle end commits */
1187         if (entry->is_end)
1188                 return -5;
1189
1190         object_nr = add_objects_verbatim_1(map, &i);
1191
1192         /* remember this */
1193         if (i) {
1194                 rci->last_map = map;
1195                 map->last_index = i;
1196         } else
1197                 rci->last_map = 0;
1198
1199         return object_nr;
1200 }
1201
1202 static void init_revcache_directory(void)
1203 {
1204         struct stat fi;
1205
1206         if (stat(git_path("rev-cache"), &fi) || !S_ISDIR(fi.st_mode))
1207                 if (mkdir(git_path("rev-cache"), 0777))
1208                         die("can't make rev-cache directory");
1209
1210 }
1211
1212 void init_rev_cache_info(struct rev_cache_info *rci)
1213 {
1214         memset(rci, 0, sizeof(struct rev_cache_info));
1215
1216         rci->objects = 1;
1217         rci->legs = 0;
1218         rci->make_index = 1;
1219         rci->fuse_me = 0;
1220
1221         rci->overwrite_all = 0;
1222
1223         rci->add_to_pending = 1;
1224
1225         rci->ignore_size = 0;
1226 }
1227
1228 void maybe_fill_with_defaults(struct rev_cache_info *rci)
1229 {
1230         static struct rev_cache_info def_rci;
1231
1232         if (rci)
1233                 return;
1234
1235         init_rev_cache_info(&def_rci);
1236         rci = &def_rci;
1237 }
1238
1239 int make_cache_slice(struct rev_cache_info *rci,
1240         struct rev_info *revs, struct commit_list **starts, struct commit_list **ends,
1241         unsigned char *cache_sha1)
1242 {
1243         struct rev_info therevs;
1244         struct strbuf buffer, startlist, endlist;
1245         struct rc_slice_header head;
1246         struct commit *commit;
1247         unsigned char sha1[20];
1248         struct strbuf merge_paths, split_paths;
1249         int object_nr, total_sz, fd;
1250         char file[PATH_MAX], *newfile;
1251         struct rev_cache_info *trci;
1252         git_SHA_CTX ctx;
1253
1254         maybe_fill_with_defaults(rci);
1255
1256         init_revcache_directory();
1257         strcpy(file, git_path("rev-cache/XXXXXX"));
1258         fd = xmkstemp(file);
1259
1260         strbuf_init(&buffer, 0);
1261         strbuf_init(&startlist, 0);
1262         strbuf_init(&endlist, 0);
1263         strbuf_init(&merge_paths, 0);
1264         strbuf_init(&split_paths, 0);
1265         acc_buffer = &buffer;
1266
1267         if (!revs) {
1268                 revs = &therevs;
1269                 init_revisions(revs, 0);
1270
1271                 /* we're gonna assume no one else has already traversed this... */
1272                 while ((commit = pop_commit(starts)))
1273                         add_pending_object(revs, &commit->object, 0);
1274
1275                 while ((commit = pop_commit(ends))) {
1276                         commit->object.flags |= UNINTERESTING;
1277                         add_pending_object(revs, &commit->object, 0);
1278                 }
1279         }
1280
1281         /* write head placeholder */
1282         memset(&head, 0, sizeof(head));
1283         head.ofs_objects = htonl(sizeof(head));
1284         xwrite(fd, &head, sizeof(head));
1285
1286         /* init revisions! */
1287         revs->tree_objects = 1;
1288         revs->blob_objects = 1;
1289         revs->topo_order = 1;
1290         revs->lifo = 1;
1291
1292         /* re-use info from other caches if possible */
1293         trci = &revs->rev_cache_info;
1294         init_rev_cache_info(trci);
1295         trci->add_to_pending = 0;
1296
1297         setup_revisions(0, 0, revs, 0);
1298         if (prepare_revision_walk(revs))
1299                 die("died preparing revision walk");
1300
1301         if (rci->legs)
1302                 make_legs(revs);
1303
1304         object_nr = total_sz = 0;
1305         while ((commit = get_revision(revs)) != 0) {
1306                 struct rc_object_entry object;
1307                 int t;
1308
1309                 strbuf_setlen(&merge_paths, 0);
1310                 strbuf_setlen(&split_paths, 0);
1311
1312                 memset(&object, 0, sizeof(object));
1313                 object.type = OBJ_COMMIT;
1314                 object.date = commit->date;
1315                 object.sha1 = commit->object.sha1;
1316
1317                 handle_paths(commit, &object, &merge_paths, &split_paths);
1318
1319                 if (object.is_end) {
1320                         strbuf_add(&endlist, object.sha1, 20);
1321                         if (ends)
1322                                 commit_list_insert(commit, ends);
1323                 }
1324                 /* the two *aren't* mutually exclusive */
1325                 if (object.is_start) {
1326                         strbuf_add(&startlist, object.sha1, 20);
1327                         if (starts)
1328                                 commit_list_insert(commit, starts);
1329                 }
1330
1331                 commit->indegree = 0;
1332
1333                 add_object_entry(0, &object, &merge_paths, &split_paths);
1334                 object_nr++;
1335
1336                 if (rci->objects && !object.is_end) {
1337                         if (rci->fuse_me && (t = add_objects_verbatim(rci, commit)) >= 0)
1338                                 /* yay!  we did it! */
1339                                 object_nr += t;
1340                         else
1341                                 /* add all unique children for this commit */
1342                                 object_nr += add_unique_objects(commit);
1343                 }
1344
1345                 /* print every ~1MB or so */
1346                 if (buffer.len > 1000000) {
1347                         write_in_full(fd, buffer.buf, buffer.len);
1348                         total_sz += buffer.len;
1349
1350                         strbuf_setlen(&buffer, 0);
1351                 }
1352         }
1353
1354         if (buffer.len) {
1355                 write_in_full(fd, buffer.buf, buffer.len);
1356                 total_sz += buffer.len;
1357         }
1358
1359         /* go ahead a free some stuff... */
1360         strbuf_release(&buffer);
1361         strbuf_release(&merge_paths);
1362         strbuf_release(&split_paths);
1363         if (path_sz)
1364                 free(paths);
1365         while (path_track_alloc)
1366                 remove_path_track(&path_track_alloc, 1);
1367
1368         /* the meaning of the hash name is more or less irrelevant, it's the uniqueness that matters */
1369         strbuf_add(&endlist, startlist.buf, startlist.len);
1370         git_SHA1_Init(&ctx);
1371         git_SHA1_Update(&ctx, endlist.buf, endlist.len);
1372         git_SHA1_Final(sha1, &ctx);
1373
1374         /* now actually initialize header */
1375         strcpy(head.signature, "REVCACHE");
1376         head.version = SUPPORTED_REVCACHE_VERSION;
1377
1378         head.object_nr = htonl(object_nr);
1379         head.size = htonl(ntohl(head.ofs_objects) + total_sz);
1380         head.path_nr = htons(path_nr);
1381         hashcpy(head.sha1, sha1);
1382
1383         /* some info! */
1384         fprintf(stderr, "objects: %d\n", object_nr);
1385         fprintf(stderr, "paths: %d\n", path_nr);
1386
1387         lseek(fd, 0, SEEK_SET);
1388         xwrite(fd, &head, sizeof(head));
1389
1390         if (rci->make_index && make_cache_index(rci, sha1, fd, ntohl(head.size)) < 0)
1391                 die("can't update index");
1392
1393         close(fd);
1394
1395         newfile = git_path("rev-cache/%s", sha1_to_hex(sha1));
1396         if (rename(file, newfile))
1397                 die("can't move temp file");
1398
1399         /* let our caller know what we've just made */
1400         if (cache_sha1)
1401                 hashcpy(cache_sha1, sha1);
1402
1403         strbuf_release(&endlist);
1404         strbuf_release(&startlist);
1405
1406         return 0;
1407 }
1408
1409
1410 static int index_sort_hash(const void *a, const void *b)
1411 {
1412         return hashcmp(((struct rc_index_entry_ondisk *)a)->sha1, ((struct rc_index_entry_ondisk *)b)->sha1);
1413 }
1414
1415 static int write_cache_index(struct strbuf *body)
1416 {
1417         struct rc_index_header whead;
1418         struct lock_file *lk;
1419         int fd, i;
1420
1421         /* clear index map if loaded */
1422         if (idx_map) {
1423                 munmap(idx_map, idx_size);
1424                 idx_map = 0;
1425         }
1426
1427         lk = xcalloc(sizeof(struct lock_file), 1);
1428         fd = hold_lock_file_for_update(lk, git_path("rev-cache/index"), 0);
1429         if (fd < 0) {
1430                 free(lk);
1431                 return -1;
1432         }
1433
1434         /* endianness yay! */
1435         memset(&whead, 0, sizeof(whead));
1436         memcpy(whead.signature, "REVINDEX", 8);
1437         whead.version = idx_head.version;
1438         whead.ofs_objects = htonl(idx_head.ofs_objects);
1439         whead.object_nr = htonl(idx_head.object_nr);
1440         whead.cache_nr = idx_head.cache_nr;
1441         whead.max_date = htonl(idx_head.max_date);
1442
1443         write(fd, &whead, sizeof(struct rc_index_header));
1444         write_in_full(fd, idx_caches, idx_head.cache_nr * 20);
1445
1446         for (i = 0; i <= 0xff; i++)
1447                 fanout[i] = htonl(fanout[i]);
1448         write_in_full(fd, fanout, 0x100 * sizeof(uint32_t));
1449
1450         write_in_full(fd, body->buf, body->len);
1451
1452         if (commit_lock_file(lk) < 0)
1453                 return -2;
1454
1455         /* lk freed by lockfile.c */
1456
1457         return 0;
1458 }
1459
1460 int make_cache_index(struct rev_cache_info *rci, unsigned char *cache_sha1,
1461         int fd, unsigned int size)
1462 {
1463         struct strbuf buffer;
1464         int i, cache_index, cur;
1465         unsigned char *map;
1466         unsigned long max_date;
1467
1468         maybe_fill_with_defaults(rci);
1469
1470         if (!idx_map)
1471                 init_index();
1472
1473         lseek(fd, 0, SEEK_SET);
1474         map = xmmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1475         if (map == MAP_FAILED)
1476                 return -1;
1477
1478         strbuf_init(&buffer, 0);
1479         if (idx_map) {
1480                 strbuf_add(&buffer, idx_map + fanout[0], fanout[0x100] - fanout[0]);
1481         } else {
1482                 /* not an update */
1483                 memset(&idx_head, 0, sizeof(struct rc_index_header));
1484                 idx_caches = 0;
1485
1486                 strcpy(idx_head.signature, "REVINDEX");
1487                 idx_head.version = SUPPORTED_REVINDEX_VERSION;
1488                 idx_head.ofs_objects = sizeof(struct rc_index_header) + 0x100 * sizeof(uint32_t);
1489         }
1490
1491         /* are we remaking a slice? */
1492         for (i = 0; i < idx_head.cache_nr; i++)
1493                 if (!hashcmp(idx_caches + i * 20, cache_sha1))
1494                         break;
1495
1496         if (i == idx_head.cache_nr) {
1497                 cache_index = idx_head.cache_nr++;
1498                 idx_head.ofs_objects += 20;
1499
1500                 idx_caches = xrealloc(idx_caches, idx_head.cache_nr * 20);
1501                 hashcpy(idx_caches + cache_index * 20, cache_sha1);
1502         } else
1503                 cache_index = i;
1504
1505         i = sizeof(struct rc_slice_header); /* offset */
1506         max_date = idx_head.max_date;
1507         while (i < size) {
1508                 struct rc_index_entry index_entry, *entry;
1509                 struct rc_index_entry_ondisk *disked_entry;
1510                 struct rc_object_entry *object_entry = RC_OBTAIN_OBJECT_ENTRY(map + i);
1511                 unsigned long date;
1512                 int off, pos = i;
1513
1514                 i += RC_ACTUAL_OBJECT_ENTRY_SIZE(object_entry);
1515
1516                 if (object_entry->type != OBJ_COMMIT)
1517                         continue;
1518
1519                 /* don't include ends; otherwise we'll find ourselves in loops */
1520                 if (object_entry->is_end)
1521                         continue;
1522
1523                 /* handle index duplication
1524                  * -> keep old copy unless new one is a start -- based on expected usage, older ones will be more
1525                  * likely to lead to greater slice traversals than new ones */
1526                 date = object_entry->date;
1527                 if (date > idx_head.max_date) {
1528                         disked_entry = 0;
1529                         if (date > max_date)
1530                                 max_date = date;
1531                 } else
1532                         disked_entry = search_index_1(object_entry->sha1);
1533
1534                 if (disked_entry && !object_entry->is_start && !rci->overwrite_all)
1535                         continue;
1536                 else if (disked_entry) {
1537                         /* mmm, pointer arithmetic... tasty */  /* (entry - idx_map = offset, so cast is valid) */
1538                         off = (unsigned int)((unsigned char *)disked_entry - idx_map) - fanout[0];
1539                         disked_entry = (struct rc_index_entry_ondisk *)(buffer.buf + off);
1540                         entry = from_disked_rc_index_entry(disked_entry, 0);
1541                 } else
1542                         entry = &index_entry;
1543
1544                 memset(entry, 0, sizeof(index_entry));
1545                 entry->sha1 = object_entry->sha1;
1546                 entry->is_start = object_entry->is_start;
1547                 entry->cache_index = cache_index;
1548                 entry->pos = pos;
1549
1550                 if (entry == &index_entry) {
1551                         strbuf_add(&buffer, to_disked_rc_index_entry(entry, 0), sizeof(struct rc_index_entry_ondisk));
1552                         idx_head.object_nr++;
1553                 } else
1554                         to_disked_rc_index_entry(entry, disked_entry);
1555
1556         }
1557
1558         idx_head.max_date = max_date;
1559         qsort(buffer.buf, buffer.len / sizeof(struct rc_index_entry_ondisk), sizeof(struct rc_index_entry_ondisk), index_sort_hash);
1560
1561         /* generate fanout */
1562         cur = 0x00;
1563         for (i = 0; i < buffer.len; i += sizeof(struct rc_index_entry_ondisk)) {
1564                 struct rc_index_entry_ondisk *entry = (struct rc_index_entry_ondisk *)(buffer.buf + i);
1565
1566                 while (cur <= entry->sha1[0])
1567                         fanout[cur++] = i + idx_head.ofs_objects;
1568         }
1569
1570         while (cur <= 0xff)
1571                 fanout[cur++] = idx_head.ofs_objects + buffer.len;
1572
1573         /* BOOM! */
1574         if (write_cache_index(&buffer))
1575                 return -1;
1576
1577         munmap(map, size);
1578         strbuf_release(&buffer);
1579
1580         /* idx_map is unloaded without cleanup_cache_slices(), so regardless of previous index existence
1581          * we can still free this up */
1582         free(idx_caches);
1583
1584         return 0;
1585 }
1586
1587
1588 void starts_from_slices(struct rev_info *revs, unsigned int flags, unsigned char *which, int n)
1589 {
1590         struct commit *commit;
1591         int i;
1592
1593         if (!idx_map)
1594                 init_index();
1595         if (!idx_map)
1596                 return;
1597
1598         for (i = idx_head.ofs_objects; i < idx_size; i += sizeof(struct rc_index_entry_ondisk)) {
1599                 struct rc_index_entry *entry = RC_OBTAIN_INDEX_ENTRY(idx_map + i);
1600
1601                 if (!entry->is_start)
1602                         continue;
1603
1604                 /* only include entries in 'which' slices */
1605                 if (n) {
1606                         int j;
1607
1608                         for (j = 0; j < n; j++)
1609                                 if (!hashcmp(idx_caches + entry->cache_index * 20, which + j * 20))
1610                                         break;
1611
1612                         if (j == n)
1613                                 continue;
1614                 }
1615
1616                 commit = lookup_commit(entry->sha1);
1617                 if (!commit)
1618                         continue;
1619
1620                 commit->object.flags |= flags;
1621                 add_pending_object(revs, &commit->object, 0);
1622         }
1623
1624 }
1625
1626
1627 struct slice_fd_time {
1628         unsigned char sha1[20];
1629         int fd;
1630         struct stat fi;
1631 };
1632
1633 int slice_time_sort(const void *a, const void *b)
1634 {
1635         unsigned long at, bt;
1636
1637         at = ((struct slice_fd_time *)a)->fi.st_ctime;
1638         bt = ((struct slice_fd_time *)b)->fi.st_ctime;
1639
1640         if (at == bt)
1641                 return 0;
1642
1643         return at > bt ? 1 : -1;
1644 }
1645
1646 int regenerate_cache_index(struct rev_cache_info *rci)
1647 {
1648         DIR *dirh;
1649         int i;
1650         struct slice_fd_time info;
1651         struct strbuf slices;
1652
1653         /* first remove old index if it exists */
1654         unlink_or_warn(git_path("rev-cache/index"));
1655
1656         strbuf_init(&slices, 0);
1657
1658         dirh = opendir(git_path("rev-cache"));
1659         if (dirh) {
1660                 struct dirent *de;
1661                 struct stat fi;
1662                 int fd;
1663                 unsigned char sha1[20];
1664
1665                 while ((de = readdir(dirh))) {
1666                         if (de->d_name[0] == '.')
1667                                 continue;
1668
1669                         if (get_sha1_hex(de->d_name, sha1))
1670                                 continue;
1671
1672                         /* open with RDWR because of mmap call in make_cache_index() */
1673                         fd = open_cache_slice(sha1, O_RDONLY);
1674                         if (fd < 0 || fstat(fd, &fi)) {
1675                                 warning("bad cache found [%s]; fuse recommended", de->d_name);
1676                                 if (fd > 0)
1677                                         close(fd);
1678                                 continue;
1679                         }
1680
1681                         hashcpy(info.sha1, sha1);
1682                         info.fd = fd;
1683                         memcpy(&info.fi, &fi, sizeof(struct stat));
1684
1685                         strbuf_add(&slices, &info, sizeof(info));
1686                 }
1687
1688                 closedir(dirh);
1689         }
1690
1691         /* we want oldest first -> upon overlap, older slices are more likely to have a larger section,
1692          * as of the overlapped commit */
1693         qsort(slices.buf, slices.len / sizeof(info), sizeof(info), slice_time_sort);
1694
1695         for (i = 0; i < slices.len; i += sizeof(info)) {
1696                 struct slice_fd_time *infop = (struct slice_fd_time *)(slices.buf + i);
1697                 struct stat *fip = &infop->fi;
1698                 int fd = infop->fd;
1699
1700                 if (make_cache_index(rci, infop->sha1, fd, fip->st_size) < 0)
1701                         die("error writing cache");
1702
1703                 close(fd);
1704         }
1705
1706         strbuf_release(&slices);
1707
1708         return 0;
1709 }
1710
1711 static int add_slices_for_fuse(struct rev_cache_info *rci, struct string_list *files, struct strbuf *ignore)
1712 {
1713         unsigned char sha1[20];
1714         char base[PATH_MAX];
1715         int baselen, i, slice_nr = 0;
1716         struct stat fi;
1717         DIR *dirh;
1718         struct dirent *de;
1719
1720         strncpy(base, git_path("rev-cache"), sizeof(base));
1721         baselen = strlen(base);
1722
1723         dirh = opendir(base);
1724         if (!dirh)
1725                 return 0;
1726
1727         while ((de = readdir(dirh))) {
1728                 if (de->d_name[0] == '.')
1729                         continue;
1730
1731                 base[baselen] = '/';
1732                 strncpy(base + baselen + 1, de->d_name, sizeof(base) - baselen - 1);
1733
1734                 if (get_sha1_hex(de->d_name, sha1)) {
1735                         /* whatever it is, we don't need it... */
1736                         string_list_insert(base, files);
1737                         continue;
1738                 }
1739
1740                 /* _theoretically_ it is possible a slice < ignore_size to map objects not covered by, yet reachable from,
1741                  * a slice >= ignore_size, meaning that we could potentially delete an 'unfused' slice; but if that
1742                  * ever *did* happen their cache structure'd be so fucked up they might as well refuse the entire thing.
1743                  * and at any rate the worst it'd do is make rev-list revert to standard walking in that (small) bit.
1744                  */
1745                 if (rci->ignore_size) {
1746                         if (stat(base, &fi))
1747                                 warning("can't query file %s\n", base);
1748                         else if (fi.st_size >= rci->ignore_size) {
1749                                 strbuf_add(ignore, sha1, 20);
1750                                 continue;
1751                         }
1752                 } else {
1753                         /* check if a pointer */
1754                         struct cache_slice_pointer ptr;
1755                         int fd = open(base, O_RDONLY);
1756
1757                         if (fd < 0)
1758                                 goto dont_save;
1759                         if (sizeof(ptr) != read_in_full(fd, &ptr, sizeof(ptr)))
1760                                 goto dont_save;
1761
1762                         close(fd);
1763                         if (!strcmp(ptr.signature, "REVCOPTR")) {
1764                                 strbuf_add(ignore, sha1, 20);
1765                                 continue;
1766                         }
1767                 }
1768
1769 dont_save:
1770                 for (i = idx_head.cache_nr - 1; i >= 0; i--) {
1771                         if (!hashcmp(idx_caches + i * 20, sha1))
1772                                 break;
1773                 }
1774
1775                 if (i >= 0)
1776                         rci->maps[i].size = 1;
1777
1778                 string_list_insert(base, files);
1779                 slice_nr++;
1780         }
1781
1782         closedir(dirh);
1783
1784         return slice_nr;
1785 }
1786
1787 /* the most work-intensive attributes in the cache are the unique objects and size, both
1788  * of which can be re-used.  although path structures will be isomorphic, path generation is
1789  * not particularly expensive, and at any rate we need to re-sort the commits */
1790 int fuse_cache_slices(struct rev_cache_info *rci, struct rev_info *revs)
1791 {
1792         unsigned char cache_sha1[20];
1793         struct string_list files = {0, 0, 0, 1}; /* dup */
1794         struct strbuf ignore;
1795         int i;
1796
1797         maybe_fill_with_defaults(rci);
1798
1799         if (!idx_map)
1800                 init_index();
1801         if (!idx_map)
1802                 return -1;
1803
1804         strbuf_init(&ignore, 0);
1805         rci->maps = xcalloc(idx_head.cache_nr, sizeof(struct rev_cache_slice_map));
1806         if (add_slices_for_fuse(rci, &files, &ignore) <= 1) {
1807                 printf("nothing to fuse\n");
1808                 return 1;
1809         }
1810
1811         if (ignore.len) {
1812                 starts_from_slices(revs, UNINTERESTING, (unsigned char *)ignore.buf, ignore.len / 20);
1813                 strbuf_release(&ignore);
1814         }
1815
1816         /* initialize mappings */
1817         for (i = idx_head.cache_nr - 1; i >= 0; i--) {
1818                 struct rev_cache_slice_map *map = rci->maps + i;
1819                 struct stat fi;
1820                 int fd;
1821
1822                 if (!map->size)
1823                         continue;
1824                 map->size = 0;
1825
1826                 /* pointers are never fused, so we can use open directly */
1827                 fd = open(git_path("rev-cache/%s", sha1_to_hex(idx_caches + i * 20)), O_RDONLY);
1828                 if (fd <= 0 || fstat(fd, &fi))
1829                         continue;
1830                 if (fi.st_size < sizeof(struct rc_slice_header))
1831                         continue;
1832
1833                 map->map = xmmap(0, fi.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
1834                 if (map->map == MAP_FAILED)
1835                         continue;
1836
1837                 close(fd);
1838                 map->size = fi.st_size;
1839         }
1840
1841         rci->make_index = 0;
1842         rci->fuse_me = 1;
1843         if (make_cache_slice(rci, revs, 0, 0, cache_sha1) < 0)
1844                 die("can't make cache slice");
1845
1846         printf("%s\n", sha1_to_hex(cache_sha1));
1847
1848         /* clean up time! */
1849         for (i = idx_head.cache_nr - 1; i >= 0; i--) {
1850                 struct rev_cache_slice_map *map = rci->maps + i;
1851
1852                 if (!map->size)
1853                         continue;
1854
1855                 munmap(map->map, map->size);
1856         }
1857         free(rci->maps);
1858         cleanup_cache_slices();
1859
1860         for (i = 0; i < files.nr; i++) {
1861                 char *name = files.items[i].string;
1862
1863                 fprintf(stderr, "removing %s\n", name);
1864                 unlink_or_warn(name);
1865         }
1866
1867         string_list_clear(&files, 0);
1868
1869         return regenerate_cache_index(rci);
1870 }
1871
1872 static int verify_cache_slice(const char *slice_path, unsigned char *sha1)
1873 {
1874         struct rc_slice_header head;
1875         int fd, len, retval = -1;
1876         unsigned char *map = MAP_FAILED;
1877         struct stat fi;
1878
1879         len = strlen(slice_path);
1880         if (len < 40)
1881                 return -2;
1882         if (get_sha1_hex(slice_path + len - 40, sha1))
1883                 return -3;
1884
1885         fd = open(slice_path, O_RDONLY);
1886         if (fd == -1)
1887                 goto end;
1888         if (fstat(fd, &fi) || fi.st_size < sizeof(head))
1889                 goto end;
1890
1891         map = xmmap(0, sizeof(head), PROT_READ, MAP_PRIVATE, fd, 0);
1892         if (map == MAP_FAILED)
1893                 goto end;
1894         if (get_cache_slice_header(sha1, map, fi.st_size, &head))
1895                 goto end;
1896
1897         retval = 0;
1898
1899 end:
1900         if (map != MAP_FAILED)
1901                 munmap(map, sizeof(head));
1902         if (fd > 0)
1903                 close(fd);
1904
1905         return retval;
1906 }
1907
1908 int make_cache_slice_pointer(struct rev_cache_info *rci, const char *slice_path)
1909 {
1910         struct cache_slice_pointer ptr;
1911         int fd;
1912         unsigned char sha1[20];
1913
1914         maybe_fill_with_defaults(rci);
1915         rci->overwrite_all = 1;
1916
1917         if (verify_cache_slice(slice_path, sha1) < 0)
1918                 return -1;
1919
1920         strcpy(ptr.signature, "REVCOPTR");
1921         ptr.version = SUPPORTED_REVCOPTR_VERSION;
1922         strcpy(ptr.path, make_nonrelative_path(slice_path));
1923
1924         fd = open(git_path("rev-cache/%s", sha1_to_hex(sha1)), O_RDWR | O_CREAT | O_TRUNC, 0666);
1925         if (fd < 0)
1926                 return -2;
1927
1928         write_in_full(fd, &ptr, sizeof(ptr));
1929         make_cache_index(rci, sha1, fd, sizeof(ptr));
1930
1931         close(fd);
1932
1933         return 0;
1934 }