Add a test showing that 'git repack' throws away grafted-away parents
[git] / bisect.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "diff.h"
4 #include "revision.h"
5 #include "refs.h"
6 #include "list-objects.h"
7 #include "quote.h"
8 #include "sha1-lookup.h"
9 #include "bisect.h"
10
11 static unsigned char (*skipped_sha1)[20];
12 static int skipped_sha1_nr;
13 static int skipped_sha1_alloc;
14
15 static const char **rev_argv;
16 static int rev_argv_nr;
17 static int rev_argv_alloc;
18
19 /* bits #0-15 in revision.h */
20
21 #define COUNTED         (1u<<16)
22
23 /*
24  * This is a truly stupid algorithm, but it's only
25  * used for bisection, and we just don't care enough.
26  *
27  * We care just barely enough to avoid recursing for
28  * non-merge entries.
29  */
30 static int count_distance(struct commit_list *entry)
31 {
32         int nr = 0;
33
34         while (entry) {
35                 struct commit *commit = entry->item;
36                 struct commit_list *p;
37
38                 if (commit->object.flags & (UNINTERESTING | COUNTED))
39                         break;
40                 if (!(commit->object.flags & TREESAME))
41                         nr++;
42                 commit->object.flags |= COUNTED;
43                 p = commit->parents;
44                 entry = p;
45                 if (p) {
46                         p = p->next;
47                         while (p) {
48                                 nr += count_distance(p);
49                                 p = p->next;
50                         }
51                 }
52         }
53
54         return nr;
55 }
56
57 static void clear_distance(struct commit_list *list)
58 {
59         while (list) {
60                 struct commit *commit = list->item;
61                 commit->object.flags &= ~COUNTED;
62                 list = list->next;
63         }
64 }
65
66 #define DEBUG_BISECT 0
67
68 static inline int weight(struct commit_list *elem)
69 {
70         return *((int*)(elem->item->util));
71 }
72
73 static inline void weight_set(struct commit_list *elem, int weight)
74 {
75         *((int*)(elem->item->util)) = weight;
76 }
77
78 static int count_interesting_parents(struct commit *commit)
79 {
80         struct commit_list *p;
81         int count;
82
83         for (count = 0, p = commit->parents; p; p = p->next) {
84                 if (p->item->object.flags & UNINTERESTING)
85                         continue;
86                 count++;
87         }
88         return count;
89 }
90
91 static inline int halfway(struct commit_list *p, int nr)
92 {
93         /*
94          * Don't short-cut something we are not going to return!
95          */
96         if (p->item->object.flags & TREESAME)
97                 return 0;
98         if (DEBUG_BISECT)
99                 return 0;
100         /*
101          * 2 and 3 are halfway of 5.
102          * 3 is halfway of 6 but 2 and 4 are not.
103          */
104         switch (2 * weight(p) - nr) {
105         case -1: case 0: case 1:
106                 return 1;
107         default:
108                 return 0;
109         }
110 }
111
112 #if !DEBUG_BISECT
113 #define show_list(a,b,c,d) do { ; } while (0)
114 #else
115 static void show_list(const char *debug, int counted, int nr,
116                       struct commit_list *list)
117 {
118         struct commit_list *p;
119
120         fprintf(stderr, "%s (%d/%d)\n", debug, counted, nr);
121
122         for (p = list; p; p = p->next) {
123                 struct commit_list *pp;
124                 struct commit *commit = p->item;
125                 unsigned flags = commit->object.flags;
126                 enum object_type type;
127                 unsigned long size;
128                 char *buf = read_sha1_file(commit->object.sha1, &type, &size);
129                 char *ep, *sp;
130
131                 fprintf(stderr, "%c%c%c ",
132                         (flags & TREESAME) ? ' ' : 'T',
133                         (flags & UNINTERESTING) ? 'U' : ' ',
134                         (flags & COUNTED) ? 'C' : ' ');
135                 if (commit->util)
136                         fprintf(stderr, "%3d", weight(p));
137                 else
138                         fprintf(stderr, "---");
139                 fprintf(stderr, " %.*s", 8, sha1_to_hex(commit->object.sha1));
140                 for (pp = commit->parents; pp; pp = pp->next)
141                         fprintf(stderr, " %.*s", 8,
142                                 sha1_to_hex(pp->item->object.sha1));
143
144                 sp = strstr(buf, "\n\n");
145                 if (sp) {
146                         sp += 2;
147                         for (ep = sp; *ep && *ep != '\n'; ep++)
148                                 ;
149                         fprintf(stderr, " %.*s", (int)(ep - sp), sp);
150                 }
151                 fprintf(stderr, "\n");
152         }
153 }
154 #endif /* DEBUG_BISECT */
155
156 static struct commit_list *best_bisection(struct commit_list *list, int nr)
157 {
158         struct commit_list *p, *best;
159         int best_distance = -1;
160
161         best = list;
162         for (p = list; p; p = p->next) {
163                 int distance;
164                 unsigned flags = p->item->object.flags;
165
166                 if (flags & TREESAME)
167                         continue;
168                 distance = weight(p);
169                 if (nr - distance < distance)
170                         distance = nr - distance;
171                 if (distance > best_distance) {
172                         best = p;
173                         best_distance = distance;
174                 }
175         }
176
177         return best;
178 }
179
180 struct commit_dist {
181         struct commit *commit;
182         int distance;
183 };
184
185 static int compare_commit_dist(const void *a_, const void *b_)
186 {
187         struct commit_dist *a, *b;
188
189         a = (struct commit_dist *)a_;
190         b = (struct commit_dist *)b_;
191         if (a->distance != b->distance)
192                 return b->distance - a->distance; /* desc sort */
193         return hashcmp(a->commit->object.sha1, b->commit->object.sha1);
194 }
195
196 static struct commit_list *best_bisection_sorted(struct commit_list *list, int nr)
197 {
198         struct commit_list *p;
199         struct commit_dist *array = xcalloc(nr, sizeof(*array));
200         int cnt, i;
201
202         for (p = list, cnt = 0; p; p = p->next) {
203                 int distance;
204                 unsigned flags = p->item->object.flags;
205
206                 if (flags & TREESAME)
207                         continue;
208                 distance = weight(p);
209                 if (nr - distance < distance)
210                         distance = nr - distance;
211                 array[cnt].commit = p->item;
212                 array[cnt].distance = distance;
213                 cnt++;
214         }
215         qsort(array, cnt, sizeof(*array), compare_commit_dist);
216         for (p = list, i = 0; i < cnt; i++) {
217                 struct name_decoration *r = xmalloc(sizeof(*r) + 100);
218                 struct object *obj = &(array[i].commit->object);
219
220                 sprintf(r->name, "dist=%d", array[i].distance);
221                 r->next = add_decoration(&name_decoration, obj, r);
222                 p->item = array[i].commit;
223                 p = p->next;
224         }
225         if (p)
226                 p->next = NULL;
227         free(array);
228         return list;
229 }
230
231 /*
232  * zero or positive weight is the number of interesting commits it can
233  * reach, including itself.  Especially, weight = 0 means it does not
234  * reach any tree-changing commits (e.g. just above uninteresting one
235  * but traversal is with pathspec).
236  *
237  * weight = -1 means it has one parent and its distance is yet to
238  * be computed.
239  *
240  * weight = -2 means it has more than one parent and its distance is
241  * unknown.  After running count_distance() first, they will get zero
242  * or positive distance.
243  */
244 static struct commit_list *do_find_bisection(struct commit_list *list,
245                                              int nr, int *weights,
246                                              int find_all)
247 {
248         int n, counted;
249         struct commit_list *p;
250
251         counted = 0;
252
253         for (n = 0, p = list; p; p = p->next) {
254                 struct commit *commit = p->item;
255                 unsigned flags = commit->object.flags;
256
257                 p->item->util = &weights[n++];
258                 switch (count_interesting_parents(commit)) {
259                 case 0:
260                         if (!(flags & TREESAME)) {
261                                 weight_set(p, 1);
262                                 counted++;
263                                 show_list("bisection 2 count one",
264                                           counted, nr, list);
265                         }
266                         /*
267                          * otherwise, it is known not to reach any
268                          * tree-changing commit and gets weight 0.
269                          */
270                         break;
271                 case 1:
272                         weight_set(p, -1);
273                         break;
274                 default:
275                         weight_set(p, -2);
276                         break;
277                 }
278         }
279
280         show_list("bisection 2 initialize", counted, nr, list);
281
282         /*
283          * If you have only one parent in the resulting set
284          * then you can reach one commit more than that parent
285          * can reach.  So we do not have to run the expensive
286          * count_distance() for single strand of pearls.
287          *
288          * However, if you have more than one parents, you cannot
289          * just add their distance and one for yourself, since
290          * they usually reach the same ancestor and you would
291          * end up counting them twice that way.
292          *
293          * So we will first count distance of merges the usual
294          * way, and then fill the blanks using cheaper algorithm.
295          */
296         for (p = list; p; p = p->next) {
297                 if (p->item->object.flags & UNINTERESTING)
298                         continue;
299                 if (weight(p) != -2)
300                         continue;
301                 weight_set(p, count_distance(p));
302                 clear_distance(list);
303
304                 /* Does it happen to be at exactly half-way? */
305                 if (!find_all && halfway(p, nr))
306                         return p;
307                 counted++;
308         }
309
310         show_list("bisection 2 count_distance", counted, nr, list);
311
312         while (counted < nr) {
313                 for (p = list; p; p = p->next) {
314                         struct commit_list *q;
315                         unsigned flags = p->item->object.flags;
316
317                         if (0 <= weight(p))
318                                 continue;
319                         for (q = p->item->parents; q; q = q->next) {
320                                 if (q->item->object.flags & UNINTERESTING)
321                                         continue;
322                                 if (0 <= weight(q))
323                                         break;
324                         }
325                         if (!q)
326                                 continue;
327
328                         /*
329                          * weight for p is unknown but q is known.
330                          * add one for p itself if p is to be counted,
331                          * otherwise inherit it from q directly.
332                          */
333                         if (!(flags & TREESAME)) {
334                                 weight_set(p, weight(q)+1);
335                                 counted++;
336                                 show_list("bisection 2 count one",
337                                           counted, nr, list);
338                         }
339                         else
340                                 weight_set(p, weight(q));
341
342                         /* Does it happen to be at exactly half-way? */
343                         if (!find_all && halfway(p, nr))
344                                 return p;
345                 }
346         }
347
348         show_list("bisection 2 counted all", counted, nr, list);
349
350         if (!find_all)
351                 return best_bisection(list, nr);
352         else
353                 return best_bisection_sorted(list, nr);
354 }
355
356 struct commit_list *find_bisection(struct commit_list *list,
357                                           int *reaches, int *all,
358                                           int find_all)
359 {
360         int nr, on_list;
361         struct commit_list *p, *best, *next, *last;
362         int *weights;
363
364         show_list("bisection 2 entry", 0, 0, list);
365
366         /*
367          * Count the number of total and tree-changing items on the
368          * list, while reversing the list.
369          */
370         for (nr = on_list = 0, last = NULL, p = list;
371              p;
372              p = next) {
373                 unsigned flags = p->item->object.flags;
374
375                 next = p->next;
376                 if (flags & UNINTERESTING)
377                         continue;
378                 p->next = last;
379                 last = p;
380                 if (!(flags & TREESAME))
381                         nr++;
382                 on_list++;
383         }
384         list = last;
385         show_list("bisection 2 sorted", 0, nr, list);
386
387         *all = nr;
388         weights = xcalloc(on_list, sizeof(*weights));
389
390         /* Do the real work of finding bisection commit. */
391         best = do_find_bisection(list, nr, weights, find_all);
392         if (best) {
393                 if (!find_all)
394                         best->next = NULL;
395                 *reaches = weight(best);
396         }
397         free(weights);
398         return best;
399 }
400
401 static int register_ref(const char *refname, const unsigned char *sha1,
402                         int flags, void *cb_data)
403 {
404         if (!strcmp(refname, "bad")) {
405                 ALLOC_GROW(rev_argv, rev_argv_nr + 1, rev_argv_alloc);
406                 rev_argv[rev_argv_nr++] = xstrdup(sha1_to_hex(sha1));
407         } else if (!prefixcmp(refname, "good-")) {
408                 const char *hex = sha1_to_hex(sha1);
409                 char *good = xmalloc(strlen(hex) + 2);
410                 *good = '^';
411                 memcpy(good + 1, hex, strlen(hex) + 1);
412                 ALLOC_GROW(rev_argv, rev_argv_nr + 1, rev_argv_alloc);
413                 rev_argv[rev_argv_nr++] = good;
414         } else if (!prefixcmp(refname, "skip-")) {
415                 ALLOC_GROW(skipped_sha1, skipped_sha1_nr + 1,
416                            skipped_sha1_alloc);
417                 hashcpy(skipped_sha1[skipped_sha1_nr++], sha1);
418         }
419
420         return 0;
421 }
422
423 static int read_bisect_refs(void)
424 {
425         return for_each_ref_in("refs/bisect/", register_ref, NULL);
426 }
427
428 void read_bisect_paths(void)
429 {
430         struct strbuf str = STRBUF_INIT;
431         const char *filename = git_path("BISECT_NAMES");
432         FILE *fp = fopen(filename, "r");
433
434         if (!fp)
435                 die("Could not open file '%s': %s", filename, strerror(errno));
436
437         while (strbuf_getline(&str, fp, '\n') != EOF) {
438                 char *quoted;
439                 int res;
440
441                 strbuf_trim(&str);
442                 quoted = strbuf_detach(&str, NULL);
443                 res = sq_dequote_to_argv(quoted, &rev_argv,
444                                          &rev_argv_nr, &rev_argv_alloc);
445                 if (res)
446                         die("Badly quoted content in file '%s': %s",
447                             filename, quoted);
448         }
449
450         strbuf_release(&str);
451         fclose(fp);
452 }
453
454 static int skipcmp(const void *a, const void *b)
455 {
456         return hashcmp(a, b);
457 }
458
459 static void prepare_skipped(void)
460 {
461         qsort(skipped_sha1, skipped_sha1_nr, sizeof(*skipped_sha1), skipcmp);
462 }
463
464 static const unsigned char *skipped_sha1_access(size_t index, void *table)
465 {
466         unsigned char (*skipped)[20] = table;
467         return skipped[index];
468 }
469
470 static int lookup_skipped(unsigned char *sha1)
471 {
472         return sha1_pos(sha1, skipped_sha1, skipped_sha1_nr,
473                         skipped_sha1_access);
474 }
475
476 struct commit_list *filter_skipped(struct commit_list *list,
477                                    struct commit_list **tried,
478                                    int show_all)
479 {
480         struct commit_list *filtered = NULL, **f = &filtered;
481
482         *tried = NULL;
483
484         if (!skipped_sha1_nr)
485                 return list;
486
487         prepare_skipped();
488
489         while (list) {
490                 struct commit_list *next = list->next;
491                 list->next = NULL;
492                 if (0 <= lookup_skipped(list->item->object.sha1)) {
493                         /* Move current to tried list */
494                         *tried = list;
495                         tried = &list->next;
496                 } else {
497                         if (!show_all)
498                                 return list;
499                         /* Move current to filtered list */
500                         *f = list;
501                         f = &list->next;
502                 }
503                 list = next;
504         }
505
506         return filtered;
507 }
508
509 static void bisect_rev_setup(struct rev_info *revs, const char *prefix)
510 {
511         init_revisions(revs, prefix);
512         revs->abbrev = 0;
513         revs->commit_format = CMIT_FMT_UNSPECIFIED;
514
515         /* argv[0] will be ignored by setup_revisions */
516         ALLOC_GROW(rev_argv, rev_argv_nr + 1, rev_argv_alloc);
517         rev_argv[rev_argv_nr++] = xstrdup("bisect_rev_setup");
518
519         if (read_bisect_refs())
520                 die("reading bisect refs failed");
521
522         ALLOC_GROW(rev_argv, rev_argv_nr + 1, rev_argv_alloc);
523         rev_argv[rev_argv_nr++] = xstrdup("--");
524
525         read_bisect_paths();
526
527         ALLOC_GROW(rev_argv, rev_argv_nr + 1, rev_argv_alloc);
528         rev_argv[rev_argv_nr++] = NULL;
529
530         setup_revisions(rev_argv_nr, rev_argv, revs, NULL);
531
532         revs->limited = 1;
533 }
534
535 int bisect_next_vars(const char *prefix)
536 {
537         struct rev_info revs;
538         struct rev_list_info info;
539         int reaches = 0, all = 0;
540
541         memset(&info, 0, sizeof(info));
542         info.revs = &revs;
543         info.bisect_show_flags = BISECT_SHOW_TRIED | BISECT_SHOW_STRINGED;
544
545         bisect_rev_setup(&revs, prefix);
546
547         if (prepare_revision_walk(&revs))
548                 die("revision walk setup failed");
549         if (revs.tree_objects)
550                 mark_edges_uninteresting(revs.commits, &revs, NULL);
551
552         revs.commits = find_bisection(revs.commits, &reaches, &all,
553                                       !!skipped_sha1_nr);
554
555         return show_bisect_vars(&info, reaches, all);
556 }