rev-list: move bisect related code into its own file
[git] / bisect.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "diff.h"
4 #include "revision.h"
5 #include "bisect.h"
6
7 /* bits #0-15 in revision.h */
8
9 #define COUNTED         (1u<<16)
10
11 /*
12  * This is a truly stupid algorithm, but it's only
13  * used for bisection, and we just don't care enough.
14  *
15  * We care just barely enough to avoid recursing for
16  * non-merge entries.
17  */
18 static int count_distance(struct commit_list *entry)
19 {
20         int nr = 0;
21
22         while (entry) {
23                 struct commit *commit = entry->item;
24                 struct commit_list *p;
25
26                 if (commit->object.flags & (UNINTERESTING | COUNTED))
27                         break;
28                 if (!(commit->object.flags & TREESAME))
29                         nr++;
30                 commit->object.flags |= COUNTED;
31                 p = commit->parents;
32                 entry = p;
33                 if (p) {
34                         p = p->next;
35                         while (p) {
36                                 nr += count_distance(p);
37                                 p = p->next;
38                         }
39                 }
40         }
41
42         return nr;
43 }
44
45 static void clear_distance(struct commit_list *list)
46 {
47         while (list) {
48                 struct commit *commit = list->item;
49                 commit->object.flags &= ~COUNTED;
50                 list = list->next;
51         }
52 }
53
54 #define DEBUG_BISECT 0
55
56 static inline int weight(struct commit_list *elem)
57 {
58         return *((int*)(elem->item->util));
59 }
60
61 static inline void weight_set(struct commit_list *elem, int weight)
62 {
63         *((int*)(elem->item->util)) = weight;
64 }
65
66 static int count_interesting_parents(struct commit *commit)
67 {
68         struct commit_list *p;
69         int count;
70
71         for (count = 0, p = commit->parents; p; p = p->next) {
72                 if (p->item->object.flags & UNINTERESTING)
73                         continue;
74                 count++;
75         }
76         return count;
77 }
78
79 static inline int halfway(struct commit_list *p, int nr)
80 {
81         /*
82          * Don't short-cut something we are not going to return!
83          */
84         if (p->item->object.flags & TREESAME)
85                 return 0;
86         if (DEBUG_BISECT)
87                 return 0;
88         /*
89          * 2 and 3 are halfway of 5.
90          * 3 is halfway of 6 but 2 and 4 are not.
91          */
92         switch (2 * weight(p) - nr) {
93         case -1: case 0: case 1:
94                 return 1;
95         default:
96                 return 0;
97         }
98 }
99
100 #if !DEBUG_BISECT
101 #define show_list(a,b,c,d) do { ; } while (0)
102 #else
103 static void show_list(const char *debug, int counted, int nr,
104                       struct commit_list *list)
105 {
106         struct commit_list *p;
107
108         fprintf(stderr, "%s (%d/%d)\n", debug, counted, nr);
109
110         for (p = list; p; p = p->next) {
111                 struct commit_list *pp;
112                 struct commit *commit = p->item;
113                 unsigned flags = commit->object.flags;
114                 enum object_type type;
115                 unsigned long size;
116                 char *buf = read_sha1_file(commit->object.sha1, &type, &size);
117                 char *ep, *sp;
118
119                 fprintf(stderr, "%c%c%c ",
120                         (flags & TREESAME) ? ' ' : 'T',
121                         (flags & UNINTERESTING) ? 'U' : ' ',
122                         (flags & COUNTED) ? 'C' : ' ');
123                 if (commit->util)
124                         fprintf(stderr, "%3d", weight(p));
125                 else
126                         fprintf(stderr, "---");
127                 fprintf(stderr, " %.*s", 8, sha1_to_hex(commit->object.sha1));
128                 for (pp = commit->parents; pp; pp = pp->next)
129                         fprintf(stderr, " %.*s", 8,
130                                 sha1_to_hex(pp->item->object.sha1));
131
132                 sp = strstr(buf, "\n\n");
133                 if (sp) {
134                         sp += 2;
135                         for (ep = sp; *ep && *ep != '\n'; ep++)
136                                 ;
137                         fprintf(stderr, " %.*s", (int)(ep - sp), sp);
138                 }
139                 fprintf(stderr, "\n");
140         }
141 }
142 #endif /* DEBUG_BISECT */
143
144 static struct commit_list *best_bisection(struct commit_list *list, int nr)
145 {
146         struct commit_list *p, *best;
147         int best_distance = -1;
148
149         best = list;
150         for (p = list; p; p = p->next) {
151                 int distance;
152                 unsigned flags = p->item->object.flags;
153
154                 if (flags & TREESAME)
155                         continue;
156                 distance = weight(p);
157                 if (nr - distance < distance)
158                         distance = nr - distance;
159                 if (distance > best_distance) {
160                         best = p;
161                         best_distance = distance;
162                 }
163         }
164
165         return best;
166 }
167
168 struct commit_dist {
169         struct commit *commit;
170         int distance;
171 };
172
173 static int compare_commit_dist(const void *a_, const void *b_)
174 {
175         struct commit_dist *a, *b;
176
177         a = (struct commit_dist *)a_;
178         b = (struct commit_dist *)b_;
179         if (a->distance != b->distance)
180                 return b->distance - a->distance; /* desc sort */
181         return hashcmp(a->commit->object.sha1, b->commit->object.sha1);
182 }
183
184 static struct commit_list *best_bisection_sorted(struct commit_list *list, int nr)
185 {
186         struct commit_list *p;
187         struct commit_dist *array = xcalloc(nr, sizeof(*array));
188         int cnt, i;
189
190         for (p = list, cnt = 0; p; p = p->next) {
191                 int distance;
192                 unsigned flags = p->item->object.flags;
193
194                 if (flags & TREESAME)
195                         continue;
196                 distance = weight(p);
197                 if (nr - distance < distance)
198                         distance = nr - distance;
199                 array[cnt].commit = p->item;
200                 array[cnt].distance = distance;
201                 cnt++;
202         }
203         qsort(array, cnt, sizeof(*array), compare_commit_dist);
204         for (p = list, i = 0; i < cnt; i++) {
205                 struct name_decoration *r = xmalloc(sizeof(*r) + 100);
206                 struct object *obj = &(array[i].commit->object);
207
208                 sprintf(r->name, "dist=%d", array[i].distance);
209                 r->next = add_decoration(&name_decoration, obj, r);
210                 p->item = array[i].commit;
211                 p = p->next;
212         }
213         if (p)
214                 p->next = NULL;
215         free(array);
216         return list;
217 }
218
219 /*
220  * zero or positive weight is the number of interesting commits it can
221  * reach, including itself.  Especially, weight = 0 means it does not
222  * reach any tree-changing commits (e.g. just above uninteresting one
223  * but traversal is with pathspec).
224  *
225  * weight = -1 means it has one parent and its distance is yet to
226  * be computed.
227  *
228  * weight = -2 means it has more than one parent and its distance is
229  * unknown.  After running count_distance() first, they will get zero
230  * or positive distance.
231  */
232 static struct commit_list *do_find_bisection(struct commit_list *list,
233                                              int nr, int *weights,
234                                              int find_all)
235 {
236         int n, counted;
237         struct commit_list *p;
238
239         counted = 0;
240
241         for (n = 0, p = list; p; p = p->next) {
242                 struct commit *commit = p->item;
243                 unsigned flags = commit->object.flags;
244
245                 p->item->util = &weights[n++];
246                 switch (count_interesting_parents(commit)) {
247                 case 0:
248                         if (!(flags & TREESAME)) {
249                                 weight_set(p, 1);
250                                 counted++;
251                                 show_list("bisection 2 count one",
252                                           counted, nr, list);
253                         }
254                         /*
255                          * otherwise, it is known not to reach any
256                          * tree-changing commit and gets weight 0.
257                          */
258                         break;
259                 case 1:
260                         weight_set(p, -1);
261                         break;
262                 default:
263                         weight_set(p, -2);
264                         break;
265                 }
266         }
267
268         show_list("bisection 2 initialize", counted, nr, list);
269
270         /*
271          * If you have only one parent in the resulting set
272          * then you can reach one commit more than that parent
273          * can reach.  So we do not have to run the expensive
274          * count_distance() for single strand of pearls.
275          *
276          * However, if you have more than one parents, you cannot
277          * just add their distance and one for yourself, since
278          * they usually reach the same ancestor and you would
279          * end up counting them twice that way.
280          *
281          * So we will first count distance of merges the usual
282          * way, and then fill the blanks using cheaper algorithm.
283          */
284         for (p = list; p; p = p->next) {
285                 if (p->item->object.flags & UNINTERESTING)
286                         continue;
287                 if (weight(p) != -2)
288                         continue;
289                 weight_set(p, count_distance(p));
290                 clear_distance(list);
291
292                 /* Does it happen to be at exactly half-way? */
293                 if (!find_all && halfway(p, nr))
294                         return p;
295                 counted++;
296         }
297
298         show_list("bisection 2 count_distance", counted, nr, list);
299
300         while (counted < nr) {
301                 for (p = list; p; p = p->next) {
302                         struct commit_list *q;
303                         unsigned flags = p->item->object.flags;
304
305                         if (0 <= weight(p))
306                                 continue;
307                         for (q = p->item->parents; q; q = q->next) {
308                                 if (q->item->object.flags & UNINTERESTING)
309                                         continue;
310                                 if (0 <= weight(q))
311                                         break;
312                         }
313                         if (!q)
314                                 continue;
315
316                         /*
317                          * weight for p is unknown but q is known.
318                          * add one for p itself if p is to be counted,
319                          * otherwise inherit it from q directly.
320                          */
321                         if (!(flags & TREESAME)) {
322                                 weight_set(p, weight(q)+1);
323                                 counted++;
324                                 show_list("bisection 2 count one",
325                                           counted, nr, list);
326                         }
327                         else
328                                 weight_set(p, weight(q));
329
330                         /* Does it happen to be at exactly half-way? */
331                         if (!find_all && halfway(p, nr))
332                                 return p;
333                 }
334         }
335
336         show_list("bisection 2 counted all", counted, nr, list);
337
338         if (!find_all)
339                 return best_bisection(list, nr);
340         else
341                 return best_bisection_sorted(list, nr);
342 }
343
344 struct commit_list *find_bisection(struct commit_list *list,
345                                           int *reaches, int *all,
346                                           int find_all)
347 {
348         int nr, on_list;
349         struct commit_list *p, *best, *next, *last;
350         int *weights;
351
352         show_list("bisection 2 entry", 0, 0, list);
353
354         /*
355          * Count the number of total and tree-changing items on the
356          * list, while reversing the list.
357          */
358         for (nr = on_list = 0, last = NULL, p = list;
359              p;
360              p = next) {
361                 unsigned flags = p->item->object.flags;
362
363                 next = p->next;
364                 if (flags & UNINTERESTING)
365                         continue;
366                 p->next = last;
367                 last = p;
368                 if (!(flags & TREESAME))
369                         nr++;
370                 on_list++;
371         }
372         list = last;
373         show_list("bisection 2 sorted", 0, nr, list);
374
375         *all = nr;
376         weights = xcalloc(on_list, sizeof(*weights));
377
378         /* Do the real work of finding bisection commit. */
379         best = do_find_bisection(list, nr, weights, find_all);
380         if (best) {
381                 if (!find_all)
382                         best->next = NULL;
383                 *reaches = weight(best);
384         }
385         free(weights);
386         return best;
387 }
388