Implement line-history search (git log -L)
[git] / line-log.c
1 #include "git-compat-util.h"
2 #include "line-range.h"
3 #include "cache.h"
4 #include "tag.h"
5 #include "blob.h"
6 #include "tree.h"
7 #include "diff.h"
8 #include "commit.h"
9 #include "decorate.h"
10 #include "revision.h"
11 #include "xdiff-interface.h"
12 #include "strbuf.h"
13 #include "log-tree.h"
14 #include "graph.h"
15 #include "line-log.h"
16
17 static void range_set_grow(struct range_set *rs, size_t extra)
18 {
19         ALLOC_GROW(rs->ranges, rs->nr + extra, rs->alloc);
20 }
21
22 /* Either initialization would be fine */
23 #define RANGE_SET_INIT {0}
24
25 static void range_set_init(struct range_set *rs, size_t prealloc)
26 {
27         rs->alloc = rs->nr = 0;
28         rs->ranges = NULL;
29         if (prealloc)
30                 range_set_grow(rs, prealloc);
31 }
32
33 static void range_set_release(struct range_set *rs)
34 {
35         free(rs->ranges);
36         rs->alloc = rs->nr = 0;
37         rs->ranges = NULL;
38 }
39
40 /* dst must be uninitialized! */
41 static void range_set_copy(struct range_set *dst, struct range_set *src)
42 {
43         range_set_init(dst, src->nr);
44         memcpy(dst->ranges, src->ranges, src->nr*sizeof(struct range_set));
45         dst->nr = src->nr;
46 }
47 static void range_set_move(struct range_set *dst, struct range_set *src)
48 {
49         range_set_release(dst);
50         dst->ranges = src->ranges;
51         dst->nr = src->nr;
52         dst->alloc = src->alloc;
53         src->ranges = NULL;
54         src->alloc = src->nr = 0;
55 }
56
57 /* tack on a _new_ range _at the end_ */
58 static void range_set_append(struct range_set *rs, long a, long b)
59 {
60         assert(a <= b);
61         assert(rs->nr == 0 || rs->ranges[rs->nr-1].end <= a);
62         range_set_grow(rs, 1);
63         rs->ranges[rs->nr].start = a;
64         rs->ranges[rs->nr].end = b;
65         rs->nr++;
66 }
67
68 static int range_cmp(const void *_r, const void *_s)
69 {
70         const struct range *r = _r;
71         const struct range *s = _s;
72
73         /* this could be simply 'return r.start-s.start', but for the types */
74         if (r->start == s->start)
75                 return 0;
76         if (r->start < s->start)
77                 return -1;
78         return 1;
79 }
80
81 /*
82  * Helper: In-place pass of sorting and merging the ranges in the
83  * range set, to re-establish the invariants after another operation
84  *
85  * NEEDSWORK currently not needed
86  */
87 static void sort_and_merge_range_set(struct range_set *rs)
88 {
89         int i;
90         int o = 1; /* output cursor */
91
92         qsort(rs->ranges, rs->nr, sizeof(struct range), range_cmp);
93
94         for (i = 1; i < rs->nr; i++) {
95                 if (rs->ranges[i].start <= rs->ranges[o-1].end) {
96                         rs->ranges[o-1].end = rs->ranges[i].end;
97                 } else {
98                         rs->ranges[o].start = rs->ranges[i].start;
99                         rs->ranges[o].end = rs->ranges[i].end;
100                         o++;
101                 }
102         }
103         assert(o <= rs->nr);
104         rs->nr = o;
105 }
106
107 /*
108  * Union of range sets (i.e., sets of line numbers).  Used to merge
109  * them when searches meet at a common ancestor.
110  *
111  * This is also where the ranges are consolidated into canonical form:
112  * overlapping and adjacent ranges are merged, and empty ranges are
113  * removed.
114  */
115 static void range_set_union(struct range_set *out,
116                              struct range_set *a, struct range_set *b)
117 {
118         int i = 0, j = 0, o = 0;
119         struct range *ra = a->ranges;
120         struct range *rb = b->ranges;
121         /* cannot make an alias of out->ranges: it may change during grow */
122
123         assert(out->nr == 0);
124         while (i < a->nr || j < b->nr) {
125                 struct range *new;
126                 if (i < a->nr && j < b->nr) {
127                         if (ra[i].start < rb[j].start)
128                                 new = &ra[i++];
129                         else if (ra[i].start > rb[j].start)
130                                 new = &rb[j++];
131                         else if (ra[i].end < rb[j].end)
132                                 new = &ra[i++];
133                         else
134                                 new = &rb[j++];
135                 } else if (i < a->nr)      /* b exhausted */
136                         new = &ra[i++];
137                 else                       /* a exhausted */
138                         new = &rb[j++];
139                 if (new->start == new->end)
140                         ; /* empty range */
141                 else if (!o || out->ranges[o-1].end < new->start) {
142                         range_set_grow(out, 1);
143                         out->ranges[o].start = new->start;
144                         out->ranges[o].end = new->end;
145                         o++;
146                 } else if (out->ranges[o-1].end < new->end) {
147                         out->ranges[o-1].end = new->end;
148                 }
149         }
150         out->nr = o;
151 }
152
153 /*
154  * Difference of range sets (out = a \ b).  Pass the "interesting"
155  * ranges as 'a' and the target side of the diff as 'b': it removes
156  * the ranges for which the commit is responsible.
157  */
158 static void range_set_difference(struct range_set *out,
159                                   struct range_set *a, struct range_set *b)
160 {
161         int i, j =  0;
162         for (i = 0; i < a->nr; i++) {
163                 long start = a->ranges[i].start;
164                 long end = a->ranges[i].end;
165                 while (start < end) {
166                         while (j < b->nr && start >= b->ranges[j].end)
167                                 /*
168                                  * a:         |-------
169                                  * b: ------|
170                                  */
171                                 j++;
172                         if (j >= b->nr || end < b->ranges[j].start) {
173                                 /*
174                                  * b exhausted, or
175                                  * a:  ----|
176                                  * b:         |----
177                                  */
178                                 range_set_append(out, start, end);
179                                 break;
180                         }
181                         if (start >= b->ranges[j].start) {
182                                 /*
183                                  * a:     |--????
184                                  * b: |------|
185                                  */
186                                 start = b->ranges[j].end;
187                         } else if (end > b->ranges[j].start) {
188                                 /*
189                                  * a: |-----|
190                                  * b:    |--?????
191                                  */
192                                 if (start < b->ranges[j].start)
193                                         range_set_append(out, start, b->ranges[j].start);
194                                 start = b->ranges[j].end;
195                         }
196                 }
197         }
198 }
199
200 static void diff_ranges_init(struct diff_ranges *diff)
201 {
202         range_set_init(&diff->parent, 0);
203         range_set_init(&diff->target, 0);
204 }
205
206 static void diff_ranges_release(struct diff_ranges *diff)
207 {
208         range_set_release(&diff->parent);
209         range_set_release(&diff->target);
210 }
211
212 void line_log_data_init(struct line_log_data *r)
213 {
214         memset(r, 0, sizeof(struct line_log_data));
215         range_set_init(&r->ranges, 0);
216 }
217
218 static void line_log_data_clear(struct line_log_data *r)
219 {
220         range_set_release(&r->ranges);
221         if (r->pair)
222                 diff_free_filepair(r->pair);
223 }
224
225 static void free_line_log_data(struct line_log_data *r)
226 {
227         while (r) {
228                 struct line_log_data *next = r->next;
229                 line_log_data_clear(r);
230                 free(r);
231                 r = next;
232         }
233 }
234
235 static struct line_log_data *
236 search_line_log_data(struct line_log_data *list, const char *path,
237                      struct line_log_data **insertion_point)
238 {
239         struct line_log_data *p = list;
240         if (insertion_point)
241                 *insertion_point = NULL;
242         while (p) {
243                 int cmp = strcmp(p->spec->path, path);
244                 if (!cmp)
245                         return p;
246                 if (insertion_point && cmp < 0)
247                         *insertion_point = p;
248                 p = p->next;
249         }
250         return NULL;
251 }
252
253 static void line_log_data_insert(struct line_log_data **list,
254                                  struct diff_filespec *spec,
255                                  long begin, long end)
256 {
257         struct line_log_data *ip;
258         struct line_log_data *p = search_line_log_data(*list, spec->path, &ip);
259
260         if (p) {
261                 range_set_append(&p->ranges, begin, end);
262                 sort_and_merge_range_set(&p->ranges);
263                 free_filespec(spec);
264                 return;
265         }
266
267         p = xcalloc(1, sizeof(struct line_log_data));
268         p->spec = spec;
269         range_set_append(&p->ranges, begin, end);
270         if (ip) {
271                 p->next = ip->next;
272                 ip->next = p;
273         } else {
274                 p->next = *list;
275                 *list = p;
276         }
277 }
278
279 struct collect_diff_cbdata {
280         struct diff_ranges *diff;
281 };
282
283 static int collect_diff_cb(long start_a, long count_a,
284                            long start_b, long count_b,
285                            void *data)
286 {
287         struct collect_diff_cbdata *d = data;
288
289         if (count_a >= 0)
290                 range_set_append(&d->diff->parent, start_a, start_a + count_a);
291         if (count_b >= 0)
292                 range_set_append(&d->diff->target, start_b, start_b + count_b);
293
294         return 0;
295 }
296
297 static void collect_diff(mmfile_t *parent, mmfile_t *target, struct diff_ranges *out)
298 {
299         struct collect_diff_cbdata cbdata = {NULL};
300         xpparam_t xpp;
301         xdemitconf_t xecfg;
302         xdemitcb_t ecb;
303
304         memset(&xpp, 0, sizeof(xpp));
305         memset(&xecfg, 0, sizeof(xecfg));
306         xecfg.ctxlen = xecfg.interhunkctxlen = 0;
307
308         cbdata.diff = out;
309         xecfg.hunk_func = collect_diff_cb;
310         memset(&ecb, 0, sizeof(ecb));
311         ecb.priv = &cbdata;
312         xdi_diff(parent, target, &xpp, &xecfg, &ecb);
313 }
314
315 /*
316  * These are handy for debugging.  Removing them with #if 0 silences
317  * the "unused function" warning.
318  */
319 #if 0
320 static void dump_range_set(struct range_set *rs, const char *desc)
321 {
322         int i;
323         printf("range set %s (%d items):\n", desc, rs->nr);
324         for (i = 0; i < rs->nr; i++)
325                 printf("\t[%ld,%ld]\n", rs->ranges[i].start, rs->ranges[i].end);
326 }
327
328 static void dump_line_log_data(struct line_log_data *r)
329 {
330         char buf[4096];
331         while (r) {
332                 snprintf(buf, 4096, "file %s\n", r->spec->path);
333                 dump_range_set(&r->ranges, buf);
334                 r = r->next;
335         }
336 }
337
338 static void dump_diff_ranges(struct diff_ranges *diff, const char *desc)
339 {
340         int i;
341         assert(diff->parent.nr == diff->target.nr);
342         printf("diff ranges %s (%d items):\n", desc, diff->parent.nr);
343         printf("\tparent\ttarget\n");
344         for (i = 0; i < diff->parent.nr; i++) {
345                 printf("\t[%ld,%ld]\t[%ld,%ld]\n",
346                        diff->parent.ranges[i].start,
347                        diff->parent.ranges[i].end,
348                        diff->target.ranges[i].start,
349                        diff->target.ranges[i].end);
350         }
351 }
352 #endif
353
354
355 static int ranges_overlap(struct range *a, struct range *b)
356 {
357         return !(a->end <= b->start || b->end <= a->start);
358 }
359
360 /*
361  * Given a diff and the set of interesting ranges, determine all hunks
362  * of the diff which touch (overlap) at least one of the interesting
363  * ranges in the target.
364  */
365 static void diff_ranges_filter_touched(struct diff_ranges *out,
366                                        struct diff_ranges *diff,
367                                        struct range_set *rs)
368 {
369         int i, j = 0;
370
371         assert(out->target.nr == 0);
372
373         for (i = 0; i < diff->target.nr; i++) {
374                 while (diff->target.ranges[i].start > rs->ranges[j].end) {
375                         j++;
376                         if (j == rs->nr)
377                                 return;
378                 }
379                 if (ranges_overlap(&diff->target.ranges[i], &rs->ranges[j])) {
380                         range_set_append(&out->parent,
381                                          diff->parent.ranges[i].start,
382                                          diff->parent.ranges[i].end);
383                         range_set_append(&out->target,
384                                          diff->target.ranges[i].start,
385                                          diff->target.ranges[i].end);
386                 }
387         }
388 }
389
390 /*
391  * Adjust the line counts in 'rs' to account for the lines
392  * added/removed in the diff.
393  */
394 static void range_set_shift_diff(struct range_set *out,
395                                  struct range_set *rs,
396                                  struct diff_ranges *diff)
397 {
398         int i, j = 0;
399         long offset = 0;
400         struct range *src = rs->ranges;
401         struct range *target = diff->target.ranges;
402         struct range *parent = diff->parent.ranges;
403
404         for (i = 0; i < rs->nr; i++) {
405                 while (j < diff->target.nr && src[i].start >= target[j].start) {
406                         offset += (parent[j].end-parent[j].start)
407                                 - (target[j].end-target[j].start);
408                         j++;
409                 }
410                 range_set_append(out, src[i].start+offset, src[i].end+offset);
411         }
412 }
413
414 /*
415  * Given a diff and the set of interesting ranges, map the ranges
416  * across the diff.  That is: observe that the target commit takes
417  * blame for all the + (target-side) ranges.  So for every pair of
418  * ranges in the diff that was touched, we remove the latter and add
419  * its parent side.
420  */
421 static void range_set_map_across_diff(struct range_set *out,
422                                       struct range_set *rs,
423                                       struct diff_ranges *diff,
424                                       struct diff_ranges **touched_out)
425 {
426         struct diff_ranges *touched = xmalloc(sizeof(*touched));
427         struct range_set tmp1 = RANGE_SET_INIT;
428         struct range_set tmp2 = RANGE_SET_INIT;
429
430         diff_ranges_init(touched);
431         diff_ranges_filter_touched(touched, diff, rs);
432         range_set_difference(&tmp1, rs, &touched->target);
433         range_set_shift_diff(&tmp2, &tmp1, diff);
434         range_set_union(out, &tmp2, &touched->parent);
435         range_set_release(&tmp1);
436         range_set_release(&tmp2);
437
438         *touched_out = touched;
439 }
440
441
442 static struct commit *check_single_commit(struct rev_info *revs)
443 {
444         struct object *commit = NULL;
445         int found = -1;
446         int i;
447
448         for (i = 0; i < revs->pending.nr; i++) {
449                 struct object *obj = revs->pending.objects[i].item;
450                 if (obj->flags & UNINTERESTING)
451                         continue;
452                 while (obj->type == OBJ_TAG)
453                         obj = deref_tag(obj, NULL, 0);
454                 if (obj->type != OBJ_COMMIT)
455                         die("Non commit %s?", revs->pending.objects[i].name);
456                 if (commit)
457                         die("More than one commit to dig from: %s and %s?",
458                             revs->pending.objects[i].name,
459                             revs->pending.objects[found].name);
460                 commit = obj;
461                 found = i;
462         }
463
464         if (!commit)
465                 die("No commit specified?");
466
467         return (struct commit *) commit;
468 }
469
470 static void fill_blob_sha1(struct commit *commit, struct diff_filespec *spec)
471 {
472         unsigned mode;
473         unsigned char sha1[20];
474
475         if (get_tree_entry(commit->object.sha1, spec->path,
476                            sha1, &mode))
477                 die("There is no path %s in the commit", spec->path);
478         fill_filespec(spec, sha1, 1, mode);
479
480         return;
481 }
482
483 static void fill_line_ends(struct diff_filespec *spec, long *lines,
484                            unsigned long **line_ends)
485 {
486         int num = 0, size = 50;
487         long cur = 0;
488         unsigned long *ends = NULL;
489         char *data = NULL;
490
491         if (diff_populate_filespec(spec, 0))
492                 die("Cannot read blob %s", sha1_to_hex(spec->sha1));
493
494         ends = xmalloc(size * sizeof(*ends));
495         ends[cur++] = 0;
496         data = spec->data;
497         while (num < spec->size) {
498                 if (data[num] == '\n' || num == spec->size - 1) {
499                         ALLOC_GROW(ends, (cur + 1), size);
500                         ends[cur++] = num;
501                 }
502                 num++;
503         }
504
505         /* shrink the array to fit the elements */
506         ends = xrealloc(ends, cur * sizeof(*ends));
507         *lines = cur-1;
508         *line_ends = ends;
509 }
510
511 struct nth_line_cb {
512         struct diff_filespec *spec;
513         long lines;
514         unsigned long *line_ends;
515 };
516
517 static const char *nth_line(void *data, long line)
518 {
519         struct nth_line_cb *d = data;
520         assert(d && line <= d->lines);
521         assert(d->spec && d->spec->data);
522
523         if (line == 0)
524                 return (char *)d->spec->data;
525         else
526                 return (char *)d->spec->data + d->line_ends[line] + 1;
527 }
528
529 static struct line_log_data *
530 parse_lines(struct commit *commit, const char *prefix, struct string_list *args)
531 {
532         long lines = 0;
533         unsigned long *ends = NULL;
534         struct nth_line_cb cb_data;
535         struct string_list_item *item;
536         struct line_log_data *ranges = NULL;
537
538         for_each_string_list_item(item, args) {
539                 const char *name_part, *range_part;
540                 const char *full_name;
541                 struct diff_filespec *spec;
542                 long begin = 0, end = 0;
543
544                 name_part = skip_range_arg(item->string);
545                 if (!name_part || *name_part != ':' || !name_part[1])
546                         die("-L argument '%s' not of the form start,end:file",
547                             item->string);
548                 range_part = xstrndup(item->string, name_part - item->string);
549                 name_part++;
550
551                 full_name = prefix_path(prefix, prefix ? strlen(prefix) : 0,
552                                         name_part);
553
554                 spec = alloc_filespec(full_name);
555                 fill_blob_sha1(commit, spec);
556                 fill_line_ends(spec, &lines, &ends);
557                 cb_data.spec = spec;
558                 cb_data.lines = lines;
559                 cb_data.line_ends = ends;
560
561                 if (parse_range_arg(range_part, nth_line, &cb_data,
562                                     lines, &begin, &end))
563                         die("malformed -L argument '%s'", range_part);
564                 if (begin < 1)
565                         begin = 1;
566                 if (end < 1)
567                         end = lines;
568                 begin--;
569                 if (lines < end || lines < begin)
570                         die("file %s has only %ld lines", name_part, lines);
571                 line_log_data_insert(&ranges, spec, begin, end);
572
573                 free(ends);
574                 ends = NULL;
575         }
576
577         return ranges;
578 }
579
580 static struct line_log_data *line_log_data_copy_one(struct line_log_data *r)
581 {
582         struct line_log_data *ret = xmalloc(sizeof(*ret));
583
584         assert(r);
585         line_log_data_init(ret);
586         range_set_copy(&ret->ranges, &r->ranges);
587
588         ret->spec = r->spec;
589         assert(ret->spec);
590         ret->spec->count++;
591
592         return ret;
593 }
594
595 static struct line_log_data *
596 line_log_data_copy(struct line_log_data *r)
597 {
598         struct line_log_data *ret = NULL;
599         struct line_log_data *tmp = NULL, *prev = NULL;
600
601         assert(r);
602         ret = tmp = prev = line_log_data_copy_one(r);
603         r = r->next;
604         while (r) {
605                 tmp = line_log_data_copy_one(r);
606                 prev->next = tmp;
607                 prev = tmp;
608                 r = r->next;
609         }
610
611         return ret;
612 }
613
614 /* merge two range sets across files */
615 static struct line_log_data *line_log_data_merge(struct line_log_data *a,
616                                                  struct line_log_data *b)
617 {
618         struct line_log_data *head = NULL, **pp = &head;
619
620         while (a || b) {
621                 struct line_log_data *src;
622                 struct line_log_data *src2 = NULL;
623                 struct line_log_data *d;
624                 int cmp;
625                 if (!a)
626                         cmp = 1;
627                 else if (!b)
628                         cmp = -1;
629                 else
630                         cmp = strcmp(a->spec->path, b->spec->path);
631                 if (cmp < 0) {
632                         src = a;
633                         a = a->next;
634                 } else if (cmp == 0) {
635                         src = a;
636                         a = a->next;
637                         src2 = b;
638                         b = b->next;
639                 } else {
640                         src = b;
641                         b = b->next;
642                 }
643                 d = xmalloc(sizeof(struct line_log_data));
644                 line_log_data_init(d);
645                 d->spec = src->spec;
646                 d->spec->count++;
647                 *pp = d;
648                 pp = &d->next;
649                 if (src2)
650                         range_set_union(&d->ranges, &src->ranges, &src2->ranges);
651                 else
652                         range_set_copy(&d->ranges, &src->ranges);
653         }
654
655         return head;
656 }
657
658 static void add_line_range(struct rev_info *revs, struct commit *commit,
659                            struct line_log_data *range)
660 {
661         struct line_log_data *old = NULL;
662         struct line_log_data *new = NULL;
663
664         old = lookup_decoration(&revs->line_log_data, &commit->object);
665         if (old && range) {
666                 new = line_log_data_merge(old, range);
667                 free_line_log_data(old);
668         } else if (range)
669                 new = line_log_data_copy(range);
670
671         if (new)
672                 add_decoration(&revs->line_log_data, &commit->object, new);
673 }
674
675 static void clear_commit_line_range(struct rev_info *revs, struct commit *commit)
676 {
677         struct line_log_data *r;
678         r = lookup_decoration(&revs->line_log_data, &commit->object);
679         if (!r)
680                 return;
681         free_line_log_data(r);
682         add_decoration(&revs->line_log_data, &commit->object, NULL);
683 }
684
685 static struct line_log_data *lookup_line_range(struct rev_info *revs,
686                                                struct commit *commit)
687 {
688         struct line_log_data *ret = NULL;
689
690         ret = lookup_decoration(&revs->line_log_data, &commit->object);
691         return ret;
692 }
693
694 void line_log_init(struct rev_info *rev, const char *prefix, struct string_list *args)
695 {
696         struct commit *commit = NULL;
697         struct line_log_data *range;
698
699         commit = check_single_commit(rev);
700         range = parse_lines(commit, prefix, args);
701         add_line_range(rev, commit, range);
702
703         if (!rev->diffopt.detect_rename) {
704                 int i, count = 0;
705                 struct line_log_data *r = range;
706                 const char **paths;
707                 while (r) {
708                         count++;
709                         r = r->next;
710                 }
711                 paths = xmalloc((count+1)*sizeof(char *));
712                 r = range;
713                 for (i = 0; i < count; i++) {
714                         paths[i] = xstrdup(r->spec->path);
715                         r = r->next;
716                 }
717                 paths[count] = NULL;
718                 init_pathspec(&rev->diffopt.pathspec, paths);
719                 free(paths);
720         }
721 }
722
723 static void load_tree_desc(struct tree_desc *desc, void **tree,
724                            const unsigned char *sha1)
725 {
726         unsigned long size;
727         *tree = read_object_with_reference(sha1, tree_type, &size, NULL);
728         if (!*tree)
729                 die("Unable to read tree (%s)", sha1_to_hex(sha1));
730         init_tree_desc(desc, *tree, size);
731 }
732
733 static int count_parents(struct commit *commit)
734 {
735         struct commit_list *parents = commit->parents;
736         int count = 0;
737         while (parents) {
738                 count++;
739                 parents = parents->next;
740         }
741         return count;
742 }
743
744 static void move_diff_queue(struct diff_queue_struct *dst,
745                             struct diff_queue_struct *src)
746 {
747         assert(src != dst);
748         memcpy(dst, src, sizeof(struct diff_queue_struct));
749         DIFF_QUEUE_CLEAR(src);
750 }
751
752 static void queue_diffs(struct diff_options *opt,
753                         struct diff_queue_struct *queue,
754                         struct commit *commit, struct commit *parent)
755 {
756         void *tree1 = NULL, *tree2 = NULL;
757         struct tree_desc desc1, desc2;
758
759         assert(commit);
760         load_tree_desc(&desc2, &tree2, commit->tree->object.sha1);
761         if (parent)
762                 load_tree_desc(&desc1, &tree1, parent->tree->object.sha1);
763         else
764                 init_tree_desc(&desc1, "", 0);
765
766         DIFF_QUEUE_CLEAR(&diff_queued_diff);
767         diff_tree(&desc1, &desc2, "", opt);
768         diffcore_std(opt);
769         move_diff_queue(queue, &diff_queued_diff);
770
771         if (tree1)
772                 free(tree1);
773         if (tree2)
774                 free(tree2);
775 }
776
777 static char *get_nth_line(long line, unsigned long *ends, void *data)
778 {
779         if (line == 0)
780                 return (char *)data;
781         else
782                 return (char *)data + ends[line] + 1;
783 }
784
785 static void print_line(const char *prefix, char first,
786                        long line, unsigned long *ends, void *data,
787                        const char *color, const char *reset)
788 {
789         char *begin = get_nth_line(line, ends, data);
790         char *end = get_nth_line(line+1, ends, data);
791         int had_nl = 0;
792
793         if (end > begin && end[-1] == '\n') {
794                 end--;
795                 had_nl = 1;
796         }
797
798         fputs(prefix, stdout);
799         fputs(color, stdout);
800         putchar(first);
801         fwrite(begin, 1, end-begin, stdout);
802         fputs(reset, stdout);
803         putchar('\n');
804         if (!had_nl)
805                 fputs("\\ No newline at end of file\n", stdout);
806 }
807
808 static char *output_prefix(struct diff_options *opt)
809 {
810         char *prefix = "";
811
812         if (opt->output_prefix) {
813                 struct strbuf *sb = opt->output_prefix(opt, opt->output_prefix_data);
814                 prefix = sb->buf;
815         }
816
817         return prefix;
818 }
819
820 static void dump_diff_hacky_one(struct rev_info *rev, struct line_log_data *range)
821 {
822         int i, j = 0;
823         long p_lines, t_lines;
824         unsigned long *p_ends = NULL, *t_ends = NULL;
825         struct diff_filepair *pair = range->pair;
826         struct diff_ranges *diff = &range->diff;
827
828         struct diff_options *opt = &rev->diffopt;
829         char *prefix = output_prefix(opt);
830         const char *c_reset = diff_get_color(opt->use_color, DIFF_RESET);
831         const char *c_frag = diff_get_color(opt->use_color, DIFF_FRAGINFO);
832         const char *c_meta = diff_get_color(opt->use_color, DIFF_METAINFO);
833         const char *c_old = diff_get_color(opt->use_color, DIFF_FILE_OLD);
834         const char *c_new = diff_get_color(opt->use_color, DIFF_FILE_NEW);
835         const char *c_plain = diff_get_color(opt->use_color, DIFF_PLAIN);
836
837         if (!pair || !diff)
838                 return;
839
840         if (pair->one->sha1_valid)
841                 fill_line_ends(pair->one, &p_lines, &p_ends);
842         fill_line_ends(pair->two, &t_lines, &t_ends);
843
844         printf("%s%sdiff --git a/%s b/%s%s\n", prefix, c_meta, pair->one->path, pair->two->path, c_reset);
845         printf("%s%s--- %s%s%s\n", prefix, c_meta,
846                pair->one->sha1_valid ? "a/" : "",
847                pair->one->sha1_valid ? pair->one->path : "/dev/null",
848                c_reset);
849         printf("%s%s+++ b/%s%s\n", prefix, c_meta, pair->two->path, c_reset);
850         for (i = 0; i < range->ranges.nr; i++) {
851                 long p_start, p_end;
852                 long t_start = range->ranges.ranges[i].start;
853                 long t_end = range->ranges.ranges[i].end;
854                 long t_cur = t_start;
855                 int j_last;
856
857                 while (j < diff->target.nr && diff->target.ranges[j].end < t_start)
858                         j++;
859                 if (j == diff->target.nr || diff->target.ranges[j].start > t_end)
860                         continue;
861
862                 /* Scan ahead to determine the last diff that falls in this range */
863                 j_last = j;
864                 while (j_last < diff->target.nr && diff->target.ranges[j_last].start < t_end)
865                         j_last++;
866                 if (j_last > j)
867                         j_last--;
868
869                 /*
870                  * Compute parent hunk headers: we know that the diff
871                  * has the correct line numbers (but not all hunks).
872                  * So it suffices to shift the start/end according to
873                  * the line numbers of the first/last hunk(s) that
874                  * fall in this range.
875                  */
876                 if (t_start < diff->target.ranges[j].start)
877                         p_start = diff->parent.ranges[j].start - (diff->target.ranges[j].start-t_start);
878                 else
879                         p_start = diff->parent.ranges[j].start;
880                 if (t_end > diff->target.ranges[j_last].end)
881                         p_end = diff->parent.ranges[j_last].end + (t_end-diff->target.ranges[j_last].end);
882                 else
883                         p_end = diff->parent.ranges[j_last].end;
884
885                 if (!p_start && !p_end) {
886                         p_start = -1;
887                         p_end = -1;
888                 }
889
890                 /* Now output a diff hunk for this range */
891                 printf("%s%s@@ -%ld,%ld +%ld,%ld @@%s\n",
892                        prefix, c_frag,
893                        p_start+1, p_end-p_start, t_start+1, t_end-t_start,
894                        c_reset);
895                 while (j < diff->target.nr && diff->target.ranges[j].start < t_end) {
896                         int k;
897                         for (; t_cur < diff->target.ranges[j].start; t_cur++)
898                                 print_line(prefix, ' ', t_cur, t_ends, pair->two->data,
899                                            c_plain, c_reset);
900                         for (k = diff->parent.ranges[j].start; k < diff->parent.ranges[j].end; k++)
901                                 print_line(prefix, '-', k, p_ends, pair->one->data,
902                                            c_old, c_reset);
903                         for (; t_cur < diff->target.ranges[j].end && t_cur < t_end; t_cur++)
904                                 print_line(prefix, '+', t_cur, t_ends, pair->two->data,
905                                            c_new, c_reset);
906                         j++;
907                 }
908                 for (; t_cur < t_end; t_cur++)
909                         print_line(prefix, ' ', t_cur, t_ends, pair->two->data,
910                                    c_plain, c_reset);
911         }
912
913         free(p_ends);
914         free(t_ends);
915 }
916
917 /*
918  * NEEDSWORK: manually building a diff here is not the Right
919  * Thing(tm).  log -L should be built into the diff pipeline.
920  */
921 static void dump_diff_hacky(struct rev_info *rev, struct line_log_data *range)
922 {
923         puts(output_prefix(&rev->diffopt));
924         while (range) {
925                 dump_diff_hacky_one(rev, range);
926                 range = range->next;
927         }
928 }
929
930 /*
931  * Unlike most other functions, this destructively operates on
932  * 'range'.
933  */
934 static int process_diff_filepair(struct rev_info *rev,
935                                  struct diff_filepair *pair,
936                                  struct line_log_data *range,
937                                  struct diff_ranges **diff_out)
938 {
939         struct line_log_data *rg = range;
940         struct range_set tmp;
941         struct diff_ranges diff;
942         mmfile_t file_parent, file_target;
943
944         assert(pair->two->path);
945         while (rg) {
946                 assert(rg->spec->path);
947                 if (!strcmp(rg->spec->path, pair->two->path))
948                         break;
949                 rg = rg->next;
950         }
951
952         if (!rg)
953                 return 0;
954         if (rg->ranges.nr == 0)
955                 return 0;
956
957         assert(pair->two->sha1_valid);
958         diff_populate_filespec(pair->two, 0);
959         file_target.ptr = pair->two->data;
960         file_target.size = pair->two->size;
961
962         if (pair->one->sha1_valid) {
963                 diff_populate_filespec(pair->one, 0);
964                 file_parent.ptr = pair->one->data;
965                 file_parent.size = pair->one->size;
966         } else {
967                 file_parent.ptr = "";
968                 file_parent.size = 0;
969         }
970
971         diff_ranges_init(&diff);
972         collect_diff(&file_parent, &file_target, &diff);
973
974         /* NEEDSWORK should apply some heuristics to prevent mismatches */
975         rg->spec->path = xstrdup(pair->one->path);
976
977         range_set_init(&tmp, 0);
978         range_set_map_across_diff(&tmp, &rg->ranges, &diff, diff_out);
979         range_set_release(&rg->ranges);
980         range_set_move(&rg->ranges, &tmp);
981
982         diff_ranges_release(&diff);
983
984         return ((*diff_out)->parent.nr > 0);
985 }
986
987 static struct diff_filepair *diff_filepair_dup(struct diff_filepair *pair)
988 {
989         struct diff_filepair *new = xmalloc(sizeof(struct diff_filepair));
990         new->one = pair->one;
991         new->two = pair->two;
992         new->one->count++;
993         new->two->count++;
994         return new;
995 }
996
997 static void free_diffqueues(int n, struct diff_queue_struct *dq)
998 {
999         int i, j;
1000         for (i = 0; i < n; i++)
1001                 for (j = 0; j < dq[i].nr; j++)
1002                         diff_free_filepair(dq[i].queue[j]);
1003         free(dq);
1004 }
1005
1006 static int process_all_files(struct line_log_data **range_out,
1007                              struct rev_info *rev,
1008                              struct diff_queue_struct *queue,
1009                              struct line_log_data *range)
1010 {
1011         int i, changed = 0;
1012
1013         *range_out = line_log_data_copy(range);
1014
1015         for (i = 0; i < queue->nr; i++) {
1016                 struct diff_ranges *pairdiff = NULL;
1017                 if (process_diff_filepair(rev, queue->queue[i], *range_out, &pairdiff)) {
1018                         struct line_log_data *rg = range;
1019                         changed++;
1020                         /* NEEDSWORK tramples over data structures not owned here */
1021                         while (rg && strcmp(rg->spec->path, queue->queue[i]->two->path))
1022                                 rg = rg->next;
1023                         assert(rg);
1024                         rg->pair = diff_filepair_dup(queue->queue[i]);
1025                         memcpy(&rg->diff, pairdiff, sizeof(struct diff_ranges));
1026                 }
1027         }
1028
1029         return changed;
1030 }
1031
1032 int line_log_print(struct rev_info *rev, struct commit *commit)
1033 {
1034         struct line_log_data *range = lookup_line_range(rev, commit);
1035
1036         show_log(rev);
1037         dump_diff_hacky(rev, range);
1038         return 1;
1039 }
1040
1041 static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *commit,
1042                                           struct line_log_data *range)
1043 {
1044         struct commit *parent = NULL;
1045         struct diff_queue_struct queue;
1046         struct line_log_data *parent_range;
1047         int changed;
1048
1049         if (commit->parents)
1050                 parent = commit->parents->item;
1051
1052         queue_diffs(&rev->diffopt, &queue, commit, parent);
1053         changed = process_all_files(&parent_range, rev, &queue, range);
1054         if (parent)
1055                 add_line_range(rev, parent, parent_range);
1056         return changed;
1057 }
1058
1059 static int process_ranges_merge_commit(struct rev_info *rev, struct commit *commit,
1060                                        struct line_log_data *range)
1061 {
1062         struct diff_queue_struct *diffqueues;
1063         struct line_log_data **cand;
1064         struct commit **parents;
1065         struct commit_list *p;
1066         int i;
1067         int nparents = count_parents(commit);
1068
1069         diffqueues = xmalloc(nparents * sizeof(*diffqueues));
1070         cand = xmalloc(nparents * sizeof(*cand));
1071         parents = xmalloc(nparents * sizeof(*parents));
1072
1073         p = commit->parents;
1074         for (i = 0; i < nparents; i++) {
1075                 parents[i] = p->item;
1076                 p = p->next;
1077                 queue_diffs(&rev->diffopt, &diffqueues[i], commit, parents[i]);
1078         }
1079
1080         for (i = 0; i < nparents; i++) {
1081                 int changed;
1082                 cand[i] = NULL;
1083                 changed = process_all_files(&cand[i], rev, &diffqueues[i], range);
1084                 if (!changed) {
1085                         /*
1086                          * This parent can take all the blame, so we
1087                          * don't follow any other path in history
1088                          */
1089                         add_line_range(rev, parents[i], cand[i]);
1090                         clear_commit_line_range(rev, commit);
1091                         commit->parents = xmalloc(sizeof(struct commit_list));
1092                         commit->parents->item = parents[i];
1093                         commit->parents->next = NULL;
1094                         free(parents);
1095                         free(cand);
1096                         free_diffqueues(nparents, diffqueues);
1097                         /* NEEDSWORK leaking like a sieve */
1098                         return 0;
1099                 }
1100         }
1101
1102         /*
1103          * No single parent took the blame.  We add the candidates
1104          * from the above loop to the parents.
1105          */
1106         for (i = 0; i < nparents; i++) {
1107                 add_line_range(rev, parents[i], cand[i]);
1108         }
1109
1110         clear_commit_line_range(rev, commit);
1111         free(parents);
1112         free(cand);
1113         free_diffqueues(nparents, diffqueues);
1114         return 1;
1115
1116         /* NEEDSWORK evil merge detection stuff */
1117         /* NEEDSWORK leaking like a sieve */
1118 }
1119
1120 static int process_ranges_arbitrary_commit(struct rev_info *rev, struct commit *commit)
1121 {
1122         struct line_log_data *range = lookup_line_range(rev, commit);
1123         int changed = 0;
1124
1125         if (range) {
1126                 if (!commit->parents || !commit->parents->next)
1127                         changed = process_ranges_ordinary_commit(rev, commit, range);
1128                 else
1129                         changed = process_ranges_merge_commit(rev, commit, range);
1130         }
1131
1132         if (!changed)
1133                 commit->object.flags |= TREESAME;
1134
1135         return changed;
1136 }
1137
1138 static enum rewrite_result line_log_rewrite_one(struct rev_info *rev, struct commit **pp)
1139 {
1140         for (;;) {
1141                 struct commit *p = *pp;
1142                 if (p->parents && p->parents->next)
1143                         return rewrite_one_ok;
1144                 if (p->object.flags & UNINTERESTING)
1145                         return rewrite_one_ok;
1146                 if (!(p->object.flags & TREESAME))
1147                         return rewrite_one_ok;
1148                 if (!p->parents)
1149                         return rewrite_one_noparents;
1150                 *pp = p->parents->item;
1151         }
1152 }
1153
1154 int line_log_filter(struct rev_info *rev)
1155 {
1156         struct commit *commit;
1157         struct commit_list *list = rev->commits;
1158         struct commit_list *out = NULL, **pp = &out;
1159
1160         while (list) {
1161                 struct commit_list *to_free = NULL;
1162                 commit = list->item;
1163                 if (process_ranges_arbitrary_commit(rev, commit)) {
1164                         *pp = list;
1165                         pp = &list->next;
1166                 } else
1167                         to_free = list;
1168                 list = list->next;
1169                 free(to_free);
1170         }
1171         *pp = NULL;
1172
1173         for (list = out; list; list = list->next)
1174                 rewrite_parents(rev, list->item, line_log_rewrite_one);
1175
1176         rev->commits = out;
1177
1178         return 0;
1179 }