Merge branch 'nd/fetch-into-shallow'
[git] / builtin / blame.c
1 /*
2  * Blame
3  *
4  * Copyright (c) 2006, Junio C Hamano
5  */
6
7 #include "cache.h"
8 #include "builtin.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree-walk.h"
13 #include "diff.h"
14 #include "diffcore.h"
15 #include "revision.h"
16 #include "quote.h"
17 #include "xdiff-interface.h"
18 #include "cache-tree.h"
19 #include "string-list.h"
20 #include "mailmap.h"
21 #include "parse-options.h"
22 #include "utf8.h"
23 #include "userdiff.h"
24 #include "line-range.h"
25 #include "line-log.h"
26
27 static char blame_usage[] = N_("git blame [options] [rev-opts] [rev] [--] file");
28
29 static const char *blame_opt_usage[] = {
30         blame_usage,
31         "",
32         N_("[rev-opts] are documented in git-rev-list(1)"),
33         NULL
34 };
35
36 static int longest_file;
37 static int longest_author;
38 static int max_orig_digits;
39 static int max_digits;
40 static int max_score_digits;
41 static int show_root;
42 static int reverse;
43 static int blank_boundary;
44 static int incremental;
45 static int xdl_opts;
46 static int abbrev = -1;
47 static int no_whole_file_rename;
48
49 static enum date_mode blame_date_mode = DATE_ISO8601;
50 static size_t blame_date_width;
51
52 static struct string_list mailmap;
53
54 #ifndef DEBUG
55 #define DEBUG 0
56 #endif
57
58 /* stats */
59 static int num_read_blob;
60 static int num_get_patch;
61 static int num_commits;
62
63 #define PICKAXE_BLAME_MOVE              01
64 #define PICKAXE_BLAME_COPY              02
65 #define PICKAXE_BLAME_COPY_HARDER       04
66 #define PICKAXE_BLAME_COPY_HARDEST      010
67
68 /*
69  * blame for a blame_entry with score lower than these thresholds
70  * is not passed to the parent using move/copy logic.
71  */
72 static unsigned blame_move_score;
73 static unsigned blame_copy_score;
74 #define BLAME_DEFAULT_MOVE_SCORE        20
75 #define BLAME_DEFAULT_COPY_SCORE        40
76
77 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
78 #define METAINFO_SHOWN          (1u<<12)
79 #define MORE_THAN_ONE_PATH      (1u<<13)
80
81 /*
82  * One blob in a commit that is being suspected
83  */
84 struct origin {
85         int refcnt;
86         struct origin *previous;
87         struct commit *commit;
88         mmfile_t file;
89         unsigned char blob_sha1[20];
90         unsigned mode;
91         char path[FLEX_ARRAY];
92 };
93
94 static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b, long ctxlen,
95                       xdl_emit_hunk_consume_func_t hunk_func, void *cb_data)
96 {
97         xpparam_t xpp = {0};
98         xdemitconf_t xecfg = {0};
99         xdemitcb_t ecb = {NULL};
100
101         xpp.flags = xdl_opts;
102         xecfg.ctxlen = ctxlen;
103         xecfg.hunk_func = hunk_func;
104         ecb.priv = cb_data;
105         return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
106 }
107
108 /*
109  * Prepare diff_filespec and convert it using diff textconv API
110  * if the textconv driver exists.
111  * Return 1 if the conversion succeeds, 0 otherwise.
112  */
113 int textconv_object(const char *path,
114                     unsigned mode,
115                     const unsigned char *sha1,
116                     int sha1_valid,
117                     char **buf,
118                     unsigned long *buf_size)
119 {
120         struct diff_filespec *df;
121         struct userdiff_driver *textconv;
122
123         df = alloc_filespec(path);
124         fill_filespec(df, sha1, sha1_valid, mode);
125         textconv = get_textconv(df);
126         if (!textconv) {
127                 free_filespec(df);
128                 return 0;
129         }
130
131         *buf_size = fill_textconv(textconv, df, buf);
132         free_filespec(df);
133         return 1;
134 }
135
136 /*
137  * Given an origin, prepare mmfile_t structure to be used by the
138  * diff machinery
139  */
140 static void fill_origin_blob(struct diff_options *opt,
141                              struct origin *o, mmfile_t *file)
142 {
143         if (!o->file.ptr) {
144                 enum object_type type;
145                 unsigned long file_size;
146
147                 num_read_blob++;
148                 if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
149                     textconv_object(o->path, o->mode, o->blob_sha1, 1, &file->ptr, &file_size))
150                         ;
151                 else
152                         file->ptr = read_sha1_file(o->blob_sha1, &type, &file_size);
153                 file->size = file_size;
154
155                 if (!file->ptr)
156                         die("Cannot read blob %s for path %s",
157                             sha1_to_hex(o->blob_sha1),
158                             o->path);
159                 o->file = *file;
160         }
161         else
162                 *file = o->file;
163 }
164
165 /*
166  * Origin is refcounted and usually we keep the blob contents to be
167  * reused.
168  */
169 static inline struct origin *origin_incref(struct origin *o)
170 {
171         if (o)
172                 o->refcnt++;
173         return o;
174 }
175
176 static void origin_decref(struct origin *o)
177 {
178         if (o && --o->refcnt <= 0) {
179                 if (o->previous)
180                         origin_decref(o->previous);
181                 free(o->file.ptr);
182                 free(o);
183         }
184 }
185
186 static void drop_origin_blob(struct origin *o)
187 {
188         if (o->file.ptr) {
189                 free(o->file.ptr);
190                 o->file.ptr = NULL;
191         }
192 }
193
194 /*
195  * Each group of lines is described by a blame_entry; it can be split
196  * as we pass blame to the parents.  They form a linked list in the
197  * scoreboard structure, sorted by the target line number.
198  */
199 struct blame_entry {
200         struct blame_entry *prev;
201         struct blame_entry *next;
202
203         /* the first line of this group in the final image;
204          * internally all line numbers are 0 based.
205          */
206         int lno;
207
208         /* how many lines this group has */
209         int num_lines;
210
211         /* the commit that introduced this group into the final image */
212         struct origin *suspect;
213
214         /* true if the suspect is truly guilty; false while we have not
215          * checked if the group came from one of its parents.
216          */
217         char guilty;
218
219         /* true if the entry has been scanned for copies in the current parent
220          */
221         char scanned;
222
223         /* the line number of the first line of this group in the
224          * suspect's file; internally all line numbers are 0 based.
225          */
226         int s_lno;
227
228         /* how significant this entry is -- cached to avoid
229          * scanning the lines over and over.
230          */
231         unsigned score;
232 };
233
234 /*
235  * The current state of the blame assignment.
236  */
237 struct scoreboard {
238         /* the final commit (i.e. where we started digging from) */
239         struct commit *final;
240         struct rev_info *revs;
241         const char *path;
242
243         /*
244          * The contents in the final image.
245          * Used by many functions to obtain contents of the nth line,
246          * indexed with scoreboard.lineno[blame_entry.lno].
247          */
248         const char *final_buf;
249         unsigned long final_buf_size;
250
251         /* linked list of blames */
252         struct blame_entry *ent;
253
254         /* look-up a line in the final buffer */
255         int num_lines;
256         int *lineno;
257 };
258
259 static inline int same_suspect(struct origin *a, struct origin *b)
260 {
261         if (a == b)
262                 return 1;
263         if (a->commit != b->commit)
264                 return 0;
265         return !strcmp(a->path, b->path);
266 }
267
268 static void sanity_check_refcnt(struct scoreboard *);
269
270 /*
271  * If two blame entries that are next to each other came from
272  * contiguous lines in the same origin (i.e. <commit, path> pair),
273  * merge them together.
274  */
275 static void coalesce(struct scoreboard *sb)
276 {
277         struct blame_entry *ent, *next;
278
279         for (ent = sb->ent; ent && (next = ent->next); ent = next) {
280                 if (same_suspect(ent->suspect, next->suspect) &&
281                     ent->guilty == next->guilty &&
282                     ent->s_lno + ent->num_lines == next->s_lno) {
283                         ent->num_lines += next->num_lines;
284                         ent->next = next->next;
285                         if (ent->next)
286                                 ent->next->prev = ent;
287                         origin_decref(next->suspect);
288                         free(next);
289                         ent->score = 0;
290                         next = ent; /* again */
291                 }
292         }
293
294         if (DEBUG) /* sanity */
295                 sanity_check_refcnt(sb);
296 }
297
298 /*
299  * Given a commit and a path in it, create a new origin structure.
300  * The callers that add blame to the scoreboard should use
301  * get_origin() to obtain shared, refcounted copy instead of calling
302  * this function directly.
303  */
304 static struct origin *make_origin(struct commit *commit, const char *path)
305 {
306         struct origin *o;
307         o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
308         o->commit = commit;
309         o->refcnt = 1;
310         strcpy(o->path, path);
311         return o;
312 }
313
314 /*
315  * Locate an existing origin or create a new one.
316  */
317 static struct origin *get_origin(struct scoreboard *sb,
318                                  struct commit *commit,
319                                  const char *path)
320 {
321         struct blame_entry *e;
322
323         for (e = sb->ent; e; e = e->next) {
324                 if (e->suspect->commit == commit &&
325                     !strcmp(e->suspect->path, path))
326                         return origin_incref(e->suspect);
327         }
328         return make_origin(commit, path);
329 }
330
331 /*
332  * Fill the blob_sha1 field of an origin if it hasn't, so that later
333  * call to fill_origin_blob() can use it to locate the data.  blob_sha1
334  * for an origin is also used to pass the blame for the entire file to
335  * the parent to detect the case where a child's blob is identical to
336  * that of its parent's.
337  *
338  * This also fills origin->mode for corresponding tree path.
339  */
340 static int fill_blob_sha1_and_mode(struct origin *origin)
341 {
342         if (!is_null_sha1(origin->blob_sha1))
343                 return 0;
344         if (get_tree_entry(origin->commit->object.sha1,
345                            origin->path,
346                            origin->blob_sha1, &origin->mode))
347                 goto error_out;
348         if (sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB)
349                 goto error_out;
350         return 0;
351  error_out:
352         hashclr(origin->blob_sha1);
353         origin->mode = S_IFINVALID;
354         return -1;
355 }
356
357 /*
358  * We have an origin -- check if the same path exists in the
359  * parent and return an origin structure to represent it.
360  */
361 static struct origin *find_origin(struct scoreboard *sb,
362                                   struct commit *parent,
363                                   struct origin *origin)
364 {
365         struct origin *porigin = NULL;
366         struct diff_options diff_opts;
367         const char *paths[2];
368
369         if (parent->util) {
370                 /*
371                  * Each commit object can cache one origin in that
372                  * commit.  This is a freestanding copy of origin and
373                  * not refcounted.
374                  */
375                 struct origin *cached = parent->util;
376                 if (!strcmp(cached->path, origin->path)) {
377                         /*
378                          * The same path between origin and its parent
379                          * without renaming -- the most common case.
380                          */
381                         porigin = get_origin(sb, parent, cached->path);
382
383                         /*
384                          * If the origin was newly created (i.e. get_origin
385                          * would call make_origin if none is found in the
386                          * scoreboard), it does not know the blob_sha1/mode,
387                          * so copy it.  Otherwise porigin was in the
388                          * scoreboard and already knows blob_sha1/mode.
389                          */
390                         if (porigin->refcnt == 1) {
391                                 hashcpy(porigin->blob_sha1, cached->blob_sha1);
392                                 porigin->mode = cached->mode;
393                         }
394                         return porigin;
395                 }
396                 /* otherwise it was not very useful; free it */
397                 free(parent->util);
398                 parent->util = NULL;
399         }
400
401         /* See if the origin->path is different between parent
402          * and origin first.  Most of the time they are the
403          * same and diff-tree is fairly efficient about this.
404          */
405         diff_setup(&diff_opts);
406         DIFF_OPT_SET(&diff_opts, RECURSIVE);
407         diff_opts.detect_rename = 0;
408         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
409         paths[0] = origin->path;
410         paths[1] = NULL;
411
412         parse_pathspec(&diff_opts.pathspec, PATHSPEC_ALL_MAGIC, 0, "", paths);
413         diff_setup_done(&diff_opts);
414
415         if (is_null_sha1(origin->commit->object.sha1))
416                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
417         else
418                 diff_tree_sha1(parent->tree->object.sha1,
419                                origin->commit->tree->object.sha1,
420                                "", &diff_opts);
421         diffcore_std(&diff_opts);
422
423         if (!diff_queued_diff.nr) {
424                 /* The path is the same as parent */
425                 porigin = get_origin(sb, parent, origin->path);
426                 hashcpy(porigin->blob_sha1, origin->blob_sha1);
427                 porigin->mode = origin->mode;
428         } else {
429                 /*
430                  * Since origin->path is a pathspec, if the parent
431                  * commit had it as a directory, we will see a whole
432                  * bunch of deletion of files in the directory that we
433                  * do not care about.
434                  */
435                 int i;
436                 struct diff_filepair *p = NULL;
437                 for (i = 0; i < diff_queued_diff.nr; i++) {
438                         const char *name;
439                         p = diff_queued_diff.queue[i];
440                         name = p->one->path ? p->one->path : p->two->path;
441                         if (!strcmp(name, origin->path))
442                                 break;
443                 }
444                 if (!p)
445                         die("internal error in blame::find_origin");
446                 switch (p->status) {
447                 default:
448                         die("internal error in blame::find_origin (%c)",
449                             p->status);
450                 case 'M':
451                         porigin = get_origin(sb, parent, origin->path);
452                         hashcpy(porigin->blob_sha1, p->one->sha1);
453                         porigin->mode = p->one->mode;
454                         break;
455                 case 'A':
456                 case 'T':
457                         /* Did not exist in parent, or type changed */
458                         break;
459                 }
460         }
461         diff_flush(&diff_opts);
462         free_pathspec(&diff_opts.pathspec);
463         if (porigin) {
464                 /*
465                  * Create a freestanding copy that is not part of
466                  * the refcounted origin found in the scoreboard, and
467                  * cache it in the commit.
468                  */
469                 struct origin *cached;
470
471                 cached = make_origin(porigin->commit, porigin->path);
472                 hashcpy(cached->blob_sha1, porigin->blob_sha1);
473                 cached->mode = porigin->mode;
474                 parent->util = cached;
475         }
476         return porigin;
477 }
478
479 /*
480  * We have an origin -- find the path that corresponds to it in its
481  * parent and return an origin structure to represent it.
482  */
483 static struct origin *find_rename(struct scoreboard *sb,
484                                   struct commit *parent,
485                                   struct origin *origin)
486 {
487         struct origin *porigin = NULL;
488         struct diff_options diff_opts;
489         int i;
490
491         diff_setup(&diff_opts);
492         DIFF_OPT_SET(&diff_opts, RECURSIVE);
493         diff_opts.detect_rename = DIFF_DETECT_RENAME;
494         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
495         diff_opts.single_follow = origin->path;
496         diff_setup_done(&diff_opts);
497
498         if (is_null_sha1(origin->commit->object.sha1))
499                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
500         else
501                 diff_tree_sha1(parent->tree->object.sha1,
502                                origin->commit->tree->object.sha1,
503                                "", &diff_opts);
504         diffcore_std(&diff_opts);
505
506         for (i = 0; i < diff_queued_diff.nr; i++) {
507                 struct diff_filepair *p = diff_queued_diff.queue[i];
508                 if ((p->status == 'R' || p->status == 'C') &&
509                     !strcmp(p->two->path, origin->path)) {
510                         porigin = get_origin(sb, parent, p->one->path);
511                         hashcpy(porigin->blob_sha1, p->one->sha1);
512                         porigin->mode = p->one->mode;
513                         break;
514                 }
515         }
516         diff_flush(&diff_opts);
517         free_pathspec(&diff_opts.pathspec);
518         return porigin;
519 }
520
521 /*
522  * Link in a new blame entry to the scoreboard.  Entries that cover the
523  * same line range have been removed from the scoreboard previously.
524  */
525 static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
526 {
527         struct blame_entry *ent, *prev = NULL;
528
529         origin_incref(e->suspect);
530
531         for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
532                 prev = ent;
533
534         /* prev, if not NULL, is the last one that is below e */
535         e->prev = prev;
536         if (prev) {
537                 e->next = prev->next;
538                 prev->next = e;
539         }
540         else {
541                 e->next = sb->ent;
542                 sb->ent = e;
543         }
544         if (e->next)
545                 e->next->prev = e;
546 }
547
548 /*
549  * src typically is on-stack; we want to copy the information in it to
550  * a malloced blame_entry that is already on the linked list of the
551  * scoreboard.  The origin of dst loses a refcnt while the origin of src
552  * gains one.
553  */
554 static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
555 {
556         struct blame_entry *p, *n;
557
558         p = dst->prev;
559         n = dst->next;
560         origin_incref(src->suspect);
561         origin_decref(dst->suspect);
562         memcpy(dst, src, sizeof(*src));
563         dst->prev = p;
564         dst->next = n;
565         dst->score = 0;
566 }
567
568 static const char *nth_line(struct scoreboard *sb, long lno)
569 {
570         return sb->final_buf + sb->lineno[lno];
571 }
572
573 static const char *nth_line_cb(void *data, long lno)
574 {
575         return nth_line((struct scoreboard *)data, lno);
576 }
577
578 /*
579  * It is known that lines between tlno to same came from parent, and e
580  * has an overlap with that range.  it also is known that parent's
581  * line plno corresponds to e's line tlno.
582  *
583  *                <---- e ----->
584  *                   <------>
585  *                   <------------>
586  *             <------------>
587  *             <------------------>
588  *
589  * Split e into potentially three parts; before this chunk, the chunk
590  * to be blamed for the parent, and after that portion.
591  */
592 static void split_overlap(struct blame_entry *split,
593                           struct blame_entry *e,
594                           int tlno, int plno, int same,
595                           struct origin *parent)
596 {
597         int chunk_end_lno;
598         memset(split, 0, sizeof(struct blame_entry [3]));
599
600         if (e->s_lno < tlno) {
601                 /* there is a pre-chunk part not blamed on parent */
602                 split[0].suspect = origin_incref(e->suspect);
603                 split[0].lno = e->lno;
604                 split[0].s_lno = e->s_lno;
605                 split[0].num_lines = tlno - e->s_lno;
606                 split[1].lno = e->lno + tlno - e->s_lno;
607                 split[1].s_lno = plno;
608         }
609         else {
610                 split[1].lno = e->lno;
611                 split[1].s_lno = plno + (e->s_lno - tlno);
612         }
613
614         if (same < e->s_lno + e->num_lines) {
615                 /* there is a post-chunk part not blamed on parent */
616                 split[2].suspect = origin_incref(e->suspect);
617                 split[2].lno = e->lno + (same - e->s_lno);
618                 split[2].s_lno = e->s_lno + (same - e->s_lno);
619                 split[2].num_lines = e->s_lno + e->num_lines - same;
620                 chunk_end_lno = split[2].lno;
621         }
622         else
623                 chunk_end_lno = e->lno + e->num_lines;
624         split[1].num_lines = chunk_end_lno - split[1].lno;
625
626         /*
627          * if it turns out there is nothing to blame the parent for,
628          * forget about the splitting.  !split[1].suspect signals this.
629          */
630         if (split[1].num_lines < 1)
631                 return;
632         split[1].suspect = origin_incref(parent);
633 }
634
635 /*
636  * split_overlap() divided an existing blame e into up to three parts
637  * in split.  Adjust the linked list of blames in the scoreboard to
638  * reflect the split.
639  */
640 static void split_blame(struct scoreboard *sb,
641                         struct blame_entry *split,
642                         struct blame_entry *e)
643 {
644         struct blame_entry *new_entry;
645
646         if (split[0].suspect && split[2].suspect) {
647                 /* The first part (reuse storage for the existing entry e) */
648                 dup_entry(e, &split[0]);
649
650                 /* The last part -- me */
651                 new_entry = xmalloc(sizeof(*new_entry));
652                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
653                 add_blame_entry(sb, new_entry);
654
655                 /* ... and the middle part -- parent */
656                 new_entry = xmalloc(sizeof(*new_entry));
657                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
658                 add_blame_entry(sb, new_entry);
659         }
660         else if (!split[0].suspect && !split[2].suspect)
661                 /*
662                  * The parent covers the entire area; reuse storage for
663                  * e and replace it with the parent.
664                  */
665                 dup_entry(e, &split[1]);
666         else if (split[0].suspect) {
667                 /* me and then parent */
668                 dup_entry(e, &split[0]);
669
670                 new_entry = xmalloc(sizeof(*new_entry));
671                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
672                 add_blame_entry(sb, new_entry);
673         }
674         else {
675                 /* parent and then me */
676                 dup_entry(e, &split[1]);
677
678                 new_entry = xmalloc(sizeof(*new_entry));
679                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
680                 add_blame_entry(sb, new_entry);
681         }
682
683         if (DEBUG) { /* sanity */
684                 struct blame_entry *ent;
685                 int lno = sb->ent->lno, corrupt = 0;
686
687                 for (ent = sb->ent; ent; ent = ent->next) {
688                         if (lno != ent->lno)
689                                 corrupt = 1;
690                         if (ent->s_lno < 0)
691                                 corrupt = 1;
692                         lno += ent->num_lines;
693                 }
694                 if (corrupt) {
695                         lno = sb->ent->lno;
696                         for (ent = sb->ent; ent; ent = ent->next) {
697                                 printf("L %8d l %8d n %8d\n",
698                                        lno, ent->lno, ent->num_lines);
699                                 lno = ent->lno + ent->num_lines;
700                         }
701                         die("oops");
702                 }
703         }
704 }
705
706 /*
707  * After splitting the blame, the origins used by the
708  * on-stack blame_entry should lose one refcnt each.
709  */
710 static void decref_split(struct blame_entry *split)
711 {
712         int i;
713
714         for (i = 0; i < 3; i++)
715                 origin_decref(split[i].suspect);
716 }
717
718 /*
719  * Helper for blame_chunk().  blame_entry e is known to overlap with
720  * the patch hunk; split it and pass blame to the parent.
721  */
722 static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
723                           int tlno, int plno, int same,
724                           struct origin *parent)
725 {
726         struct blame_entry split[3];
727
728         split_overlap(split, e, tlno, plno, same, parent);
729         if (split[1].suspect)
730                 split_blame(sb, split, e);
731         decref_split(split);
732 }
733
734 /*
735  * Find the line number of the last line the target is suspected for.
736  */
737 static int find_last_in_target(struct scoreboard *sb, struct origin *target)
738 {
739         struct blame_entry *e;
740         int last_in_target = -1;
741
742         for (e = sb->ent; e; e = e->next) {
743                 if (e->guilty || !same_suspect(e->suspect, target))
744                         continue;
745                 if (last_in_target < e->s_lno + e->num_lines)
746                         last_in_target = e->s_lno + e->num_lines;
747         }
748         return last_in_target;
749 }
750
751 /*
752  * Process one hunk from the patch between the current suspect for
753  * blame_entry e and its parent.  Find and split the overlap, and
754  * pass blame to the overlapping part to the parent.
755  */
756 static void blame_chunk(struct scoreboard *sb,
757                         int tlno, int plno, int same,
758                         struct origin *target, struct origin *parent)
759 {
760         struct blame_entry *e;
761
762         for (e = sb->ent; e; e = e->next) {
763                 if (e->guilty || !same_suspect(e->suspect, target))
764                         continue;
765                 if (same <= e->s_lno)
766                         continue;
767                 if (tlno < e->s_lno + e->num_lines)
768                         blame_overlap(sb, e, tlno, plno, same, parent);
769         }
770 }
771
772 struct blame_chunk_cb_data {
773         struct scoreboard *sb;
774         struct origin *target;
775         struct origin *parent;
776         long plno;
777         long tlno;
778 };
779
780 static int blame_chunk_cb(long start_a, long count_a,
781                           long start_b, long count_b, void *data)
782 {
783         struct blame_chunk_cb_data *d = data;
784         blame_chunk(d->sb, d->tlno, d->plno, start_b, d->target, d->parent);
785         d->plno = start_a + count_a;
786         d->tlno = start_b + count_b;
787         return 0;
788 }
789
790 /*
791  * We are looking at the origin 'target' and aiming to pass blame
792  * for the lines it is suspected to its parent.  Run diff to find
793  * which lines came from parent and pass blame for them.
794  */
795 static int pass_blame_to_parent(struct scoreboard *sb,
796                                 struct origin *target,
797                                 struct origin *parent)
798 {
799         int last_in_target;
800         mmfile_t file_p, file_o;
801         struct blame_chunk_cb_data d;
802
803         memset(&d, 0, sizeof(d));
804         d.sb = sb; d.target = target; d.parent = parent;
805         last_in_target = find_last_in_target(sb, target);
806         if (last_in_target < 0)
807                 return 1; /* nothing remains for this target */
808
809         fill_origin_blob(&sb->revs->diffopt, parent, &file_p);
810         fill_origin_blob(&sb->revs->diffopt, target, &file_o);
811         num_get_patch++;
812
813         diff_hunks(&file_p, &file_o, 0, blame_chunk_cb, &d);
814         /* The rest (i.e. anything after tlno) are the same as the parent */
815         blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent);
816
817         return 0;
818 }
819
820 /*
821  * The lines in blame_entry after splitting blames many times can become
822  * very small and trivial, and at some point it becomes pointless to
823  * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
824  * ordinary C program, and it is not worth to say it was copied from
825  * totally unrelated file in the parent.
826  *
827  * Compute how trivial the lines in the blame_entry are.
828  */
829 static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
830 {
831         unsigned score;
832         const char *cp, *ep;
833
834         if (e->score)
835                 return e->score;
836
837         score = 1;
838         cp = nth_line(sb, e->lno);
839         ep = nth_line(sb, e->lno + e->num_lines);
840         while (cp < ep) {
841                 unsigned ch = *((unsigned char *)cp);
842                 if (isalnum(ch))
843                         score++;
844                 cp++;
845         }
846         e->score = score;
847         return score;
848 }
849
850 /*
851  * best_so_far[] and this[] are both a split of an existing blame_entry
852  * that passes blame to the parent.  Maintain best_so_far the best split
853  * so far, by comparing this and best_so_far and copying this into
854  * bst_so_far as needed.
855  */
856 static void copy_split_if_better(struct scoreboard *sb,
857                                  struct blame_entry *best_so_far,
858                                  struct blame_entry *this)
859 {
860         int i;
861
862         if (!this[1].suspect)
863                 return;
864         if (best_so_far[1].suspect) {
865                 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
866                         return;
867         }
868
869         for (i = 0; i < 3; i++)
870                 origin_incref(this[i].suspect);
871         decref_split(best_so_far);
872         memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
873 }
874
875 /*
876  * We are looking at a part of the final image represented by
877  * ent (tlno and same are offset by ent->s_lno).
878  * tlno is where we are looking at in the final image.
879  * up to (but not including) same match preimage.
880  * plno is where we are looking at in the preimage.
881  *
882  * <-------------- final image ---------------------->
883  *       <------ent------>
884  *         ^tlno ^same
885  *    <---------preimage----->
886  *         ^plno
887  *
888  * All line numbers are 0-based.
889  */
890 static void handle_split(struct scoreboard *sb,
891                          struct blame_entry *ent,
892                          int tlno, int plno, int same,
893                          struct origin *parent,
894                          struct blame_entry *split)
895 {
896         if (ent->num_lines <= tlno)
897                 return;
898         if (tlno < same) {
899                 struct blame_entry this[3];
900                 tlno += ent->s_lno;
901                 same += ent->s_lno;
902                 split_overlap(this, ent, tlno, plno, same, parent);
903                 copy_split_if_better(sb, split, this);
904                 decref_split(this);
905         }
906 }
907
908 struct handle_split_cb_data {
909         struct scoreboard *sb;
910         struct blame_entry *ent;
911         struct origin *parent;
912         struct blame_entry *split;
913         long plno;
914         long tlno;
915 };
916
917 static int handle_split_cb(long start_a, long count_a,
918                            long start_b, long count_b, void *data)
919 {
920         struct handle_split_cb_data *d = data;
921         handle_split(d->sb, d->ent, d->tlno, d->plno, start_b, d->parent,
922                      d->split);
923         d->plno = start_a + count_a;
924         d->tlno = start_b + count_b;
925         return 0;
926 }
927
928 /*
929  * Find the lines from parent that are the same as ent so that
930  * we can pass blames to it.  file_p has the blob contents for
931  * the parent.
932  */
933 static void find_copy_in_blob(struct scoreboard *sb,
934                               struct blame_entry *ent,
935                               struct origin *parent,
936                               struct blame_entry *split,
937                               mmfile_t *file_p)
938 {
939         const char *cp;
940         int cnt;
941         mmfile_t file_o;
942         struct handle_split_cb_data d;
943
944         memset(&d, 0, sizeof(d));
945         d.sb = sb; d.ent = ent; d.parent = parent; d.split = split;
946         /*
947          * Prepare mmfile that contains only the lines in ent.
948          */
949         cp = nth_line(sb, ent->lno);
950         file_o.ptr = (char *) cp;
951         cnt = ent->num_lines;
952
953         while (cnt && cp < sb->final_buf + sb->final_buf_size) {
954                 if (*cp++ == '\n')
955                         cnt--;
956         }
957         file_o.size = cp - file_o.ptr;
958
959         /*
960          * file_o is a part of final image we are annotating.
961          * file_p partially may match that image.
962          */
963         memset(split, 0, sizeof(struct blame_entry [3]));
964         diff_hunks(file_p, &file_o, 1, handle_split_cb, &d);
965         /* remainder, if any, all match the preimage */
966         handle_split(sb, ent, d.tlno, d.plno, ent->num_lines, parent, split);
967 }
968
969 /*
970  * See if lines currently target is suspected for can be attributed to
971  * parent.
972  */
973 static int find_move_in_parent(struct scoreboard *sb,
974                                struct origin *target,
975                                struct origin *parent)
976 {
977         int last_in_target, made_progress;
978         struct blame_entry *e, split[3];
979         mmfile_t file_p;
980
981         last_in_target = find_last_in_target(sb, target);
982         if (last_in_target < 0)
983                 return 1; /* nothing remains for this target */
984
985         fill_origin_blob(&sb->revs->diffopt, parent, &file_p);
986         if (!file_p.ptr)
987                 return 0;
988
989         made_progress = 1;
990         while (made_progress) {
991                 made_progress = 0;
992                 for (e = sb->ent; e; e = e->next) {
993                         if (e->guilty || !same_suspect(e->suspect, target) ||
994                             ent_score(sb, e) < blame_move_score)
995                                 continue;
996                         find_copy_in_blob(sb, e, parent, split, &file_p);
997                         if (split[1].suspect &&
998                             blame_move_score < ent_score(sb, &split[1])) {
999                                 split_blame(sb, split, e);
1000                                 made_progress = 1;
1001                         }
1002                         decref_split(split);
1003                 }
1004         }
1005         return 0;
1006 }
1007
1008 struct blame_list {
1009         struct blame_entry *ent;
1010         struct blame_entry split[3];
1011 };
1012
1013 /*
1014  * Count the number of entries the target is suspected for,
1015  * and prepare a list of entry and the best split.
1016  */
1017 static struct blame_list *setup_blame_list(struct scoreboard *sb,
1018                                            struct origin *target,
1019                                            int min_score,
1020                                            int *num_ents_p)
1021 {
1022         struct blame_entry *e;
1023         int num_ents, i;
1024         struct blame_list *blame_list = NULL;
1025
1026         for (e = sb->ent, num_ents = 0; e; e = e->next)
1027                 if (!e->scanned && !e->guilty &&
1028                     same_suspect(e->suspect, target) &&
1029                     min_score < ent_score(sb, e))
1030                         num_ents++;
1031         if (num_ents) {
1032                 blame_list = xcalloc(num_ents, sizeof(struct blame_list));
1033                 for (e = sb->ent, i = 0; e; e = e->next)
1034                         if (!e->scanned && !e->guilty &&
1035                             same_suspect(e->suspect, target) &&
1036                             min_score < ent_score(sb, e))
1037                                 blame_list[i++].ent = e;
1038         }
1039         *num_ents_p = num_ents;
1040         return blame_list;
1041 }
1042
1043 /*
1044  * Reset the scanned status on all entries.
1045  */
1046 static void reset_scanned_flag(struct scoreboard *sb)
1047 {
1048         struct blame_entry *e;
1049         for (e = sb->ent; e; e = e->next)
1050                 e->scanned = 0;
1051 }
1052
1053 /*
1054  * For lines target is suspected for, see if we can find code movement
1055  * across file boundary from the parent commit.  porigin is the path
1056  * in the parent we already tried.
1057  */
1058 static int find_copy_in_parent(struct scoreboard *sb,
1059                                struct origin *target,
1060                                struct commit *parent,
1061                                struct origin *porigin,
1062                                int opt)
1063 {
1064         struct diff_options diff_opts;
1065         int i, j;
1066         int retval;
1067         struct blame_list *blame_list;
1068         int num_ents;
1069
1070         blame_list = setup_blame_list(sb, target, blame_copy_score, &num_ents);
1071         if (!blame_list)
1072                 return 1; /* nothing remains for this target */
1073
1074         diff_setup(&diff_opts);
1075         DIFF_OPT_SET(&diff_opts, RECURSIVE);
1076         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1077
1078         diff_setup_done(&diff_opts);
1079
1080         /* Try "find copies harder" on new path if requested;
1081          * we do not want to use diffcore_rename() actually to
1082          * match things up; find_copies_harder is set only to
1083          * force diff_tree_sha1() to feed all filepairs to diff_queue,
1084          * and this code needs to be after diff_setup_done(), which
1085          * usually makes find-copies-harder imply copy detection.
1086          */
1087         if ((opt & PICKAXE_BLAME_COPY_HARDEST)
1088             || ((opt & PICKAXE_BLAME_COPY_HARDER)
1089                 && (!porigin || strcmp(target->path, porigin->path))))
1090                 DIFF_OPT_SET(&diff_opts, FIND_COPIES_HARDER);
1091
1092         if (is_null_sha1(target->commit->object.sha1))
1093                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
1094         else
1095                 diff_tree_sha1(parent->tree->object.sha1,
1096                                target->commit->tree->object.sha1,
1097                                "", &diff_opts);
1098
1099         if (!DIFF_OPT_TST(&diff_opts, FIND_COPIES_HARDER))
1100                 diffcore_std(&diff_opts);
1101
1102         retval = 0;
1103         while (1) {
1104                 int made_progress = 0;
1105
1106                 for (i = 0; i < diff_queued_diff.nr; i++) {
1107                         struct diff_filepair *p = diff_queued_diff.queue[i];
1108                         struct origin *norigin;
1109                         mmfile_t file_p;
1110                         struct blame_entry this[3];
1111
1112                         if (!DIFF_FILE_VALID(p->one))
1113                                 continue; /* does not exist in parent */
1114                         if (S_ISGITLINK(p->one->mode))
1115                                 continue; /* ignore git links */
1116                         if (porigin && !strcmp(p->one->path, porigin->path))
1117                                 /* find_move already dealt with this path */
1118                                 continue;
1119
1120                         norigin = get_origin(sb, parent, p->one->path);
1121                         hashcpy(norigin->blob_sha1, p->one->sha1);
1122                         norigin->mode = p->one->mode;
1123                         fill_origin_blob(&sb->revs->diffopt, norigin, &file_p);
1124                         if (!file_p.ptr)
1125                                 continue;
1126
1127                         for (j = 0; j < num_ents; j++) {
1128                                 find_copy_in_blob(sb, blame_list[j].ent,
1129                                                   norigin, this, &file_p);
1130                                 copy_split_if_better(sb, blame_list[j].split,
1131                                                      this);
1132                                 decref_split(this);
1133                         }
1134                         origin_decref(norigin);
1135                 }
1136
1137                 for (j = 0; j < num_ents; j++) {
1138                         struct blame_entry *split = blame_list[j].split;
1139                         if (split[1].suspect &&
1140                             blame_copy_score < ent_score(sb, &split[1])) {
1141                                 split_blame(sb, split, blame_list[j].ent);
1142                                 made_progress = 1;
1143                         }
1144                         else
1145                                 blame_list[j].ent->scanned = 1;
1146                         decref_split(split);
1147                 }
1148                 free(blame_list);
1149
1150                 if (!made_progress)
1151                         break;
1152                 blame_list = setup_blame_list(sb, target, blame_copy_score, &num_ents);
1153                 if (!blame_list) {
1154                         retval = 1;
1155                         break;
1156                 }
1157         }
1158         reset_scanned_flag(sb);
1159         diff_flush(&diff_opts);
1160         free_pathspec(&diff_opts.pathspec);
1161         return retval;
1162 }
1163
1164 /*
1165  * The blobs of origin and porigin exactly match, so everything
1166  * origin is suspected for can be blamed on the parent.
1167  */
1168 static void pass_whole_blame(struct scoreboard *sb,
1169                              struct origin *origin, struct origin *porigin)
1170 {
1171         struct blame_entry *e;
1172
1173         if (!porigin->file.ptr && origin->file.ptr) {
1174                 /* Steal its file */
1175                 porigin->file = origin->file;
1176                 origin->file.ptr = NULL;
1177         }
1178         for (e = sb->ent; e; e = e->next) {
1179                 if (!same_suspect(e->suspect, origin))
1180                         continue;
1181                 origin_incref(porigin);
1182                 origin_decref(e->suspect);
1183                 e->suspect = porigin;
1184         }
1185 }
1186
1187 /*
1188  * We pass blame from the current commit to its parents.  We keep saying
1189  * "parent" (and "porigin"), but what we mean is to find scapegoat to
1190  * exonerate ourselves.
1191  */
1192 static struct commit_list *first_scapegoat(struct rev_info *revs, struct commit *commit)
1193 {
1194         if (!reverse)
1195                 return commit->parents;
1196         return lookup_decoration(&revs->children, &commit->object);
1197 }
1198
1199 static int num_scapegoats(struct rev_info *revs, struct commit *commit)
1200 {
1201         int cnt;
1202         struct commit_list *l = first_scapegoat(revs, commit);
1203         for (cnt = 0; l; l = l->next)
1204                 cnt++;
1205         return cnt;
1206 }
1207
1208 #define MAXSG 16
1209
1210 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
1211 {
1212         struct rev_info *revs = sb->revs;
1213         int i, pass, num_sg;
1214         struct commit *commit = origin->commit;
1215         struct commit_list *sg;
1216         struct origin *sg_buf[MAXSG];
1217         struct origin *porigin, **sg_origin = sg_buf;
1218
1219         num_sg = num_scapegoats(revs, commit);
1220         if (!num_sg)
1221                 goto finish;
1222         else if (num_sg < ARRAY_SIZE(sg_buf))
1223                 memset(sg_buf, 0, sizeof(sg_buf));
1224         else
1225                 sg_origin = xcalloc(num_sg, sizeof(*sg_origin));
1226
1227         /*
1228          * The first pass looks for unrenamed path to optimize for
1229          * common cases, then we look for renames in the second pass.
1230          */
1231         for (pass = 0; pass < 2 - no_whole_file_rename; pass++) {
1232                 struct origin *(*find)(struct scoreboard *,
1233                                        struct commit *, struct origin *);
1234                 find = pass ? find_rename : find_origin;
1235
1236                 for (i = 0, sg = first_scapegoat(revs, commit);
1237                      i < num_sg && sg;
1238                      sg = sg->next, i++) {
1239                         struct commit *p = sg->item;
1240                         int j, same;
1241
1242                         if (sg_origin[i])
1243                                 continue;
1244                         if (parse_commit(p))
1245                                 continue;
1246                         porigin = find(sb, p, origin);
1247                         if (!porigin)
1248                                 continue;
1249                         if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1250                                 pass_whole_blame(sb, origin, porigin);
1251                                 origin_decref(porigin);
1252                                 goto finish;
1253                         }
1254                         for (j = same = 0; j < i; j++)
1255                                 if (sg_origin[j] &&
1256                                     !hashcmp(sg_origin[j]->blob_sha1,
1257                                              porigin->blob_sha1)) {
1258                                         same = 1;
1259                                         break;
1260                                 }
1261                         if (!same)
1262                                 sg_origin[i] = porigin;
1263                         else
1264                                 origin_decref(porigin);
1265                 }
1266         }
1267
1268         num_commits++;
1269         for (i = 0, sg = first_scapegoat(revs, commit);
1270              i < num_sg && sg;
1271              sg = sg->next, i++) {
1272                 struct origin *porigin = sg_origin[i];
1273                 if (!porigin)
1274                         continue;
1275                 if (!origin->previous) {
1276                         origin_incref(porigin);
1277                         origin->previous = porigin;
1278                 }
1279                 if (pass_blame_to_parent(sb, origin, porigin))
1280                         goto finish;
1281         }
1282
1283         /*
1284          * Optionally find moves in parents' files.
1285          */
1286         if (opt & PICKAXE_BLAME_MOVE)
1287                 for (i = 0, sg = first_scapegoat(revs, commit);
1288                      i < num_sg && sg;
1289                      sg = sg->next, i++) {
1290                         struct origin *porigin = sg_origin[i];
1291                         if (!porigin)
1292                                 continue;
1293                         if (find_move_in_parent(sb, origin, porigin))
1294                                 goto finish;
1295                 }
1296
1297         /*
1298          * Optionally find copies from parents' files.
1299          */
1300         if (opt & PICKAXE_BLAME_COPY)
1301                 for (i = 0, sg = first_scapegoat(revs, commit);
1302                      i < num_sg && sg;
1303                      sg = sg->next, i++) {
1304                         struct origin *porigin = sg_origin[i];
1305                         if (find_copy_in_parent(sb, origin, sg->item,
1306                                                 porigin, opt))
1307                                 goto finish;
1308                 }
1309
1310  finish:
1311         for (i = 0; i < num_sg; i++) {
1312                 if (sg_origin[i]) {
1313                         drop_origin_blob(sg_origin[i]);
1314                         origin_decref(sg_origin[i]);
1315                 }
1316         }
1317         drop_origin_blob(origin);
1318         if (sg_buf != sg_origin)
1319                 free(sg_origin);
1320 }
1321
1322 /*
1323  * Information on commits, used for output.
1324  */
1325 struct commit_info {
1326         struct strbuf author;
1327         struct strbuf author_mail;
1328         unsigned long author_time;
1329         struct strbuf author_tz;
1330
1331         /* filled only when asked for details */
1332         struct strbuf committer;
1333         struct strbuf committer_mail;
1334         unsigned long committer_time;
1335         struct strbuf committer_tz;
1336
1337         struct strbuf summary;
1338 };
1339
1340 /*
1341  * Parse author/committer line in the commit object buffer
1342  */
1343 static void get_ac_line(const char *inbuf, const char *what,
1344         struct strbuf *name, struct strbuf *mail,
1345         unsigned long *time, struct strbuf *tz)
1346 {
1347         struct ident_split ident;
1348         size_t len, maillen, namelen;
1349         char *tmp, *endp;
1350         const char *namebuf, *mailbuf;
1351
1352         tmp = strstr(inbuf, what);
1353         if (!tmp)
1354                 goto error_out;
1355         tmp += strlen(what);
1356         endp = strchr(tmp, '\n');
1357         if (!endp)
1358                 len = strlen(tmp);
1359         else
1360                 len = endp - tmp;
1361
1362         if (split_ident_line(&ident, tmp, len)) {
1363         error_out:
1364                 /* Ugh */
1365                 tmp = "(unknown)";
1366                 strbuf_addstr(name, tmp);
1367                 strbuf_addstr(mail, tmp);
1368                 strbuf_addstr(tz, tmp);
1369                 *time = 0;
1370                 return;
1371         }
1372
1373         namelen = ident.name_end - ident.name_begin;
1374         namebuf = ident.name_begin;
1375
1376         maillen = ident.mail_end - ident.mail_begin;
1377         mailbuf = ident.mail_begin;
1378
1379         if (ident.date_begin && ident.date_end)
1380                 *time = strtoul(ident.date_begin, NULL, 10);
1381         else
1382                 *time = 0;
1383
1384         if (ident.tz_begin && ident.tz_end)
1385                 strbuf_add(tz, ident.tz_begin, ident.tz_end - ident.tz_begin);
1386         else
1387                 strbuf_addstr(tz, "(unknown)");
1388
1389         /*
1390          * Now, convert both name and e-mail using mailmap
1391          */
1392         map_user(&mailmap, &mailbuf, &maillen,
1393                  &namebuf, &namelen);
1394
1395         strbuf_addf(mail, "<%.*s>", (int)maillen, mailbuf);
1396         strbuf_add(name, namebuf, namelen);
1397 }
1398
1399 static void commit_info_init(struct commit_info *ci)
1400 {
1401
1402         strbuf_init(&ci->author, 0);
1403         strbuf_init(&ci->author_mail, 0);
1404         strbuf_init(&ci->author_tz, 0);
1405         strbuf_init(&ci->committer, 0);
1406         strbuf_init(&ci->committer_mail, 0);
1407         strbuf_init(&ci->committer_tz, 0);
1408         strbuf_init(&ci->summary, 0);
1409 }
1410
1411 static void commit_info_destroy(struct commit_info *ci)
1412 {
1413
1414         strbuf_release(&ci->author);
1415         strbuf_release(&ci->author_mail);
1416         strbuf_release(&ci->author_tz);
1417         strbuf_release(&ci->committer);
1418         strbuf_release(&ci->committer_mail);
1419         strbuf_release(&ci->committer_tz);
1420         strbuf_release(&ci->summary);
1421 }
1422
1423 static void get_commit_info(struct commit *commit,
1424                             struct commit_info *ret,
1425                             int detailed)
1426 {
1427         int len;
1428         const char *subject, *encoding;
1429         char *message;
1430
1431         commit_info_init(ret);
1432
1433         encoding = get_log_output_encoding();
1434         message = logmsg_reencode(commit, NULL, encoding);
1435         get_ac_line(message, "\nauthor ",
1436                     &ret->author, &ret->author_mail,
1437                     &ret->author_time, &ret->author_tz);
1438
1439         if (!detailed) {
1440                 logmsg_free(message, commit);
1441                 return;
1442         }
1443
1444         get_ac_line(message, "\ncommitter ",
1445                     &ret->committer, &ret->committer_mail,
1446                     &ret->committer_time, &ret->committer_tz);
1447
1448         len = find_commit_subject(message, &subject);
1449         if (len)
1450                 strbuf_add(&ret->summary, subject, len);
1451         else
1452                 strbuf_addf(&ret->summary, "(%s)", sha1_to_hex(commit->object.sha1));
1453
1454         logmsg_free(message, commit);
1455 }
1456
1457 /*
1458  * To allow LF and other nonportable characters in pathnames,
1459  * they are c-style quoted as needed.
1460  */
1461 static void write_filename_info(const char *path)
1462 {
1463         printf("filename ");
1464         write_name_quoted(path, stdout, '\n');
1465 }
1466
1467 /*
1468  * Porcelain/Incremental format wants to show a lot of details per
1469  * commit.  Instead of repeating this every line, emit it only once,
1470  * the first time each commit appears in the output (unless the
1471  * user has specifically asked for us to repeat).
1472  */
1473 static int emit_one_suspect_detail(struct origin *suspect, int repeat)
1474 {
1475         struct commit_info ci;
1476
1477         if (!repeat && (suspect->commit->object.flags & METAINFO_SHOWN))
1478                 return 0;
1479
1480         suspect->commit->object.flags |= METAINFO_SHOWN;
1481         get_commit_info(suspect->commit, &ci, 1);
1482         printf("author %s\n", ci.author.buf);
1483         printf("author-mail %s\n", ci.author_mail.buf);
1484         printf("author-time %lu\n", ci.author_time);
1485         printf("author-tz %s\n", ci.author_tz.buf);
1486         printf("committer %s\n", ci.committer.buf);
1487         printf("committer-mail %s\n", ci.committer_mail.buf);
1488         printf("committer-time %lu\n", ci.committer_time);
1489         printf("committer-tz %s\n", ci.committer_tz.buf);
1490         printf("summary %s\n", ci.summary.buf);
1491         if (suspect->commit->object.flags & UNINTERESTING)
1492                 printf("boundary\n");
1493         if (suspect->previous) {
1494                 struct origin *prev = suspect->previous;
1495                 printf("previous %s ", sha1_to_hex(prev->commit->object.sha1));
1496                 write_name_quoted(prev->path, stdout, '\n');
1497         }
1498
1499         commit_info_destroy(&ci);
1500
1501         return 1;
1502 }
1503
1504 /*
1505  * The blame_entry is found to be guilty for the range.  Mark it
1506  * as such, and show it in incremental output.
1507  */
1508 static void found_guilty_entry(struct blame_entry *ent)
1509 {
1510         if (ent->guilty)
1511                 return;
1512         ent->guilty = 1;
1513         if (incremental) {
1514                 struct origin *suspect = ent->suspect;
1515
1516                 printf("%s %d %d %d\n",
1517                        sha1_to_hex(suspect->commit->object.sha1),
1518                        ent->s_lno + 1, ent->lno + 1, ent->num_lines);
1519                 emit_one_suspect_detail(suspect, 0);
1520                 write_filename_info(suspect->path);
1521                 maybe_flush_or_die(stdout, "stdout");
1522         }
1523 }
1524
1525 /*
1526  * The main loop -- while the scoreboard has lines whose true origin
1527  * is still unknown, pick one blame_entry, and allow its current
1528  * suspect to pass blames to its parents.
1529  */
1530 static void assign_blame(struct scoreboard *sb, int opt)
1531 {
1532         struct rev_info *revs = sb->revs;
1533
1534         while (1) {
1535                 struct blame_entry *ent;
1536                 struct commit *commit;
1537                 struct origin *suspect = NULL;
1538
1539                 /* find one suspect to break down */
1540                 for (ent = sb->ent; !suspect && ent; ent = ent->next)
1541                         if (!ent->guilty)
1542                                 suspect = ent->suspect;
1543                 if (!suspect)
1544                         return; /* all done */
1545
1546                 /*
1547                  * We will use this suspect later in the loop,
1548                  * so hold onto it in the meantime.
1549                  */
1550                 origin_incref(suspect);
1551                 commit = suspect->commit;
1552                 if (!commit->object.parsed)
1553                         parse_commit(commit);
1554                 if (reverse ||
1555                     (!(commit->object.flags & UNINTERESTING) &&
1556                      !(revs->max_age != -1 && commit->date < revs->max_age)))
1557                         pass_blame(sb, suspect, opt);
1558                 else {
1559                         commit->object.flags |= UNINTERESTING;
1560                         if (commit->object.parsed)
1561                                 mark_parents_uninteresting(commit);
1562                 }
1563                 /* treat root commit as boundary */
1564                 if (!commit->parents && !show_root)
1565                         commit->object.flags |= UNINTERESTING;
1566
1567                 /* Take responsibility for the remaining entries */
1568                 for (ent = sb->ent; ent; ent = ent->next)
1569                         if (same_suspect(ent->suspect, suspect))
1570                                 found_guilty_entry(ent);
1571                 origin_decref(suspect);
1572
1573                 if (DEBUG) /* sanity */
1574                         sanity_check_refcnt(sb);
1575         }
1576 }
1577
1578 static const char *format_time(unsigned long time, const char *tz_str,
1579                                int show_raw_time)
1580 {
1581         static char time_buf[128];
1582         const char *time_str;
1583         int time_len;
1584         int tz;
1585
1586         if (show_raw_time) {
1587                 snprintf(time_buf, sizeof(time_buf), "%lu %s", time, tz_str);
1588         }
1589         else {
1590                 tz = atoi(tz_str);
1591                 time_str = show_date(time, tz, blame_date_mode);
1592                 time_len = strlen(time_str);
1593                 memcpy(time_buf, time_str, time_len);
1594                 memset(time_buf + time_len, ' ', blame_date_width - time_len);
1595         }
1596         return time_buf;
1597 }
1598
1599 #define OUTPUT_ANNOTATE_COMPAT  001
1600 #define OUTPUT_LONG_OBJECT_NAME 002
1601 #define OUTPUT_RAW_TIMESTAMP    004
1602 #define OUTPUT_PORCELAIN        010
1603 #define OUTPUT_SHOW_NAME        020
1604 #define OUTPUT_SHOW_NUMBER      040
1605 #define OUTPUT_SHOW_SCORE      0100
1606 #define OUTPUT_NO_AUTHOR       0200
1607 #define OUTPUT_SHOW_EMAIL       0400
1608 #define OUTPUT_LINE_PORCELAIN 01000
1609
1610 static void emit_porcelain_details(struct origin *suspect, int repeat)
1611 {
1612         if (emit_one_suspect_detail(suspect, repeat) ||
1613             (suspect->commit->object.flags & MORE_THAN_ONE_PATH))
1614                 write_filename_info(suspect->path);
1615 }
1616
1617 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent,
1618                            int opt)
1619 {
1620         int repeat = opt & OUTPUT_LINE_PORCELAIN;
1621         int cnt;
1622         const char *cp;
1623         struct origin *suspect = ent->suspect;
1624         char hex[41];
1625
1626         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1627         printf("%s%c%d %d %d\n",
1628                hex,
1629                ent->guilty ? ' ' : '*', /* purely for debugging */
1630                ent->s_lno + 1,
1631                ent->lno + 1,
1632                ent->num_lines);
1633         emit_porcelain_details(suspect, repeat);
1634
1635         cp = nth_line(sb, ent->lno);
1636         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1637                 char ch;
1638                 if (cnt) {
1639                         printf("%s %d %d\n", hex,
1640                                ent->s_lno + 1 + cnt,
1641                                ent->lno + 1 + cnt);
1642                         if (repeat)
1643                                 emit_porcelain_details(suspect, 1);
1644                 }
1645                 putchar('\t');
1646                 do {
1647                         ch = *cp++;
1648                         putchar(ch);
1649                 } while (ch != '\n' &&
1650                          cp < sb->final_buf + sb->final_buf_size);
1651         }
1652
1653         if (sb->final_buf_size && cp[-1] != '\n')
1654                 putchar('\n');
1655 }
1656
1657 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1658 {
1659         int cnt;
1660         const char *cp;
1661         struct origin *suspect = ent->suspect;
1662         struct commit_info ci;
1663         char hex[41];
1664         int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1665
1666         get_commit_info(suspect->commit, &ci, 1);
1667         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1668
1669         cp = nth_line(sb, ent->lno);
1670         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1671                 char ch;
1672                 int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : abbrev;
1673
1674                 if (suspect->commit->object.flags & UNINTERESTING) {
1675                         if (blank_boundary)
1676                                 memset(hex, ' ', length);
1677                         else if (!(opt & OUTPUT_ANNOTATE_COMPAT)) {
1678                                 length--;
1679                                 putchar('^');
1680                         }
1681                 }
1682
1683                 printf("%.*s", length, hex);
1684                 if (opt & OUTPUT_ANNOTATE_COMPAT) {
1685                         const char *name;
1686                         if (opt & OUTPUT_SHOW_EMAIL)
1687                                 name = ci.author_mail.buf;
1688                         else
1689                                 name = ci.author.buf;
1690                         printf("\t(%10s\t%10s\t%d)", name,
1691                                format_time(ci.author_time, ci.author_tz.buf,
1692                                            show_raw_time),
1693                                ent->lno + 1 + cnt);
1694                 } else {
1695                         if (opt & OUTPUT_SHOW_SCORE)
1696                                 printf(" %*d %02d",
1697                                        max_score_digits, ent->score,
1698                                        ent->suspect->refcnt);
1699                         if (opt & OUTPUT_SHOW_NAME)
1700                                 printf(" %-*.*s", longest_file, longest_file,
1701                                        suspect->path);
1702                         if (opt & OUTPUT_SHOW_NUMBER)
1703                                 printf(" %*d", max_orig_digits,
1704                                        ent->s_lno + 1 + cnt);
1705
1706                         if (!(opt & OUTPUT_NO_AUTHOR)) {
1707                                 const char *name;
1708                                 int pad;
1709                                 if (opt & OUTPUT_SHOW_EMAIL)
1710                                         name = ci.author_mail.buf;
1711                                 else
1712                                         name = ci.author.buf;
1713                                 pad = longest_author - utf8_strwidth(name);
1714                                 printf(" (%s%*s %10s",
1715                                        name, pad, "",
1716                                        format_time(ci.author_time,
1717                                                    ci.author_tz.buf,
1718                                                    show_raw_time));
1719                         }
1720                         printf(" %*d) ",
1721                                max_digits, ent->lno + 1 + cnt);
1722                 }
1723                 do {
1724                         ch = *cp++;
1725                         putchar(ch);
1726                 } while (ch != '\n' &&
1727                          cp < sb->final_buf + sb->final_buf_size);
1728         }
1729
1730         if (sb->final_buf_size && cp[-1] != '\n')
1731                 putchar('\n');
1732
1733         commit_info_destroy(&ci);
1734 }
1735
1736 static void output(struct scoreboard *sb, int option)
1737 {
1738         struct blame_entry *ent;
1739
1740         if (option & OUTPUT_PORCELAIN) {
1741                 for (ent = sb->ent; ent; ent = ent->next) {
1742                         struct blame_entry *oth;
1743                         struct origin *suspect = ent->suspect;
1744                         struct commit *commit = suspect->commit;
1745                         if (commit->object.flags & MORE_THAN_ONE_PATH)
1746                                 continue;
1747                         for (oth = ent->next; oth; oth = oth->next) {
1748                                 if ((oth->suspect->commit != commit) ||
1749                                     !strcmp(oth->suspect->path, suspect->path))
1750                                         continue;
1751                                 commit->object.flags |= MORE_THAN_ONE_PATH;
1752                                 break;
1753                         }
1754                 }
1755         }
1756
1757         for (ent = sb->ent; ent; ent = ent->next) {
1758                 if (option & OUTPUT_PORCELAIN)
1759                         emit_porcelain(sb, ent, option);
1760                 else {
1761                         emit_other(sb, ent, option);
1762                 }
1763         }
1764 }
1765
1766 /*
1767  * To allow quick access to the contents of nth line in the
1768  * final image, prepare an index in the scoreboard.
1769  */
1770 static int prepare_lines(struct scoreboard *sb)
1771 {
1772         const char *buf = sb->final_buf;
1773         unsigned long len = sb->final_buf_size;
1774         int num = 0, incomplete = 0, bol = 1;
1775
1776         if (len && buf[len-1] != '\n')
1777                 incomplete++; /* incomplete line at the end */
1778         while (len--) {
1779                 if (bol) {
1780                         sb->lineno = xrealloc(sb->lineno,
1781                                               sizeof(int *) * (num + 1));
1782                         sb->lineno[num] = buf - sb->final_buf;
1783                         bol = 0;
1784                 }
1785                 if (*buf++ == '\n') {
1786                         num++;
1787                         bol = 1;
1788                 }
1789         }
1790         sb->lineno = xrealloc(sb->lineno,
1791                               sizeof(int *) * (num + incomplete + 1));
1792         sb->lineno[num + incomplete] = buf - sb->final_buf;
1793         sb->num_lines = num + incomplete;
1794         return sb->num_lines;
1795 }
1796
1797 /*
1798  * Add phony grafts for use with -S; this is primarily to
1799  * support git's cvsserver that wants to give a linear history
1800  * to its clients.
1801  */
1802 static int read_ancestry(const char *graft_file)
1803 {
1804         FILE *fp = fopen(graft_file, "r");
1805         char buf[1024];
1806         if (!fp)
1807                 return -1;
1808         while (fgets(buf, sizeof(buf), fp)) {
1809                 /* The format is just "Commit Parent1 Parent2 ...\n" */
1810                 int len = strlen(buf);
1811                 struct commit_graft *graft = read_graft_line(buf, len);
1812                 if (graft)
1813                         register_commit_graft(graft, 0);
1814         }
1815         fclose(fp);
1816         return 0;
1817 }
1818
1819 static int update_auto_abbrev(int auto_abbrev, struct origin *suspect)
1820 {
1821         const char *uniq = find_unique_abbrev(suspect->commit->object.sha1,
1822                                               auto_abbrev);
1823         int len = strlen(uniq);
1824         if (auto_abbrev < len)
1825                 return len;
1826         return auto_abbrev;
1827 }
1828
1829 /*
1830  * How many columns do we need to show line numbers, authors,
1831  * and filenames?
1832  */
1833 static void find_alignment(struct scoreboard *sb, int *option)
1834 {
1835         int longest_src_lines = 0;
1836         int longest_dst_lines = 0;
1837         unsigned largest_score = 0;
1838         struct blame_entry *e;
1839         int compute_auto_abbrev = (abbrev < 0);
1840         int auto_abbrev = default_abbrev;
1841
1842         for (e = sb->ent; e; e = e->next) {
1843                 struct origin *suspect = e->suspect;
1844                 struct commit_info ci;
1845                 int num;
1846
1847                 if (compute_auto_abbrev)
1848                         auto_abbrev = update_auto_abbrev(auto_abbrev, suspect);
1849                 if (strcmp(suspect->path, sb->path))
1850                         *option |= OUTPUT_SHOW_NAME;
1851                 num = strlen(suspect->path);
1852                 if (longest_file < num)
1853                         longest_file = num;
1854                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1855                         suspect->commit->object.flags |= METAINFO_SHOWN;
1856                         get_commit_info(suspect->commit, &ci, 1);
1857                         if (*option & OUTPUT_SHOW_EMAIL)
1858                                 num = utf8_strwidth(ci.author_mail.buf);
1859                         else
1860                                 num = utf8_strwidth(ci.author.buf);
1861                         if (longest_author < num)
1862                                 longest_author = num;
1863                 }
1864                 num = e->s_lno + e->num_lines;
1865                 if (longest_src_lines < num)
1866                         longest_src_lines = num;
1867                 num = e->lno + e->num_lines;
1868                 if (longest_dst_lines < num)
1869                         longest_dst_lines = num;
1870                 if (largest_score < ent_score(sb, e))
1871                         largest_score = ent_score(sb, e);
1872
1873                 commit_info_destroy(&ci);
1874         }
1875         max_orig_digits = decimal_width(longest_src_lines);
1876         max_digits = decimal_width(longest_dst_lines);
1877         max_score_digits = decimal_width(largest_score);
1878
1879         if (compute_auto_abbrev)
1880                 /* one more abbrev length is needed for the boundary commit */
1881                 abbrev = auto_abbrev + 1;
1882 }
1883
1884 /*
1885  * For debugging -- origin is refcounted, and this asserts that
1886  * we do not underflow.
1887  */
1888 static void sanity_check_refcnt(struct scoreboard *sb)
1889 {
1890         int baa = 0;
1891         struct blame_entry *ent;
1892
1893         for (ent = sb->ent; ent; ent = ent->next) {
1894                 /* Nobody should have zero or negative refcnt */
1895                 if (ent->suspect->refcnt <= 0) {
1896                         fprintf(stderr, "%s in %s has negative refcnt %d\n",
1897                                 ent->suspect->path,
1898                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1899                                 ent->suspect->refcnt);
1900                         baa = 1;
1901                 }
1902         }
1903         if (baa) {
1904                 int opt = 0160;
1905                 find_alignment(sb, &opt);
1906                 output(sb, opt);
1907                 die("Baa %d!", baa);
1908         }
1909 }
1910
1911 /*
1912  * Used for the command line parsing; check if the path exists
1913  * in the working tree.
1914  */
1915 static int has_string_in_work_tree(const char *path)
1916 {
1917         struct stat st;
1918         return !lstat(path, &st);
1919 }
1920
1921 static unsigned parse_score(const char *arg)
1922 {
1923         char *end;
1924         unsigned long score = strtoul(arg, &end, 10);
1925         if (*end)
1926                 return 0;
1927         return score;
1928 }
1929
1930 static const char *add_prefix(const char *prefix, const char *path)
1931 {
1932         return prefix_path(prefix, prefix ? strlen(prefix) : 0, path);
1933 }
1934
1935 static int git_blame_config(const char *var, const char *value, void *cb)
1936 {
1937         if (!strcmp(var, "blame.showroot")) {
1938                 show_root = git_config_bool(var, value);
1939                 return 0;
1940         }
1941         if (!strcmp(var, "blame.blankboundary")) {
1942                 blank_boundary = git_config_bool(var, value);
1943                 return 0;
1944         }
1945         if (!strcmp(var, "blame.date")) {
1946                 if (!value)
1947                         return config_error_nonbool(var);
1948                 blame_date_mode = parse_date_format(value);
1949                 return 0;
1950         }
1951
1952         if (userdiff_config(var, value) < 0)
1953                 return -1;
1954
1955         return git_default_config(var, value, cb);
1956 }
1957
1958 static void verify_working_tree_path(struct commit *work_tree, const char *path)
1959 {
1960         struct commit_list *parents;
1961
1962         for (parents = work_tree->parents; parents; parents = parents->next) {
1963                 const unsigned char *commit_sha1 = parents->item->object.sha1;
1964                 unsigned char blob_sha1[20];
1965                 unsigned mode;
1966
1967                 if (!get_tree_entry(commit_sha1, path, blob_sha1, &mode) &&
1968                     sha1_object_info(blob_sha1, NULL) == OBJ_BLOB)
1969                         return;
1970         }
1971         die("no such path '%s' in HEAD", path);
1972 }
1973
1974 static struct commit_list **append_parent(struct commit_list **tail, const unsigned char *sha1)
1975 {
1976         struct commit *parent;
1977
1978         parent = lookup_commit_reference(sha1);
1979         if (!parent)
1980                 die("no such commit %s", sha1_to_hex(sha1));
1981         return &commit_list_insert(parent, tail)->next;
1982 }
1983
1984 static void append_merge_parents(struct commit_list **tail)
1985 {
1986         int merge_head;
1987         const char *merge_head_file = git_path("MERGE_HEAD");
1988         struct strbuf line = STRBUF_INIT;
1989
1990         merge_head = open(merge_head_file, O_RDONLY);
1991         if (merge_head < 0) {
1992                 if (errno == ENOENT)
1993                         return;
1994                 die("cannot open '%s' for reading", merge_head_file);
1995         }
1996
1997         while (!strbuf_getwholeline_fd(&line, merge_head, '\n')) {
1998                 unsigned char sha1[20];
1999                 if (line.len < 40 || get_sha1_hex(line.buf, sha1))
2000                         die("unknown line in '%s': %s", merge_head_file, line.buf);
2001                 tail = append_parent(tail, sha1);
2002         }
2003         close(merge_head);
2004         strbuf_release(&line);
2005 }
2006
2007 /*
2008  * Prepare a dummy commit that represents the work tree (or staged) item.
2009  * Note that annotating work tree item never works in the reverse.
2010  */
2011 static struct commit *fake_working_tree_commit(struct diff_options *opt,
2012                                                const char *path,
2013                                                const char *contents_from)
2014 {
2015         struct commit *commit;
2016         struct origin *origin;
2017         struct commit_list **parent_tail, *parent;
2018         unsigned char head_sha1[20];
2019         struct strbuf buf = STRBUF_INIT;
2020         const char *ident;
2021         time_t now;
2022         int size, len;
2023         struct cache_entry *ce;
2024         unsigned mode;
2025         struct strbuf msg = STRBUF_INIT;
2026
2027         time(&now);
2028         commit = xcalloc(1, sizeof(*commit));
2029         commit->object.parsed = 1;
2030         commit->date = now;
2031         commit->object.type = OBJ_COMMIT;
2032         parent_tail = &commit->parents;
2033
2034         if (!resolve_ref_unsafe("HEAD", head_sha1, 1, NULL))
2035                 die("no such ref: HEAD");
2036
2037         parent_tail = append_parent(parent_tail, head_sha1);
2038         append_merge_parents(parent_tail);
2039         verify_working_tree_path(commit, path);
2040
2041         origin = make_origin(commit, path);
2042
2043         ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
2044         strbuf_addstr(&msg, "tree 0000000000000000000000000000000000000000\n");
2045         for (parent = commit->parents; parent; parent = parent->next)
2046                 strbuf_addf(&msg, "parent %s\n",
2047                             sha1_to_hex(parent->item->object.sha1));
2048         strbuf_addf(&msg,
2049                     "author %s\n"
2050                     "committer %s\n\n"
2051                     "Version of %s from %s\n",
2052                     ident, ident, path,
2053                     (!contents_from ? path :
2054                      (!strcmp(contents_from, "-") ? "standard input" : contents_from)));
2055         commit->buffer = strbuf_detach(&msg, NULL);
2056
2057         if (!contents_from || strcmp("-", contents_from)) {
2058                 struct stat st;
2059                 const char *read_from;
2060                 char *buf_ptr;
2061                 unsigned long buf_len;
2062
2063                 if (contents_from) {
2064                         if (stat(contents_from, &st) < 0)
2065                                 die_errno("Cannot stat '%s'", contents_from);
2066                         read_from = contents_from;
2067                 }
2068                 else {
2069                         if (lstat(path, &st) < 0)
2070                                 die_errno("Cannot lstat '%s'", path);
2071                         read_from = path;
2072                 }
2073                 mode = canon_mode(st.st_mode);
2074
2075                 switch (st.st_mode & S_IFMT) {
2076                 case S_IFREG:
2077                         if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
2078                             textconv_object(read_from, mode, null_sha1, 0, &buf_ptr, &buf_len))
2079                                 strbuf_attach(&buf, buf_ptr, buf_len, buf_len + 1);
2080                         else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
2081                                 die_errno("cannot open or read '%s'", read_from);
2082                         break;
2083                 case S_IFLNK:
2084                         if (strbuf_readlink(&buf, read_from, st.st_size) < 0)
2085                                 die_errno("cannot readlink '%s'", read_from);
2086                         break;
2087                 default:
2088                         die("unsupported file type %s", read_from);
2089                 }
2090         }
2091         else {
2092                 /* Reading from stdin */
2093                 mode = 0;
2094                 if (strbuf_read(&buf, 0, 0) < 0)
2095                         die_errno("failed to read from stdin");
2096         }
2097         convert_to_git(path, buf.buf, buf.len, &buf, 0);
2098         origin->file.ptr = buf.buf;
2099         origin->file.size = buf.len;
2100         pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
2101         commit->util = origin;
2102
2103         /*
2104          * Read the current index, replace the path entry with
2105          * origin->blob_sha1 without mucking with its mode or type
2106          * bits; we are not going to write this index out -- we just
2107          * want to run "diff-index --cached".
2108          */
2109         discard_cache();
2110         read_cache();
2111
2112         len = strlen(path);
2113         if (!mode) {
2114                 int pos = cache_name_pos(path, len);
2115                 if (0 <= pos)
2116                         mode = active_cache[pos]->ce_mode;
2117                 else
2118                         /* Let's not bother reading from HEAD tree */
2119                         mode = S_IFREG | 0644;
2120         }
2121         size = cache_entry_size(len);
2122         ce = xcalloc(1, size);
2123         hashcpy(ce->sha1, origin->blob_sha1);
2124         memcpy(ce->name, path, len);
2125         ce->ce_flags = create_ce_flags(0);
2126         ce->ce_namelen = len;
2127         ce->ce_mode = create_ce_mode(mode);
2128         add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
2129
2130         /*
2131          * We are not going to write this out, so this does not matter
2132          * right now, but someday we might optimize diff-index --cached
2133          * with cache-tree information.
2134          */
2135         cache_tree_invalidate_path(active_cache_tree, path);
2136
2137         return commit;
2138 }
2139
2140 static const char *prepare_final(struct scoreboard *sb)
2141 {
2142         int i;
2143         const char *final_commit_name = NULL;
2144         struct rev_info *revs = sb->revs;
2145
2146         /*
2147          * There must be one and only one positive commit in the
2148          * revs->pending array.
2149          */
2150         for (i = 0; i < revs->pending.nr; i++) {
2151                 struct object *obj = revs->pending.objects[i].item;
2152                 if (obj->flags & UNINTERESTING)
2153                         continue;
2154                 while (obj->type == OBJ_TAG)
2155                         obj = deref_tag(obj, NULL, 0);
2156                 if (obj->type != OBJ_COMMIT)
2157                         die("Non commit %s?", revs->pending.objects[i].name);
2158                 if (sb->final)
2159                         die("More than one commit to dig from %s and %s?",
2160                             revs->pending.objects[i].name,
2161                             final_commit_name);
2162                 sb->final = (struct commit *) obj;
2163                 final_commit_name = revs->pending.objects[i].name;
2164         }
2165         return final_commit_name;
2166 }
2167
2168 static const char *prepare_initial(struct scoreboard *sb)
2169 {
2170         int i;
2171         const char *final_commit_name = NULL;
2172         struct rev_info *revs = sb->revs;
2173
2174         /*
2175          * There must be one and only one negative commit, and it must be
2176          * the boundary.
2177          */
2178         for (i = 0; i < revs->pending.nr; i++) {
2179                 struct object *obj = revs->pending.objects[i].item;
2180                 if (!(obj->flags & UNINTERESTING))
2181                         continue;
2182                 while (obj->type == OBJ_TAG)
2183                         obj = deref_tag(obj, NULL, 0);
2184                 if (obj->type != OBJ_COMMIT)
2185                         die("Non commit %s?", revs->pending.objects[i].name);
2186                 if (sb->final)
2187                         die("More than one commit to dig down to %s and %s?",
2188                             revs->pending.objects[i].name,
2189                             final_commit_name);
2190                 sb->final = (struct commit *) obj;
2191                 final_commit_name = revs->pending.objects[i].name;
2192         }
2193         if (!final_commit_name)
2194                 die("No commit to dig down to?");
2195         return final_commit_name;
2196 }
2197
2198 static int blame_copy_callback(const struct option *option, const char *arg, int unset)
2199 {
2200         int *opt = option->value;
2201
2202         /*
2203          * -C enables copy from removed files;
2204          * -C -C enables copy from existing files, but only
2205          *       when blaming a new file;
2206          * -C -C -C enables copy from existing files for
2207          *          everybody
2208          */
2209         if (*opt & PICKAXE_BLAME_COPY_HARDER)
2210                 *opt |= PICKAXE_BLAME_COPY_HARDEST;
2211         if (*opt & PICKAXE_BLAME_COPY)
2212                 *opt |= PICKAXE_BLAME_COPY_HARDER;
2213         *opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
2214
2215         if (arg)
2216                 blame_copy_score = parse_score(arg);
2217         return 0;
2218 }
2219
2220 static int blame_move_callback(const struct option *option, const char *arg, int unset)
2221 {
2222         int *opt = option->value;
2223
2224         *opt |= PICKAXE_BLAME_MOVE;
2225
2226         if (arg)
2227                 blame_move_score = parse_score(arg);
2228         return 0;
2229 }
2230
2231 int cmd_blame(int argc, const char **argv, const char *prefix)
2232 {
2233         struct rev_info revs;
2234         const char *path;
2235         struct scoreboard sb;
2236         struct origin *o;
2237         struct blame_entry *ent = NULL;
2238         long dashdash_pos, lno;
2239         const char *final_commit_name = NULL;
2240         enum object_type type;
2241
2242         static struct string_list range_list;
2243         static int output_option = 0, opt = 0;
2244         static int show_stats = 0;
2245         static const char *revs_file = NULL;
2246         static const char *contents_from = NULL;
2247         static const struct option options[] = {
2248                 OPT_BOOL(0, "incremental", &incremental, N_("Show blame entries as we find them, incrementally")),
2249                 OPT_BOOL('b', NULL, &blank_boundary, N_("Show blank SHA-1 for boundary commits (Default: off)")),
2250                 OPT_BOOL(0, "root", &show_root, N_("Do not treat root commits as boundaries (Default: off)")),
2251                 OPT_BOOL(0, "show-stats", &show_stats, N_("Show work cost statistics")),
2252                 OPT_BIT(0, "score-debug", &output_option, N_("Show output score for blame entries"), OUTPUT_SHOW_SCORE),
2253                 OPT_BIT('f', "show-name", &output_option, N_("Show original filename (Default: auto)"), OUTPUT_SHOW_NAME),
2254                 OPT_BIT('n', "show-number", &output_option, N_("Show original linenumber (Default: off)"), OUTPUT_SHOW_NUMBER),
2255                 OPT_BIT('p', "porcelain", &output_option, N_("Show in a format designed for machine consumption"), OUTPUT_PORCELAIN),
2256                 OPT_BIT(0, "line-porcelain", &output_option, N_("Show porcelain format with per-line commit information"), OUTPUT_PORCELAIN|OUTPUT_LINE_PORCELAIN),
2257                 OPT_BIT('c', NULL, &output_option, N_("Use the same output mode as git-annotate (Default: off)"), OUTPUT_ANNOTATE_COMPAT),
2258                 OPT_BIT('t', NULL, &output_option, N_("Show raw timestamp (Default: off)"), OUTPUT_RAW_TIMESTAMP),
2259                 OPT_BIT('l', NULL, &output_option, N_("Show long commit SHA1 (Default: off)"), OUTPUT_LONG_OBJECT_NAME),
2260                 OPT_BIT('s', NULL, &output_option, N_("Suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
2261                 OPT_BIT('e', "show-email", &output_option, N_("Show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
2262                 OPT_BIT('w', NULL, &xdl_opts, N_("Ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
2263                 OPT_BIT(0, "minimal", &xdl_opts, N_("Spend extra cycles to find better match"), XDF_NEED_MINIMAL),
2264                 OPT_STRING('S', NULL, &revs_file, N_("file"), N_("Use revisions from <file> instead of calling git-rev-list")),
2265                 OPT_STRING(0, "contents", &contents_from, N_("file"), N_("Use <file>'s contents as the final image")),
2266                 { OPTION_CALLBACK, 'C', NULL, &opt, N_("score"), N_("Find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback },
2267                 { OPTION_CALLBACK, 'M', NULL, &opt, N_("score"), N_("Find line movements within and across files"), PARSE_OPT_OPTARG, blame_move_callback },
2268                 OPT_STRING_LIST('L', NULL, &range_list, N_("n,m"), N_("Process only line range n,m, counting from 1")),
2269                 OPT__ABBREV(&abbrev),
2270                 OPT_END()
2271         };
2272
2273         struct parse_opt_ctx_t ctx;
2274         int cmd_is_annotate = !strcmp(argv[0], "annotate");
2275         struct range_set ranges;
2276         unsigned int range_i;
2277         long anchor;
2278
2279         git_config(git_blame_config, NULL);
2280         init_revisions(&revs, NULL);
2281         revs.date_mode = blame_date_mode;
2282         DIFF_OPT_SET(&revs.diffopt, ALLOW_TEXTCONV);
2283         DIFF_OPT_SET(&revs.diffopt, FOLLOW_RENAMES);
2284
2285         save_commit_buffer = 0;
2286         dashdash_pos = 0;
2287
2288         parse_options_start(&ctx, argc, argv, prefix, options,
2289                             PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_ARGV0);
2290         for (;;) {
2291                 switch (parse_options_step(&ctx, options, blame_opt_usage)) {
2292                 case PARSE_OPT_HELP:
2293                         exit(129);
2294                 case PARSE_OPT_DONE:
2295                         if (ctx.argv[0])
2296                                 dashdash_pos = ctx.cpidx;
2297                         goto parse_done;
2298                 }
2299
2300                 if (!strcmp(ctx.argv[0], "--reverse")) {
2301                         ctx.argv[0] = "--children";
2302                         reverse = 1;
2303                 }
2304                 parse_revision_opt(&revs, &ctx, options, blame_opt_usage);
2305         }
2306 parse_done:
2307         no_whole_file_rename = !DIFF_OPT_TST(&revs.diffopt, FOLLOW_RENAMES);
2308         DIFF_OPT_CLR(&revs.diffopt, FOLLOW_RENAMES);
2309         argc = parse_options_end(&ctx);
2310
2311         if (0 < abbrev)
2312                 /* one more abbrev length is needed for the boundary commit */
2313                 abbrev++;
2314
2315         if (revs_file && read_ancestry(revs_file))
2316                 die_errno("reading graft file '%s' failed", revs_file);
2317
2318         if (cmd_is_annotate) {
2319                 output_option |= OUTPUT_ANNOTATE_COMPAT;
2320                 blame_date_mode = DATE_ISO8601;
2321         } else {
2322                 blame_date_mode = revs.date_mode;
2323         }
2324
2325         /* The maximum width used to show the dates */
2326         switch (blame_date_mode) {
2327         case DATE_RFC2822:
2328                 blame_date_width = sizeof("Thu, 19 Oct 2006 16:00:04 -0700");
2329                 break;
2330         case DATE_ISO8601:
2331                 blame_date_width = sizeof("2006-10-19 16:00:04 -0700");
2332                 break;
2333         case DATE_RAW:
2334                 blame_date_width = sizeof("1161298804 -0700");
2335                 break;
2336         case DATE_SHORT:
2337                 blame_date_width = sizeof("2006-10-19");
2338                 break;
2339         case DATE_RELATIVE:
2340                 /* "normal" is used as the fallback for "relative" */
2341         case DATE_LOCAL:
2342         case DATE_NORMAL:
2343                 blame_date_width = sizeof("Thu Oct 19 16:00:04 2006 -0700");
2344                 break;
2345         }
2346         blame_date_width -= 1; /* strip the null */
2347
2348         if (DIFF_OPT_TST(&revs.diffopt, FIND_COPIES_HARDER))
2349                 opt |= (PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE |
2350                         PICKAXE_BLAME_COPY_HARDER);
2351
2352         if (!blame_move_score)
2353                 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
2354         if (!blame_copy_score)
2355                 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
2356
2357         /*
2358          * We have collected options unknown to us in argv[1..unk]
2359          * which are to be passed to revision machinery if we are
2360          * going to do the "bottom" processing.
2361          *
2362          * The remaining are:
2363          *
2364          * (1) if dashdash_pos != 0, it is either
2365          *     "blame [revisions] -- <path>" or
2366          *     "blame -- <path> <rev>"
2367          *
2368          * (2) otherwise, it is one of the two:
2369          *     "blame [revisions] <path>"
2370          *     "blame <path> <rev>"
2371          *
2372          * Note that we must strip out <path> from the arguments: we do not
2373          * want the path pruning but we may want "bottom" processing.
2374          */
2375         if (dashdash_pos) {
2376                 switch (argc - dashdash_pos - 1) {
2377                 case 2: /* (1b) */
2378                         if (argc != 4)
2379                                 usage_with_options(blame_opt_usage, options);
2380                         /* reorder for the new way: <rev> -- <path> */
2381                         argv[1] = argv[3];
2382                         argv[3] = argv[2];
2383                         argv[2] = "--";
2384                         /* FALLTHROUGH */
2385                 case 1: /* (1a) */
2386                         path = add_prefix(prefix, argv[--argc]);
2387                         argv[argc] = NULL;
2388                         break;
2389                 default:
2390                         usage_with_options(blame_opt_usage, options);
2391                 }
2392         } else {
2393                 if (argc < 2)
2394                         usage_with_options(blame_opt_usage, options);
2395                 path = add_prefix(prefix, argv[argc - 1]);
2396                 if (argc == 3 && !has_string_in_work_tree(path)) { /* (2b) */
2397                         path = add_prefix(prefix, argv[1]);
2398                         argv[1] = argv[2];
2399                 }
2400                 argv[argc - 1] = "--";
2401
2402                 setup_work_tree();
2403                 if (!has_string_in_work_tree(path))
2404                         die_errno("cannot stat path '%s'", path);
2405         }
2406
2407         revs.disable_stdin = 1;
2408         setup_revisions(argc, argv, &revs, NULL);
2409         memset(&sb, 0, sizeof(sb));
2410
2411         sb.revs = &revs;
2412         if (!reverse)
2413                 final_commit_name = prepare_final(&sb);
2414         else if (contents_from)
2415                 die("--contents and --children do not blend well.");
2416         else
2417                 final_commit_name = prepare_initial(&sb);
2418
2419         if (!sb.final) {
2420                 /*
2421                  * "--not A B -- path" without anything positive;
2422                  * do not default to HEAD, but use the working tree
2423                  * or "--contents".
2424                  */
2425                 setup_work_tree();
2426                 sb.final = fake_working_tree_commit(&sb.revs->diffopt,
2427                                                     path, contents_from);
2428                 add_pending_object(&revs, &(sb.final->object), ":");
2429         }
2430         else if (contents_from)
2431                 die("Cannot use --contents with final commit object name");
2432
2433         /*
2434          * If we have bottom, this will mark the ancestors of the
2435          * bottom commits we would reach while traversing as
2436          * uninteresting.
2437          */
2438         if (prepare_revision_walk(&revs))
2439                 die("revision walk setup failed");
2440
2441         if (is_null_sha1(sb.final->object.sha1)) {
2442                 char *buf;
2443                 o = sb.final->util;
2444                 buf = xmalloc(o->file.size + 1);
2445                 memcpy(buf, o->file.ptr, o->file.size + 1);
2446                 sb.final_buf = buf;
2447                 sb.final_buf_size = o->file.size;
2448         }
2449         else {
2450                 o = get_origin(&sb, sb.final, path);
2451                 if (fill_blob_sha1_and_mode(o))
2452                         die("no such path %s in %s", path, final_commit_name);
2453
2454                 if (DIFF_OPT_TST(&sb.revs->diffopt, ALLOW_TEXTCONV) &&
2455                     textconv_object(path, o->mode, o->blob_sha1, 1, (char **) &sb.final_buf,
2456                                     &sb.final_buf_size))
2457                         ;
2458                 else
2459                         sb.final_buf = read_sha1_file(o->blob_sha1, &type,
2460                                                       &sb.final_buf_size);
2461
2462                 if (!sb.final_buf)
2463                         die("Cannot read blob %s for path %s",
2464                             sha1_to_hex(o->blob_sha1),
2465                             path);
2466         }
2467         num_read_blob++;
2468         lno = prepare_lines(&sb);
2469
2470         if (lno && !range_list.nr)
2471                 string_list_append(&range_list, xstrdup("1"));
2472
2473         anchor = 1;
2474         range_set_init(&ranges, range_list.nr);
2475         for (range_i = 0; range_i < range_list.nr; ++range_i) {
2476                 long bottom, top;
2477                 if (parse_range_arg(range_list.items[range_i].string,
2478                                     nth_line_cb, &sb, lno, anchor,
2479                                     &bottom, &top, sb.path))
2480                         usage(blame_usage);
2481                 if (lno < top || ((lno || bottom) && lno < bottom))
2482                         die("file %s has only %lu lines", path, lno);
2483                 if (bottom < 1)
2484                         bottom = 1;
2485                 if (top < 1)
2486                         top = lno;
2487                 bottom--;
2488                 range_set_append_unsafe(&ranges, bottom, top);
2489                 anchor = top + 1;
2490         }
2491         sort_and_merge_range_set(&ranges);
2492
2493         for (range_i = ranges.nr; range_i > 0; --range_i) {
2494                 const struct range *r = &ranges.ranges[range_i - 1];
2495                 long bottom = r->start;
2496                 long top = r->end;
2497                 struct blame_entry *next = ent;
2498                 ent = xcalloc(1, sizeof(*ent));
2499                 ent->lno = bottom;
2500                 ent->num_lines = top - bottom;
2501                 ent->suspect = o;
2502                 ent->s_lno = bottom;
2503                 ent->next = next;
2504                 if (next)
2505                         next->prev = ent;
2506                 origin_incref(o);
2507         }
2508         origin_decref(o);
2509
2510         range_set_release(&ranges);
2511         string_list_clear(&range_list, 0);
2512
2513         sb.ent = ent;
2514         sb.path = path;
2515
2516         read_mailmap(&mailmap, NULL);
2517
2518         if (!incremental)
2519                 setup_pager();
2520
2521         assign_blame(&sb, opt);
2522
2523         if (incremental)
2524                 return 0;
2525
2526         coalesce(&sb);
2527
2528         if (!(output_option & OUTPUT_PORCELAIN))
2529                 find_alignment(&sb, &output_option);
2530
2531         output(&sb, output_option);
2532         free((void *)sb.final_buf);
2533         for (ent = sb.ent; ent; ) {
2534                 struct blame_entry *e = ent->next;
2535                 free(ent);
2536                 ent = e;
2537         }
2538
2539         if (show_stats) {
2540                 printf("num read blob: %d\n", num_read_blob);
2541                 printf("num get patch: %d\n", num_get_patch);
2542                 printf("num commits: %d\n", num_commits);
2543         }
2544         return 0;
2545 }