Merge branch 'jc/maint-rev-list-topo-doc'
[git] / builtin / blame.c
1 /*
2  * Blame
3  *
4  * Copyright (c) 2006, Junio C Hamano
5  */
6
7 #include "cache.h"
8 #include "builtin.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree-walk.h"
13 #include "diff.h"
14 #include "diffcore.h"
15 #include "revision.h"
16 #include "quote.h"
17 #include "xdiff-interface.h"
18 #include "cache-tree.h"
19 #include "string-list.h"
20 #include "mailmap.h"
21 #include "parse-options.h"
22 #include "utf8.h"
23 #include "userdiff.h"
24
25 static char blame_usage[] = "git blame [options] [rev-opts] [rev] [--] file";
26
27 static const char *blame_opt_usage[] = {
28         blame_usage,
29         "",
30         "[rev-opts] are documented in git-rev-list(1)",
31         NULL
32 };
33
34 static int longest_file;
35 static int longest_author;
36 static int max_orig_digits;
37 static int max_digits;
38 static int max_score_digits;
39 static int show_root;
40 static int reverse;
41 static int blank_boundary;
42 static int incremental;
43 static int xdl_opts;
44 static int abbrev = -1;
45
46 static enum date_mode blame_date_mode = DATE_ISO8601;
47 static size_t blame_date_width;
48
49 static struct string_list mailmap;
50
51 #ifndef DEBUG
52 #define DEBUG 0
53 #endif
54
55 /* stats */
56 static int num_read_blob;
57 static int num_get_patch;
58 static int num_commits;
59
60 #define PICKAXE_BLAME_MOVE              01
61 #define PICKAXE_BLAME_COPY              02
62 #define PICKAXE_BLAME_COPY_HARDER       04
63 #define PICKAXE_BLAME_COPY_HARDEST      010
64
65 /*
66  * blame for a blame_entry with score lower than these thresholds
67  * is not passed to the parent using move/copy logic.
68  */
69 static unsigned blame_move_score;
70 static unsigned blame_copy_score;
71 #define BLAME_DEFAULT_MOVE_SCORE        20
72 #define BLAME_DEFAULT_COPY_SCORE        40
73
74 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
75 #define METAINFO_SHOWN          (1u<<12)
76 #define MORE_THAN_ONE_PATH      (1u<<13)
77
78 /*
79  * One blob in a commit that is being suspected
80  */
81 struct origin {
82         int refcnt;
83         struct origin *previous;
84         struct commit *commit;
85         mmfile_t file;
86         unsigned char blob_sha1[20];
87         unsigned mode;
88         char path[FLEX_ARRAY];
89 };
90
91 static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b, long ctxlen,
92                       xdl_emit_hunk_consume_func_t hunk_func, void *cb_data)
93 {
94         xpparam_t xpp = {0};
95         xdemitconf_t xecfg = {0};
96         xdemitcb_t ecb = {NULL};
97
98         xpp.flags = xdl_opts;
99         xecfg.ctxlen = ctxlen;
100         xecfg.hunk_func = hunk_func;
101         ecb.priv = cb_data;
102         return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
103 }
104
105 /*
106  * Prepare diff_filespec and convert it using diff textconv API
107  * if the textconv driver exists.
108  * Return 1 if the conversion succeeds, 0 otherwise.
109  */
110 int textconv_object(const char *path,
111                     unsigned mode,
112                     const unsigned char *sha1,
113                     char **buf,
114                     unsigned long *buf_size)
115 {
116         struct diff_filespec *df;
117         struct userdiff_driver *textconv;
118
119         df = alloc_filespec(path);
120         fill_filespec(df, sha1, mode);
121         textconv = get_textconv(df);
122         if (!textconv) {
123                 free_filespec(df);
124                 return 0;
125         }
126
127         *buf_size = fill_textconv(textconv, df, buf);
128         free_filespec(df);
129         return 1;
130 }
131
132 /*
133  * Given an origin, prepare mmfile_t structure to be used by the
134  * diff machinery
135  */
136 static void fill_origin_blob(struct diff_options *opt,
137                              struct origin *o, mmfile_t *file)
138 {
139         if (!o->file.ptr) {
140                 enum object_type type;
141                 unsigned long file_size;
142
143                 num_read_blob++;
144                 if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
145                     textconv_object(o->path, o->mode, o->blob_sha1, &file->ptr, &file_size))
146                         ;
147                 else
148                         file->ptr = read_sha1_file(o->blob_sha1, &type, &file_size);
149                 file->size = file_size;
150
151                 if (!file->ptr)
152                         die("Cannot read blob %s for path %s",
153                             sha1_to_hex(o->blob_sha1),
154                             o->path);
155                 o->file = *file;
156         }
157         else
158                 *file = o->file;
159 }
160
161 /*
162  * Origin is refcounted and usually we keep the blob contents to be
163  * reused.
164  */
165 static inline struct origin *origin_incref(struct origin *o)
166 {
167         if (o)
168                 o->refcnt++;
169         return o;
170 }
171
172 static void origin_decref(struct origin *o)
173 {
174         if (o && --o->refcnt <= 0) {
175                 if (o->previous)
176                         origin_decref(o->previous);
177                 free(o->file.ptr);
178                 free(o);
179         }
180 }
181
182 static void drop_origin_blob(struct origin *o)
183 {
184         if (o->file.ptr) {
185                 free(o->file.ptr);
186                 o->file.ptr = NULL;
187         }
188 }
189
190 /*
191  * Each group of lines is described by a blame_entry; it can be split
192  * as we pass blame to the parents.  They form a linked list in the
193  * scoreboard structure, sorted by the target line number.
194  */
195 struct blame_entry {
196         struct blame_entry *prev;
197         struct blame_entry *next;
198
199         /* the first line of this group in the final image;
200          * internally all line numbers are 0 based.
201          */
202         int lno;
203
204         /* how many lines this group has */
205         int num_lines;
206
207         /* the commit that introduced this group into the final image */
208         struct origin *suspect;
209
210         /* true if the suspect is truly guilty; false while we have not
211          * checked if the group came from one of its parents.
212          */
213         char guilty;
214
215         /* true if the entry has been scanned for copies in the current parent
216          */
217         char scanned;
218
219         /* the line number of the first line of this group in the
220          * suspect's file; internally all line numbers are 0 based.
221          */
222         int s_lno;
223
224         /* how significant this entry is -- cached to avoid
225          * scanning the lines over and over.
226          */
227         unsigned score;
228 };
229
230 /*
231  * The current state of the blame assignment.
232  */
233 struct scoreboard {
234         /* the final commit (i.e. where we started digging from) */
235         struct commit *final;
236         struct rev_info *revs;
237         const char *path;
238
239         /*
240          * The contents in the final image.
241          * Used by many functions to obtain contents of the nth line,
242          * indexed with scoreboard.lineno[blame_entry.lno].
243          */
244         const char *final_buf;
245         unsigned long final_buf_size;
246
247         /* linked list of blames */
248         struct blame_entry *ent;
249
250         /* look-up a line in the final buffer */
251         int num_lines;
252         int *lineno;
253 };
254
255 static inline int same_suspect(struct origin *a, struct origin *b)
256 {
257         if (a == b)
258                 return 1;
259         if (a->commit != b->commit)
260                 return 0;
261         return !strcmp(a->path, b->path);
262 }
263
264 static void sanity_check_refcnt(struct scoreboard *);
265
266 /*
267  * If two blame entries that are next to each other came from
268  * contiguous lines in the same origin (i.e. <commit, path> pair),
269  * merge them together.
270  */
271 static void coalesce(struct scoreboard *sb)
272 {
273         struct blame_entry *ent, *next;
274
275         for (ent = sb->ent; ent && (next = ent->next); ent = next) {
276                 if (same_suspect(ent->suspect, next->suspect) &&
277                     ent->guilty == next->guilty &&
278                     ent->s_lno + ent->num_lines == next->s_lno) {
279                         ent->num_lines += next->num_lines;
280                         ent->next = next->next;
281                         if (ent->next)
282                                 ent->next->prev = ent;
283                         origin_decref(next->suspect);
284                         free(next);
285                         ent->score = 0;
286                         next = ent; /* again */
287                 }
288         }
289
290         if (DEBUG) /* sanity */
291                 sanity_check_refcnt(sb);
292 }
293
294 /*
295  * Given a commit and a path in it, create a new origin structure.
296  * The callers that add blame to the scoreboard should use
297  * get_origin() to obtain shared, refcounted copy instead of calling
298  * this function directly.
299  */
300 static struct origin *make_origin(struct commit *commit, const char *path)
301 {
302         struct origin *o;
303         o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
304         o->commit = commit;
305         o->refcnt = 1;
306         strcpy(o->path, path);
307         return o;
308 }
309
310 /*
311  * Locate an existing origin or create a new one.
312  */
313 static struct origin *get_origin(struct scoreboard *sb,
314                                  struct commit *commit,
315                                  const char *path)
316 {
317         struct blame_entry *e;
318
319         for (e = sb->ent; e; e = e->next) {
320                 if (e->suspect->commit == commit &&
321                     !strcmp(e->suspect->path, path))
322                         return origin_incref(e->suspect);
323         }
324         return make_origin(commit, path);
325 }
326
327 /*
328  * Fill the blob_sha1 field of an origin if it hasn't, so that later
329  * call to fill_origin_blob() can use it to locate the data.  blob_sha1
330  * for an origin is also used to pass the blame for the entire file to
331  * the parent to detect the case where a child's blob is identical to
332  * that of its parent's.
333  *
334  * This also fills origin->mode for corresponding tree path.
335  */
336 static int fill_blob_sha1_and_mode(struct origin *origin)
337 {
338         if (!is_null_sha1(origin->blob_sha1))
339                 return 0;
340         if (get_tree_entry(origin->commit->object.sha1,
341                            origin->path,
342                            origin->blob_sha1, &origin->mode))
343                 goto error_out;
344         if (sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB)
345                 goto error_out;
346         return 0;
347  error_out:
348         hashclr(origin->blob_sha1);
349         origin->mode = S_IFINVALID;
350         return -1;
351 }
352
353 /*
354  * We have an origin -- check if the same path exists in the
355  * parent and return an origin structure to represent it.
356  */
357 static struct origin *find_origin(struct scoreboard *sb,
358                                   struct commit *parent,
359                                   struct origin *origin)
360 {
361         struct origin *porigin = NULL;
362         struct diff_options diff_opts;
363         const char *paths[2];
364
365         if (parent->util) {
366                 /*
367                  * Each commit object can cache one origin in that
368                  * commit.  This is a freestanding copy of origin and
369                  * not refcounted.
370                  */
371                 struct origin *cached = parent->util;
372                 if (!strcmp(cached->path, origin->path)) {
373                         /*
374                          * The same path between origin and its parent
375                          * without renaming -- the most common case.
376                          */
377                         porigin = get_origin(sb, parent, cached->path);
378
379                         /*
380                          * If the origin was newly created (i.e. get_origin
381                          * would call make_origin if none is found in the
382                          * scoreboard), it does not know the blob_sha1/mode,
383                          * so copy it.  Otherwise porigin was in the
384                          * scoreboard and already knows blob_sha1/mode.
385                          */
386                         if (porigin->refcnt == 1) {
387                                 hashcpy(porigin->blob_sha1, cached->blob_sha1);
388                                 porigin->mode = cached->mode;
389                         }
390                         return porigin;
391                 }
392                 /* otherwise it was not very useful; free it */
393                 free(parent->util);
394                 parent->util = NULL;
395         }
396
397         /* See if the origin->path is different between parent
398          * and origin first.  Most of the time they are the
399          * same and diff-tree is fairly efficient about this.
400          */
401         diff_setup(&diff_opts);
402         DIFF_OPT_SET(&diff_opts, RECURSIVE);
403         diff_opts.detect_rename = 0;
404         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
405         paths[0] = origin->path;
406         paths[1] = NULL;
407
408         diff_tree_setup_paths(paths, &diff_opts);
409         diff_setup_done(&diff_opts);
410
411         if (is_null_sha1(origin->commit->object.sha1))
412                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
413         else
414                 diff_tree_sha1(parent->tree->object.sha1,
415                                origin->commit->tree->object.sha1,
416                                "", &diff_opts);
417         diffcore_std(&diff_opts);
418
419         if (!diff_queued_diff.nr) {
420                 /* The path is the same as parent */
421                 porigin = get_origin(sb, parent, origin->path);
422                 hashcpy(porigin->blob_sha1, origin->blob_sha1);
423                 porigin->mode = origin->mode;
424         } else {
425                 /*
426                  * Since origin->path is a pathspec, if the parent
427                  * commit had it as a directory, we will see a whole
428                  * bunch of deletion of files in the directory that we
429                  * do not care about.
430                  */
431                 int i;
432                 struct diff_filepair *p = NULL;
433                 for (i = 0; i < diff_queued_diff.nr; i++) {
434                         const char *name;
435                         p = diff_queued_diff.queue[i];
436                         name = p->one->path ? p->one->path : p->two->path;
437                         if (!strcmp(name, origin->path))
438                                 break;
439                 }
440                 if (!p)
441                         die("internal error in blame::find_origin");
442                 switch (p->status) {
443                 default:
444                         die("internal error in blame::find_origin (%c)",
445                             p->status);
446                 case 'M':
447                         porigin = get_origin(sb, parent, origin->path);
448                         hashcpy(porigin->blob_sha1, p->one->sha1);
449                         porigin->mode = p->one->mode;
450                         break;
451                 case 'A':
452                 case 'T':
453                         /* Did not exist in parent, or type changed */
454                         break;
455                 }
456         }
457         diff_flush(&diff_opts);
458         diff_tree_release_paths(&diff_opts);
459         if (porigin) {
460                 /*
461                  * Create a freestanding copy that is not part of
462                  * the refcounted origin found in the scoreboard, and
463                  * cache it in the commit.
464                  */
465                 struct origin *cached;
466
467                 cached = make_origin(porigin->commit, porigin->path);
468                 hashcpy(cached->blob_sha1, porigin->blob_sha1);
469                 cached->mode = porigin->mode;
470                 parent->util = cached;
471         }
472         return porigin;
473 }
474
475 /*
476  * We have an origin -- find the path that corresponds to it in its
477  * parent and return an origin structure to represent it.
478  */
479 static struct origin *find_rename(struct scoreboard *sb,
480                                   struct commit *parent,
481                                   struct origin *origin)
482 {
483         struct origin *porigin = NULL;
484         struct diff_options diff_opts;
485         int i;
486         const char *paths[2];
487
488         diff_setup(&diff_opts);
489         DIFF_OPT_SET(&diff_opts, RECURSIVE);
490         diff_opts.detect_rename = DIFF_DETECT_RENAME;
491         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
492         diff_opts.single_follow = origin->path;
493         paths[0] = NULL;
494         diff_tree_setup_paths(paths, &diff_opts);
495         diff_setup_done(&diff_opts);
496
497         if (is_null_sha1(origin->commit->object.sha1))
498                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
499         else
500                 diff_tree_sha1(parent->tree->object.sha1,
501                                origin->commit->tree->object.sha1,
502                                "", &diff_opts);
503         diffcore_std(&diff_opts);
504
505         for (i = 0; i < diff_queued_diff.nr; i++) {
506                 struct diff_filepair *p = diff_queued_diff.queue[i];
507                 if ((p->status == 'R' || p->status == 'C') &&
508                     !strcmp(p->two->path, origin->path)) {
509                         porigin = get_origin(sb, parent, p->one->path);
510                         hashcpy(porigin->blob_sha1, p->one->sha1);
511                         porigin->mode = p->one->mode;
512                         break;
513                 }
514         }
515         diff_flush(&diff_opts);
516         diff_tree_release_paths(&diff_opts);
517         return porigin;
518 }
519
520 /*
521  * Link in a new blame entry to the scoreboard.  Entries that cover the
522  * same line range have been removed from the scoreboard previously.
523  */
524 static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
525 {
526         struct blame_entry *ent, *prev = NULL;
527
528         origin_incref(e->suspect);
529
530         for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
531                 prev = ent;
532
533         /* prev, if not NULL, is the last one that is below e */
534         e->prev = prev;
535         if (prev) {
536                 e->next = prev->next;
537                 prev->next = e;
538         }
539         else {
540                 e->next = sb->ent;
541                 sb->ent = e;
542         }
543         if (e->next)
544                 e->next->prev = e;
545 }
546
547 /*
548  * src typically is on-stack; we want to copy the information in it to
549  * a malloced blame_entry that is already on the linked list of the
550  * scoreboard.  The origin of dst loses a refcnt while the origin of src
551  * gains one.
552  */
553 static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
554 {
555         struct blame_entry *p, *n;
556
557         p = dst->prev;
558         n = dst->next;
559         origin_incref(src->suspect);
560         origin_decref(dst->suspect);
561         memcpy(dst, src, sizeof(*src));
562         dst->prev = p;
563         dst->next = n;
564         dst->score = 0;
565 }
566
567 static const char *nth_line(struct scoreboard *sb, int lno)
568 {
569         return sb->final_buf + sb->lineno[lno];
570 }
571
572 /*
573  * It is known that lines between tlno to same came from parent, and e
574  * has an overlap with that range.  it also is known that parent's
575  * line plno corresponds to e's line tlno.
576  *
577  *                <---- e ----->
578  *                   <------>
579  *                   <------------>
580  *             <------------>
581  *             <------------------>
582  *
583  * Split e into potentially three parts; before this chunk, the chunk
584  * to be blamed for the parent, and after that portion.
585  */
586 static void split_overlap(struct blame_entry *split,
587                           struct blame_entry *e,
588                           int tlno, int plno, int same,
589                           struct origin *parent)
590 {
591         int chunk_end_lno;
592         memset(split, 0, sizeof(struct blame_entry [3]));
593
594         if (e->s_lno < tlno) {
595                 /* there is a pre-chunk part not blamed on parent */
596                 split[0].suspect = origin_incref(e->suspect);
597                 split[0].lno = e->lno;
598                 split[0].s_lno = e->s_lno;
599                 split[0].num_lines = tlno - e->s_lno;
600                 split[1].lno = e->lno + tlno - e->s_lno;
601                 split[1].s_lno = plno;
602         }
603         else {
604                 split[1].lno = e->lno;
605                 split[1].s_lno = plno + (e->s_lno - tlno);
606         }
607
608         if (same < e->s_lno + e->num_lines) {
609                 /* there is a post-chunk part not blamed on parent */
610                 split[2].suspect = origin_incref(e->suspect);
611                 split[2].lno = e->lno + (same - e->s_lno);
612                 split[2].s_lno = e->s_lno + (same - e->s_lno);
613                 split[2].num_lines = e->s_lno + e->num_lines - same;
614                 chunk_end_lno = split[2].lno;
615         }
616         else
617                 chunk_end_lno = e->lno + e->num_lines;
618         split[1].num_lines = chunk_end_lno - split[1].lno;
619
620         /*
621          * if it turns out there is nothing to blame the parent for,
622          * forget about the splitting.  !split[1].suspect signals this.
623          */
624         if (split[1].num_lines < 1)
625                 return;
626         split[1].suspect = origin_incref(parent);
627 }
628
629 /*
630  * split_overlap() divided an existing blame e into up to three parts
631  * in split.  Adjust the linked list of blames in the scoreboard to
632  * reflect the split.
633  */
634 static void split_blame(struct scoreboard *sb,
635                         struct blame_entry *split,
636                         struct blame_entry *e)
637 {
638         struct blame_entry *new_entry;
639
640         if (split[0].suspect && split[2].suspect) {
641                 /* The first part (reuse storage for the existing entry e) */
642                 dup_entry(e, &split[0]);
643
644                 /* The last part -- me */
645                 new_entry = xmalloc(sizeof(*new_entry));
646                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
647                 add_blame_entry(sb, new_entry);
648
649                 /* ... and the middle part -- parent */
650                 new_entry = xmalloc(sizeof(*new_entry));
651                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
652                 add_blame_entry(sb, new_entry);
653         }
654         else if (!split[0].suspect && !split[2].suspect)
655                 /*
656                  * The parent covers the entire area; reuse storage for
657                  * e and replace it with the parent.
658                  */
659                 dup_entry(e, &split[1]);
660         else if (split[0].suspect) {
661                 /* me and then parent */
662                 dup_entry(e, &split[0]);
663
664                 new_entry = xmalloc(sizeof(*new_entry));
665                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
666                 add_blame_entry(sb, new_entry);
667         }
668         else {
669                 /* parent and then me */
670                 dup_entry(e, &split[1]);
671
672                 new_entry = xmalloc(sizeof(*new_entry));
673                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
674                 add_blame_entry(sb, new_entry);
675         }
676
677         if (DEBUG) { /* sanity */
678                 struct blame_entry *ent;
679                 int lno = sb->ent->lno, corrupt = 0;
680
681                 for (ent = sb->ent; ent; ent = ent->next) {
682                         if (lno != ent->lno)
683                                 corrupt = 1;
684                         if (ent->s_lno < 0)
685                                 corrupt = 1;
686                         lno += ent->num_lines;
687                 }
688                 if (corrupt) {
689                         lno = sb->ent->lno;
690                         for (ent = sb->ent; ent; ent = ent->next) {
691                                 printf("L %8d l %8d n %8d\n",
692                                        lno, ent->lno, ent->num_lines);
693                                 lno = ent->lno + ent->num_lines;
694                         }
695                         die("oops");
696                 }
697         }
698 }
699
700 /*
701  * After splitting the blame, the origins used by the
702  * on-stack blame_entry should lose one refcnt each.
703  */
704 static void decref_split(struct blame_entry *split)
705 {
706         int i;
707
708         for (i = 0; i < 3; i++)
709                 origin_decref(split[i].suspect);
710 }
711
712 /*
713  * Helper for blame_chunk().  blame_entry e is known to overlap with
714  * the patch hunk; split it and pass blame to the parent.
715  */
716 static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
717                           int tlno, int plno, int same,
718                           struct origin *parent)
719 {
720         struct blame_entry split[3];
721
722         split_overlap(split, e, tlno, plno, same, parent);
723         if (split[1].suspect)
724                 split_blame(sb, split, e);
725         decref_split(split);
726 }
727
728 /*
729  * Find the line number of the last line the target is suspected for.
730  */
731 static int find_last_in_target(struct scoreboard *sb, struct origin *target)
732 {
733         struct blame_entry *e;
734         int last_in_target = -1;
735
736         for (e = sb->ent; e; e = e->next) {
737                 if (e->guilty || !same_suspect(e->suspect, target))
738                         continue;
739                 if (last_in_target < e->s_lno + e->num_lines)
740                         last_in_target = e->s_lno + e->num_lines;
741         }
742         return last_in_target;
743 }
744
745 /*
746  * Process one hunk from the patch between the current suspect for
747  * blame_entry e and its parent.  Find and split the overlap, and
748  * pass blame to the overlapping part to the parent.
749  */
750 static void blame_chunk(struct scoreboard *sb,
751                         int tlno, int plno, int same,
752                         struct origin *target, struct origin *parent)
753 {
754         struct blame_entry *e;
755
756         for (e = sb->ent; e; e = e->next) {
757                 if (e->guilty || !same_suspect(e->suspect, target))
758                         continue;
759                 if (same <= e->s_lno)
760                         continue;
761                 if (tlno < e->s_lno + e->num_lines)
762                         blame_overlap(sb, e, tlno, plno, same, parent);
763         }
764 }
765
766 struct blame_chunk_cb_data {
767         struct scoreboard *sb;
768         struct origin *target;
769         struct origin *parent;
770         long plno;
771         long tlno;
772 };
773
774 static int blame_chunk_cb(long start_a, long count_a,
775                           long start_b, long count_b, void *data)
776 {
777         struct blame_chunk_cb_data *d = data;
778         blame_chunk(d->sb, d->tlno, d->plno, start_b, d->target, d->parent);
779         d->plno = start_a + count_a;
780         d->tlno = start_b + count_b;
781         return 0;
782 }
783
784 /*
785  * We are looking at the origin 'target' and aiming to pass blame
786  * for the lines it is suspected to its parent.  Run diff to find
787  * which lines came from parent and pass blame for them.
788  */
789 static int pass_blame_to_parent(struct scoreboard *sb,
790                                 struct origin *target,
791                                 struct origin *parent)
792 {
793         int last_in_target;
794         mmfile_t file_p, file_o;
795         struct blame_chunk_cb_data d;
796
797         memset(&d, 0, sizeof(d));
798         d.sb = sb; d.target = target; d.parent = parent;
799         last_in_target = find_last_in_target(sb, target);
800         if (last_in_target < 0)
801                 return 1; /* nothing remains for this target */
802
803         fill_origin_blob(&sb->revs->diffopt, parent, &file_p);
804         fill_origin_blob(&sb->revs->diffopt, target, &file_o);
805         num_get_patch++;
806
807         diff_hunks(&file_p, &file_o, 0, blame_chunk_cb, &d);
808         /* The rest (i.e. anything after tlno) are the same as the parent */
809         blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent);
810
811         return 0;
812 }
813
814 /*
815  * The lines in blame_entry after splitting blames many times can become
816  * very small and trivial, and at some point it becomes pointless to
817  * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
818  * ordinary C program, and it is not worth to say it was copied from
819  * totally unrelated file in the parent.
820  *
821  * Compute how trivial the lines in the blame_entry are.
822  */
823 static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
824 {
825         unsigned score;
826         const char *cp, *ep;
827
828         if (e->score)
829                 return e->score;
830
831         score = 1;
832         cp = nth_line(sb, e->lno);
833         ep = nth_line(sb, e->lno + e->num_lines);
834         while (cp < ep) {
835                 unsigned ch = *((unsigned char *)cp);
836                 if (isalnum(ch))
837                         score++;
838                 cp++;
839         }
840         e->score = score;
841         return score;
842 }
843
844 /*
845  * best_so_far[] and this[] are both a split of an existing blame_entry
846  * that passes blame to the parent.  Maintain best_so_far the best split
847  * so far, by comparing this and best_so_far and copying this into
848  * bst_so_far as needed.
849  */
850 static void copy_split_if_better(struct scoreboard *sb,
851                                  struct blame_entry *best_so_far,
852                                  struct blame_entry *this)
853 {
854         int i;
855
856         if (!this[1].suspect)
857                 return;
858         if (best_so_far[1].suspect) {
859                 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
860                         return;
861         }
862
863         for (i = 0; i < 3; i++)
864                 origin_incref(this[i].suspect);
865         decref_split(best_so_far);
866         memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
867 }
868
869 /*
870  * We are looking at a part of the final image represented by
871  * ent (tlno and same are offset by ent->s_lno).
872  * tlno is where we are looking at in the final image.
873  * up to (but not including) same match preimage.
874  * plno is where we are looking at in the preimage.
875  *
876  * <-------------- final image ---------------------->
877  *       <------ent------>
878  *         ^tlno ^same
879  *    <---------preimage----->
880  *         ^plno
881  *
882  * All line numbers are 0-based.
883  */
884 static void handle_split(struct scoreboard *sb,
885                          struct blame_entry *ent,
886                          int tlno, int plno, int same,
887                          struct origin *parent,
888                          struct blame_entry *split)
889 {
890         if (ent->num_lines <= tlno)
891                 return;
892         if (tlno < same) {
893                 struct blame_entry this[3];
894                 tlno += ent->s_lno;
895                 same += ent->s_lno;
896                 split_overlap(this, ent, tlno, plno, same, parent);
897                 copy_split_if_better(sb, split, this);
898                 decref_split(this);
899         }
900 }
901
902 struct handle_split_cb_data {
903         struct scoreboard *sb;
904         struct blame_entry *ent;
905         struct origin *parent;
906         struct blame_entry *split;
907         long plno;
908         long tlno;
909 };
910
911 static int handle_split_cb(long start_a, long count_a,
912                            long start_b, long count_b, void *data)
913 {
914         struct handle_split_cb_data *d = data;
915         handle_split(d->sb, d->ent, d->tlno, d->plno, start_b, d->parent,
916                      d->split);
917         d->plno = start_a + count_a;
918         d->tlno = start_b + count_b;
919         return 0;
920 }
921
922 /*
923  * Find the lines from parent that are the same as ent so that
924  * we can pass blames to it.  file_p has the blob contents for
925  * the parent.
926  */
927 static void find_copy_in_blob(struct scoreboard *sb,
928                               struct blame_entry *ent,
929                               struct origin *parent,
930                               struct blame_entry *split,
931                               mmfile_t *file_p)
932 {
933         const char *cp;
934         int cnt;
935         mmfile_t file_o;
936         struct handle_split_cb_data d;
937
938         memset(&d, 0, sizeof(d));
939         d.sb = sb; d.ent = ent; d.parent = parent; d.split = split;
940         /*
941          * Prepare mmfile that contains only the lines in ent.
942          */
943         cp = nth_line(sb, ent->lno);
944         file_o.ptr = (char *) cp;
945         cnt = ent->num_lines;
946
947         while (cnt && cp < sb->final_buf + sb->final_buf_size) {
948                 if (*cp++ == '\n')
949                         cnt--;
950         }
951         file_o.size = cp - file_o.ptr;
952
953         /*
954          * file_o is a part of final image we are annotating.
955          * file_p partially may match that image.
956          */
957         memset(split, 0, sizeof(struct blame_entry [3]));
958         diff_hunks(file_p, &file_o, 1, handle_split_cb, &d);
959         /* remainder, if any, all match the preimage */
960         handle_split(sb, ent, d.tlno, d.plno, ent->num_lines, parent, split);
961 }
962
963 /*
964  * See if lines currently target is suspected for can be attributed to
965  * parent.
966  */
967 static int find_move_in_parent(struct scoreboard *sb,
968                                struct origin *target,
969                                struct origin *parent)
970 {
971         int last_in_target, made_progress;
972         struct blame_entry *e, split[3];
973         mmfile_t file_p;
974
975         last_in_target = find_last_in_target(sb, target);
976         if (last_in_target < 0)
977                 return 1; /* nothing remains for this target */
978
979         fill_origin_blob(&sb->revs->diffopt, parent, &file_p);
980         if (!file_p.ptr)
981                 return 0;
982
983         made_progress = 1;
984         while (made_progress) {
985                 made_progress = 0;
986                 for (e = sb->ent; e; e = e->next) {
987                         if (e->guilty || !same_suspect(e->suspect, target) ||
988                             ent_score(sb, e) < blame_move_score)
989                                 continue;
990                         find_copy_in_blob(sb, e, parent, split, &file_p);
991                         if (split[1].suspect &&
992                             blame_move_score < ent_score(sb, &split[1])) {
993                                 split_blame(sb, split, e);
994                                 made_progress = 1;
995                         }
996                         decref_split(split);
997                 }
998         }
999         return 0;
1000 }
1001
1002 struct blame_list {
1003         struct blame_entry *ent;
1004         struct blame_entry split[3];
1005 };
1006
1007 /*
1008  * Count the number of entries the target is suspected for,
1009  * and prepare a list of entry and the best split.
1010  */
1011 static struct blame_list *setup_blame_list(struct scoreboard *sb,
1012                                            struct origin *target,
1013                                            int min_score,
1014                                            int *num_ents_p)
1015 {
1016         struct blame_entry *e;
1017         int num_ents, i;
1018         struct blame_list *blame_list = NULL;
1019
1020         for (e = sb->ent, num_ents = 0; e; e = e->next)
1021                 if (!e->scanned && !e->guilty &&
1022                     same_suspect(e->suspect, target) &&
1023                     min_score < ent_score(sb, e))
1024                         num_ents++;
1025         if (num_ents) {
1026                 blame_list = xcalloc(num_ents, sizeof(struct blame_list));
1027                 for (e = sb->ent, i = 0; e; e = e->next)
1028                         if (!e->scanned && !e->guilty &&
1029                             same_suspect(e->suspect, target) &&
1030                             min_score < ent_score(sb, e))
1031                                 blame_list[i++].ent = e;
1032         }
1033         *num_ents_p = num_ents;
1034         return blame_list;
1035 }
1036
1037 /*
1038  * Reset the scanned status on all entries.
1039  */
1040 static void reset_scanned_flag(struct scoreboard *sb)
1041 {
1042         struct blame_entry *e;
1043         for (e = sb->ent; e; e = e->next)
1044                 e->scanned = 0;
1045 }
1046
1047 /*
1048  * For lines target is suspected for, see if we can find code movement
1049  * across file boundary from the parent commit.  porigin is the path
1050  * in the parent we already tried.
1051  */
1052 static int find_copy_in_parent(struct scoreboard *sb,
1053                                struct origin *target,
1054                                struct commit *parent,
1055                                struct origin *porigin,
1056                                int opt)
1057 {
1058         struct diff_options diff_opts;
1059         const char *paths[1];
1060         int i, j;
1061         int retval;
1062         struct blame_list *blame_list;
1063         int num_ents;
1064
1065         blame_list = setup_blame_list(sb, target, blame_copy_score, &num_ents);
1066         if (!blame_list)
1067                 return 1; /* nothing remains for this target */
1068
1069         diff_setup(&diff_opts);
1070         DIFF_OPT_SET(&diff_opts, RECURSIVE);
1071         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1072
1073         paths[0] = NULL;
1074         diff_tree_setup_paths(paths, &diff_opts);
1075         diff_setup_done(&diff_opts);
1076
1077         /* Try "find copies harder" on new path if requested;
1078          * we do not want to use diffcore_rename() actually to
1079          * match things up; find_copies_harder is set only to
1080          * force diff_tree_sha1() to feed all filepairs to diff_queue,
1081          * and this code needs to be after diff_setup_done(), which
1082          * usually makes find-copies-harder imply copy detection.
1083          */
1084         if ((opt & PICKAXE_BLAME_COPY_HARDEST)
1085             || ((opt & PICKAXE_BLAME_COPY_HARDER)
1086                 && (!porigin || strcmp(target->path, porigin->path))))
1087                 DIFF_OPT_SET(&diff_opts, FIND_COPIES_HARDER);
1088
1089         if (is_null_sha1(target->commit->object.sha1))
1090                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
1091         else
1092                 diff_tree_sha1(parent->tree->object.sha1,
1093                                target->commit->tree->object.sha1,
1094                                "", &diff_opts);
1095
1096         if (!DIFF_OPT_TST(&diff_opts, FIND_COPIES_HARDER))
1097                 diffcore_std(&diff_opts);
1098
1099         retval = 0;
1100         while (1) {
1101                 int made_progress = 0;
1102
1103                 for (i = 0; i < diff_queued_diff.nr; i++) {
1104                         struct diff_filepair *p = diff_queued_diff.queue[i];
1105                         struct origin *norigin;
1106                         mmfile_t file_p;
1107                         struct blame_entry this[3];
1108
1109                         if (!DIFF_FILE_VALID(p->one))
1110                                 continue; /* does not exist in parent */
1111                         if (S_ISGITLINK(p->one->mode))
1112                                 continue; /* ignore git links */
1113                         if (porigin && !strcmp(p->one->path, porigin->path))
1114                                 /* find_move already dealt with this path */
1115                                 continue;
1116
1117                         norigin = get_origin(sb, parent, p->one->path);
1118                         hashcpy(norigin->blob_sha1, p->one->sha1);
1119                         norigin->mode = p->one->mode;
1120                         fill_origin_blob(&sb->revs->diffopt, norigin, &file_p);
1121                         if (!file_p.ptr)
1122                                 continue;
1123
1124                         for (j = 0; j < num_ents; j++) {
1125                                 find_copy_in_blob(sb, blame_list[j].ent,
1126                                                   norigin, this, &file_p);
1127                                 copy_split_if_better(sb, blame_list[j].split,
1128                                                      this);
1129                                 decref_split(this);
1130                         }
1131                         origin_decref(norigin);
1132                 }
1133
1134                 for (j = 0; j < num_ents; j++) {
1135                         struct blame_entry *split = blame_list[j].split;
1136                         if (split[1].suspect &&
1137                             blame_copy_score < ent_score(sb, &split[1])) {
1138                                 split_blame(sb, split, blame_list[j].ent);
1139                                 made_progress = 1;
1140                         }
1141                         else
1142                                 blame_list[j].ent->scanned = 1;
1143                         decref_split(split);
1144                 }
1145                 free(blame_list);
1146
1147                 if (!made_progress)
1148                         break;
1149                 blame_list = setup_blame_list(sb, target, blame_copy_score, &num_ents);
1150                 if (!blame_list) {
1151                         retval = 1;
1152                         break;
1153                 }
1154         }
1155         reset_scanned_flag(sb);
1156         diff_flush(&diff_opts);
1157         diff_tree_release_paths(&diff_opts);
1158         return retval;
1159 }
1160
1161 /*
1162  * The blobs of origin and porigin exactly match, so everything
1163  * origin is suspected for can be blamed on the parent.
1164  */
1165 static void pass_whole_blame(struct scoreboard *sb,
1166                              struct origin *origin, struct origin *porigin)
1167 {
1168         struct blame_entry *e;
1169
1170         if (!porigin->file.ptr && origin->file.ptr) {
1171                 /* Steal its file */
1172                 porigin->file = origin->file;
1173                 origin->file.ptr = NULL;
1174         }
1175         for (e = sb->ent; e; e = e->next) {
1176                 if (!same_suspect(e->suspect, origin))
1177                         continue;
1178                 origin_incref(porigin);
1179                 origin_decref(e->suspect);
1180                 e->suspect = porigin;
1181         }
1182 }
1183
1184 /*
1185  * We pass blame from the current commit to its parents.  We keep saying
1186  * "parent" (and "porigin"), but what we mean is to find scapegoat to
1187  * exonerate ourselves.
1188  */
1189 static struct commit_list *first_scapegoat(struct rev_info *revs, struct commit *commit)
1190 {
1191         if (!reverse)
1192                 return commit->parents;
1193         return lookup_decoration(&revs->children, &commit->object);
1194 }
1195
1196 static int num_scapegoats(struct rev_info *revs, struct commit *commit)
1197 {
1198         int cnt;
1199         struct commit_list *l = first_scapegoat(revs, commit);
1200         for (cnt = 0; l; l = l->next)
1201                 cnt++;
1202         return cnt;
1203 }
1204
1205 #define MAXSG 16
1206
1207 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
1208 {
1209         struct rev_info *revs = sb->revs;
1210         int i, pass, num_sg;
1211         struct commit *commit = origin->commit;
1212         struct commit_list *sg;
1213         struct origin *sg_buf[MAXSG];
1214         struct origin *porigin, **sg_origin = sg_buf;
1215
1216         num_sg = num_scapegoats(revs, commit);
1217         if (!num_sg)
1218                 goto finish;
1219         else if (num_sg < ARRAY_SIZE(sg_buf))
1220                 memset(sg_buf, 0, sizeof(sg_buf));
1221         else
1222                 sg_origin = xcalloc(num_sg, sizeof(*sg_origin));
1223
1224         /*
1225          * The first pass looks for unrenamed path to optimize for
1226          * common cases, then we look for renames in the second pass.
1227          */
1228         for (pass = 0; pass < 2; pass++) {
1229                 struct origin *(*find)(struct scoreboard *,
1230                                        struct commit *, struct origin *);
1231                 find = pass ? find_rename : find_origin;
1232
1233                 for (i = 0, sg = first_scapegoat(revs, commit);
1234                      i < num_sg && sg;
1235                      sg = sg->next, i++) {
1236                         struct commit *p = sg->item;
1237                         int j, same;
1238
1239                         if (sg_origin[i])
1240                                 continue;
1241                         if (parse_commit(p))
1242                                 continue;
1243                         porigin = find(sb, p, origin);
1244                         if (!porigin)
1245                                 continue;
1246                         if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1247                                 pass_whole_blame(sb, origin, porigin);
1248                                 origin_decref(porigin);
1249                                 goto finish;
1250                         }
1251                         for (j = same = 0; j < i; j++)
1252                                 if (sg_origin[j] &&
1253                                     !hashcmp(sg_origin[j]->blob_sha1,
1254                                              porigin->blob_sha1)) {
1255                                         same = 1;
1256                                         break;
1257                                 }
1258                         if (!same)
1259                                 sg_origin[i] = porigin;
1260                         else
1261                                 origin_decref(porigin);
1262                 }
1263         }
1264
1265         num_commits++;
1266         for (i = 0, sg = first_scapegoat(revs, commit);
1267              i < num_sg && sg;
1268              sg = sg->next, i++) {
1269                 struct origin *porigin = sg_origin[i];
1270                 if (!porigin)
1271                         continue;
1272                 if (!origin->previous) {
1273                         origin_incref(porigin);
1274                         origin->previous = porigin;
1275                 }
1276                 if (pass_blame_to_parent(sb, origin, porigin))
1277                         goto finish;
1278         }
1279
1280         /*
1281          * Optionally find moves in parents' files.
1282          */
1283         if (opt & PICKAXE_BLAME_MOVE)
1284                 for (i = 0, sg = first_scapegoat(revs, commit);
1285                      i < num_sg && sg;
1286                      sg = sg->next, i++) {
1287                         struct origin *porigin = sg_origin[i];
1288                         if (!porigin)
1289                                 continue;
1290                         if (find_move_in_parent(sb, origin, porigin))
1291                                 goto finish;
1292                 }
1293
1294         /*
1295          * Optionally find copies from parents' files.
1296          */
1297         if (opt & PICKAXE_BLAME_COPY)
1298                 for (i = 0, sg = first_scapegoat(revs, commit);
1299                      i < num_sg && sg;
1300                      sg = sg->next, i++) {
1301                         struct origin *porigin = sg_origin[i];
1302                         if (find_copy_in_parent(sb, origin, sg->item,
1303                                                 porigin, opt))
1304                                 goto finish;
1305                 }
1306
1307  finish:
1308         for (i = 0; i < num_sg; i++) {
1309                 if (sg_origin[i]) {
1310                         drop_origin_blob(sg_origin[i]);
1311                         origin_decref(sg_origin[i]);
1312                 }
1313         }
1314         drop_origin_blob(origin);
1315         if (sg_buf != sg_origin)
1316                 free(sg_origin);
1317 }
1318
1319 /*
1320  * Information on commits, used for output.
1321  */
1322 struct commit_info {
1323         const char *author;
1324         const char *author_mail;
1325         unsigned long author_time;
1326         const char *author_tz;
1327
1328         /* filled only when asked for details */
1329         const char *committer;
1330         const char *committer_mail;
1331         unsigned long committer_time;
1332         const char *committer_tz;
1333
1334         const char *summary;
1335 };
1336
1337 /*
1338  * Parse author/committer line in the commit object buffer
1339  */
1340 static void get_ac_line(const char *inbuf, const char *what,
1341                         int person_len, char *person,
1342                         int mail_len, char *mail,
1343                         unsigned long *time, const char **tz)
1344 {
1345         int len, tzlen, maillen;
1346         char *tmp, *endp, *timepos, *mailpos;
1347
1348         tmp = strstr(inbuf, what);
1349         if (!tmp)
1350                 goto error_out;
1351         tmp += strlen(what);
1352         endp = strchr(tmp, '\n');
1353         if (!endp)
1354                 len = strlen(tmp);
1355         else
1356                 len = endp - tmp;
1357         if (person_len <= len) {
1358         error_out:
1359                 /* Ugh */
1360                 *tz = "(unknown)";
1361                 strcpy(person, *tz);
1362                 strcpy(mail, *tz);
1363                 *time = 0;
1364                 return;
1365         }
1366         memcpy(person, tmp, len);
1367
1368         tmp = person;
1369         tmp += len;
1370         *tmp = 0;
1371         while (person < tmp && *tmp != ' ')
1372                 tmp--;
1373         if (tmp <= person)
1374                 goto error_out;
1375         *tz = tmp+1;
1376         tzlen = (person+len)-(tmp+1);
1377
1378         *tmp = 0;
1379         while (person < tmp && *tmp != ' ')
1380                 tmp--;
1381         if (tmp <= person)
1382                 goto error_out;
1383         *time = strtoul(tmp, NULL, 10);
1384         timepos = tmp;
1385
1386         *tmp = 0;
1387         while (person < tmp && !(*tmp == ' ' && tmp[1] == '<'))
1388                 tmp--;
1389         if (tmp <= person)
1390                 return;
1391         mailpos = tmp + 1;
1392         *tmp = 0;
1393         maillen = timepos - tmp;
1394         memcpy(mail, mailpos, maillen);
1395
1396         if (!mailmap.nr)
1397                 return;
1398
1399         /*
1400          * mailmap expansion may make the name longer.
1401          * make room by pushing stuff down.
1402          */
1403         tmp = person + person_len - (tzlen + 1);
1404         memmove(tmp, *tz, tzlen);
1405         tmp[tzlen] = 0;
1406         *tz = tmp;
1407
1408         /*
1409          * Now, convert both name and e-mail using mailmap
1410          */
1411         if (map_user(&mailmap, mail+1, mail_len-1, person, tmp-person-1)) {
1412                 /* Add a trailing '>' to email, since map_user returns plain emails
1413                    Note: It already has '<', since we replace from mail+1 */
1414                 mailpos = memchr(mail, '\0', mail_len);
1415                 if (mailpos && mailpos-mail < mail_len - 1) {
1416                         *mailpos = '>';
1417                         *(mailpos+1) = '\0';
1418                 }
1419         }
1420 }
1421
1422 static void get_commit_info(struct commit *commit,
1423                             struct commit_info *ret,
1424                             int detailed)
1425 {
1426         int len;
1427         const char *subject;
1428         char *reencoded, *message;
1429         static char author_name[1024];
1430         static char author_mail[1024];
1431         static char committer_name[1024];
1432         static char committer_mail[1024];
1433         static char summary_buf[1024];
1434
1435         /*
1436          * We've operated without save_commit_buffer, so
1437          * we now need to populate them for output.
1438          */
1439         if (!commit->buffer) {
1440                 enum object_type type;
1441                 unsigned long size;
1442                 commit->buffer =
1443                         read_sha1_file(commit->object.sha1, &type, &size);
1444                 if (!commit->buffer)
1445                         die("Cannot read commit %s",
1446                             sha1_to_hex(commit->object.sha1));
1447         }
1448         reencoded = reencode_commit_message(commit, NULL);
1449         message   = reencoded ? reencoded : commit->buffer;
1450         ret->author = author_name;
1451         ret->author_mail = author_mail;
1452         get_ac_line(message, "\nauthor ",
1453                     sizeof(author_name), author_name,
1454                     sizeof(author_mail), author_mail,
1455                     &ret->author_time, &ret->author_tz);
1456
1457         if (!detailed) {
1458                 free(reencoded);
1459                 return;
1460         }
1461
1462         ret->committer = committer_name;
1463         ret->committer_mail = committer_mail;
1464         get_ac_line(message, "\ncommitter ",
1465                     sizeof(committer_name), committer_name,
1466                     sizeof(committer_mail), committer_mail,
1467                     &ret->committer_time, &ret->committer_tz);
1468
1469         ret->summary = summary_buf;
1470         len = find_commit_subject(message, &subject);
1471         if (len && len < sizeof(summary_buf)) {
1472                 memcpy(summary_buf, subject, len);
1473                 summary_buf[len] = 0;
1474         } else {
1475                 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1476         }
1477         free(reencoded);
1478 }
1479
1480 /*
1481  * To allow LF and other nonportable characters in pathnames,
1482  * they are c-style quoted as needed.
1483  */
1484 static void write_filename_info(const char *path)
1485 {
1486         printf("filename ");
1487         write_name_quoted(path, stdout, '\n');
1488 }
1489
1490 /*
1491  * Porcelain/Incremental format wants to show a lot of details per
1492  * commit.  Instead of repeating this every line, emit it only once,
1493  * the first time each commit appears in the output (unless the
1494  * user has specifically asked for us to repeat).
1495  */
1496 static int emit_one_suspect_detail(struct origin *suspect, int repeat)
1497 {
1498         struct commit_info ci;
1499
1500         if (!repeat && (suspect->commit->object.flags & METAINFO_SHOWN))
1501                 return 0;
1502
1503         suspect->commit->object.flags |= METAINFO_SHOWN;
1504         get_commit_info(suspect->commit, &ci, 1);
1505         printf("author %s\n", ci.author);
1506         printf("author-mail %s\n", ci.author_mail);
1507         printf("author-time %lu\n", ci.author_time);
1508         printf("author-tz %s\n", ci.author_tz);
1509         printf("committer %s\n", ci.committer);
1510         printf("committer-mail %s\n", ci.committer_mail);
1511         printf("committer-time %lu\n", ci.committer_time);
1512         printf("committer-tz %s\n", ci.committer_tz);
1513         printf("summary %s\n", ci.summary);
1514         if (suspect->commit->object.flags & UNINTERESTING)
1515                 printf("boundary\n");
1516         if (suspect->previous) {
1517                 struct origin *prev = suspect->previous;
1518                 printf("previous %s ", sha1_to_hex(prev->commit->object.sha1));
1519                 write_name_quoted(prev->path, stdout, '\n');
1520         }
1521         return 1;
1522 }
1523
1524 /*
1525  * The blame_entry is found to be guilty for the range.  Mark it
1526  * as such, and show it in incremental output.
1527  */
1528 static void found_guilty_entry(struct blame_entry *ent)
1529 {
1530         if (ent->guilty)
1531                 return;
1532         ent->guilty = 1;
1533         if (incremental) {
1534                 struct origin *suspect = ent->suspect;
1535
1536                 printf("%s %d %d %d\n",
1537                        sha1_to_hex(suspect->commit->object.sha1),
1538                        ent->s_lno + 1, ent->lno + 1, ent->num_lines);
1539                 emit_one_suspect_detail(suspect, 0);
1540                 write_filename_info(suspect->path);
1541                 maybe_flush_or_die(stdout, "stdout");
1542         }
1543 }
1544
1545 /*
1546  * The main loop -- while the scoreboard has lines whose true origin
1547  * is still unknown, pick one blame_entry, and allow its current
1548  * suspect to pass blames to its parents.
1549  */
1550 static void assign_blame(struct scoreboard *sb, int opt)
1551 {
1552         struct rev_info *revs = sb->revs;
1553
1554         while (1) {
1555                 struct blame_entry *ent;
1556                 struct commit *commit;
1557                 struct origin *suspect = NULL;
1558
1559                 /* find one suspect to break down */
1560                 for (ent = sb->ent; !suspect && ent; ent = ent->next)
1561                         if (!ent->guilty)
1562                                 suspect = ent->suspect;
1563                 if (!suspect)
1564                         return; /* all done */
1565
1566                 /*
1567                  * We will use this suspect later in the loop,
1568                  * so hold onto it in the meantime.
1569                  */
1570                 origin_incref(suspect);
1571                 commit = suspect->commit;
1572                 if (!commit->object.parsed)
1573                         parse_commit(commit);
1574                 if (reverse ||
1575                     (!(commit->object.flags & UNINTERESTING) &&
1576                      !(revs->max_age != -1 && commit->date < revs->max_age)))
1577                         pass_blame(sb, suspect, opt);
1578                 else {
1579                         commit->object.flags |= UNINTERESTING;
1580                         if (commit->object.parsed)
1581                                 mark_parents_uninteresting(commit);
1582                 }
1583                 /* treat root commit as boundary */
1584                 if (!commit->parents && !show_root)
1585                         commit->object.flags |= UNINTERESTING;
1586
1587                 /* Take responsibility for the remaining entries */
1588                 for (ent = sb->ent; ent; ent = ent->next)
1589                         if (same_suspect(ent->suspect, suspect))
1590                                 found_guilty_entry(ent);
1591                 origin_decref(suspect);
1592
1593                 if (DEBUG) /* sanity */
1594                         sanity_check_refcnt(sb);
1595         }
1596 }
1597
1598 static const char *format_time(unsigned long time, const char *tz_str,
1599                                int show_raw_time)
1600 {
1601         static char time_buf[128];
1602         const char *time_str;
1603         int time_len;
1604         int tz;
1605
1606         if (show_raw_time) {
1607                 snprintf(time_buf, sizeof(time_buf), "%lu %s", time, tz_str);
1608         }
1609         else {
1610                 tz = atoi(tz_str);
1611                 time_str = show_date(time, tz, blame_date_mode);
1612                 time_len = strlen(time_str);
1613                 memcpy(time_buf, time_str, time_len);
1614                 memset(time_buf + time_len, ' ', blame_date_width - time_len);
1615         }
1616         return time_buf;
1617 }
1618
1619 #define OUTPUT_ANNOTATE_COMPAT  001
1620 #define OUTPUT_LONG_OBJECT_NAME 002
1621 #define OUTPUT_RAW_TIMESTAMP    004
1622 #define OUTPUT_PORCELAIN        010
1623 #define OUTPUT_SHOW_NAME        020
1624 #define OUTPUT_SHOW_NUMBER      040
1625 #define OUTPUT_SHOW_SCORE      0100
1626 #define OUTPUT_NO_AUTHOR       0200
1627 #define OUTPUT_SHOW_EMAIL       0400
1628 #define OUTPUT_LINE_PORCELAIN 01000
1629
1630 static void emit_porcelain_details(struct origin *suspect, int repeat)
1631 {
1632         if (emit_one_suspect_detail(suspect, repeat) ||
1633             (suspect->commit->object.flags & MORE_THAN_ONE_PATH))
1634                 write_filename_info(suspect->path);
1635 }
1636
1637 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent,
1638                            int opt)
1639 {
1640         int repeat = opt & OUTPUT_LINE_PORCELAIN;
1641         int cnt;
1642         const char *cp;
1643         struct origin *suspect = ent->suspect;
1644         char hex[41];
1645
1646         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1647         printf("%s%c%d %d %d\n",
1648                hex,
1649                ent->guilty ? ' ' : '*', /* purely for debugging */
1650                ent->s_lno + 1,
1651                ent->lno + 1,
1652                ent->num_lines);
1653         emit_porcelain_details(suspect, repeat);
1654
1655         cp = nth_line(sb, ent->lno);
1656         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1657                 char ch;
1658                 if (cnt) {
1659                         printf("%s %d %d\n", hex,
1660                                ent->s_lno + 1 + cnt,
1661                                ent->lno + 1 + cnt);
1662                         if (repeat)
1663                                 emit_porcelain_details(suspect, 1);
1664                 }
1665                 putchar('\t');
1666                 do {
1667                         ch = *cp++;
1668                         putchar(ch);
1669                 } while (ch != '\n' &&
1670                          cp < sb->final_buf + sb->final_buf_size);
1671         }
1672
1673         if (sb->final_buf_size && cp[-1] != '\n')
1674                 putchar('\n');
1675 }
1676
1677 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1678 {
1679         int cnt;
1680         const char *cp;
1681         struct origin *suspect = ent->suspect;
1682         struct commit_info ci;
1683         char hex[41];
1684         int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1685
1686         get_commit_info(suspect->commit, &ci, 1);
1687         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1688
1689         cp = nth_line(sb, ent->lno);
1690         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1691                 char ch;
1692                 int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : abbrev;
1693
1694                 if (suspect->commit->object.flags & UNINTERESTING) {
1695                         if (blank_boundary)
1696                                 memset(hex, ' ', length);
1697                         else if (!(opt & OUTPUT_ANNOTATE_COMPAT)) {
1698                                 length--;
1699                                 putchar('^');
1700                         }
1701                 }
1702
1703                 printf("%.*s", length, hex);
1704                 if (opt & OUTPUT_ANNOTATE_COMPAT) {
1705                         const char *name;
1706                         if (opt & OUTPUT_SHOW_EMAIL)
1707                                 name = ci.author_mail;
1708                         else
1709                                 name = ci.author;
1710                         printf("\t(%10s\t%10s\t%d)", name,
1711                                format_time(ci.author_time, ci.author_tz,
1712                                            show_raw_time),
1713                                ent->lno + 1 + cnt);
1714                 } else {
1715                         if (opt & OUTPUT_SHOW_SCORE)
1716                                 printf(" %*d %02d",
1717                                        max_score_digits, ent->score,
1718                                        ent->suspect->refcnt);
1719                         if (opt & OUTPUT_SHOW_NAME)
1720                                 printf(" %-*.*s", longest_file, longest_file,
1721                                        suspect->path);
1722                         if (opt & OUTPUT_SHOW_NUMBER)
1723                                 printf(" %*d", max_orig_digits,
1724                                        ent->s_lno + 1 + cnt);
1725
1726                         if (!(opt & OUTPUT_NO_AUTHOR)) {
1727                                 const char *name;
1728                                 int pad;
1729                                 if (opt & OUTPUT_SHOW_EMAIL)
1730                                         name = ci.author_mail;
1731                                 else
1732                                         name = ci.author;
1733                                 pad = longest_author - utf8_strwidth(name);
1734                                 printf(" (%s%*s %10s",
1735                                        name, pad, "",
1736                                        format_time(ci.author_time,
1737                                                    ci.author_tz,
1738                                                    show_raw_time));
1739                         }
1740                         printf(" %*d) ",
1741                                max_digits, ent->lno + 1 + cnt);
1742                 }
1743                 do {
1744                         ch = *cp++;
1745                         putchar(ch);
1746                 } while (ch != '\n' &&
1747                          cp < sb->final_buf + sb->final_buf_size);
1748         }
1749
1750         if (sb->final_buf_size && cp[-1] != '\n')
1751                 putchar('\n');
1752 }
1753
1754 static void output(struct scoreboard *sb, int option)
1755 {
1756         struct blame_entry *ent;
1757
1758         if (option & OUTPUT_PORCELAIN) {
1759                 for (ent = sb->ent; ent; ent = ent->next) {
1760                         struct blame_entry *oth;
1761                         struct origin *suspect = ent->suspect;
1762                         struct commit *commit = suspect->commit;
1763                         if (commit->object.flags & MORE_THAN_ONE_PATH)
1764                                 continue;
1765                         for (oth = ent->next; oth; oth = oth->next) {
1766                                 if ((oth->suspect->commit != commit) ||
1767                                     !strcmp(oth->suspect->path, suspect->path))
1768                                         continue;
1769                                 commit->object.flags |= MORE_THAN_ONE_PATH;
1770                                 break;
1771                         }
1772                 }
1773         }
1774
1775         for (ent = sb->ent; ent; ent = ent->next) {
1776                 if (option & OUTPUT_PORCELAIN)
1777                         emit_porcelain(sb, ent, option);
1778                 else {
1779                         emit_other(sb, ent, option);
1780                 }
1781         }
1782 }
1783
1784 /*
1785  * To allow quick access to the contents of nth line in the
1786  * final image, prepare an index in the scoreboard.
1787  */
1788 static int prepare_lines(struct scoreboard *sb)
1789 {
1790         const char *buf = sb->final_buf;
1791         unsigned long len = sb->final_buf_size;
1792         int num = 0, incomplete = 0, bol = 1;
1793
1794         if (len && buf[len-1] != '\n')
1795                 incomplete++; /* incomplete line at the end */
1796         while (len--) {
1797                 if (bol) {
1798                         sb->lineno = xrealloc(sb->lineno,
1799                                               sizeof(int *) * (num + 1));
1800                         sb->lineno[num] = buf - sb->final_buf;
1801                         bol = 0;
1802                 }
1803                 if (*buf++ == '\n') {
1804                         num++;
1805                         bol = 1;
1806                 }
1807         }
1808         sb->lineno = xrealloc(sb->lineno,
1809                               sizeof(int *) * (num + incomplete + 1));
1810         sb->lineno[num + incomplete] = buf - sb->final_buf;
1811         sb->num_lines = num + incomplete;
1812         return sb->num_lines;
1813 }
1814
1815 /*
1816  * Add phony grafts for use with -S; this is primarily to
1817  * support git's cvsserver that wants to give a linear history
1818  * to its clients.
1819  */
1820 static int read_ancestry(const char *graft_file)
1821 {
1822         FILE *fp = fopen(graft_file, "r");
1823         char buf[1024];
1824         if (!fp)
1825                 return -1;
1826         while (fgets(buf, sizeof(buf), fp)) {
1827                 /* The format is just "Commit Parent1 Parent2 ...\n" */
1828                 int len = strlen(buf);
1829                 struct commit_graft *graft = read_graft_line(buf, len);
1830                 if (graft)
1831                         register_commit_graft(graft, 0);
1832         }
1833         fclose(fp);
1834         return 0;
1835 }
1836
1837 static int update_auto_abbrev(int auto_abbrev, struct origin *suspect)
1838 {
1839         const char *uniq = find_unique_abbrev(suspect->commit->object.sha1,
1840                                               auto_abbrev);
1841         int len = strlen(uniq);
1842         if (auto_abbrev < len)
1843                 return len;
1844         return auto_abbrev;
1845 }
1846
1847 /*
1848  * How many columns do we need to show line numbers, authors,
1849  * and filenames?
1850  */
1851 static void find_alignment(struct scoreboard *sb, int *option)
1852 {
1853         int longest_src_lines = 0;
1854         int longest_dst_lines = 0;
1855         unsigned largest_score = 0;
1856         struct blame_entry *e;
1857         int compute_auto_abbrev = (abbrev < 0);
1858         int auto_abbrev = default_abbrev;
1859
1860         for (e = sb->ent; e; e = e->next) {
1861                 struct origin *suspect = e->suspect;
1862                 struct commit_info ci;
1863                 int num;
1864
1865                 if (compute_auto_abbrev)
1866                         auto_abbrev = update_auto_abbrev(auto_abbrev, suspect);
1867                 if (strcmp(suspect->path, sb->path))
1868                         *option |= OUTPUT_SHOW_NAME;
1869                 num = strlen(suspect->path);
1870                 if (longest_file < num)
1871                         longest_file = num;
1872                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1873                         suspect->commit->object.flags |= METAINFO_SHOWN;
1874                         get_commit_info(suspect->commit, &ci, 1);
1875                         if (*option & OUTPUT_SHOW_EMAIL)
1876                                 num = utf8_strwidth(ci.author_mail);
1877                         else
1878                                 num = utf8_strwidth(ci.author);
1879                         if (longest_author < num)
1880                                 longest_author = num;
1881                 }
1882                 num = e->s_lno + e->num_lines;
1883                 if (longest_src_lines < num)
1884                         longest_src_lines = num;
1885                 num = e->lno + e->num_lines;
1886                 if (longest_dst_lines < num)
1887                         longest_dst_lines = num;
1888                 if (largest_score < ent_score(sb, e))
1889                         largest_score = ent_score(sb, e);
1890         }
1891         max_orig_digits = decimal_width(longest_src_lines);
1892         max_digits = decimal_width(longest_dst_lines);
1893         max_score_digits = decimal_width(largest_score);
1894
1895         if (compute_auto_abbrev)
1896                 /* one more abbrev length is needed for the boundary commit */
1897                 abbrev = auto_abbrev + 1;
1898 }
1899
1900 /*
1901  * For debugging -- origin is refcounted, and this asserts that
1902  * we do not underflow.
1903  */
1904 static void sanity_check_refcnt(struct scoreboard *sb)
1905 {
1906         int baa = 0;
1907         struct blame_entry *ent;
1908
1909         for (ent = sb->ent; ent; ent = ent->next) {
1910                 /* Nobody should have zero or negative refcnt */
1911                 if (ent->suspect->refcnt <= 0) {
1912                         fprintf(stderr, "%s in %s has negative refcnt %d\n",
1913                                 ent->suspect->path,
1914                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1915                                 ent->suspect->refcnt);
1916                         baa = 1;
1917                 }
1918         }
1919         if (baa) {
1920                 int opt = 0160;
1921                 find_alignment(sb, &opt);
1922                 output(sb, opt);
1923                 die("Baa %d!", baa);
1924         }
1925 }
1926
1927 /*
1928  * Used for the command line parsing; check if the path exists
1929  * in the working tree.
1930  */
1931 static int has_string_in_work_tree(const char *path)
1932 {
1933         struct stat st;
1934         return !lstat(path, &st);
1935 }
1936
1937 static unsigned parse_score(const char *arg)
1938 {
1939         char *end;
1940         unsigned long score = strtoul(arg, &end, 10);
1941         if (*end)
1942                 return 0;
1943         return score;
1944 }
1945
1946 static const char *add_prefix(const char *prefix, const char *path)
1947 {
1948         return prefix_path(prefix, prefix ? strlen(prefix) : 0, path);
1949 }
1950
1951 /*
1952  * Parsing of (comma separated) one item in the -L option
1953  */
1954 static const char *parse_loc(const char *spec,
1955                              struct scoreboard *sb, long lno,
1956                              long begin, long *ret)
1957 {
1958         char *term;
1959         const char *line;
1960         long num;
1961         int reg_error;
1962         regex_t regexp;
1963         regmatch_t match[1];
1964
1965         /* Allow "-L <something>,+20" to mean starting at <something>
1966          * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1967          * <something>.
1968          */
1969         if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1970                 num = strtol(spec + 1, &term, 10);
1971                 if (term != spec + 1) {
1972                         if (spec[0] == '-')
1973                                 num = 0 - num;
1974                         if (0 < num)
1975                                 *ret = begin + num - 2;
1976                         else if (!num)
1977                                 *ret = begin;
1978                         else
1979                                 *ret = begin + num;
1980                         return term;
1981                 }
1982                 return spec;
1983         }
1984         num = strtol(spec, &term, 10);
1985         if (term != spec) {
1986                 *ret = num;
1987                 return term;
1988         }
1989         if (spec[0] != '/')
1990                 return spec;
1991
1992         /* it could be a regexp of form /.../ */
1993         for (term = (char *) spec + 1; *term && *term != '/'; term++) {
1994                 if (*term == '\\')
1995                         term++;
1996         }
1997         if (*term != '/')
1998                 return spec;
1999
2000         /* try [spec+1 .. term-1] as regexp */
2001         *term = 0;
2002         begin--; /* input is in human terms */
2003         line = nth_line(sb, begin);
2004
2005         if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
2006             !(reg_error = regexec(&regexp, line, 1, match, 0))) {
2007                 const char *cp = line + match[0].rm_so;
2008                 const char *nline;
2009
2010                 while (begin++ < lno) {
2011                         nline = nth_line(sb, begin);
2012                         if (line <= cp && cp < nline)
2013                                 break;
2014                         line = nline;
2015                 }
2016                 *ret = begin;
2017                 regfree(&regexp);
2018                 *term++ = '/';
2019                 return term;
2020         }
2021         else {
2022                 char errbuf[1024];
2023                 regerror(reg_error, &regexp, errbuf, 1024);
2024                 die("-L parameter '%s': %s", spec + 1, errbuf);
2025         }
2026 }
2027
2028 /*
2029  * Parsing of -L option
2030  */
2031 static void prepare_blame_range(struct scoreboard *sb,
2032                                 const char *bottomtop,
2033                                 long lno,
2034                                 long *bottom, long *top)
2035 {
2036         const char *term;
2037
2038         term = parse_loc(bottomtop, sb, lno, 1, bottom);
2039         if (*term == ',') {
2040                 term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
2041                 if (*term)
2042                         usage(blame_usage);
2043         }
2044         if (*term)
2045                 usage(blame_usage);
2046 }
2047
2048 static int git_blame_config(const char *var, const char *value, void *cb)
2049 {
2050         if (!strcmp(var, "blame.showroot")) {
2051                 show_root = git_config_bool(var, value);
2052                 return 0;
2053         }
2054         if (!strcmp(var, "blame.blankboundary")) {
2055                 blank_boundary = git_config_bool(var, value);
2056                 return 0;
2057         }
2058         if (!strcmp(var, "blame.date")) {
2059                 if (!value)
2060                         return config_error_nonbool(var);
2061                 blame_date_mode = parse_date_format(value);
2062                 return 0;
2063         }
2064
2065         if (userdiff_config(var, value) < 0)
2066                 return -1;
2067
2068         return git_default_config(var, value, cb);
2069 }
2070
2071 /*
2072  * Prepare a dummy commit that represents the work tree (or staged) item.
2073  * Note that annotating work tree item never works in the reverse.
2074  */
2075 static struct commit *fake_working_tree_commit(struct diff_options *opt,
2076                                                const char *path,
2077                                                const char *contents_from)
2078 {
2079         struct commit *commit;
2080         struct origin *origin;
2081         unsigned char head_sha1[20];
2082         struct strbuf buf = STRBUF_INIT;
2083         const char *ident;
2084         time_t now;
2085         int size, len;
2086         struct cache_entry *ce;
2087         unsigned mode;
2088
2089         if (get_sha1("HEAD", head_sha1))
2090                 die("No such ref: HEAD");
2091
2092         time(&now);
2093         commit = xcalloc(1, sizeof(*commit));
2094         commit->parents = xcalloc(1, sizeof(*commit->parents));
2095         commit->parents->item = lookup_commit_reference(head_sha1);
2096         commit->object.parsed = 1;
2097         commit->date = now;
2098         commit->object.type = OBJ_COMMIT;
2099
2100         origin = make_origin(commit, path);
2101
2102         if (!contents_from || strcmp("-", contents_from)) {
2103                 struct stat st;
2104                 const char *read_from;
2105                 char *buf_ptr;
2106                 unsigned long buf_len;
2107
2108                 if (contents_from) {
2109                         if (stat(contents_from, &st) < 0)
2110                                 die_errno("Cannot stat '%s'", contents_from);
2111                         read_from = contents_from;
2112                 }
2113                 else {
2114                         if (lstat(path, &st) < 0)
2115                                 die_errno("Cannot lstat '%s'", path);
2116                         read_from = path;
2117                 }
2118                 mode = canon_mode(st.st_mode);
2119
2120                 switch (st.st_mode & S_IFMT) {
2121                 case S_IFREG:
2122                         if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
2123                             textconv_object(read_from, mode, null_sha1, &buf_ptr, &buf_len))
2124                                 strbuf_attach(&buf, buf_ptr, buf_len, buf_len + 1);
2125                         else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
2126                                 die_errno("cannot open or read '%s'", read_from);
2127                         break;
2128                 case S_IFLNK:
2129                         if (strbuf_readlink(&buf, read_from, st.st_size) < 0)
2130                                 die_errno("cannot readlink '%s'", read_from);
2131                         break;
2132                 default:
2133                         die("unsupported file type %s", read_from);
2134                 }
2135         }
2136         else {
2137                 /* Reading from stdin */
2138                 contents_from = "standard input";
2139                 mode = 0;
2140                 if (strbuf_read(&buf, 0, 0) < 0)
2141                         die_errno("failed to read from stdin");
2142         }
2143         convert_to_git(path, buf.buf, buf.len, &buf, 0);
2144         origin->file.ptr = buf.buf;
2145         origin->file.size = buf.len;
2146         pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
2147         commit->util = origin;
2148
2149         /*
2150          * Read the current index, replace the path entry with
2151          * origin->blob_sha1 without mucking with its mode or type
2152          * bits; we are not going to write this index out -- we just
2153          * want to run "diff-index --cached".
2154          */
2155         discard_cache();
2156         read_cache();
2157
2158         len = strlen(path);
2159         if (!mode) {
2160                 int pos = cache_name_pos(path, len);
2161                 if (0 <= pos)
2162                         mode = active_cache[pos]->ce_mode;
2163                 else
2164                         /* Let's not bother reading from HEAD tree */
2165                         mode = S_IFREG | 0644;
2166         }
2167         size = cache_entry_size(len);
2168         ce = xcalloc(1, size);
2169         hashcpy(ce->sha1, origin->blob_sha1);
2170         memcpy(ce->name, path, len);
2171         ce->ce_flags = create_ce_flags(0);
2172         ce->ce_namelen = len;
2173         ce->ce_mode = create_ce_mode(mode);
2174         add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
2175
2176         /*
2177          * We are not going to write this out, so this does not matter
2178          * right now, but someday we might optimize diff-index --cached
2179          * with cache-tree information.
2180          */
2181         cache_tree_invalidate_path(active_cache_tree, path);
2182
2183         commit->buffer = xmalloc(400);
2184         ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
2185         snprintf(commit->buffer, 400,
2186                 "tree 0000000000000000000000000000000000000000\n"
2187                 "parent %s\n"
2188                 "author %s\n"
2189                 "committer %s\n\n"
2190                 "Version of %s from %s\n",
2191                 sha1_to_hex(head_sha1),
2192                 ident, ident, path, contents_from ? contents_from : path);
2193         return commit;
2194 }
2195
2196 static const char *prepare_final(struct scoreboard *sb)
2197 {
2198         int i;
2199         const char *final_commit_name = NULL;
2200         struct rev_info *revs = sb->revs;
2201
2202         /*
2203          * There must be one and only one positive commit in the
2204          * revs->pending array.
2205          */
2206         for (i = 0; i < revs->pending.nr; i++) {
2207                 struct object *obj = revs->pending.objects[i].item;
2208                 if (obj->flags & UNINTERESTING)
2209                         continue;
2210                 while (obj->type == OBJ_TAG)
2211                         obj = deref_tag(obj, NULL, 0);
2212                 if (obj->type != OBJ_COMMIT)
2213                         die("Non commit %s?", revs->pending.objects[i].name);
2214                 if (sb->final)
2215                         die("More than one commit to dig from %s and %s?",
2216                             revs->pending.objects[i].name,
2217                             final_commit_name);
2218                 sb->final = (struct commit *) obj;
2219                 final_commit_name = revs->pending.objects[i].name;
2220         }
2221         return final_commit_name;
2222 }
2223
2224 static const char *prepare_initial(struct scoreboard *sb)
2225 {
2226         int i;
2227         const char *final_commit_name = NULL;
2228         struct rev_info *revs = sb->revs;
2229
2230         /*
2231          * There must be one and only one negative commit, and it must be
2232          * the boundary.
2233          */
2234         for (i = 0; i < revs->pending.nr; i++) {
2235                 struct object *obj = revs->pending.objects[i].item;
2236                 if (!(obj->flags & UNINTERESTING))
2237                         continue;
2238                 while (obj->type == OBJ_TAG)
2239                         obj = deref_tag(obj, NULL, 0);
2240                 if (obj->type != OBJ_COMMIT)
2241                         die("Non commit %s?", revs->pending.objects[i].name);
2242                 if (sb->final)
2243                         die("More than one commit to dig down to %s and %s?",
2244                             revs->pending.objects[i].name,
2245                             final_commit_name);
2246                 sb->final = (struct commit *) obj;
2247                 final_commit_name = revs->pending.objects[i].name;
2248         }
2249         if (!final_commit_name)
2250                 die("No commit to dig down to?");
2251         return final_commit_name;
2252 }
2253
2254 static int blame_copy_callback(const struct option *option, const char *arg, int unset)
2255 {
2256         int *opt = option->value;
2257
2258         /*
2259          * -C enables copy from removed files;
2260          * -C -C enables copy from existing files, but only
2261          *       when blaming a new file;
2262          * -C -C -C enables copy from existing files for
2263          *          everybody
2264          */
2265         if (*opt & PICKAXE_BLAME_COPY_HARDER)
2266                 *opt |= PICKAXE_BLAME_COPY_HARDEST;
2267         if (*opt & PICKAXE_BLAME_COPY)
2268                 *opt |= PICKAXE_BLAME_COPY_HARDER;
2269         *opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
2270
2271         if (arg)
2272                 blame_copy_score = parse_score(arg);
2273         return 0;
2274 }
2275
2276 static int blame_move_callback(const struct option *option, const char *arg, int unset)
2277 {
2278         int *opt = option->value;
2279
2280         *opt |= PICKAXE_BLAME_MOVE;
2281
2282         if (arg)
2283                 blame_move_score = parse_score(arg);
2284         return 0;
2285 }
2286
2287 static int blame_bottomtop_callback(const struct option *option, const char *arg, int unset)
2288 {
2289         const char **bottomtop = option->value;
2290         if (!arg)
2291                 return -1;
2292         if (*bottomtop)
2293                 die("More than one '-L n,m' option given");
2294         *bottomtop = arg;
2295         return 0;
2296 }
2297
2298 int cmd_blame(int argc, const char **argv, const char *prefix)
2299 {
2300         struct rev_info revs;
2301         const char *path;
2302         struct scoreboard sb;
2303         struct origin *o;
2304         struct blame_entry *ent;
2305         long dashdash_pos, bottom, top, lno;
2306         const char *final_commit_name = NULL;
2307         enum object_type type;
2308
2309         static const char *bottomtop = NULL;
2310         static int output_option = 0, opt = 0;
2311         static int show_stats = 0;
2312         static const char *revs_file = NULL;
2313         static const char *contents_from = NULL;
2314         static const struct option options[] = {
2315                 OPT_BOOLEAN(0, "incremental", &incremental, "Show blame entries as we find them, incrementally"),
2316                 OPT_BOOLEAN('b', NULL, &blank_boundary, "Show blank SHA-1 for boundary commits (Default: off)"),
2317                 OPT_BOOLEAN(0, "root", &show_root, "Do not treat root commits as boundaries (Default: off)"),
2318                 OPT_BOOLEAN(0, "show-stats", &show_stats, "Show work cost statistics"),
2319                 OPT_BIT(0, "score-debug", &output_option, "Show output score for blame entries", OUTPUT_SHOW_SCORE),
2320                 OPT_BIT('f', "show-name", &output_option, "Show original filename (Default: auto)", OUTPUT_SHOW_NAME),
2321                 OPT_BIT('n', "show-number", &output_option, "Show original linenumber (Default: off)", OUTPUT_SHOW_NUMBER),
2322                 OPT_BIT('p', "porcelain", &output_option, "Show in a format designed for machine consumption", OUTPUT_PORCELAIN),
2323                 OPT_BIT(0, "line-porcelain", &output_option, "Show porcelain format with per-line commit information", OUTPUT_PORCELAIN|OUTPUT_LINE_PORCELAIN),
2324                 OPT_BIT('c', NULL, &output_option, "Use the same output mode as git-annotate (Default: off)", OUTPUT_ANNOTATE_COMPAT),
2325                 OPT_BIT('t', NULL, &output_option, "Show raw timestamp (Default: off)", OUTPUT_RAW_TIMESTAMP),
2326                 OPT_BIT('l', NULL, &output_option, "Show long commit SHA1 (Default: off)", OUTPUT_LONG_OBJECT_NAME),
2327                 OPT_BIT('s', NULL, &output_option, "Suppress author name and timestamp (Default: off)", OUTPUT_NO_AUTHOR),
2328                 OPT_BIT('e', "show-email", &output_option, "Show author email instead of name (Default: off)", OUTPUT_SHOW_EMAIL),
2329                 OPT_BIT('w', NULL, &xdl_opts, "Ignore whitespace differences", XDF_IGNORE_WHITESPACE),
2330                 OPT_BIT(0, "minimal", &xdl_opts, "Spend extra cycles to find better match", XDF_NEED_MINIMAL),
2331                 OPT_STRING('S', NULL, &revs_file, "file", "Use revisions from <file> instead of calling git-rev-list"),
2332                 OPT_STRING(0, "contents", &contents_from, "file", "Use <file>'s contents as the final image"),
2333                 { OPTION_CALLBACK, 'C', NULL, &opt, "score", "Find line copies within and across files", PARSE_OPT_OPTARG, blame_copy_callback },
2334                 { OPTION_CALLBACK, 'M', NULL, &opt, "score", "Find line movements within and across files", PARSE_OPT_OPTARG, blame_move_callback },
2335                 OPT_CALLBACK('L', NULL, &bottomtop, "n,m", "Process only line range n,m, counting from 1", blame_bottomtop_callback),
2336                 OPT__ABBREV(&abbrev),
2337                 OPT_END()
2338         };
2339
2340         struct parse_opt_ctx_t ctx;
2341         int cmd_is_annotate = !strcmp(argv[0], "annotate");
2342
2343         git_config(git_blame_config, NULL);
2344         init_revisions(&revs, NULL);
2345         revs.date_mode = blame_date_mode;
2346         DIFF_OPT_SET(&revs.diffopt, ALLOW_TEXTCONV);
2347
2348         save_commit_buffer = 0;
2349         dashdash_pos = 0;
2350
2351         parse_options_start(&ctx, argc, argv, prefix, options,
2352                             PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_ARGV0);
2353         for (;;) {
2354                 switch (parse_options_step(&ctx, options, blame_opt_usage)) {
2355                 case PARSE_OPT_HELP:
2356                         exit(129);
2357                 case PARSE_OPT_DONE:
2358                         if (ctx.argv[0])
2359                                 dashdash_pos = ctx.cpidx;
2360                         goto parse_done;
2361                 }
2362
2363                 if (!strcmp(ctx.argv[0], "--reverse")) {
2364                         ctx.argv[0] = "--children";
2365                         reverse = 1;
2366                 }
2367                 parse_revision_opt(&revs, &ctx, options, blame_opt_usage);
2368         }
2369 parse_done:
2370         argc = parse_options_end(&ctx);
2371
2372         if (0 < abbrev)
2373                 /* one more abbrev length is needed for the boundary commit */
2374                 abbrev++;
2375
2376         if (revs_file && read_ancestry(revs_file))
2377                 die_errno("reading graft file '%s' failed", revs_file);
2378
2379         if (cmd_is_annotate) {
2380                 output_option |= OUTPUT_ANNOTATE_COMPAT;
2381                 blame_date_mode = DATE_ISO8601;
2382         } else {
2383                 blame_date_mode = revs.date_mode;
2384         }
2385
2386         /* The maximum width used to show the dates */
2387         switch (blame_date_mode) {
2388         case DATE_RFC2822:
2389                 blame_date_width = sizeof("Thu, 19 Oct 2006 16:00:04 -0700");
2390                 break;
2391         case DATE_ISO8601:
2392                 blame_date_width = sizeof("2006-10-19 16:00:04 -0700");
2393                 break;
2394         case DATE_RAW:
2395                 blame_date_width = sizeof("1161298804 -0700");
2396                 break;
2397         case DATE_SHORT:
2398                 blame_date_width = sizeof("2006-10-19");
2399                 break;
2400         case DATE_RELATIVE:
2401                 /* "normal" is used as the fallback for "relative" */
2402         case DATE_LOCAL:
2403         case DATE_NORMAL:
2404                 blame_date_width = sizeof("Thu Oct 19 16:00:04 2006 -0700");
2405                 break;
2406         }
2407         blame_date_width -= 1; /* strip the null */
2408
2409         if (DIFF_OPT_TST(&revs.diffopt, FIND_COPIES_HARDER))
2410                 opt |= (PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE |
2411                         PICKAXE_BLAME_COPY_HARDER);
2412
2413         if (!blame_move_score)
2414                 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
2415         if (!blame_copy_score)
2416                 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
2417
2418         /*
2419          * We have collected options unknown to us in argv[1..unk]
2420          * which are to be passed to revision machinery if we are
2421          * going to do the "bottom" processing.
2422          *
2423          * The remaining are:
2424          *
2425          * (1) if dashdash_pos != 0, it is either
2426          *     "blame [revisions] -- <path>" or
2427          *     "blame -- <path> <rev>"
2428          *
2429          * (2) otherwise, it is one of the two:
2430          *     "blame [revisions] <path>"
2431          *     "blame <path> <rev>"
2432          *
2433          * Note that we must strip out <path> from the arguments: we do not
2434          * want the path pruning but we may want "bottom" processing.
2435          */
2436         if (dashdash_pos) {
2437                 switch (argc - dashdash_pos - 1) {
2438                 case 2: /* (1b) */
2439                         if (argc != 4)
2440                                 usage_with_options(blame_opt_usage, options);
2441                         /* reorder for the new way: <rev> -- <path> */
2442                         argv[1] = argv[3];
2443                         argv[3] = argv[2];
2444                         argv[2] = "--";
2445                         /* FALLTHROUGH */
2446                 case 1: /* (1a) */
2447                         path = add_prefix(prefix, argv[--argc]);
2448                         argv[argc] = NULL;
2449                         break;
2450                 default:
2451                         usage_with_options(blame_opt_usage, options);
2452                 }
2453         } else {
2454                 if (argc < 2)
2455                         usage_with_options(blame_opt_usage, options);
2456                 path = add_prefix(prefix, argv[argc - 1]);
2457                 if (argc == 3 && !has_string_in_work_tree(path)) { /* (2b) */
2458                         path = add_prefix(prefix, argv[1]);
2459                         argv[1] = argv[2];
2460                 }
2461                 argv[argc - 1] = "--";
2462
2463                 setup_work_tree();
2464                 if (!has_string_in_work_tree(path))
2465                         die_errno("cannot stat path '%s'", path);
2466         }
2467
2468         revs.disable_stdin = 1;
2469         setup_revisions(argc, argv, &revs, NULL);
2470         memset(&sb, 0, sizeof(sb));
2471
2472         sb.revs = &revs;
2473         if (!reverse)
2474                 final_commit_name = prepare_final(&sb);
2475         else if (contents_from)
2476                 die("--contents and --children do not blend well.");
2477         else
2478                 final_commit_name = prepare_initial(&sb);
2479
2480         if (!sb.final) {
2481                 /*
2482                  * "--not A B -- path" without anything positive;
2483                  * do not default to HEAD, but use the working tree
2484                  * or "--contents".
2485                  */
2486                 setup_work_tree();
2487                 sb.final = fake_working_tree_commit(&sb.revs->diffopt,
2488                                                     path, contents_from);
2489                 add_pending_object(&revs, &(sb.final->object), ":");
2490         }
2491         else if (contents_from)
2492                 die("Cannot use --contents with final commit object name");
2493
2494         /*
2495          * If we have bottom, this will mark the ancestors of the
2496          * bottom commits we would reach while traversing as
2497          * uninteresting.
2498          */
2499         if (prepare_revision_walk(&revs))
2500                 die("revision walk setup failed");
2501
2502         if (is_null_sha1(sb.final->object.sha1)) {
2503                 char *buf;
2504                 o = sb.final->util;
2505                 buf = xmalloc(o->file.size + 1);
2506                 memcpy(buf, o->file.ptr, o->file.size + 1);
2507                 sb.final_buf = buf;
2508                 sb.final_buf_size = o->file.size;
2509         }
2510         else {
2511                 o = get_origin(&sb, sb.final, path);
2512                 if (fill_blob_sha1_and_mode(o))
2513                         die("no such path %s in %s", path, final_commit_name);
2514
2515                 if (DIFF_OPT_TST(&sb.revs->diffopt, ALLOW_TEXTCONV) &&
2516                     textconv_object(path, o->mode, o->blob_sha1, (char **) &sb.final_buf,
2517                                     &sb.final_buf_size))
2518                         ;
2519                 else
2520                         sb.final_buf = read_sha1_file(o->blob_sha1, &type,
2521                                                       &sb.final_buf_size);
2522
2523                 if (!sb.final_buf)
2524                         die("Cannot read blob %s for path %s",
2525                             sha1_to_hex(o->blob_sha1),
2526                             path);
2527         }
2528         num_read_blob++;
2529         lno = prepare_lines(&sb);
2530
2531         bottom = top = 0;
2532         if (bottomtop)
2533                 prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
2534         if (bottom && top && top < bottom) {
2535                 long tmp;
2536                 tmp = top; top = bottom; bottom = tmp;
2537         }
2538         if (bottom < 1)
2539                 bottom = 1;
2540         if (top < 1)
2541                 top = lno;
2542         bottom--;
2543         if (lno < top || lno < bottom)
2544                 die("file %s has only %lu lines", path, lno);
2545
2546         ent = xcalloc(1, sizeof(*ent));
2547         ent->lno = bottom;
2548         ent->num_lines = top - bottom;
2549         ent->suspect = o;
2550         ent->s_lno = bottom;
2551
2552         sb.ent = ent;
2553         sb.path = path;
2554
2555         read_mailmap(&mailmap, NULL);
2556
2557         if (!incremental)
2558                 setup_pager();
2559
2560         assign_blame(&sb, opt);
2561
2562         if (incremental)
2563                 return 0;
2564
2565         coalesce(&sb);
2566
2567         if (!(output_option & OUTPUT_PORCELAIN))
2568                 find_alignment(&sb, &output_option);
2569
2570         output(&sb, output_option);
2571         free((void *)sb.final_buf);
2572         for (ent = sb.ent; ent; ) {
2573                 struct blame_entry *e = ent->next;
2574                 free(ent);
2575                 ent = e;
2576         }
2577
2578         if (show_stats) {
2579                 printf("num read blob: %d\n", num_read_blob);
2580                 printf("num get patch: %d\n", num_get_patch);
2581                 printf("num commits: %d\n", num_commits);
2582         }
2583         return 0;
2584 }