Merge branch 'sp/win'
[git] / builtin-blame.c
1 /*
2  * Pickaxe
3  *
4  * Copyright (c) 2006, Junio C Hamano
5  */
6
7 #include "cache.h"
8 #include "builtin.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree-walk.h"
13 #include "diff.h"
14 #include "diffcore.h"
15 #include "revision.h"
16 #include "quote.h"
17 #include "xdiff-interface.h"
18 #include "cache-tree.h"
19 #include "path-list.h"
20 #include "mailmap.h"
21 #include "parse-options.h"
22
23 static char blame_usage[] = "git-blame [options] [rev-opts] [rev] [--] file";
24
25 static const char *blame_opt_usage[] = {
26         blame_usage,
27         "",
28         "[rev-opts] are documented in git-rev-list(1)",
29         NULL
30 };
31
32 static int longest_file;
33 static int longest_author;
34 static int max_orig_digits;
35 static int max_digits;
36 static int max_score_digits;
37 static int show_root;
38 static int reverse;
39 static int blank_boundary;
40 static int incremental;
41 static int cmd_is_annotate;
42 static int xdl_opts = XDF_NEED_MINIMAL;
43 static struct path_list mailmap;
44
45 #ifndef DEBUG
46 #define DEBUG 0
47 #endif
48
49 /* stats */
50 static int num_read_blob;
51 static int num_get_patch;
52 static int num_commits;
53
54 #define PICKAXE_BLAME_MOVE              01
55 #define PICKAXE_BLAME_COPY              02
56 #define PICKAXE_BLAME_COPY_HARDER       04
57 #define PICKAXE_BLAME_COPY_HARDEST      010
58
59 /*
60  * blame for a blame_entry with score lower than these thresholds
61  * is not passed to the parent using move/copy logic.
62  */
63 static unsigned blame_move_score;
64 static unsigned blame_copy_score;
65 #define BLAME_DEFAULT_MOVE_SCORE        20
66 #define BLAME_DEFAULT_COPY_SCORE        40
67
68 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
69 #define METAINFO_SHOWN          (1u<<12)
70 #define MORE_THAN_ONE_PATH      (1u<<13)
71
72 /*
73  * One blob in a commit that is being suspected
74  */
75 struct origin {
76         int refcnt;
77         struct commit *commit;
78         mmfile_t file;
79         unsigned char blob_sha1[20];
80         char path[FLEX_ARRAY];
81 };
82
83 /*
84  * Given an origin, prepare mmfile_t structure to be used by the
85  * diff machinery
86  */
87 static void fill_origin_blob(struct origin *o, mmfile_t *file)
88 {
89         if (!o->file.ptr) {
90                 enum object_type type;
91                 num_read_blob++;
92                 file->ptr = read_sha1_file(o->blob_sha1, &type,
93                                            (unsigned long *)(&(file->size)));
94                 if (!file->ptr)
95                         die("Cannot read blob %s for path %s",
96                             sha1_to_hex(o->blob_sha1),
97                             o->path);
98                 o->file = *file;
99         }
100         else
101                 *file = o->file;
102 }
103
104 /*
105  * Origin is refcounted and usually we keep the blob contents to be
106  * reused.
107  */
108 static inline struct origin *origin_incref(struct origin *o)
109 {
110         if (o)
111                 o->refcnt++;
112         return o;
113 }
114
115 static void origin_decref(struct origin *o)
116 {
117         if (o && --o->refcnt <= 0) {
118                 free(o->file.ptr);
119                 free(o);
120         }
121 }
122
123 static void drop_origin_blob(struct origin *o)
124 {
125         if (o->file.ptr) {
126                 free(o->file.ptr);
127                 o->file.ptr = NULL;
128         }
129 }
130
131 /*
132  * Each group of lines is described by a blame_entry; it can be split
133  * as we pass blame to the parents.  They form a linked list in the
134  * scoreboard structure, sorted by the target line number.
135  */
136 struct blame_entry {
137         struct blame_entry *prev;
138         struct blame_entry *next;
139
140         /* the first line of this group in the final image;
141          * internally all line numbers are 0 based.
142          */
143         int lno;
144
145         /* how many lines this group has */
146         int num_lines;
147
148         /* the commit that introduced this group into the final image */
149         struct origin *suspect;
150
151         /* true if the suspect is truly guilty; false while we have not
152          * checked if the group came from one of its parents.
153          */
154         char guilty;
155
156         /* the line number of the first line of this group in the
157          * suspect's file; internally all line numbers are 0 based.
158          */
159         int s_lno;
160
161         /* how significant this entry is -- cached to avoid
162          * scanning the lines over and over.
163          */
164         unsigned score;
165 };
166
167 /*
168  * The current state of the blame assignment.
169  */
170 struct scoreboard {
171         /* the final commit (i.e. where we started digging from) */
172         struct commit *final;
173         struct rev_info *revs;
174         const char *path;
175
176         /*
177          * The contents in the final image.
178          * Used by many functions to obtain contents of the nth line,
179          * indexed with scoreboard.lineno[blame_entry.lno].
180          */
181         const char *final_buf;
182         unsigned long final_buf_size;
183
184         /* linked list of blames */
185         struct blame_entry *ent;
186
187         /* look-up a line in the final buffer */
188         int num_lines;
189         int *lineno;
190 };
191
192 static inline int same_suspect(struct origin *a, struct origin *b)
193 {
194         if (a == b)
195                 return 1;
196         if (a->commit != b->commit)
197                 return 0;
198         return !strcmp(a->path, b->path);
199 }
200
201 static void sanity_check_refcnt(struct scoreboard *);
202
203 /*
204  * If two blame entries that are next to each other came from
205  * contiguous lines in the same origin (i.e. <commit, path> pair),
206  * merge them together.
207  */
208 static void coalesce(struct scoreboard *sb)
209 {
210         struct blame_entry *ent, *next;
211
212         for (ent = sb->ent; ent && (next = ent->next); ent = next) {
213                 if (same_suspect(ent->suspect, next->suspect) &&
214                     ent->guilty == next->guilty &&
215                     ent->s_lno + ent->num_lines == next->s_lno) {
216                         ent->num_lines += next->num_lines;
217                         ent->next = next->next;
218                         if (ent->next)
219                                 ent->next->prev = ent;
220                         origin_decref(next->suspect);
221                         free(next);
222                         ent->score = 0;
223                         next = ent; /* again */
224                 }
225         }
226
227         if (DEBUG) /* sanity */
228                 sanity_check_refcnt(sb);
229 }
230
231 /*
232  * Given a commit and a path in it, create a new origin structure.
233  * The callers that add blame to the scoreboard should use
234  * get_origin() to obtain shared, refcounted copy instead of calling
235  * this function directly.
236  */
237 static struct origin *make_origin(struct commit *commit, const char *path)
238 {
239         struct origin *o;
240         o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
241         o->commit = commit;
242         o->refcnt = 1;
243         strcpy(o->path, path);
244         return o;
245 }
246
247 /*
248  * Locate an existing origin or create a new one.
249  */
250 static struct origin *get_origin(struct scoreboard *sb,
251                                  struct commit *commit,
252                                  const char *path)
253 {
254         struct blame_entry *e;
255
256         for (e = sb->ent; e; e = e->next) {
257                 if (e->suspect->commit == commit &&
258                     !strcmp(e->suspect->path, path))
259                         return origin_incref(e->suspect);
260         }
261         return make_origin(commit, path);
262 }
263
264 /*
265  * Fill the blob_sha1 field of an origin if it hasn't, so that later
266  * call to fill_origin_blob() can use it to locate the data.  blob_sha1
267  * for an origin is also used to pass the blame for the entire file to
268  * the parent to detect the case where a child's blob is identical to
269  * that of its parent's.
270  */
271 static int fill_blob_sha1(struct origin *origin)
272 {
273         unsigned mode;
274
275         if (!is_null_sha1(origin->blob_sha1))
276                 return 0;
277         if (get_tree_entry(origin->commit->object.sha1,
278                            origin->path,
279                            origin->blob_sha1, &mode))
280                 goto error_out;
281         if (sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB)
282                 goto error_out;
283         return 0;
284  error_out:
285         hashclr(origin->blob_sha1);
286         return -1;
287 }
288
289 /*
290  * We have an origin -- check if the same path exists in the
291  * parent and return an origin structure to represent it.
292  */
293 static struct origin *find_origin(struct scoreboard *sb,
294                                   struct commit *parent,
295                                   struct origin *origin)
296 {
297         struct origin *porigin = NULL;
298         struct diff_options diff_opts;
299         const char *paths[2];
300
301         if (parent->util) {
302                 /*
303                  * Each commit object can cache one origin in that
304                  * commit.  This is a freestanding copy of origin and
305                  * not refcounted.
306                  */
307                 struct origin *cached = parent->util;
308                 if (!strcmp(cached->path, origin->path)) {
309                         /*
310                          * The same path between origin and its parent
311                          * without renaming -- the most common case.
312                          */
313                         porigin = get_origin(sb, parent, cached->path);
314
315                         /*
316                          * If the origin was newly created (i.e. get_origin
317                          * would call make_origin if none is found in the
318                          * scoreboard), it does not know the blob_sha1,
319                          * so copy it.  Otherwise porigin was in the
320                          * scoreboard and already knows blob_sha1.
321                          */
322                         if (porigin->refcnt == 1)
323                                 hashcpy(porigin->blob_sha1, cached->blob_sha1);
324                         return porigin;
325                 }
326                 /* otherwise it was not very useful; free it */
327                 free(parent->util);
328                 parent->util = NULL;
329         }
330
331         /* See if the origin->path is different between parent
332          * and origin first.  Most of the time they are the
333          * same and diff-tree is fairly efficient about this.
334          */
335         diff_setup(&diff_opts);
336         DIFF_OPT_SET(&diff_opts, RECURSIVE);
337         diff_opts.detect_rename = 0;
338         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
339         paths[0] = origin->path;
340         paths[1] = NULL;
341
342         diff_tree_setup_paths(paths, &diff_opts);
343         if (diff_setup_done(&diff_opts) < 0)
344                 die("diff-setup");
345
346         if (is_null_sha1(origin->commit->object.sha1))
347                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
348         else
349                 diff_tree_sha1(parent->tree->object.sha1,
350                                origin->commit->tree->object.sha1,
351                                "", &diff_opts);
352         diffcore_std(&diff_opts);
353
354         /* It is either one entry that says "modified", or "created",
355          * or nothing.
356          */
357         if (!diff_queued_diff.nr) {
358                 /* The path is the same as parent */
359                 porigin = get_origin(sb, parent, origin->path);
360                 hashcpy(porigin->blob_sha1, origin->blob_sha1);
361         }
362         else if (diff_queued_diff.nr != 1)
363                 die("internal error in blame::find_origin");
364         else {
365                 struct diff_filepair *p = diff_queued_diff.queue[0];
366                 switch (p->status) {
367                 default:
368                         die("internal error in blame::find_origin (%c)",
369                             p->status);
370                 case 'M':
371                         porigin = get_origin(sb, parent, origin->path);
372                         hashcpy(porigin->blob_sha1, p->one->sha1);
373                         break;
374                 case 'A':
375                 case 'T':
376                         /* Did not exist in parent, or type changed */
377                         break;
378                 }
379         }
380         diff_flush(&diff_opts);
381         diff_tree_release_paths(&diff_opts);
382         if (porigin) {
383                 /*
384                  * Create a freestanding copy that is not part of
385                  * the refcounted origin found in the scoreboard, and
386                  * cache it in the commit.
387                  */
388                 struct origin *cached;
389
390                 cached = make_origin(porigin->commit, porigin->path);
391                 hashcpy(cached->blob_sha1, porigin->blob_sha1);
392                 parent->util = cached;
393         }
394         return porigin;
395 }
396
397 /*
398  * We have an origin -- find the path that corresponds to it in its
399  * parent and return an origin structure to represent it.
400  */
401 static struct origin *find_rename(struct scoreboard *sb,
402                                   struct commit *parent,
403                                   struct origin *origin)
404 {
405         struct origin *porigin = NULL;
406         struct diff_options diff_opts;
407         int i;
408         const char *paths[2];
409
410         diff_setup(&diff_opts);
411         DIFF_OPT_SET(&diff_opts, RECURSIVE);
412         diff_opts.detect_rename = DIFF_DETECT_RENAME;
413         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
414         diff_opts.single_follow = origin->path;
415         paths[0] = NULL;
416         diff_tree_setup_paths(paths, &diff_opts);
417         if (diff_setup_done(&diff_opts) < 0)
418                 die("diff-setup");
419
420         if (is_null_sha1(origin->commit->object.sha1))
421                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
422         else
423                 diff_tree_sha1(parent->tree->object.sha1,
424                                origin->commit->tree->object.sha1,
425                                "", &diff_opts);
426         diffcore_std(&diff_opts);
427
428         for (i = 0; i < diff_queued_diff.nr; i++) {
429                 struct diff_filepair *p = diff_queued_diff.queue[i];
430                 if ((p->status == 'R' || p->status == 'C') &&
431                     !strcmp(p->two->path, origin->path)) {
432                         porigin = get_origin(sb, parent, p->one->path);
433                         hashcpy(porigin->blob_sha1, p->one->sha1);
434                         break;
435                 }
436         }
437         diff_flush(&diff_opts);
438         diff_tree_release_paths(&diff_opts);
439         return porigin;
440 }
441
442 /*
443  * Parsing of patch chunks...
444  */
445 struct chunk {
446         /* line number in postimage; up to but not including this
447          * line is the same as preimage
448          */
449         int same;
450
451         /* preimage line number after this chunk */
452         int p_next;
453
454         /* postimage line number after this chunk */
455         int t_next;
456 };
457
458 struct patch {
459         struct chunk *chunks;
460         int num;
461 };
462
463 struct blame_diff_state {
464         struct xdiff_emit_state xm;
465         struct patch *ret;
466         unsigned hunk_post_context;
467         unsigned hunk_in_pre_context : 1;
468 };
469
470 static void process_u_diff(void *state_, char *line, unsigned long len)
471 {
472         struct blame_diff_state *state = state_;
473         struct chunk *chunk;
474         int off1, off2, len1, len2, num;
475
476         num = state->ret->num;
477         if (len < 4 || line[0] != '@' || line[1] != '@') {
478                 if (state->hunk_in_pre_context && line[0] == ' ')
479                         state->ret->chunks[num - 1].same++;
480                 else {
481                         state->hunk_in_pre_context = 0;
482                         if (line[0] == ' ')
483                                 state->hunk_post_context++;
484                         else
485                                 state->hunk_post_context = 0;
486                 }
487                 return;
488         }
489
490         if (num && state->hunk_post_context) {
491                 chunk = &state->ret->chunks[num - 1];
492                 chunk->p_next -= state->hunk_post_context;
493                 chunk->t_next -= state->hunk_post_context;
494         }
495         state->ret->num = ++num;
496         state->ret->chunks = xrealloc(state->ret->chunks,
497                                       sizeof(struct chunk) * num);
498         chunk = &state->ret->chunks[num - 1];
499         if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
500                 state->ret->num--;
501                 return;
502         }
503
504         /* Line numbers in patch output are one based. */
505         off1--;
506         off2--;
507
508         chunk->same = len2 ? off2 : (off2 + 1);
509
510         chunk->p_next = off1 + (len1 ? len1 : 1);
511         chunk->t_next = chunk->same + len2;
512         state->hunk_in_pre_context = 1;
513         state->hunk_post_context = 0;
514 }
515
516 static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
517                                     int context)
518 {
519         struct blame_diff_state state;
520         xpparam_t xpp;
521         xdemitconf_t xecfg;
522         xdemitcb_t ecb;
523
524         xpp.flags = xdl_opts;
525         memset(&xecfg, 0, sizeof(xecfg));
526         xecfg.ctxlen = context;
527         ecb.outf = xdiff_outf;
528         ecb.priv = &state;
529         memset(&state, 0, sizeof(state));
530         state.xm.consume = process_u_diff;
531         state.ret = xmalloc(sizeof(struct patch));
532         state.ret->chunks = NULL;
533         state.ret->num = 0;
534
535         xdi_diff(file_p, file_o, &xpp, &xecfg, &ecb);
536
537         if (state.ret->num) {
538                 struct chunk *chunk;
539                 chunk = &state.ret->chunks[state.ret->num - 1];
540                 chunk->p_next -= state.hunk_post_context;
541                 chunk->t_next -= state.hunk_post_context;
542         }
543         return state.ret;
544 }
545
546 /*
547  * Run diff between two origins and grab the patch output, so that
548  * we can pass blame for lines origin is currently suspected for
549  * to its parent.
550  */
551 static struct patch *get_patch(struct origin *parent, struct origin *origin)
552 {
553         mmfile_t file_p, file_o;
554         struct patch *patch;
555
556         fill_origin_blob(parent, &file_p);
557         fill_origin_blob(origin, &file_o);
558         if (!file_p.ptr || !file_o.ptr)
559                 return NULL;
560         patch = compare_buffer(&file_p, &file_o, 0);
561         num_get_patch++;
562         return patch;
563 }
564
565 static void free_patch(struct patch *p)
566 {
567         free(p->chunks);
568         free(p);
569 }
570
571 /*
572  * Link in a new blame entry to the scoreboard.  Entries that cover the
573  * same line range have been removed from the scoreboard previously.
574  */
575 static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
576 {
577         struct blame_entry *ent, *prev = NULL;
578
579         origin_incref(e->suspect);
580
581         for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
582                 prev = ent;
583
584         /* prev, if not NULL, is the last one that is below e */
585         e->prev = prev;
586         if (prev) {
587                 e->next = prev->next;
588                 prev->next = e;
589         }
590         else {
591                 e->next = sb->ent;
592                 sb->ent = e;
593         }
594         if (e->next)
595                 e->next->prev = e;
596 }
597
598 /*
599  * src typically is on-stack; we want to copy the information in it to
600  * a malloced blame_entry that is already on the linked list of the
601  * scoreboard.  The origin of dst loses a refcnt while the origin of src
602  * gains one.
603  */
604 static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
605 {
606         struct blame_entry *p, *n;
607
608         p = dst->prev;
609         n = dst->next;
610         origin_incref(src->suspect);
611         origin_decref(dst->suspect);
612         memcpy(dst, src, sizeof(*src));
613         dst->prev = p;
614         dst->next = n;
615         dst->score = 0;
616 }
617
618 static const char *nth_line(struct scoreboard *sb, int lno)
619 {
620         return sb->final_buf + sb->lineno[lno];
621 }
622
623 /*
624  * It is known that lines between tlno to same came from parent, and e
625  * has an overlap with that range.  it also is known that parent's
626  * line plno corresponds to e's line tlno.
627  *
628  *                <---- e ----->
629  *                   <------>
630  *                   <------------>
631  *             <------------>
632  *             <------------------>
633  *
634  * Split e into potentially three parts; before this chunk, the chunk
635  * to be blamed for the parent, and after that portion.
636  */
637 static void split_overlap(struct blame_entry *split,
638                           struct blame_entry *e,
639                           int tlno, int plno, int same,
640                           struct origin *parent)
641 {
642         int chunk_end_lno;
643         memset(split, 0, sizeof(struct blame_entry [3]));
644
645         if (e->s_lno < tlno) {
646                 /* there is a pre-chunk part not blamed on parent */
647                 split[0].suspect = origin_incref(e->suspect);
648                 split[0].lno = e->lno;
649                 split[0].s_lno = e->s_lno;
650                 split[0].num_lines = tlno - e->s_lno;
651                 split[1].lno = e->lno + tlno - e->s_lno;
652                 split[1].s_lno = plno;
653         }
654         else {
655                 split[1].lno = e->lno;
656                 split[1].s_lno = plno + (e->s_lno - tlno);
657         }
658
659         if (same < e->s_lno + e->num_lines) {
660                 /* there is a post-chunk part not blamed on parent */
661                 split[2].suspect = origin_incref(e->suspect);
662                 split[2].lno = e->lno + (same - e->s_lno);
663                 split[2].s_lno = e->s_lno + (same - e->s_lno);
664                 split[2].num_lines = e->s_lno + e->num_lines - same;
665                 chunk_end_lno = split[2].lno;
666         }
667         else
668                 chunk_end_lno = e->lno + e->num_lines;
669         split[1].num_lines = chunk_end_lno - split[1].lno;
670
671         /*
672          * if it turns out there is nothing to blame the parent for,
673          * forget about the splitting.  !split[1].suspect signals this.
674          */
675         if (split[1].num_lines < 1)
676                 return;
677         split[1].suspect = origin_incref(parent);
678 }
679
680 /*
681  * split_overlap() divided an existing blame e into up to three parts
682  * in split.  Adjust the linked list of blames in the scoreboard to
683  * reflect the split.
684  */
685 static void split_blame(struct scoreboard *sb,
686                         struct blame_entry *split,
687                         struct blame_entry *e)
688 {
689         struct blame_entry *new_entry;
690
691         if (split[0].suspect && split[2].suspect) {
692                 /* The first part (reuse storage for the existing entry e) */
693                 dup_entry(e, &split[0]);
694
695                 /* The last part -- me */
696                 new_entry = xmalloc(sizeof(*new_entry));
697                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
698                 add_blame_entry(sb, new_entry);
699
700                 /* ... and the middle part -- parent */
701                 new_entry = xmalloc(sizeof(*new_entry));
702                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
703                 add_blame_entry(sb, new_entry);
704         }
705         else if (!split[0].suspect && !split[2].suspect)
706                 /*
707                  * The parent covers the entire area; reuse storage for
708                  * e and replace it with the parent.
709                  */
710                 dup_entry(e, &split[1]);
711         else if (split[0].suspect) {
712                 /* me and then parent */
713                 dup_entry(e, &split[0]);
714
715                 new_entry = xmalloc(sizeof(*new_entry));
716                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
717                 add_blame_entry(sb, new_entry);
718         }
719         else {
720                 /* parent and then me */
721                 dup_entry(e, &split[1]);
722
723                 new_entry = xmalloc(sizeof(*new_entry));
724                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
725                 add_blame_entry(sb, new_entry);
726         }
727
728         if (DEBUG) { /* sanity */
729                 struct blame_entry *ent;
730                 int lno = sb->ent->lno, corrupt = 0;
731
732                 for (ent = sb->ent; ent; ent = ent->next) {
733                         if (lno != ent->lno)
734                                 corrupt = 1;
735                         if (ent->s_lno < 0)
736                                 corrupt = 1;
737                         lno += ent->num_lines;
738                 }
739                 if (corrupt) {
740                         lno = sb->ent->lno;
741                         for (ent = sb->ent; ent; ent = ent->next) {
742                                 printf("L %8d l %8d n %8d\n",
743                                        lno, ent->lno, ent->num_lines);
744                                 lno = ent->lno + ent->num_lines;
745                         }
746                         die("oops");
747                 }
748         }
749 }
750
751 /*
752  * After splitting the blame, the origins used by the
753  * on-stack blame_entry should lose one refcnt each.
754  */
755 static void decref_split(struct blame_entry *split)
756 {
757         int i;
758
759         for (i = 0; i < 3; i++)
760                 origin_decref(split[i].suspect);
761 }
762
763 /*
764  * Helper for blame_chunk().  blame_entry e is known to overlap with
765  * the patch hunk; split it and pass blame to the parent.
766  */
767 static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
768                           int tlno, int plno, int same,
769                           struct origin *parent)
770 {
771         struct blame_entry split[3];
772
773         split_overlap(split, e, tlno, plno, same, parent);
774         if (split[1].suspect)
775                 split_blame(sb, split, e);
776         decref_split(split);
777 }
778
779 /*
780  * Find the line number of the last line the target is suspected for.
781  */
782 static int find_last_in_target(struct scoreboard *sb, struct origin *target)
783 {
784         struct blame_entry *e;
785         int last_in_target = -1;
786
787         for (e = sb->ent; e; e = e->next) {
788                 if (e->guilty || !same_suspect(e->suspect, target))
789                         continue;
790                 if (last_in_target < e->s_lno + e->num_lines)
791                         last_in_target = e->s_lno + e->num_lines;
792         }
793         return last_in_target;
794 }
795
796 /*
797  * Process one hunk from the patch between the current suspect for
798  * blame_entry e and its parent.  Find and split the overlap, and
799  * pass blame to the overlapping part to the parent.
800  */
801 static void blame_chunk(struct scoreboard *sb,
802                         int tlno, int plno, int same,
803                         struct origin *target, struct origin *parent)
804 {
805         struct blame_entry *e;
806
807         for (e = sb->ent; e; e = e->next) {
808                 if (e->guilty || !same_suspect(e->suspect, target))
809                         continue;
810                 if (same <= e->s_lno)
811                         continue;
812                 if (tlno < e->s_lno + e->num_lines)
813                         blame_overlap(sb, e, tlno, plno, same, parent);
814         }
815 }
816
817 /*
818  * We are looking at the origin 'target' and aiming to pass blame
819  * for the lines it is suspected to its parent.  Run diff to find
820  * which lines came from parent and pass blame for them.
821  */
822 static int pass_blame_to_parent(struct scoreboard *sb,
823                                 struct origin *target,
824                                 struct origin *parent)
825 {
826         int i, last_in_target, plno, tlno;
827         struct patch *patch;
828
829         last_in_target = find_last_in_target(sb, target);
830         if (last_in_target < 0)
831                 return 1; /* nothing remains for this target */
832
833         patch = get_patch(parent, target);
834         plno = tlno = 0;
835         for (i = 0; i < patch->num; i++) {
836                 struct chunk *chunk = &patch->chunks[i];
837
838                 blame_chunk(sb, tlno, plno, chunk->same, target, parent);
839                 plno = chunk->p_next;
840                 tlno = chunk->t_next;
841         }
842         /* The rest (i.e. anything after tlno) are the same as the parent */
843         blame_chunk(sb, tlno, plno, last_in_target, target, parent);
844
845         free_patch(patch);
846         return 0;
847 }
848
849 /*
850  * The lines in blame_entry after splitting blames many times can become
851  * very small and trivial, and at some point it becomes pointless to
852  * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
853  * ordinary C program, and it is not worth to say it was copied from
854  * totally unrelated file in the parent.
855  *
856  * Compute how trivial the lines in the blame_entry are.
857  */
858 static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
859 {
860         unsigned score;
861         const char *cp, *ep;
862
863         if (e->score)
864                 return e->score;
865
866         score = 1;
867         cp = nth_line(sb, e->lno);
868         ep = nth_line(sb, e->lno + e->num_lines);
869         while (cp < ep) {
870                 unsigned ch = *((unsigned char *)cp);
871                 if (isalnum(ch))
872                         score++;
873                 cp++;
874         }
875         e->score = score;
876         return score;
877 }
878
879 /*
880  * best_so_far[] and this[] are both a split of an existing blame_entry
881  * that passes blame to the parent.  Maintain best_so_far the best split
882  * so far, by comparing this and best_so_far and copying this into
883  * bst_so_far as needed.
884  */
885 static void copy_split_if_better(struct scoreboard *sb,
886                                  struct blame_entry *best_so_far,
887                                  struct blame_entry *this)
888 {
889         int i;
890
891         if (!this[1].suspect)
892                 return;
893         if (best_so_far[1].suspect) {
894                 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
895                         return;
896         }
897
898         for (i = 0; i < 3; i++)
899                 origin_incref(this[i].suspect);
900         decref_split(best_so_far);
901         memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
902 }
903
904 /*
905  * We are looking at a part of the final image represented by
906  * ent (tlno and same are offset by ent->s_lno).
907  * tlno is where we are looking at in the final image.
908  * up to (but not including) same match preimage.
909  * plno is where we are looking at in the preimage.
910  *
911  * <-------------- final image ---------------------->
912  *       <------ent------>
913  *         ^tlno ^same
914  *    <---------preimage----->
915  *         ^plno
916  *
917  * All line numbers are 0-based.
918  */
919 static void handle_split(struct scoreboard *sb,
920                          struct blame_entry *ent,
921                          int tlno, int plno, int same,
922                          struct origin *parent,
923                          struct blame_entry *split)
924 {
925         if (ent->num_lines <= tlno)
926                 return;
927         if (tlno < same) {
928                 struct blame_entry this[3];
929                 tlno += ent->s_lno;
930                 same += ent->s_lno;
931                 split_overlap(this, ent, tlno, plno, same, parent);
932                 copy_split_if_better(sb, split, this);
933                 decref_split(this);
934         }
935 }
936
937 /*
938  * Find the lines from parent that are the same as ent so that
939  * we can pass blames to it.  file_p has the blob contents for
940  * the parent.
941  */
942 static void find_copy_in_blob(struct scoreboard *sb,
943                               struct blame_entry *ent,
944                               struct origin *parent,
945                               struct blame_entry *split,
946                               mmfile_t *file_p)
947 {
948         const char *cp;
949         int cnt;
950         mmfile_t file_o;
951         struct patch *patch;
952         int i, plno, tlno;
953
954         /*
955          * Prepare mmfile that contains only the lines in ent.
956          */
957         cp = nth_line(sb, ent->lno);
958         file_o.ptr = (char*) cp;
959         cnt = ent->num_lines;
960
961         while (cnt && cp < sb->final_buf + sb->final_buf_size) {
962                 if (*cp++ == '\n')
963                         cnt--;
964         }
965         file_o.size = cp - file_o.ptr;
966
967         patch = compare_buffer(file_p, &file_o, 1);
968
969         /*
970          * file_o is a part of final image we are annotating.
971          * file_p partially may match that image.
972          */
973         memset(split, 0, sizeof(struct blame_entry [3]));
974         plno = tlno = 0;
975         for (i = 0; i < patch->num; i++) {
976                 struct chunk *chunk = &patch->chunks[i];
977
978                 handle_split(sb, ent, tlno, plno, chunk->same, parent, split);
979                 plno = chunk->p_next;
980                 tlno = chunk->t_next;
981         }
982         /* remainder, if any, all match the preimage */
983         handle_split(sb, ent, tlno, plno, ent->num_lines, parent, split);
984         free_patch(patch);
985 }
986
987 /*
988  * See if lines currently target is suspected for can be attributed to
989  * parent.
990  */
991 static int find_move_in_parent(struct scoreboard *sb,
992                                struct origin *target,
993                                struct origin *parent)
994 {
995         int last_in_target, made_progress;
996         struct blame_entry *e, split[3];
997         mmfile_t file_p;
998
999         last_in_target = find_last_in_target(sb, target);
1000         if (last_in_target < 0)
1001                 return 1; /* nothing remains for this target */
1002
1003         fill_origin_blob(parent, &file_p);
1004         if (!file_p.ptr)
1005                 return 0;
1006
1007         made_progress = 1;
1008         while (made_progress) {
1009                 made_progress = 0;
1010                 for (e = sb->ent; e; e = e->next) {
1011                         if (e->guilty || !same_suspect(e->suspect, target))
1012                                 continue;
1013                         find_copy_in_blob(sb, e, parent, split, &file_p);
1014                         if (split[1].suspect &&
1015                             blame_move_score < ent_score(sb, &split[1])) {
1016                                 split_blame(sb, split, e);
1017                                 made_progress = 1;
1018                         }
1019                         decref_split(split);
1020                 }
1021         }
1022         return 0;
1023 }
1024
1025 struct blame_list {
1026         struct blame_entry *ent;
1027         struct blame_entry split[3];
1028 };
1029
1030 /*
1031  * Count the number of entries the target is suspected for,
1032  * and prepare a list of entry and the best split.
1033  */
1034 static struct blame_list *setup_blame_list(struct scoreboard *sb,
1035                                            struct origin *target,
1036                                            int *num_ents_p)
1037 {
1038         struct blame_entry *e;
1039         int num_ents, i;
1040         struct blame_list *blame_list = NULL;
1041
1042         for (e = sb->ent, num_ents = 0; e; e = e->next)
1043                 if (!e->guilty && same_suspect(e->suspect, target))
1044                         num_ents++;
1045         if (num_ents) {
1046                 blame_list = xcalloc(num_ents, sizeof(struct blame_list));
1047                 for (e = sb->ent, i = 0; e; e = e->next)
1048                         if (!e->guilty && same_suspect(e->suspect, target))
1049                                 blame_list[i++].ent = e;
1050         }
1051         *num_ents_p = num_ents;
1052         return blame_list;
1053 }
1054
1055 /*
1056  * For lines target is suspected for, see if we can find code movement
1057  * across file boundary from the parent commit.  porigin is the path
1058  * in the parent we already tried.
1059  */
1060 static int find_copy_in_parent(struct scoreboard *sb,
1061                                struct origin *target,
1062                                struct commit *parent,
1063                                struct origin *porigin,
1064                                int opt)
1065 {
1066         struct diff_options diff_opts;
1067         const char *paths[1];
1068         int i, j;
1069         int retval;
1070         struct blame_list *blame_list;
1071         int num_ents;
1072
1073         blame_list = setup_blame_list(sb, target, &num_ents);
1074         if (!blame_list)
1075                 return 1; /* nothing remains for this target */
1076
1077         diff_setup(&diff_opts);
1078         DIFF_OPT_SET(&diff_opts, RECURSIVE);
1079         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1080
1081         paths[0] = NULL;
1082         diff_tree_setup_paths(paths, &diff_opts);
1083         if (diff_setup_done(&diff_opts) < 0)
1084                 die("diff-setup");
1085
1086         /* Try "find copies harder" on new path if requested;
1087          * we do not want to use diffcore_rename() actually to
1088          * match things up; find_copies_harder is set only to
1089          * force diff_tree_sha1() to feed all filepairs to diff_queue,
1090          * and this code needs to be after diff_setup_done(), which
1091          * usually makes find-copies-harder imply copy detection.
1092          */
1093         if ((opt & PICKAXE_BLAME_COPY_HARDEST)
1094             || ((opt & PICKAXE_BLAME_COPY_HARDER)
1095                 && (!porigin || strcmp(target->path, porigin->path))))
1096                 DIFF_OPT_SET(&diff_opts, FIND_COPIES_HARDER);
1097
1098         if (is_null_sha1(target->commit->object.sha1))
1099                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
1100         else
1101                 diff_tree_sha1(parent->tree->object.sha1,
1102                                target->commit->tree->object.sha1,
1103                                "", &diff_opts);
1104
1105         if (!DIFF_OPT_TST(&diff_opts, FIND_COPIES_HARDER))
1106                 diffcore_std(&diff_opts);
1107
1108         retval = 0;
1109         while (1) {
1110                 int made_progress = 0;
1111
1112                 for (i = 0; i < diff_queued_diff.nr; i++) {
1113                         struct diff_filepair *p = diff_queued_diff.queue[i];
1114                         struct origin *norigin;
1115                         mmfile_t file_p;
1116                         struct blame_entry this[3];
1117
1118                         if (!DIFF_FILE_VALID(p->one))
1119                                 continue; /* does not exist in parent */
1120                         if (porigin && !strcmp(p->one->path, porigin->path))
1121                                 /* find_move already dealt with this path */
1122                                 continue;
1123
1124                         norigin = get_origin(sb, parent, p->one->path);
1125                         hashcpy(norigin->blob_sha1, p->one->sha1);
1126                         fill_origin_blob(norigin, &file_p);
1127                         if (!file_p.ptr)
1128                                 continue;
1129
1130                         for (j = 0; j < num_ents; j++) {
1131                                 find_copy_in_blob(sb, blame_list[j].ent,
1132                                                   norigin, this, &file_p);
1133                                 copy_split_if_better(sb, blame_list[j].split,
1134                                                      this);
1135                                 decref_split(this);
1136                         }
1137                         origin_decref(norigin);
1138                 }
1139
1140                 for (j = 0; j < num_ents; j++) {
1141                         struct blame_entry *split = blame_list[j].split;
1142                         if (split[1].suspect &&
1143                             blame_copy_score < ent_score(sb, &split[1])) {
1144                                 split_blame(sb, split, blame_list[j].ent);
1145                                 made_progress = 1;
1146                         }
1147                         decref_split(split);
1148                 }
1149                 free(blame_list);
1150
1151                 if (!made_progress)
1152                         break;
1153                 blame_list = setup_blame_list(sb, target, &num_ents);
1154                 if (!blame_list) {
1155                         retval = 1;
1156                         break;
1157                 }
1158         }
1159         diff_flush(&diff_opts);
1160         diff_tree_release_paths(&diff_opts);
1161         return retval;
1162 }
1163
1164 /*
1165  * The blobs of origin and porigin exactly match, so everything
1166  * origin is suspected for can be blamed on the parent.
1167  */
1168 static void pass_whole_blame(struct scoreboard *sb,
1169                              struct origin *origin, struct origin *porigin)
1170 {
1171         struct blame_entry *e;
1172
1173         if (!porigin->file.ptr && origin->file.ptr) {
1174                 /* Steal its file */
1175                 porigin->file = origin->file;
1176                 origin->file.ptr = NULL;
1177         }
1178         for (e = sb->ent; e; e = e->next) {
1179                 if (!same_suspect(e->suspect, origin))
1180                         continue;
1181                 origin_incref(porigin);
1182                 origin_decref(e->suspect);
1183                 e->suspect = porigin;
1184         }
1185 }
1186
1187 /*
1188  * We pass blame from the current commit to its parents.  We keep saying
1189  * "parent" (and "porigin"), but what we mean is to find scapegoat to
1190  * exonerate ourselves.
1191  */
1192 static struct commit_list *first_scapegoat(struct rev_info *revs, struct commit *commit)
1193 {
1194         if (!reverse)
1195                 return commit->parents;
1196         return lookup_decoration(&revs->children, &commit->object);
1197 }
1198
1199 static int num_scapegoats(struct rev_info *revs, struct commit *commit)
1200 {
1201         int cnt;
1202         struct commit_list *l = first_scapegoat(revs, commit);
1203         for (cnt = 0; l; l = l->next)
1204                 cnt++;
1205         return cnt;
1206 }
1207
1208 #define MAXSG 16
1209
1210 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
1211 {
1212         struct rev_info *revs = sb->revs;
1213         int i, pass, num_sg;
1214         struct commit *commit = origin->commit;
1215         struct commit_list *sg;
1216         struct origin *sg_buf[MAXSG];
1217         struct origin *porigin, **sg_origin = sg_buf;
1218
1219         num_sg = num_scapegoats(revs, commit);
1220         if (!num_sg)
1221                 goto finish;
1222         else if (num_sg < ARRAY_SIZE(sg_buf))
1223                 memset(sg_buf, 0, sizeof(sg_buf));
1224         else
1225                 sg_origin = xcalloc(num_sg, sizeof(*sg_origin));
1226
1227         /*
1228          * The first pass looks for unrenamed path to optimize for
1229          * common cases, then we look for renames in the second pass.
1230          */
1231         for (pass = 0; pass < 2; pass++) {
1232                 struct origin *(*find)(struct scoreboard *,
1233                                        struct commit *, struct origin *);
1234                 find = pass ? find_rename : find_origin;
1235
1236                 for (i = 0, sg = first_scapegoat(revs, commit);
1237                      i < num_sg && sg;
1238                      sg = sg->next, i++) {
1239                         struct commit *p = sg->item;
1240                         int j, same;
1241
1242                         if (sg_origin[i])
1243                                 continue;
1244                         if (parse_commit(p))
1245                                 continue;
1246                         porigin = find(sb, p, origin);
1247                         if (!porigin)
1248                                 continue;
1249                         if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1250                                 pass_whole_blame(sb, origin, porigin);
1251                                 origin_decref(porigin);
1252                                 goto finish;
1253                         }
1254                         for (j = same = 0; j < i; j++)
1255                                 if (sg_origin[j] &&
1256                                     !hashcmp(sg_origin[j]->blob_sha1,
1257                                              porigin->blob_sha1)) {
1258                                         same = 1;
1259                                         break;
1260                                 }
1261                         if (!same)
1262                                 sg_origin[i] = porigin;
1263                         else
1264                                 origin_decref(porigin);
1265                 }
1266         }
1267
1268         num_commits++;
1269         for (i = 0, sg = first_scapegoat(revs, commit);
1270              i < num_sg && sg;
1271              sg = sg->next, i++) {
1272                 struct origin *porigin = sg_origin[i];
1273                 if (!porigin)
1274                         continue;
1275                 if (pass_blame_to_parent(sb, origin, porigin))
1276                         goto finish;
1277         }
1278
1279         /*
1280          * Optionally find moves in parents' files.
1281          */
1282         if (opt & PICKAXE_BLAME_MOVE)
1283                 for (i = 0, sg = first_scapegoat(revs, commit);
1284                      i < num_sg && sg;
1285                      sg = sg->next, i++) {
1286                         struct origin *porigin = sg_origin[i];
1287                         if (!porigin)
1288                                 continue;
1289                         if (find_move_in_parent(sb, origin, porigin))
1290                                 goto finish;
1291                 }
1292
1293         /*
1294          * Optionally find copies from parents' files.
1295          */
1296         if (opt & PICKAXE_BLAME_COPY)
1297                 for (i = 0, sg = first_scapegoat(revs, commit);
1298                      i < num_sg && sg;
1299                      sg = sg->next, i++) {
1300                         struct origin *porigin = sg_origin[i];
1301                         if (find_copy_in_parent(sb, origin, sg->item,
1302                                                 porigin, opt))
1303                                 goto finish;
1304                 }
1305
1306  finish:
1307         for (i = 0; i < num_sg; i++) {
1308                 if (sg_origin[i]) {
1309                         drop_origin_blob(sg_origin[i]);
1310                         origin_decref(sg_origin[i]);
1311                 }
1312         }
1313         drop_origin_blob(origin);
1314         if (sg_buf != sg_origin)
1315                 free(sg_origin);
1316 }
1317
1318 /*
1319  * Information on commits, used for output.
1320  */
1321 struct commit_info
1322 {
1323         const char *author;
1324         const char *author_mail;
1325         unsigned long author_time;
1326         const char *author_tz;
1327
1328         /* filled only when asked for details */
1329         const char *committer;
1330         const char *committer_mail;
1331         unsigned long committer_time;
1332         const char *committer_tz;
1333
1334         const char *summary;
1335 };
1336
1337 /*
1338  * Parse author/committer line in the commit object buffer
1339  */
1340 static void get_ac_line(const char *inbuf, const char *what,
1341                         int bufsz, char *person, const char **mail,
1342                         unsigned long *time, const char **tz)
1343 {
1344         int len, tzlen, maillen;
1345         char *tmp, *endp, *timepos;
1346
1347         tmp = strstr(inbuf, what);
1348         if (!tmp)
1349                 goto error_out;
1350         tmp += strlen(what);
1351         endp = strchr(tmp, '\n');
1352         if (!endp)
1353                 len = strlen(tmp);
1354         else
1355                 len = endp - tmp;
1356         if (bufsz <= len) {
1357         error_out:
1358                 /* Ugh */
1359                 *mail = *tz = "(unknown)";
1360                 *time = 0;
1361                 return;
1362         }
1363         memcpy(person, tmp, len);
1364
1365         tmp = person;
1366         tmp += len;
1367         *tmp = 0;
1368         while (*tmp != ' ')
1369                 tmp--;
1370         *tz = tmp+1;
1371         tzlen = (person+len)-(tmp+1);
1372
1373         *tmp = 0;
1374         while (*tmp != ' ')
1375                 tmp--;
1376         *time = strtoul(tmp, NULL, 10);
1377         timepos = tmp;
1378
1379         *tmp = 0;
1380         while (*tmp != ' ')
1381                 tmp--;
1382         *mail = tmp + 1;
1383         *tmp = 0;
1384         maillen = timepos - tmp;
1385
1386         if (!mailmap.nr)
1387                 return;
1388
1389         /*
1390          * mailmap expansion may make the name longer.
1391          * make room by pushing stuff down.
1392          */
1393         tmp = person + bufsz - (tzlen + 1);
1394         memmove(tmp, *tz, tzlen);
1395         tmp[tzlen] = 0;
1396         *tz = tmp;
1397
1398         tmp = tmp - (maillen + 1);
1399         memmove(tmp, *mail, maillen);
1400         tmp[maillen] = 0;
1401         *mail = tmp;
1402
1403         /*
1404          * Now, convert e-mail using mailmap
1405          */
1406         map_email(&mailmap, tmp + 1, person, tmp-person-1);
1407 }
1408
1409 static void get_commit_info(struct commit *commit,
1410                             struct commit_info *ret,
1411                             int detailed)
1412 {
1413         int len;
1414         char *tmp, *endp;
1415         static char author_buf[1024];
1416         static char committer_buf[1024];
1417         static char summary_buf[1024];
1418
1419         /*
1420          * We've operated without save_commit_buffer, so
1421          * we now need to populate them for output.
1422          */
1423         if (!commit->buffer) {
1424                 enum object_type type;
1425                 unsigned long size;
1426                 commit->buffer =
1427                         read_sha1_file(commit->object.sha1, &type, &size);
1428                 if (!commit->buffer)
1429                         die("Cannot read commit %s",
1430                             sha1_to_hex(commit->object.sha1));
1431         }
1432         ret->author = author_buf;
1433         get_ac_line(commit->buffer, "\nauthor ",
1434                     sizeof(author_buf), author_buf, &ret->author_mail,
1435                     &ret->author_time, &ret->author_tz);
1436
1437         if (!detailed)
1438                 return;
1439
1440         ret->committer = committer_buf;
1441         get_ac_line(commit->buffer, "\ncommitter ",
1442                     sizeof(committer_buf), committer_buf, &ret->committer_mail,
1443                     &ret->committer_time, &ret->committer_tz);
1444
1445         ret->summary = summary_buf;
1446         tmp = strstr(commit->buffer, "\n\n");
1447         if (!tmp) {
1448         error_out:
1449                 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1450                 return;
1451         }
1452         tmp += 2;
1453         endp = strchr(tmp, '\n');
1454         if (!endp)
1455                 endp = tmp + strlen(tmp);
1456         len = endp - tmp;
1457         if (len >= sizeof(summary_buf) || len == 0)
1458                 goto error_out;
1459         memcpy(summary_buf, tmp, len);
1460         summary_buf[len] = 0;
1461 }
1462
1463 /*
1464  * To allow LF and other nonportable characters in pathnames,
1465  * they are c-style quoted as needed.
1466  */
1467 static void write_filename_info(const char *path)
1468 {
1469         printf("filename ");
1470         write_name_quoted(path, stdout, '\n');
1471 }
1472
1473 /*
1474  * The blame_entry is found to be guilty for the range.  Mark it
1475  * as such, and show it in incremental output.
1476  */
1477 static void found_guilty_entry(struct blame_entry *ent)
1478 {
1479         if (ent->guilty)
1480                 return;
1481         ent->guilty = 1;
1482         if (incremental) {
1483                 struct origin *suspect = ent->suspect;
1484
1485                 printf("%s %d %d %d\n",
1486                        sha1_to_hex(suspect->commit->object.sha1),
1487                        ent->s_lno + 1, ent->lno + 1, ent->num_lines);
1488                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1489                         struct commit_info ci;
1490                         suspect->commit->object.flags |= METAINFO_SHOWN;
1491                         get_commit_info(suspect->commit, &ci, 1);
1492                         printf("author %s\n", ci.author);
1493                         printf("author-mail %s\n", ci.author_mail);
1494                         printf("author-time %lu\n", ci.author_time);
1495                         printf("author-tz %s\n", ci.author_tz);
1496                         printf("committer %s\n", ci.committer);
1497                         printf("committer-mail %s\n", ci.committer_mail);
1498                         printf("committer-time %lu\n", ci.committer_time);
1499                         printf("committer-tz %s\n", ci.committer_tz);
1500                         printf("summary %s\n", ci.summary);
1501                         if (suspect->commit->object.flags & UNINTERESTING)
1502                                 printf("boundary\n");
1503                 }
1504                 write_filename_info(suspect->path);
1505                 maybe_flush_or_die(stdout, "stdout");
1506         }
1507 }
1508
1509 /*
1510  * The main loop -- while the scoreboard has lines whose true origin
1511  * is still unknown, pick one blame_entry, and allow its current
1512  * suspect to pass blames to its parents.
1513  */
1514 static void assign_blame(struct scoreboard *sb, int opt)
1515 {
1516         struct rev_info *revs = sb->revs;
1517
1518         while (1) {
1519                 struct blame_entry *ent;
1520                 struct commit *commit;
1521                 struct origin *suspect = NULL;
1522
1523                 /* find one suspect to break down */
1524                 for (ent = sb->ent; !suspect && ent; ent = ent->next)
1525                         if (!ent->guilty)
1526                                 suspect = ent->suspect;
1527                 if (!suspect)
1528                         return; /* all done */
1529
1530                 /*
1531                  * We will use this suspect later in the loop,
1532                  * so hold onto it in the meantime.
1533                  */
1534                 origin_incref(suspect);
1535                 commit = suspect->commit;
1536                 if (!commit->object.parsed)
1537                         parse_commit(commit);
1538                 if (reverse ||
1539                     (!(commit->object.flags & UNINTERESTING) &&
1540                      !(revs->max_age != -1 && commit->date < revs->max_age)))
1541                         pass_blame(sb, suspect, opt);
1542                 else {
1543                         commit->object.flags |= UNINTERESTING;
1544                         if (commit->object.parsed)
1545                                 mark_parents_uninteresting(commit);
1546                 }
1547                 /* treat root commit as boundary */
1548                 if (!commit->parents && !show_root)
1549                         commit->object.flags |= UNINTERESTING;
1550
1551                 /* Take responsibility for the remaining entries */
1552                 for (ent = sb->ent; ent; ent = ent->next)
1553                         if (same_suspect(ent->suspect, suspect))
1554                                 found_guilty_entry(ent);
1555                 origin_decref(suspect);
1556
1557                 if (DEBUG) /* sanity */
1558                         sanity_check_refcnt(sb);
1559         }
1560 }
1561
1562 static const char *format_time(unsigned long time, const char *tz_str,
1563                                int show_raw_time)
1564 {
1565         static char time_buf[128];
1566         time_t t = time;
1567         int minutes, tz;
1568         struct tm *tm;
1569
1570         if (show_raw_time) {
1571                 sprintf(time_buf, "%lu %s", time, tz_str);
1572                 return time_buf;
1573         }
1574
1575         tz = atoi(tz_str);
1576         minutes = tz < 0 ? -tz : tz;
1577         minutes = (minutes / 100)*60 + (minutes % 100);
1578         minutes = tz < 0 ? -minutes : minutes;
1579         t = time + minutes * 60;
1580         tm = gmtime(&t);
1581
1582         strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1583         strcat(time_buf, tz_str);
1584         return time_buf;
1585 }
1586
1587 #define OUTPUT_ANNOTATE_COMPAT  001
1588 #define OUTPUT_LONG_OBJECT_NAME 002
1589 #define OUTPUT_RAW_TIMESTAMP    004
1590 #define OUTPUT_PORCELAIN        010
1591 #define OUTPUT_SHOW_NAME        020
1592 #define OUTPUT_SHOW_NUMBER      040
1593 #define OUTPUT_SHOW_SCORE      0100
1594 #define OUTPUT_NO_AUTHOR       0200
1595
1596 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1597 {
1598         int cnt;
1599         const char *cp;
1600         struct origin *suspect = ent->suspect;
1601         char hex[41];
1602
1603         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1604         printf("%s%c%d %d %d\n",
1605                hex,
1606                ent->guilty ? ' ' : '*', // purely for debugging
1607                ent->s_lno + 1,
1608                ent->lno + 1,
1609                ent->num_lines);
1610         if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1611                 struct commit_info ci;
1612                 suspect->commit->object.flags |= METAINFO_SHOWN;
1613                 get_commit_info(suspect->commit, &ci, 1);
1614                 printf("author %s\n", ci.author);
1615                 printf("author-mail %s\n", ci.author_mail);
1616                 printf("author-time %lu\n", ci.author_time);
1617                 printf("author-tz %s\n", ci.author_tz);
1618                 printf("committer %s\n", ci.committer);
1619                 printf("committer-mail %s\n", ci.committer_mail);
1620                 printf("committer-time %lu\n", ci.committer_time);
1621                 printf("committer-tz %s\n", ci.committer_tz);
1622                 write_filename_info(suspect->path);
1623                 printf("summary %s\n", ci.summary);
1624                 if (suspect->commit->object.flags & UNINTERESTING)
1625                         printf("boundary\n");
1626         }
1627         else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1628                 write_filename_info(suspect->path);
1629
1630         cp = nth_line(sb, ent->lno);
1631         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1632                 char ch;
1633                 if (cnt)
1634                         printf("%s %d %d\n", hex,
1635                                ent->s_lno + 1 + cnt,
1636                                ent->lno + 1 + cnt);
1637                 putchar('\t');
1638                 do {
1639                         ch = *cp++;
1640                         putchar(ch);
1641                 } while (ch != '\n' &&
1642                          cp < sb->final_buf + sb->final_buf_size);
1643         }
1644 }
1645
1646 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1647 {
1648         int cnt;
1649         const char *cp;
1650         struct origin *suspect = ent->suspect;
1651         struct commit_info ci;
1652         char hex[41];
1653         int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1654
1655         get_commit_info(suspect->commit, &ci, 1);
1656         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1657
1658         cp = nth_line(sb, ent->lno);
1659         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1660                 char ch;
1661                 int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;
1662
1663                 if (suspect->commit->object.flags & UNINTERESTING) {
1664                         if (blank_boundary)
1665                                 memset(hex, ' ', length);
1666                         else if (!cmd_is_annotate) {
1667                                 length--;
1668                                 putchar('^');
1669                         }
1670                 }
1671
1672                 printf("%.*s", length, hex);
1673                 if (opt & OUTPUT_ANNOTATE_COMPAT)
1674                         printf("\t(%10s\t%10s\t%d)", ci.author,
1675                                format_time(ci.author_time, ci.author_tz,
1676                                            show_raw_time),
1677                                ent->lno + 1 + cnt);
1678                 else {
1679                         if (opt & OUTPUT_SHOW_SCORE)
1680                                 printf(" %*d %02d",
1681                                        max_score_digits, ent->score,
1682                                        ent->suspect->refcnt);
1683                         if (opt & OUTPUT_SHOW_NAME)
1684                                 printf(" %-*.*s", longest_file, longest_file,
1685                                        suspect->path);
1686                         if (opt & OUTPUT_SHOW_NUMBER)
1687                                 printf(" %*d", max_orig_digits,
1688                                        ent->s_lno + 1 + cnt);
1689
1690                         if (!(opt & OUTPUT_NO_AUTHOR))
1691                                 printf(" (%-*.*s %10s",
1692                                        longest_author, longest_author,
1693                                        ci.author,
1694                                        format_time(ci.author_time,
1695                                                    ci.author_tz,
1696                                                    show_raw_time));
1697                         printf(" %*d) ",
1698                                max_digits, ent->lno + 1 + cnt);
1699                 }
1700                 do {
1701                         ch = *cp++;
1702                         putchar(ch);
1703                 } while (ch != '\n' &&
1704                          cp < sb->final_buf + sb->final_buf_size);
1705         }
1706 }
1707
1708 static void output(struct scoreboard *sb, int option)
1709 {
1710         struct blame_entry *ent;
1711
1712         if (option & OUTPUT_PORCELAIN) {
1713                 for (ent = sb->ent; ent; ent = ent->next) {
1714                         struct blame_entry *oth;
1715                         struct origin *suspect = ent->suspect;
1716                         struct commit *commit = suspect->commit;
1717                         if (commit->object.flags & MORE_THAN_ONE_PATH)
1718                                 continue;
1719                         for (oth = ent->next; oth; oth = oth->next) {
1720                                 if ((oth->suspect->commit != commit) ||
1721                                     !strcmp(oth->suspect->path, suspect->path))
1722                                         continue;
1723                                 commit->object.flags |= MORE_THAN_ONE_PATH;
1724                                 break;
1725                         }
1726                 }
1727         }
1728
1729         for (ent = sb->ent; ent; ent = ent->next) {
1730                 if (option & OUTPUT_PORCELAIN)
1731                         emit_porcelain(sb, ent);
1732                 else {
1733                         emit_other(sb, ent, option);
1734                 }
1735         }
1736 }
1737
1738 /*
1739  * To allow quick access to the contents of nth line in the
1740  * final image, prepare an index in the scoreboard.
1741  */
1742 static int prepare_lines(struct scoreboard *sb)
1743 {
1744         const char *buf = sb->final_buf;
1745         unsigned long len = sb->final_buf_size;
1746         int num = 0, incomplete = 0, bol = 1;
1747
1748         if (len && buf[len-1] != '\n')
1749                 incomplete++; /* incomplete line at the end */
1750         while (len--) {
1751                 if (bol) {
1752                         sb->lineno = xrealloc(sb->lineno,
1753                                               sizeof(int* ) * (num + 1));
1754                         sb->lineno[num] = buf - sb->final_buf;
1755                         bol = 0;
1756                 }
1757                 if (*buf++ == '\n') {
1758                         num++;
1759                         bol = 1;
1760                 }
1761         }
1762         sb->lineno = xrealloc(sb->lineno,
1763                               sizeof(int* ) * (num + incomplete + 1));
1764         sb->lineno[num + incomplete] = buf - sb->final_buf;
1765         sb->num_lines = num + incomplete;
1766         return sb->num_lines;
1767 }
1768
1769 /*
1770  * Add phony grafts for use with -S; this is primarily to
1771  * support git-cvsserver that wants to give a linear history
1772  * to its clients.
1773  */
1774 static int read_ancestry(const char *graft_file)
1775 {
1776         FILE *fp = fopen(graft_file, "r");
1777         char buf[1024];
1778         if (!fp)
1779                 return -1;
1780         while (fgets(buf, sizeof(buf), fp)) {
1781                 /* The format is just "Commit Parent1 Parent2 ...\n" */
1782                 int len = strlen(buf);
1783                 struct commit_graft *graft = read_graft_line(buf, len);
1784                 if (graft)
1785                         register_commit_graft(graft, 0);
1786         }
1787         fclose(fp);
1788         return 0;
1789 }
1790
1791 /*
1792  * How many columns do we need to show line numbers in decimal?
1793  */
1794 static int lineno_width(int lines)
1795 {
1796         int i, width;
1797
1798         for (width = 1, i = 10; i <= lines + 1; width++)
1799                 i *= 10;
1800         return width;
1801 }
1802
1803 /*
1804  * How many columns do we need to show line numbers, authors,
1805  * and filenames?
1806  */
1807 static void find_alignment(struct scoreboard *sb, int *option)
1808 {
1809         int longest_src_lines = 0;
1810         int longest_dst_lines = 0;
1811         unsigned largest_score = 0;
1812         struct blame_entry *e;
1813
1814         for (e = sb->ent; e; e = e->next) {
1815                 struct origin *suspect = e->suspect;
1816                 struct commit_info ci;
1817                 int num;
1818
1819                 if (strcmp(suspect->path, sb->path))
1820                         *option |= OUTPUT_SHOW_NAME;
1821                 num = strlen(suspect->path);
1822                 if (longest_file < num)
1823                         longest_file = num;
1824                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1825                         suspect->commit->object.flags |= METAINFO_SHOWN;
1826                         get_commit_info(suspect->commit, &ci, 1);
1827                         num = strlen(ci.author);
1828                         if (longest_author < num)
1829                                 longest_author = num;
1830                 }
1831                 num = e->s_lno + e->num_lines;
1832                 if (longest_src_lines < num)
1833                         longest_src_lines = num;
1834                 num = e->lno + e->num_lines;
1835                 if (longest_dst_lines < num)
1836                         longest_dst_lines = num;
1837                 if (largest_score < ent_score(sb, e))
1838                         largest_score = ent_score(sb, e);
1839         }
1840         max_orig_digits = lineno_width(longest_src_lines);
1841         max_digits = lineno_width(longest_dst_lines);
1842         max_score_digits = lineno_width(largest_score);
1843 }
1844
1845 /*
1846  * For debugging -- origin is refcounted, and this asserts that
1847  * we do not underflow.
1848  */
1849 static void sanity_check_refcnt(struct scoreboard *sb)
1850 {
1851         int baa = 0;
1852         struct blame_entry *ent;
1853
1854         for (ent = sb->ent; ent; ent = ent->next) {
1855                 /* Nobody should have zero or negative refcnt */
1856                 if (ent->suspect->refcnt <= 0) {
1857                         fprintf(stderr, "%s in %s has negative refcnt %d\n",
1858                                 ent->suspect->path,
1859                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1860                                 ent->suspect->refcnt);
1861                         baa = 1;
1862                 }
1863         }
1864         for (ent = sb->ent; ent; ent = ent->next) {
1865                 /* Mark the ones that haven't been checked */
1866                 if (0 < ent->suspect->refcnt)
1867                         ent->suspect->refcnt = -ent->suspect->refcnt;
1868         }
1869         for (ent = sb->ent; ent; ent = ent->next) {
1870                 /*
1871                  * ... then pick each and see if they have the the
1872                  * correct refcnt.
1873                  */
1874                 int found;
1875                 struct blame_entry *e;
1876                 struct origin *suspect = ent->suspect;
1877
1878                 if (0 < suspect->refcnt)
1879                         continue;
1880                 suspect->refcnt = -suspect->refcnt; /* Unmark */
1881                 for (found = 0, e = sb->ent; e; e = e->next) {
1882                         if (e->suspect != suspect)
1883                                 continue;
1884                         found++;
1885                 }
1886                 if (suspect->refcnt != found) {
1887                         fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1888                                 ent->suspect->path,
1889                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1890                                 ent->suspect->refcnt, found);
1891                         baa = 2;
1892                 }
1893         }
1894         if (baa) {
1895                 int opt = 0160;
1896                 find_alignment(sb, &opt);
1897                 output(sb, opt);
1898                 die("Baa %d!", baa);
1899         }
1900 }
1901
1902 /*
1903  * Used for the command line parsing; check if the path exists
1904  * in the working tree.
1905  */
1906 static int has_path_in_work_tree(const char *path)
1907 {
1908         struct stat st;
1909         return !lstat(path, &st);
1910 }
1911
1912 static unsigned parse_score(const char *arg)
1913 {
1914         char *end;
1915         unsigned long score = strtoul(arg, &end, 10);
1916         if (*end)
1917                 return 0;
1918         return score;
1919 }
1920
1921 static const char *add_prefix(const char *prefix, const char *path)
1922 {
1923         return prefix_path(prefix, prefix ? strlen(prefix) : 0, path);
1924 }
1925
1926 /*
1927  * Parsing of (comma separated) one item in the -L option
1928  */
1929 static const char *parse_loc(const char *spec,
1930                              struct scoreboard *sb, long lno,
1931                              long begin, long *ret)
1932 {
1933         char *term;
1934         const char *line;
1935         long num;
1936         int reg_error;
1937         regex_t regexp;
1938         regmatch_t match[1];
1939
1940         /* Allow "-L <something>,+20" to mean starting at <something>
1941          * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1942          * <something>.
1943          */
1944         if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1945                 num = strtol(spec + 1, &term, 10);
1946                 if (term != spec + 1) {
1947                         if (spec[0] == '-')
1948                                 num = 0 - num;
1949                         if (0 < num)
1950                                 *ret = begin + num - 2;
1951                         else if (!num)
1952                                 *ret = begin;
1953                         else
1954                                 *ret = begin + num;
1955                         return term;
1956                 }
1957                 return spec;
1958         }
1959         num = strtol(spec, &term, 10);
1960         if (term != spec) {
1961                 *ret = num;
1962                 return term;
1963         }
1964         if (spec[0] != '/')
1965                 return spec;
1966
1967         /* it could be a regexp of form /.../ */
1968         for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1969                 if (*term == '\\')
1970                         term++;
1971         }
1972         if (*term != '/')
1973                 return spec;
1974
1975         /* try [spec+1 .. term-1] as regexp */
1976         *term = 0;
1977         begin--; /* input is in human terms */
1978         line = nth_line(sb, begin);
1979
1980         if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1981             !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1982                 const char *cp = line + match[0].rm_so;
1983                 const char *nline;
1984
1985                 while (begin++ < lno) {
1986                         nline = nth_line(sb, begin);
1987                         if (line <= cp && cp < nline)
1988                                 break;
1989                         line = nline;
1990                 }
1991                 *ret = begin;
1992                 regfree(&regexp);
1993                 *term++ = '/';
1994                 return term;
1995         }
1996         else {
1997                 char errbuf[1024];
1998                 regerror(reg_error, &regexp, errbuf, 1024);
1999                 die("-L parameter '%s': %s", spec + 1, errbuf);
2000         }
2001 }
2002
2003 /*
2004  * Parsing of -L option
2005  */
2006 static void prepare_blame_range(struct scoreboard *sb,
2007                                 const char *bottomtop,
2008                                 long lno,
2009                                 long *bottom, long *top)
2010 {
2011         const char *term;
2012
2013         term = parse_loc(bottomtop, sb, lno, 1, bottom);
2014         if (*term == ',') {
2015                 term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
2016                 if (*term)
2017                         usage(blame_usage);
2018         }
2019         if (*term)
2020                 usage(blame_usage);
2021 }
2022
2023 static int git_blame_config(const char *var, const char *value, void *cb)
2024 {
2025         if (!strcmp(var, "blame.showroot")) {
2026                 show_root = git_config_bool(var, value);
2027                 return 0;
2028         }
2029         if (!strcmp(var, "blame.blankboundary")) {
2030                 blank_boundary = git_config_bool(var, value);
2031                 return 0;
2032         }
2033         return git_default_config(var, value, cb);
2034 }
2035
2036 /*
2037  * Prepare a dummy commit that represents the work tree (or staged) item.
2038  * Note that annotating work tree item never works in the reverse.
2039  */
2040 static struct commit *fake_working_tree_commit(const char *path, const char *contents_from)
2041 {
2042         struct commit *commit;
2043         struct origin *origin;
2044         unsigned char head_sha1[20];
2045         struct strbuf buf;
2046         const char *ident;
2047         time_t now;
2048         int size, len;
2049         struct cache_entry *ce;
2050         unsigned mode;
2051
2052         if (get_sha1("HEAD", head_sha1))
2053                 die("No such ref: HEAD");
2054
2055         time(&now);
2056         commit = xcalloc(1, sizeof(*commit));
2057         commit->parents = xcalloc(1, sizeof(*commit->parents));
2058         commit->parents->item = lookup_commit_reference(head_sha1);
2059         commit->object.parsed = 1;
2060         commit->date = now;
2061         commit->object.type = OBJ_COMMIT;
2062
2063         origin = make_origin(commit, path);
2064
2065         strbuf_init(&buf, 0);
2066         if (!contents_from || strcmp("-", contents_from)) {
2067                 struct stat st;
2068                 const char *read_from;
2069                 unsigned long fin_size;
2070
2071                 if (contents_from) {
2072                         if (stat(contents_from, &st) < 0)
2073                                 die("Cannot stat %s", contents_from);
2074                         read_from = contents_from;
2075                 }
2076                 else {
2077                         if (lstat(path, &st) < 0)
2078                                 die("Cannot lstat %s", path);
2079                         read_from = path;
2080                 }
2081                 fin_size = xsize_t(st.st_size);
2082                 mode = canon_mode(st.st_mode);
2083                 switch (st.st_mode & S_IFMT) {
2084                 case S_IFREG:
2085                         if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
2086                                 die("cannot open or read %s", read_from);
2087                         break;
2088                 case S_IFLNK:
2089                         if (readlink(read_from, buf.buf, buf.alloc) != fin_size)
2090                                 die("cannot readlink %s", read_from);
2091                         buf.len = fin_size;
2092                         break;
2093                 default:
2094                         die("unsupported file type %s", read_from);
2095                 }
2096         }
2097         else {
2098                 /* Reading from stdin */
2099                 contents_from = "standard input";
2100                 mode = 0;
2101                 if (strbuf_read(&buf, 0, 0) < 0)
2102                         die("read error %s from stdin", strerror(errno));
2103         }
2104         convert_to_git(path, buf.buf, buf.len, &buf, 0);
2105         origin->file.ptr = buf.buf;
2106         origin->file.size = buf.len;
2107         pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1);
2108         commit->util = origin;
2109
2110         /*
2111          * Read the current index, replace the path entry with
2112          * origin->blob_sha1 without mucking with its mode or type
2113          * bits; we are not going to write this index out -- we just
2114          * want to run "diff-index --cached".
2115          */
2116         discard_cache();
2117         read_cache();
2118
2119         len = strlen(path);
2120         if (!mode) {
2121                 int pos = cache_name_pos(path, len);
2122                 if (0 <= pos)
2123                         mode = active_cache[pos]->ce_mode;
2124                 else
2125                         /* Let's not bother reading from HEAD tree */
2126                         mode = S_IFREG | 0644;
2127         }
2128         size = cache_entry_size(len);
2129         ce = xcalloc(1, size);
2130         hashcpy(ce->sha1, origin->blob_sha1);
2131         memcpy(ce->name, path, len);
2132         ce->ce_flags = create_ce_flags(len, 0);
2133         ce->ce_mode = create_ce_mode(mode);
2134         add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
2135
2136         /*
2137          * We are not going to write this out, so this does not matter
2138          * right now, but someday we might optimize diff-index --cached
2139          * with cache-tree information.
2140          */
2141         cache_tree_invalidate_path(active_cache_tree, path);
2142
2143         commit->buffer = xmalloc(400);
2144         ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
2145         snprintf(commit->buffer, 400,
2146                 "tree 0000000000000000000000000000000000000000\n"
2147                 "parent %s\n"
2148                 "author %s\n"
2149                 "committer %s\n\n"
2150                 "Version of %s from %s\n",
2151                 sha1_to_hex(head_sha1),
2152                 ident, ident, path, contents_from ? contents_from : path);
2153         return commit;
2154 }
2155
2156 static const char *prepare_final(struct scoreboard *sb)
2157 {
2158         int i;
2159         const char *final_commit_name = NULL;
2160         struct rev_info *revs = sb->revs;
2161
2162         /*
2163          * There must be one and only one positive commit in the
2164          * revs->pending array.
2165          */
2166         for (i = 0; i < revs->pending.nr; i++) {
2167                 struct object *obj = revs->pending.objects[i].item;
2168                 if (obj->flags & UNINTERESTING)
2169                         continue;
2170                 while (obj->type == OBJ_TAG)
2171                         obj = deref_tag(obj, NULL, 0);
2172                 if (obj->type != OBJ_COMMIT)
2173                         die("Non commit %s?", revs->pending.objects[i].name);
2174                 if (sb->final)
2175                         die("More than one commit to dig from %s and %s?",
2176                             revs->pending.objects[i].name,
2177                             final_commit_name);
2178                 sb->final = (struct commit *) obj;
2179                 final_commit_name = revs->pending.objects[i].name;
2180         }
2181         return final_commit_name;
2182 }
2183
2184 static const char *prepare_initial(struct scoreboard *sb)
2185 {
2186         int i;
2187         const char *final_commit_name = NULL;
2188         struct rev_info *revs = sb->revs;
2189
2190         /*
2191          * There must be one and only one negative commit, and it must be
2192          * the boundary.
2193          */
2194         for (i = 0; i < revs->pending.nr; i++) {
2195                 struct object *obj = revs->pending.objects[i].item;
2196                 if (!(obj->flags & UNINTERESTING))
2197                         continue;
2198                 while (obj->type == OBJ_TAG)
2199                         obj = deref_tag(obj, NULL, 0);
2200                 if (obj->type != OBJ_COMMIT)
2201                         die("Non commit %s?", revs->pending.objects[i].name);
2202                 if (sb->final)
2203                         die("More than one commit to dig down to %s and %s?",
2204                             revs->pending.objects[i].name,
2205                             final_commit_name);
2206                 sb->final = (struct commit *) obj;
2207                 final_commit_name = revs->pending.objects[i].name;
2208         }
2209         if (!final_commit_name)
2210                 die("No commit to dig down to?");
2211         return final_commit_name;
2212 }
2213
2214 static int blame_copy_callback(const struct option *option, const char *arg, int unset)
2215 {
2216         int *opt = option->value;
2217
2218         /*
2219          * -C enables copy from removed files;
2220          * -C -C enables copy from existing files, but only
2221          *       when blaming a new file;
2222          * -C -C -C enables copy from existing files for
2223          *          everybody
2224          */
2225         if (*opt & PICKAXE_BLAME_COPY_HARDER)
2226                 *opt |= PICKAXE_BLAME_COPY_HARDEST;
2227         if (*opt & PICKAXE_BLAME_COPY)
2228                 *opt |= PICKAXE_BLAME_COPY_HARDER;
2229         *opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
2230
2231         if (arg)
2232                 blame_copy_score = parse_score(arg);
2233         return 0;
2234 }
2235
2236 static int blame_move_callback(const struct option *option, const char *arg, int unset)
2237 {
2238         int *opt = option->value;
2239
2240         *opt |= PICKAXE_BLAME_MOVE;
2241
2242         if (arg)
2243                 blame_move_score = parse_score(arg);
2244         return 0;
2245 }
2246
2247 static int blame_bottomtop_callback(const struct option *option, const char *arg, int unset)
2248 {
2249         const char **bottomtop = option->value;
2250         if (!arg)
2251                 return -1;
2252         if (*bottomtop)
2253                 die("More than one '-L n,m' option given");
2254         *bottomtop = arg;
2255         return 0;
2256 }
2257
2258 int cmd_blame(int argc, const char **argv, const char *prefix)
2259 {
2260         struct rev_info revs;
2261         const char *path;
2262         struct scoreboard sb;
2263         struct origin *o;
2264         struct blame_entry *ent;
2265         long dashdash_pos, bottom, top, lno;
2266         const char *final_commit_name = NULL;
2267         enum object_type type;
2268
2269         static const char *bottomtop = NULL;
2270         static int output_option = 0, opt = 0;
2271         static int show_stats = 0;
2272         static const char *revs_file = NULL;
2273         static const char *contents_from = NULL;
2274         static const struct option options[] = {
2275                 OPT_BOOLEAN(0, "incremental", &incremental, "Show blame entries as we find them, incrementally"),
2276                 OPT_BOOLEAN('b', NULL, &blank_boundary, "Show blank SHA-1 for boundary commits (Default: off)"),
2277                 OPT_BOOLEAN(0, "root", &show_root, "Do not treat root commits as boundaries (Default: off)"),
2278                 OPT_BOOLEAN(0, "show-stats", &show_stats, "Show work cost statistics"),
2279                 OPT_BIT(0, "score-debug", &output_option, "Show output score for blame entries", OUTPUT_SHOW_SCORE),
2280                 OPT_BIT('f', "show-name", &output_option, "Show original filename (Default: auto)", OUTPUT_SHOW_NAME),
2281                 OPT_BIT('n', "show-number", &output_option, "Show original linenumber (Default: off)", OUTPUT_SHOW_NUMBER),
2282                 OPT_BIT('p', "porcelain", &output_option, "Show in a format designed for machine consumption", OUTPUT_PORCELAIN),
2283                 OPT_BIT('c', NULL, &output_option, "Use the same output mode as git-annotate (Default: off)", OUTPUT_ANNOTATE_COMPAT),
2284                 OPT_BIT('t', NULL, &output_option, "Show raw timestamp (Default: off)", OUTPUT_RAW_TIMESTAMP),
2285                 OPT_BIT('l', NULL, &output_option, "Show long commit SHA1 (Default: off)", OUTPUT_LONG_OBJECT_NAME),
2286                 OPT_BIT('s', NULL, &output_option, "Suppress author name and timestamp (Default: off)", OUTPUT_NO_AUTHOR),
2287                 OPT_BIT('w', NULL, &xdl_opts, "Ignore whitespace differences", XDF_IGNORE_WHITESPACE),
2288                 OPT_STRING('S', NULL, &revs_file, "file", "Use revisions from <file> instead of calling git-rev-list"),
2289                 OPT_STRING(0, "contents", &contents_from, "file", "Use <file>'s contents as the final image"),
2290                 { OPTION_CALLBACK, 'C', NULL, &opt, "score", "Find line copies within and across files", PARSE_OPT_OPTARG, blame_copy_callback },
2291                 { OPTION_CALLBACK, 'M', NULL, &opt, "score", "Find line movements within and across files", PARSE_OPT_OPTARG, blame_move_callback },
2292                 OPT_CALLBACK('L', NULL, &bottomtop, "n,m", "Process only line range n,m, counting from 1", blame_bottomtop_callback),
2293                 OPT_END()
2294         };
2295
2296         struct parse_opt_ctx_t ctx;
2297
2298         cmd_is_annotate = !strcmp(argv[0], "annotate");
2299
2300         git_config(git_blame_config, NULL);
2301         init_revisions(&revs, NULL);
2302         save_commit_buffer = 0;
2303         dashdash_pos = 0;
2304
2305         parse_options_start(&ctx, argc, argv, PARSE_OPT_KEEP_DASHDASH |
2306                             PARSE_OPT_KEEP_ARGV0);
2307         for (;;) {
2308                 switch (parse_options_step(&ctx, options, blame_opt_usage)) {
2309                 case PARSE_OPT_HELP:
2310                         exit(129);
2311                 case PARSE_OPT_DONE:
2312                         if (ctx.argv[0])
2313                                 dashdash_pos = ctx.cpidx;
2314                         goto parse_done;
2315                 }
2316
2317                 if (!strcmp(ctx.argv[0], "--reverse")) {
2318                         ctx.argv[0] = "--children";
2319                         reverse = 1;
2320                 }
2321                 parse_revision_opt(&revs, &ctx, options, blame_opt_usage);
2322         }
2323 parse_done:
2324         argc = parse_options_end(&ctx);
2325
2326         if (!blame_move_score)
2327                 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
2328         if (!blame_copy_score)
2329                 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
2330
2331         /*
2332          * We have collected options unknown to us in argv[1..unk]
2333          * which are to be passed to revision machinery if we are
2334          * going to do the "bottom" processing.
2335          *
2336          * The remaining are:
2337          *
2338          * (1) if dashdash_pos != 0, its either
2339          *     "blame [revisions] -- <path>" or
2340          *     "blame -- <path> <rev>"
2341          *
2342          * (2) otherwise, its one of the two:
2343          *     "blame [revisions] <path>"
2344          *     "blame <path> <rev>"
2345          *
2346          * Note that we must strip out <path> from the arguments: we do not
2347          * want the path pruning but we may want "bottom" processing.
2348          */
2349         if (dashdash_pos) {
2350                 switch (argc - dashdash_pos - 1) {
2351                 case 2: /* (1b) */
2352                         if (argc != 4)
2353                                 usage_with_options(blame_opt_usage, options);
2354                         /* reorder for the new way: <rev> -- <path> */
2355                         argv[1] = argv[3];
2356                         argv[3] = argv[2];
2357                         argv[2] = "--";
2358                         /* FALLTHROUGH */
2359                 case 1: /* (1a) */
2360                         path = add_prefix(prefix, argv[--argc]);
2361                         argv[argc] = NULL;
2362                         break;
2363                 default:
2364                         usage_with_options(blame_opt_usage, options);
2365                 }
2366         } else {
2367                 if (argc < 2)
2368                         usage_with_options(blame_opt_usage, options);
2369                 path = add_prefix(prefix, argv[argc - 1]);
2370                 if (argc == 3 && !has_path_in_work_tree(path)) { /* (2b) */
2371                         path = add_prefix(prefix, argv[1]);
2372                         argv[1] = argv[2];
2373                 }
2374                 argv[argc - 1] = "--";
2375
2376                 setup_work_tree();
2377                 if (!has_path_in_work_tree(path))
2378                         die("cannot stat path %s: %s", path, strerror(errno));
2379         }
2380
2381         setup_revisions(argc, argv, &revs, NULL);
2382         memset(&sb, 0, sizeof(sb));
2383
2384         sb.revs = &revs;
2385         if (!reverse)
2386                 final_commit_name = prepare_final(&sb);
2387         else if (contents_from)
2388                 die("--contents and --children do not blend well.");
2389         else
2390                 final_commit_name = prepare_initial(&sb);
2391
2392         if (!sb.final) {
2393                 /*
2394                  * "--not A B -- path" without anything positive;
2395                  * do not default to HEAD, but use the working tree
2396                  * or "--contents".
2397                  */
2398                 setup_work_tree();
2399                 sb.final = fake_working_tree_commit(path, contents_from);
2400                 add_pending_object(&revs, &(sb.final->object), ":");
2401         }
2402         else if (contents_from)
2403                 die("Cannot use --contents with final commit object name");
2404
2405         /*
2406          * If we have bottom, this will mark the ancestors of the
2407          * bottom commits we would reach while traversing as
2408          * uninteresting.
2409          */
2410         if (prepare_revision_walk(&revs))
2411                 die("revision walk setup failed");
2412
2413         if (is_null_sha1(sb.final->object.sha1)) {
2414                 char *buf;
2415                 o = sb.final->util;
2416                 buf = xmalloc(o->file.size + 1);
2417                 memcpy(buf, o->file.ptr, o->file.size + 1);
2418                 sb.final_buf = buf;
2419                 sb.final_buf_size = o->file.size;
2420         }
2421         else {
2422                 o = get_origin(&sb, sb.final, path);
2423                 if (fill_blob_sha1(o))
2424                         die("no such path %s in %s", path, final_commit_name);
2425
2426                 sb.final_buf = read_sha1_file(o->blob_sha1, &type,
2427                                               &sb.final_buf_size);
2428                 if (!sb.final_buf)
2429                         die("Cannot read blob %s for path %s",
2430                             sha1_to_hex(o->blob_sha1),
2431                             path);
2432         }
2433         num_read_blob++;
2434         lno = prepare_lines(&sb);
2435
2436         bottom = top = 0;
2437         if (bottomtop)
2438                 prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
2439         if (bottom && top && top < bottom) {
2440                 long tmp;
2441                 tmp = top; top = bottom; bottom = tmp;
2442         }
2443         if (bottom < 1)
2444                 bottom = 1;
2445         if (top < 1)
2446                 top = lno;
2447         bottom--;
2448         if (lno < top)
2449                 die("file %s has only %lu lines", path, lno);
2450
2451         ent = xcalloc(1, sizeof(*ent));
2452         ent->lno = bottom;
2453         ent->num_lines = top - bottom;
2454         ent->suspect = o;
2455         ent->s_lno = bottom;
2456
2457         sb.ent = ent;
2458         sb.path = path;
2459
2460         if (revs_file && read_ancestry(revs_file))
2461                 die("reading graft file %s failed: %s",
2462                     revs_file, strerror(errno));
2463
2464         read_mailmap(&mailmap, ".mailmap", NULL);
2465
2466         if (!incremental)
2467                 setup_pager();
2468
2469         assign_blame(&sb, opt);
2470
2471         if (incremental)
2472                 return 0;
2473
2474         coalesce(&sb);
2475
2476         if (!(output_option & OUTPUT_PORCELAIN))
2477                 find_alignment(&sb, &output_option);
2478
2479         output(&sb, output_option);
2480         free((void *)sb.final_buf);
2481         for (ent = sb.ent; ent; ) {
2482                 struct blame_entry *e = ent->next;
2483                 free(ent);
2484                 ent = e;
2485         }
2486
2487         if (show_stats) {
2488                 printf("num read blob: %d\n", num_read_blob);
2489                 printf("num get patch: %d\n", num_get_patch);
2490                 printf("num commits: %d\n", num_commits);
2491         }
2492         return 0;
2493 }