War on whitespace
[git] / builtin-blame.c
1 /*
2  * Pickaxe
3  *
4  * Copyright (c) 2006, Junio C Hamano
5  */
6
7 #include "cache.h"
8 #include "builtin.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree-walk.h"
13 #include "diff.h"
14 #include "diffcore.h"
15 #include "revision.h"
16 #include "quote.h"
17 #include "xdiff-interface.h"
18 #include "cache-tree.h"
19 #include "path-list.h"
20 #include "mailmap.h"
21
22 static char blame_usage[] =
23 "git-blame [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [--contents <filename>] [--incremental] [commit] [--] file\n"
24 "  -c                  Use the same output mode as git-annotate (Default: off)\n"
25 "  -b                  Show blank SHA-1 for boundary commits (Default: off)\n"
26 "  -l                  Show long commit SHA1 (Default: off)\n"
27 "  --root              Do not treat root commits as boundaries (Default: off)\n"
28 "  -t                  Show raw timestamp (Default: off)\n"
29 "  -f, --show-name     Show original filename (Default: auto)\n"
30 "  -n, --show-number   Show original linenumber (Default: off)\n"
31 "  -s                  Suppress author name and timestamp (Default: off)\n"
32 "  -p, --porcelain     Show in a format designed for machine consumption\n"
33 "  -L n,m              Process only line range n,m, counting from 1\n"
34 "  -M, -C              Find line movements within and across files\n"
35 "  --incremental       Show blame entries as we find them, incrementally\n"
36 "  --contents file     Use <file>'s contents as the final image\n"
37 "  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
38
39 static int longest_file;
40 static int longest_author;
41 static int max_orig_digits;
42 static int max_digits;
43 static int max_score_digits;
44 static int show_root;
45 static int blank_boundary;
46 static int incremental;
47 static int cmd_is_annotate;
48 static struct path_list mailmap;
49
50 #ifndef DEBUG
51 #define DEBUG 0
52 #endif
53
54 /* stats */
55 static int num_read_blob;
56 static int num_get_patch;
57 static int num_commits;
58
59 #define PICKAXE_BLAME_MOVE              01
60 #define PICKAXE_BLAME_COPY              02
61 #define PICKAXE_BLAME_COPY_HARDER       04
62 #define PICKAXE_BLAME_COPY_HARDEST      010
63
64 /*
65  * blame for a blame_entry with score lower than these thresholds
66  * is not passed to the parent using move/copy logic.
67  */
68 static unsigned blame_move_score;
69 static unsigned blame_copy_score;
70 #define BLAME_DEFAULT_MOVE_SCORE        20
71 #define BLAME_DEFAULT_COPY_SCORE        40
72
73 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
74 #define METAINFO_SHOWN          (1u<<12)
75 #define MORE_THAN_ONE_PATH      (1u<<13)
76
77 /*
78  * One blob in a commit that is being suspected
79  */
80 struct origin {
81         int refcnt;
82         struct commit *commit;
83         mmfile_t file;
84         unsigned char blob_sha1[20];
85         char path[FLEX_ARRAY];
86 };
87
88 /*
89  * Given an origin, prepare mmfile_t structure to be used by the
90  * diff machinery
91  */
92 static char *fill_origin_blob(struct origin *o, mmfile_t *file)
93 {
94         if (!o->file.ptr) {
95                 enum object_type type;
96                 num_read_blob++;
97                 file->ptr = read_sha1_file(o->blob_sha1, &type,
98                                            (unsigned long *)(&(file->size)));
99                 o->file = *file;
100         }
101         else
102                 *file = o->file;
103         return file->ptr;
104 }
105
106 /*
107  * Origin is refcounted and usually we keep the blob contents to be
108  * reused.
109  */
110 static inline struct origin *origin_incref(struct origin *o)
111 {
112         if (o)
113                 o->refcnt++;
114         return o;
115 }
116
117 static void origin_decref(struct origin *o)
118 {
119         if (o && --o->refcnt <= 0) {
120                 if (o->file.ptr)
121                         free(o->file.ptr);
122                 memset(o, 0, sizeof(*o));
123                 free(o);
124         }
125 }
126
127 /*
128  * Each group of lines is described by a blame_entry; it can be split
129  * as we pass blame to the parents.  They form a linked list in the
130  * scoreboard structure, sorted by the target line number.
131  */
132 struct blame_entry {
133         struct blame_entry *prev;
134         struct blame_entry *next;
135
136         /* the first line of this group in the final image;
137          * internally all line numbers are 0 based.
138          */
139         int lno;
140
141         /* how many lines this group has */
142         int num_lines;
143
144         /* the commit that introduced this group into the final image */
145         struct origin *suspect;
146
147         /* true if the suspect is truly guilty; false while we have not
148          * checked if the group came from one of its parents.
149          */
150         char guilty;
151
152         /* the line number of the first line of this group in the
153          * suspect's file; internally all line numbers are 0 based.
154          */
155         int s_lno;
156
157         /* how significant this entry is -- cached to avoid
158          * scanning the lines over and over.
159          */
160         unsigned score;
161 };
162
163 /*
164  * The current state of the blame assignment.
165  */
166 struct scoreboard {
167         /* the final commit (i.e. where we started digging from) */
168         struct commit *final;
169
170         const char *path;
171
172         /*
173          * The contents in the final image.
174          * Used by many functions to obtain contents of the nth line,
175          * indexed with scoreboard.lineno[blame_entry.lno].
176          */
177         const char *final_buf;
178         unsigned long final_buf_size;
179
180         /* linked list of blames */
181         struct blame_entry *ent;
182
183         /* look-up a line in the final buffer */
184         int num_lines;
185         int *lineno;
186 };
187
188 static inline int same_suspect(struct origin *a, struct origin *b)
189 {
190         if (a == b)
191                 return 1;
192         if (a->commit != b->commit)
193                 return 0;
194         return !strcmp(a->path, b->path);
195 }
196
197 static void sanity_check_refcnt(struct scoreboard *);
198
199 /*
200  * If two blame entries that are next to each other came from
201  * contiguous lines in the same origin (i.e. <commit, path> pair),
202  * merge them together.
203  */
204 static void coalesce(struct scoreboard *sb)
205 {
206         struct blame_entry *ent, *next;
207
208         for (ent = sb->ent; ent && (next = ent->next); ent = next) {
209                 if (same_suspect(ent->suspect, next->suspect) &&
210                     ent->guilty == next->guilty &&
211                     ent->s_lno + ent->num_lines == next->s_lno) {
212                         ent->num_lines += next->num_lines;
213                         ent->next = next->next;
214                         if (ent->next)
215                                 ent->next->prev = ent;
216                         origin_decref(next->suspect);
217                         free(next);
218                         ent->score = 0;
219                         next = ent; /* again */
220                 }
221         }
222
223         if (DEBUG) /* sanity */
224                 sanity_check_refcnt(sb);
225 }
226
227 /*
228  * Given a commit and a path in it, create a new origin structure.
229  * The callers that add blame to the scoreboard should use
230  * get_origin() to obtain shared, refcounted copy instead of calling
231  * this function directly.
232  */
233 static struct origin *make_origin(struct commit *commit, const char *path)
234 {
235         struct origin *o;
236         o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
237         o->commit = commit;
238         o->refcnt = 1;
239         strcpy(o->path, path);
240         return o;
241 }
242
243 /*
244  * Locate an existing origin or create a new one.
245  */
246 static struct origin *get_origin(struct scoreboard *sb,
247                                  struct commit *commit,
248                                  const char *path)
249 {
250         struct blame_entry *e;
251
252         for (e = sb->ent; e; e = e->next) {
253                 if (e->suspect->commit == commit &&
254                     !strcmp(e->suspect->path, path))
255                         return origin_incref(e->suspect);
256         }
257         return make_origin(commit, path);
258 }
259
260 /*
261  * Fill the blob_sha1 field of an origin if it hasn't, so that later
262  * call to fill_origin_blob() can use it to locate the data.  blob_sha1
263  * for an origin is also used to pass the blame for the entire file to
264  * the parent to detect the case where a child's blob is identical to
265  * that of its parent's.
266  */
267 static int fill_blob_sha1(struct origin *origin)
268 {
269         unsigned mode;
270
271         if (!is_null_sha1(origin->blob_sha1))
272                 return 0;
273         if (get_tree_entry(origin->commit->object.sha1,
274                            origin->path,
275                            origin->blob_sha1, &mode))
276                 goto error_out;
277         if (sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB)
278                 goto error_out;
279         return 0;
280  error_out:
281         hashclr(origin->blob_sha1);
282         return -1;
283 }
284
285 /*
286  * We have an origin -- check if the same path exists in the
287  * parent and return an origin structure to represent it.
288  */
289 static struct origin *find_origin(struct scoreboard *sb,
290                                   struct commit *parent,
291                                   struct origin *origin)
292 {
293         struct origin *porigin = NULL;
294         struct diff_options diff_opts;
295         const char *paths[2];
296
297         if (parent->util) {
298                 /*
299                  * Each commit object can cache one origin in that
300                  * commit.  This is a freestanding copy of origin and
301                  * not refcounted.
302                  */
303                 struct origin *cached = parent->util;
304                 if (!strcmp(cached->path, origin->path)) {
305                         /*
306                          * The same path between origin and its parent
307                          * without renaming -- the most common case.
308                          */
309                         porigin = get_origin(sb, parent, cached->path);
310
311                         /*
312                          * If the origin was newly created (i.e. get_origin
313                          * would call make_origin if none is found in the
314                          * scoreboard), it does not know the blob_sha1,
315                          * so copy it.  Otherwise porigin was in the
316                          * scoreboard and already knows blob_sha1.
317                          */
318                         if (porigin->refcnt == 1)
319                                 hashcpy(porigin->blob_sha1, cached->blob_sha1);
320                         return porigin;
321                 }
322                 /* otherwise it was not very useful; free it */
323                 free(parent->util);
324                 parent->util = NULL;
325         }
326
327         /* See if the origin->path is different between parent
328          * and origin first.  Most of the time they are the
329          * same and diff-tree is fairly efficient about this.
330          */
331         diff_setup(&diff_opts);
332         diff_opts.recursive = 1;
333         diff_opts.detect_rename = 0;
334         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
335         paths[0] = origin->path;
336         paths[1] = NULL;
337
338         diff_tree_setup_paths(paths, &diff_opts);
339         if (diff_setup_done(&diff_opts) < 0)
340                 die("diff-setup");
341
342         if (is_null_sha1(origin->commit->object.sha1))
343                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
344         else
345                 diff_tree_sha1(parent->tree->object.sha1,
346                                origin->commit->tree->object.sha1,
347                                "", &diff_opts);
348         diffcore_std(&diff_opts);
349
350         /* It is either one entry that says "modified", or "created",
351          * or nothing.
352          */
353         if (!diff_queued_diff.nr) {
354                 /* The path is the same as parent */
355                 porigin = get_origin(sb, parent, origin->path);
356                 hashcpy(porigin->blob_sha1, origin->blob_sha1);
357         }
358         else if (diff_queued_diff.nr != 1)
359                 die("internal error in blame::find_origin");
360         else {
361                 struct diff_filepair *p = diff_queued_diff.queue[0];
362                 switch (p->status) {
363                 default:
364                         die("internal error in blame::find_origin (%c)",
365                             p->status);
366                 case 'M':
367                         porigin = get_origin(sb, parent, origin->path);
368                         hashcpy(porigin->blob_sha1, p->one->sha1);
369                         break;
370                 case 'A':
371                 case 'T':
372                         /* Did not exist in parent, or type changed */
373                         break;
374                 }
375         }
376         diff_flush(&diff_opts);
377         if (porigin) {
378                 /*
379                  * Create a freestanding copy that is not part of
380                  * the refcounted origin found in the scoreboard, and
381                  * cache it in the commit.
382                  */
383                 struct origin *cached;
384
385                 cached = make_origin(porigin->commit, porigin->path);
386                 hashcpy(cached->blob_sha1, porigin->blob_sha1);
387                 parent->util = cached;
388         }
389         return porigin;
390 }
391
392 /*
393  * We have an origin -- find the path that corresponds to it in its
394  * parent and return an origin structure to represent it.
395  */
396 static struct origin *find_rename(struct scoreboard *sb,
397                                   struct commit *parent,
398                                   struct origin *origin)
399 {
400         struct origin *porigin = NULL;
401         struct diff_options diff_opts;
402         int i;
403         const char *paths[2];
404
405         diff_setup(&diff_opts);
406         diff_opts.recursive = 1;
407         diff_opts.detect_rename = DIFF_DETECT_RENAME;
408         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
409         diff_opts.single_follow = origin->path;
410         paths[0] = NULL;
411         diff_tree_setup_paths(paths, &diff_opts);
412         if (diff_setup_done(&diff_opts) < 0)
413                 die("diff-setup");
414
415         if (is_null_sha1(origin->commit->object.sha1))
416                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
417         else
418                 diff_tree_sha1(parent->tree->object.sha1,
419                                origin->commit->tree->object.sha1,
420                                "", &diff_opts);
421         diffcore_std(&diff_opts);
422
423         for (i = 0; i < diff_queued_diff.nr; i++) {
424                 struct diff_filepair *p = diff_queued_diff.queue[i];
425                 if ((p->status == 'R' || p->status == 'C') &&
426                     !strcmp(p->two->path, origin->path)) {
427                         porigin = get_origin(sb, parent, p->one->path);
428                         hashcpy(porigin->blob_sha1, p->one->sha1);
429                         break;
430                 }
431         }
432         diff_flush(&diff_opts);
433         return porigin;
434 }
435
436 /*
437  * Parsing of patch chunks...
438  */
439 struct chunk {
440         /* line number in postimage; up to but not including this
441          * line is the same as preimage
442          */
443         int same;
444
445         /* preimage line number after this chunk */
446         int p_next;
447
448         /* postimage line number after this chunk */
449         int t_next;
450 };
451
452 struct patch {
453         struct chunk *chunks;
454         int num;
455 };
456
457 struct blame_diff_state {
458         struct xdiff_emit_state xm;
459         struct patch *ret;
460         unsigned hunk_post_context;
461         unsigned hunk_in_pre_context : 1;
462 };
463
464 static void process_u_diff(void *state_, char *line, unsigned long len)
465 {
466         struct blame_diff_state *state = state_;
467         struct chunk *chunk;
468         int off1, off2, len1, len2, num;
469
470         num = state->ret->num;
471         if (len < 4 || line[0] != '@' || line[1] != '@') {
472                 if (state->hunk_in_pre_context && line[0] == ' ')
473                         state->ret->chunks[num - 1].same++;
474                 else {
475                         state->hunk_in_pre_context = 0;
476                         if (line[0] == ' ')
477                                 state->hunk_post_context++;
478                         else
479                                 state->hunk_post_context = 0;
480                 }
481                 return;
482         }
483
484         if (num && state->hunk_post_context) {
485                 chunk = &state->ret->chunks[num - 1];
486                 chunk->p_next -= state->hunk_post_context;
487                 chunk->t_next -= state->hunk_post_context;
488         }
489         state->ret->num = ++num;
490         state->ret->chunks = xrealloc(state->ret->chunks,
491                                       sizeof(struct chunk) * num);
492         chunk = &state->ret->chunks[num - 1];
493         if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
494                 state->ret->num--;
495                 return;
496         }
497
498         /* Line numbers in patch output are one based. */
499         off1--;
500         off2--;
501
502         chunk->same = len2 ? off2 : (off2 + 1);
503
504         chunk->p_next = off1 + (len1 ? len1 : 1);
505         chunk->t_next = chunk->same + len2;
506         state->hunk_in_pre_context = 1;
507         state->hunk_post_context = 0;
508 }
509
510 static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
511                                     int context)
512 {
513         struct blame_diff_state state;
514         xpparam_t xpp;
515         xdemitconf_t xecfg;
516         xdemitcb_t ecb;
517
518         xpp.flags = XDF_NEED_MINIMAL;
519         xecfg.ctxlen = context;
520         xecfg.flags = 0;
521         ecb.outf = xdiff_outf;
522         ecb.priv = &state;
523         memset(&state, 0, sizeof(state));
524         state.xm.consume = process_u_diff;
525         state.ret = xmalloc(sizeof(struct patch));
526         state.ret->chunks = NULL;
527         state.ret->num = 0;
528
529         xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
530
531         if (state.ret->num) {
532                 struct chunk *chunk;
533                 chunk = &state.ret->chunks[state.ret->num - 1];
534                 chunk->p_next -= state.hunk_post_context;
535                 chunk->t_next -= state.hunk_post_context;
536         }
537         return state.ret;
538 }
539
540 /*
541  * Run diff between two origins and grab the patch output, so that
542  * we can pass blame for lines origin is currently suspected for
543  * to its parent.
544  */
545 static struct patch *get_patch(struct origin *parent, struct origin *origin)
546 {
547         mmfile_t file_p, file_o;
548         struct patch *patch;
549
550         fill_origin_blob(parent, &file_p);
551         fill_origin_blob(origin, &file_o);
552         if (!file_p.ptr || !file_o.ptr)
553                 return NULL;
554         patch = compare_buffer(&file_p, &file_o, 0);
555         num_get_patch++;
556         return patch;
557 }
558
559 static void free_patch(struct patch *p)
560 {
561         free(p->chunks);
562         free(p);
563 }
564
565 /*
566  * Link in a new blame entry to the scoreboard.  Entries that cover the
567  * same line range have been removed from the scoreboard previously.
568  */
569 static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
570 {
571         struct blame_entry *ent, *prev = NULL;
572
573         origin_incref(e->suspect);
574
575         for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
576                 prev = ent;
577
578         /* prev, if not NULL, is the last one that is below e */
579         e->prev = prev;
580         if (prev) {
581                 e->next = prev->next;
582                 prev->next = e;
583         }
584         else {
585                 e->next = sb->ent;
586                 sb->ent = e;
587         }
588         if (e->next)
589                 e->next->prev = e;
590 }
591
592 /*
593  * src typically is on-stack; we want to copy the information in it to
594  * an malloced blame_entry that is already on the linked list of the
595  * scoreboard.  The origin of dst loses a refcnt while the origin of src
596  * gains one.
597  */
598 static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
599 {
600         struct blame_entry *p, *n;
601
602         p = dst->prev;
603         n = dst->next;
604         origin_incref(src->suspect);
605         origin_decref(dst->suspect);
606         memcpy(dst, src, sizeof(*src));
607         dst->prev = p;
608         dst->next = n;
609         dst->score = 0;
610 }
611
612 static const char *nth_line(struct scoreboard *sb, int lno)
613 {
614         return sb->final_buf + sb->lineno[lno];
615 }
616
617 /*
618  * It is known that lines between tlno to same came from parent, and e
619  * has an overlap with that range.  it also is known that parent's
620  * line plno corresponds to e's line tlno.
621  *
622  *                <---- e ----->
623  *                   <------>
624  *                   <------------>
625  *             <------------>
626  *             <------------------>
627  *
628  * Split e into potentially three parts; before this chunk, the chunk
629  * to be blamed for the parent, and after that portion.
630  */
631 static void split_overlap(struct blame_entry *split,
632                           struct blame_entry *e,
633                           int tlno, int plno, int same,
634                           struct origin *parent)
635 {
636         int chunk_end_lno;
637         memset(split, 0, sizeof(struct blame_entry [3]));
638
639         if (e->s_lno < tlno) {
640                 /* there is a pre-chunk part not blamed on parent */
641                 split[0].suspect = origin_incref(e->suspect);
642                 split[0].lno = e->lno;
643                 split[0].s_lno = e->s_lno;
644                 split[0].num_lines = tlno - e->s_lno;
645                 split[1].lno = e->lno + tlno - e->s_lno;
646                 split[1].s_lno = plno;
647         }
648         else {
649                 split[1].lno = e->lno;
650                 split[1].s_lno = plno + (e->s_lno - tlno);
651         }
652
653         if (same < e->s_lno + e->num_lines) {
654                 /* there is a post-chunk part not blamed on parent */
655                 split[2].suspect = origin_incref(e->suspect);
656                 split[2].lno = e->lno + (same - e->s_lno);
657                 split[2].s_lno = e->s_lno + (same - e->s_lno);
658                 split[2].num_lines = e->s_lno + e->num_lines - same;
659                 chunk_end_lno = split[2].lno;
660         }
661         else
662                 chunk_end_lno = e->lno + e->num_lines;
663         split[1].num_lines = chunk_end_lno - split[1].lno;
664
665         /*
666          * if it turns out there is nothing to blame the parent for,
667          * forget about the splitting.  !split[1].suspect signals this.
668          */
669         if (split[1].num_lines < 1)
670                 return;
671         split[1].suspect = origin_incref(parent);
672 }
673
674 /*
675  * split_overlap() divided an existing blame e into up to three parts
676  * in split.  Adjust the linked list of blames in the scoreboard to
677  * reflect the split.
678  */
679 static void split_blame(struct scoreboard *sb,
680                         struct blame_entry *split,
681                         struct blame_entry *e)
682 {
683         struct blame_entry *new_entry;
684
685         if (split[0].suspect && split[2].suspect) {
686                 /* The first part (reuse storage for the existing entry e) */
687                 dup_entry(e, &split[0]);
688
689                 /* The last part -- me */
690                 new_entry = xmalloc(sizeof(*new_entry));
691                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
692                 add_blame_entry(sb, new_entry);
693
694                 /* ... and the middle part -- parent */
695                 new_entry = xmalloc(sizeof(*new_entry));
696                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
697                 add_blame_entry(sb, new_entry);
698         }
699         else if (!split[0].suspect && !split[2].suspect)
700                 /*
701                  * The parent covers the entire area; reuse storage for
702                  * e and replace it with the parent.
703                  */
704                 dup_entry(e, &split[1]);
705         else if (split[0].suspect) {
706                 /* me and then parent */
707                 dup_entry(e, &split[0]);
708
709                 new_entry = xmalloc(sizeof(*new_entry));
710                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
711                 add_blame_entry(sb, new_entry);
712         }
713         else {
714                 /* parent and then me */
715                 dup_entry(e, &split[1]);
716
717                 new_entry = xmalloc(sizeof(*new_entry));
718                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
719                 add_blame_entry(sb, new_entry);
720         }
721
722         if (DEBUG) { /* sanity */
723                 struct blame_entry *ent;
724                 int lno = sb->ent->lno, corrupt = 0;
725
726                 for (ent = sb->ent; ent; ent = ent->next) {
727                         if (lno != ent->lno)
728                                 corrupt = 1;
729                         if (ent->s_lno < 0)
730                                 corrupt = 1;
731                         lno += ent->num_lines;
732                 }
733                 if (corrupt) {
734                         lno = sb->ent->lno;
735                         for (ent = sb->ent; ent; ent = ent->next) {
736                                 printf("L %8d l %8d n %8d\n",
737                                        lno, ent->lno, ent->num_lines);
738                                 lno = ent->lno + ent->num_lines;
739                         }
740                         die("oops");
741                 }
742         }
743 }
744
745 /*
746  * After splitting the blame, the origins used by the
747  * on-stack blame_entry should lose one refcnt each.
748  */
749 static void decref_split(struct blame_entry *split)
750 {
751         int i;
752
753         for (i = 0; i < 3; i++)
754                 origin_decref(split[i].suspect);
755 }
756
757 /*
758  * Helper for blame_chunk().  blame_entry e is known to overlap with
759  * the patch hunk; split it and pass blame to the parent.
760  */
761 static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
762                           int tlno, int plno, int same,
763                           struct origin *parent)
764 {
765         struct blame_entry split[3];
766
767         split_overlap(split, e, tlno, plno, same, parent);
768         if (split[1].suspect)
769                 split_blame(sb, split, e);
770         decref_split(split);
771 }
772
773 /*
774  * Find the line number of the last line the target is suspected for.
775  */
776 static int find_last_in_target(struct scoreboard *sb, struct origin *target)
777 {
778         struct blame_entry *e;
779         int last_in_target = -1;
780
781         for (e = sb->ent; e; e = e->next) {
782                 if (e->guilty || !same_suspect(e->suspect, target))
783                         continue;
784                 if (last_in_target < e->s_lno + e->num_lines)
785                         last_in_target = e->s_lno + e->num_lines;
786         }
787         return last_in_target;
788 }
789
790 /*
791  * Process one hunk from the patch between the current suspect for
792  * blame_entry e and its parent.  Find and split the overlap, and
793  * pass blame to the overlapping part to the parent.
794  */
795 static void blame_chunk(struct scoreboard *sb,
796                         int tlno, int plno, int same,
797                         struct origin *target, struct origin *parent)
798 {
799         struct blame_entry *e;
800
801         for (e = sb->ent; e; e = e->next) {
802                 if (e->guilty || !same_suspect(e->suspect, target))
803                         continue;
804                 if (same <= e->s_lno)
805                         continue;
806                 if (tlno < e->s_lno + e->num_lines)
807                         blame_overlap(sb, e, tlno, plno, same, parent);
808         }
809 }
810
811 /*
812  * We are looking at the origin 'target' and aiming to pass blame
813  * for the lines it is suspected to its parent.  Run diff to find
814  * which lines came from parent and pass blame for them.
815  */
816 static int pass_blame_to_parent(struct scoreboard *sb,
817                                 struct origin *target,
818                                 struct origin *parent)
819 {
820         int i, last_in_target, plno, tlno;
821         struct patch *patch;
822
823         last_in_target = find_last_in_target(sb, target);
824         if (last_in_target < 0)
825                 return 1; /* nothing remains for this target */
826
827         patch = get_patch(parent, target);
828         plno = tlno = 0;
829         for (i = 0; i < patch->num; i++) {
830                 struct chunk *chunk = &patch->chunks[i];
831
832                 blame_chunk(sb, tlno, plno, chunk->same, target, parent);
833                 plno = chunk->p_next;
834                 tlno = chunk->t_next;
835         }
836         /* The rest (i.e. anything after tlno) are the same as the parent */
837         blame_chunk(sb, tlno, plno, last_in_target, target, parent);
838
839         free_patch(patch);
840         return 0;
841 }
842
843 /*
844  * The lines in blame_entry after splitting blames many times can become
845  * very small and trivial, and at some point it becomes pointless to
846  * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
847  * ordinary C program, and it is not worth to say it was copied from
848  * totally unrelated file in the parent.
849  *
850  * Compute how trivial the lines in the blame_entry are.
851  */
852 static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
853 {
854         unsigned score;
855         const char *cp, *ep;
856
857         if (e->score)
858                 return e->score;
859
860         score = 1;
861         cp = nth_line(sb, e->lno);
862         ep = nth_line(sb, e->lno + e->num_lines);
863         while (cp < ep) {
864                 unsigned ch = *((unsigned char *)cp);
865                 if (isalnum(ch))
866                         score++;
867                 cp++;
868         }
869         e->score = score;
870         return score;
871 }
872
873 /*
874  * best_so_far[] and this[] are both a split of an existing blame_entry
875  * that passes blame to the parent.  Maintain best_so_far the best split
876  * so far, by comparing this and best_so_far and copying this into
877  * bst_so_far as needed.
878  */
879 static void copy_split_if_better(struct scoreboard *sb,
880                                  struct blame_entry *best_so_far,
881                                  struct blame_entry *this)
882 {
883         int i;
884
885         if (!this[1].suspect)
886                 return;
887         if (best_so_far[1].suspect) {
888                 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
889                         return;
890         }
891
892         for (i = 0; i < 3; i++)
893                 origin_incref(this[i].suspect);
894         decref_split(best_so_far);
895         memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
896 }
897
898 /*
899  * We are looking at a part of the final image represented by
900  * ent (tlno and same are offset by ent->s_lno).
901  * tlno is where we are looking at in the final image.
902  * up to (but not including) same match preimage.
903  * plno is where we are looking at in the preimage.
904  *
905  * <-------------- final image ---------------------->
906  *       <------ent------>
907  *         ^tlno ^same
908  *    <---------preimage----->
909  *         ^plno
910  *
911  * All line numbers are 0-based.
912  */
913 static void handle_split(struct scoreboard *sb,
914                          struct blame_entry *ent,
915                          int tlno, int plno, int same,
916                          struct origin *parent,
917                          struct blame_entry *split)
918 {
919         if (ent->num_lines <= tlno)
920                 return;
921         if (tlno < same) {
922                 struct blame_entry this[3];
923                 tlno += ent->s_lno;
924                 same += ent->s_lno;
925                 split_overlap(this, ent, tlno, plno, same, parent);
926                 copy_split_if_better(sb, split, this);
927                 decref_split(this);
928         }
929 }
930
931 /*
932  * Find the lines from parent that are the same as ent so that
933  * we can pass blames to it.  file_p has the blob contents for
934  * the parent.
935  */
936 static void find_copy_in_blob(struct scoreboard *sb,
937                               struct blame_entry *ent,
938                               struct origin *parent,
939                               struct blame_entry *split,
940                               mmfile_t *file_p)
941 {
942         const char *cp;
943         int cnt;
944         mmfile_t file_o;
945         struct patch *patch;
946         int i, plno, tlno;
947
948         /*
949          * Prepare mmfile that contains only the lines in ent.
950          */
951         cp = nth_line(sb, ent->lno);
952         file_o.ptr = (char*) cp;
953         cnt = ent->num_lines;
954
955         while (cnt && cp < sb->final_buf + sb->final_buf_size) {
956                 if (*cp++ == '\n')
957                         cnt--;
958         }
959         file_o.size = cp - file_o.ptr;
960
961         patch = compare_buffer(file_p, &file_o, 1);
962
963         /*
964          * file_o is a part of final image we are annotating.
965          * file_p partially may match that image.
966          */
967         memset(split, 0, sizeof(struct blame_entry [3]));
968         plno = tlno = 0;
969         for (i = 0; i < patch->num; i++) {
970                 struct chunk *chunk = &patch->chunks[i];
971
972                 handle_split(sb, ent, tlno, plno, chunk->same, parent, split);
973                 plno = chunk->p_next;
974                 tlno = chunk->t_next;
975         }
976         /* remainder, if any, all match the preimage */
977         handle_split(sb, ent, tlno, plno, ent->num_lines, parent, split);
978         free_patch(patch);
979 }
980
981 /*
982  * See if lines currently target is suspected for can be attributed to
983  * parent.
984  */
985 static int find_move_in_parent(struct scoreboard *sb,
986                                struct origin *target,
987                                struct origin *parent)
988 {
989         int last_in_target, made_progress;
990         struct blame_entry *e, split[3];
991         mmfile_t file_p;
992
993         last_in_target = find_last_in_target(sb, target);
994         if (last_in_target < 0)
995                 return 1; /* nothing remains for this target */
996
997         fill_origin_blob(parent, &file_p);
998         if (!file_p.ptr)
999                 return 0;
1000
1001         made_progress = 1;
1002         while (made_progress) {
1003                 made_progress = 0;
1004                 for (e = sb->ent; e; e = e->next) {
1005                         if (e->guilty || !same_suspect(e->suspect, target))
1006                                 continue;
1007                         find_copy_in_blob(sb, e, parent, split, &file_p);
1008                         if (split[1].suspect &&
1009                             blame_move_score < ent_score(sb, &split[1])) {
1010                                 split_blame(sb, split, e);
1011                                 made_progress = 1;
1012                         }
1013                         decref_split(split);
1014                 }
1015         }
1016         return 0;
1017 }
1018
1019 struct blame_list {
1020         struct blame_entry *ent;
1021         struct blame_entry split[3];
1022 };
1023
1024 /*
1025  * Count the number of entries the target is suspected for,
1026  * and prepare a list of entry and the best split.
1027  */
1028 static struct blame_list *setup_blame_list(struct scoreboard *sb,
1029                                            struct origin *target,
1030                                            int *num_ents_p)
1031 {
1032         struct blame_entry *e;
1033         int num_ents, i;
1034         struct blame_list *blame_list = NULL;
1035
1036         for (e = sb->ent, num_ents = 0; e; e = e->next)
1037                 if (!e->guilty && same_suspect(e->suspect, target))
1038                         num_ents++;
1039         if (num_ents) {
1040                 blame_list = xcalloc(num_ents, sizeof(struct blame_list));
1041                 for (e = sb->ent, i = 0; e; e = e->next)
1042                         if (!e->guilty && same_suspect(e->suspect, target))
1043                                 blame_list[i++].ent = e;
1044         }
1045         *num_ents_p = num_ents;
1046         return blame_list;
1047 }
1048
1049 /*
1050  * For lines target is suspected for, see if we can find code movement
1051  * across file boundary from the parent commit.  porigin is the path
1052  * in the parent we already tried.
1053  */
1054 static int find_copy_in_parent(struct scoreboard *sb,
1055                                struct origin *target,
1056                                struct commit *parent,
1057                                struct origin *porigin,
1058                                int opt)
1059 {
1060         struct diff_options diff_opts;
1061         const char *paths[1];
1062         int i, j;
1063         int retval;
1064         struct blame_list *blame_list;
1065         int num_ents;
1066
1067         blame_list = setup_blame_list(sb, target, &num_ents);
1068         if (!blame_list)
1069                 return 1; /* nothing remains for this target */
1070
1071         diff_setup(&diff_opts);
1072         diff_opts.recursive = 1;
1073         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1074
1075         paths[0] = NULL;
1076         diff_tree_setup_paths(paths, &diff_opts);
1077         if (diff_setup_done(&diff_opts) < 0)
1078                 die("diff-setup");
1079
1080         /* Try "find copies harder" on new path if requested;
1081          * we do not want to use diffcore_rename() actually to
1082          * match things up; find_copies_harder is set only to
1083          * force diff_tree_sha1() to feed all filepairs to diff_queue,
1084          * and this code needs to be after diff_setup_done(), which
1085          * usually makes find-copies-harder imply copy detection.
1086          */
1087         if ((opt & PICKAXE_BLAME_COPY_HARDEST)
1088             || ((opt & PICKAXE_BLAME_COPY_HARDER)
1089                 && (!porigin || strcmp(target->path, porigin->path))))
1090                 diff_opts.find_copies_harder = 1;
1091
1092         if (is_null_sha1(target->commit->object.sha1))
1093                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
1094         else
1095                 diff_tree_sha1(parent->tree->object.sha1,
1096                                target->commit->tree->object.sha1,
1097                                "", &diff_opts);
1098
1099         if (!diff_opts.find_copies_harder)
1100                 diffcore_std(&diff_opts);
1101
1102         retval = 0;
1103         while (1) {
1104                 int made_progress = 0;
1105
1106                 for (i = 0; i < diff_queued_diff.nr; i++) {
1107                         struct diff_filepair *p = diff_queued_diff.queue[i];
1108                         struct origin *norigin;
1109                         mmfile_t file_p;
1110                         struct blame_entry this[3];
1111
1112                         if (!DIFF_FILE_VALID(p->one))
1113                                 continue; /* does not exist in parent */
1114                         if (porigin && !strcmp(p->one->path, porigin->path))
1115                                 /* find_move already dealt with this path */
1116                                 continue;
1117
1118                         norigin = get_origin(sb, parent, p->one->path);
1119                         hashcpy(norigin->blob_sha1, p->one->sha1);
1120                         fill_origin_blob(norigin, &file_p);
1121                         if (!file_p.ptr)
1122                                 continue;
1123
1124                         for (j = 0; j < num_ents; j++) {
1125                                 find_copy_in_blob(sb, blame_list[j].ent,
1126                                                   norigin, this, &file_p);
1127                                 copy_split_if_better(sb, blame_list[j].split,
1128                                                      this);
1129                                 decref_split(this);
1130                         }
1131                         origin_decref(norigin);
1132                 }
1133
1134                 for (j = 0; j < num_ents; j++) {
1135                         struct blame_entry *split = blame_list[j].split;
1136                         if (split[1].suspect &&
1137                             blame_copy_score < ent_score(sb, &split[1])) {
1138                                 split_blame(sb, split, blame_list[j].ent);
1139                                 made_progress = 1;
1140                         }
1141                         decref_split(split);
1142                 }
1143                 free(blame_list);
1144
1145                 if (!made_progress)
1146                         break;
1147                 blame_list = setup_blame_list(sb, target, &num_ents);
1148                 if (!blame_list) {
1149                         retval = 1;
1150                         break;
1151                 }
1152         }
1153         diff_flush(&diff_opts);
1154
1155         return retval;
1156 }
1157
1158 /*
1159  * The blobs of origin and porigin exactly match, so everything
1160  * origin is suspected for can be blamed on the parent.
1161  */
1162 static void pass_whole_blame(struct scoreboard *sb,
1163                              struct origin *origin, struct origin *porigin)
1164 {
1165         struct blame_entry *e;
1166
1167         if (!porigin->file.ptr && origin->file.ptr) {
1168                 /* Steal its file */
1169                 porigin->file = origin->file;
1170                 origin->file.ptr = NULL;
1171         }
1172         for (e = sb->ent; e; e = e->next) {
1173                 if (!same_suspect(e->suspect, origin))
1174                         continue;
1175                 origin_incref(porigin);
1176                 origin_decref(e->suspect);
1177                 e->suspect = porigin;
1178         }
1179 }
1180
1181 #define MAXPARENT 16
1182
1183 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
1184 {
1185         int i, pass;
1186         struct commit *commit = origin->commit;
1187         struct commit_list *parent;
1188         struct origin *parent_origin[MAXPARENT], *porigin;
1189
1190         memset(parent_origin, 0, sizeof(parent_origin));
1191
1192         /* The first pass looks for unrenamed path to optimize for
1193          * common cases, then we look for renames in the second pass.
1194          */
1195         for (pass = 0; pass < 2; pass++) {
1196                 struct origin *(*find)(struct scoreboard *,
1197                                        struct commit *, struct origin *);
1198                 find = pass ? find_rename : find_origin;
1199
1200                 for (i = 0, parent = commit->parents;
1201                      i < MAXPARENT && parent;
1202                      parent = parent->next, i++) {
1203                         struct commit *p = parent->item;
1204                         int j, same;
1205
1206                         if (parent_origin[i])
1207                                 continue;
1208                         if (parse_commit(p))
1209                                 continue;
1210                         porigin = find(sb, p, origin);
1211                         if (!porigin)
1212                                 continue;
1213                         if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1214                                 pass_whole_blame(sb, origin, porigin);
1215                                 origin_decref(porigin);
1216                                 goto finish;
1217                         }
1218                         for (j = same = 0; j < i; j++)
1219                                 if (parent_origin[j] &&
1220                                     !hashcmp(parent_origin[j]->blob_sha1,
1221                                              porigin->blob_sha1)) {
1222                                         same = 1;
1223                                         break;
1224                                 }
1225                         if (!same)
1226                                 parent_origin[i] = porigin;
1227                         else
1228                                 origin_decref(porigin);
1229                 }
1230         }
1231
1232         num_commits++;
1233         for (i = 0, parent = commit->parents;
1234              i < MAXPARENT && parent;
1235              parent = parent->next, i++) {
1236                 struct origin *porigin = parent_origin[i];
1237                 if (!porigin)
1238                         continue;
1239                 if (pass_blame_to_parent(sb, origin, porigin))
1240                         goto finish;
1241         }
1242
1243         /*
1244          * Optionally find moves in parents' files.
1245          */
1246         if (opt & PICKAXE_BLAME_MOVE)
1247                 for (i = 0, parent = commit->parents;
1248                      i < MAXPARENT && parent;
1249                      parent = parent->next, i++) {
1250                         struct origin *porigin = parent_origin[i];
1251                         if (!porigin)
1252                                 continue;
1253                         if (find_move_in_parent(sb, origin, porigin))
1254                                 goto finish;
1255                 }
1256
1257         /*
1258          * Optionally find copies from parents' files.
1259          */
1260         if (opt & PICKAXE_BLAME_COPY)
1261                 for (i = 0, parent = commit->parents;
1262                      i < MAXPARENT && parent;
1263                      parent = parent->next, i++) {
1264                         struct origin *porigin = parent_origin[i];
1265                         if (find_copy_in_parent(sb, origin, parent->item,
1266                                                 porigin, opt))
1267                                 goto finish;
1268                 }
1269
1270  finish:
1271         for (i = 0; i < MAXPARENT; i++)
1272                 origin_decref(parent_origin[i]);
1273 }
1274
1275 /*
1276  * Information on commits, used for output.
1277  */
1278 struct commit_info
1279 {
1280         const char *author;
1281         const char *author_mail;
1282         unsigned long author_time;
1283         const char *author_tz;
1284
1285         /* filled only when asked for details */
1286         const char *committer;
1287         const char *committer_mail;
1288         unsigned long committer_time;
1289         const char *committer_tz;
1290
1291         const char *summary;
1292 };
1293
1294 /*
1295  * Parse author/committer line in the commit object buffer
1296  */
1297 static void get_ac_line(const char *inbuf, const char *what,
1298                         int bufsz, char *person, const char **mail,
1299                         unsigned long *time, const char **tz)
1300 {
1301         int len, tzlen, maillen;
1302         char *tmp, *endp, *timepos;
1303
1304         tmp = strstr(inbuf, what);
1305         if (!tmp)
1306                 goto error_out;
1307         tmp += strlen(what);
1308         endp = strchr(tmp, '\n');
1309         if (!endp)
1310                 len = strlen(tmp);
1311         else
1312                 len = endp - tmp;
1313         if (bufsz <= len) {
1314         error_out:
1315                 /* Ugh */
1316                 *mail = *tz = "(unknown)";
1317                 *time = 0;
1318                 return;
1319         }
1320         memcpy(person, tmp, len);
1321
1322         tmp = person;
1323         tmp += len;
1324         *tmp = 0;
1325         while (*tmp != ' ')
1326                 tmp--;
1327         *tz = tmp+1;
1328         tzlen = (person+len)-(tmp+1);
1329
1330         *tmp = 0;
1331         while (*tmp != ' ')
1332                 tmp--;
1333         *time = strtoul(tmp, NULL, 10);
1334         timepos = tmp;
1335
1336         *tmp = 0;
1337         while (*tmp != ' ')
1338                 tmp--;
1339         *mail = tmp + 1;
1340         *tmp = 0;
1341         maillen = timepos - tmp;
1342
1343         if (!mailmap.nr)
1344                 return;
1345
1346         /*
1347          * mailmap expansion may make the name longer.
1348          * make room by pushing stuff down.
1349          */
1350         tmp = person + bufsz - (tzlen + 1);
1351         memmove(tmp, *tz, tzlen);
1352         tmp[tzlen] = 0;
1353         *tz = tmp;
1354
1355         tmp = tmp - (maillen + 1);
1356         memmove(tmp, *mail, maillen);
1357         tmp[maillen] = 0;
1358         *mail = tmp;
1359
1360         /*
1361          * Now, convert e-mail using mailmap
1362          */
1363         map_email(&mailmap, tmp + 1, person, tmp-person-1);
1364 }
1365
1366 static void get_commit_info(struct commit *commit,
1367                             struct commit_info *ret,
1368                             int detailed)
1369 {
1370         int len;
1371         char *tmp, *endp;
1372         static char author_buf[1024];
1373         static char committer_buf[1024];
1374         static char summary_buf[1024];
1375
1376         /*
1377          * We've operated without save_commit_buffer, so
1378          * we now need to populate them for output.
1379          */
1380         if (!commit->buffer) {
1381                 enum object_type type;
1382                 unsigned long size;
1383                 commit->buffer =
1384                         read_sha1_file(commit->object.sha1, &type, &size);
1385         }
1386         ret->author = author_buf;
1387         get_ac_line(commit->buffer, "\nauthor ",
1388                     sizeof(author_buf), author_buf, &ret->author_mail,
1389                     &ret->author_time, &ret->author_tz);
1390
1391         if (!detailed)
1392                 return;
1393
1394         ret->committer = committer_buf;
1395         get_ac_line(commit->buffer, "\ncommitter ",
1396                     sizeof(committer_buf), committer_buf, &ret->committer_mail,
1397                     &ret->committer_time, &ret->committer_tz);
1398
1399         ret->summary = summary_buf;
1400         tmp = strstr(commit->buffer, "\n\n");
1401         if (!tmp) {
1402         error_out:
1403                 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1404                 return;
1405         }
1406         tmp += 2;
1407         endp = strchr(tmp, '\n');
1408         if (!endp)
1409                 endp = tmp + strlen(tmp);
1410         len = endp - tmp;
1411         if (len >= sizeof(summary_buf) || len == 0)
1412                 goto error_out;
1413         memcpy(summary_buf, tmp, len);
1414         summary_buf[len] = 0;
1415 }
1416
1417 /*
1418  * To allow LF and other nonportable characters in pathnames,
1419  * they are c-style quoted as needed.
1420  */
1421 static void write_filename_info(const char *path)
1422 {
1423         printf("filename ");
1424         write_name_quoted(NULL, 0, path, 1, stdout);
1425         putchar('\n');
1426 }
1427
1428 /*
1429  * The blame_entry is found to be guilty for the range.  Mark it
1430  * as such, and show it in incremental output.
1431  */
1432 static void found_guilty_entry(struct blame_entry *ent)
1433 {
1434         if (ent->guilty)
1435                 return;
1436         ent->guilty = 1;
1437         if (incremental) {
1438                 struct origin *suspect = ent->suspect;
1439
1440                 printf("%s %d %d %d\n",
1441                        sha1_to_hex(suspect->commit->object.sha1),
1442                        ent->s_lno + 1, ent->lno + 1, ent->num_lines);
1443                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1444                         struct commit_info ci;
1445                         suspect->commit->object.flags |= METAINFO_SHOWN;
1446                         get_commit_info(suspect->commit, &ci, 1);
1447                         printf("author %s\n", ci.author);
1448                         printf("author-mail %s\n", ci.author_mail);
1449                         printf("author-time %lu\n", ci.author_time);
1450                         printf("author-tz %s\n", ci.author_tz);
1451                         printf("committer %s\n", ci.committer);
1452                         printf("committer-mail %s\n", ci.committer_mail);
1453                         printf("committer-time %lu\n", ci.committer_time);
1454                         printf("committer-tz %s\n", ci.committer_tz);
1455                         printf("summary %s\n", ci.summary);
1456                         if (suspect->commit->object.flags & UNINTERESTING)
1457                                 printf("boundary\n");
1458                 }
1459                 write_filename_info(suspect->path);
1460         }
1461 }
1462
1463 /*
1464  * The main loop -- while the scoreboard has lines whose true origin
1465  * is still unknown, pick one blame_entry, and allow its current
1466  * suspect to pass blames to its parents.
1467  */
1468 static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1469 {
1470         while (1) {
1471                 struct blame_entry *ent;
1472                 struct commit *commit;
1473                 struct origin *suspect = NULL;
1474
1475                 /* find one suspect to break down */
1476                 for (ent = sb->ent; !suspect && ent; ent = ent->next)
1477                         if (!ent->guilty)
1478                                 suspect = ent->suspect;
1479                 if (!suspect)
1480                         return; /* all done */
1481
1482                 /*
1483                  * We will use this suspect later in the loop,
1484                  * so hold onto it in the meantime.
1485                  */
1486                 origin_incref(suspect);
1487                 commit = suspect->commit;
1488                 if (!commit->object.parsed)
1489                         parse_commit(commit);
1490                 if (!(commit->object.flags & UNINTERESTING) &&
1491                     !(revs->max_age != -1 && commit->date < revs->max_age))
1492                         pass_blame(sb, suspect, opt);
1493                 else {
1494                         commit->object.flags |= UNINTERESTING;
1495                         if (commit->object.parsed)
1496                                 mark_parents_uninteresting(commit);
1497                 }
1498                 /* treat root commit as boundary */
1499                 if (!commit->parents && !show_root)
1500                         commit->object.flags |= UNINTERESTING;
1501
1502                 /* Take responsibility for the remaining entries */
1503                 for (ent = sb->ent; ent; ent = ent->next)
1504                         if (same_suspect(ent->suspect, suspect))
1505                                 found_guilty_entry(ent);
1506                 origin_decref(suspect);
1507
1508                 if (DEBUG) /* sanity */
1509                         sanity_check_refcnt(sb);
1510         }
1511 }
1512
1513 static const char *format_time(unsigned long time, const char *tz_str,
1514                                int show_raw_time)
1515 {
1516         static char time_buf[128];
1517         time_t t = time;
1518         int minutes, tz;
1519         struct tm *tm;
1520
1521         if (show_raw_time) {
1522                 sprintf(time_buf, "%lu %s", time, tz_str);
1523                 return time_buf;
1524         }
1525
1526         tz = atoi(tz_str);
1527         minutes = tz < 0 ? -tz : tz;
1528         minutes = (minutes / 100)*60 + (minutes % 100);
1529         minutes = tz < 0 ? -minutes : minutes;
1530         t = time + minutes * 60;
1531         tm = gmtime(&t);
1532
1533         strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1534         strcat(time_buf, tz_str);
1535         return time_buf;
1536 }
1537
1538 #define OUTPUT_ANNOTATE_COMPAT  001
1539 #define OUTPUT_LONG_OBJECT_NAME 002
1540 #define OUTPUT_RAW_TIMESTAMP    004
1541 #define OUTPUT_PORCELAIN        010
1542 #define OUTPUT_SHOW_NAME        020
1543 #define OUTPUT_SHOW_NUMBER      040
1544 #define OUTPUT_SHOW_SCORE      0100
1545 #define OUTPUT_NO_AUTHOR       0200
1546
1547 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1548 {
1549         int cnt;
1550         const char *cp;
1551         struct origin *suspect = ent->suspect;
1552         char hex[41];
1553
1554         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1555         printf("%s%c%d %d %d\n",
1556                hex,
1557                ent->guilty ? ' ' : '*', // purely for debugging
1558                ent->s_lno + 1,
1559                ent->lno + 1,
1560                ent->num_lines);
1561         if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1562                 struct commit_info ci;
1563                 suspect->commit->object.flags |= METAINFO_SHOWN;
1564                 get_commit_info(suspect->commit, &ci, 1);
1565                 printf("author %s\n", ci.author);
1566                 printf("author-mail %s\n", ci.author_mail);
1567                 printf("author-time %lu\n", ci.author_time);
1568                 printf("author-tz %s\n", ci.author_tz);
1569                 printf("committer %s\n", ci.committer);
1570                 printf("committer-mail %s\n", ci.committer_mail);
1571                 printf("committer-time %lu\n", ci.committer_time);
1572                 printf("committer-tz %s\n", ci.committer_tz);
1573                 write_filename_info(suspect->path);
1574                 printf("summary %s\n", ci.summary);
1575                 if (suspect->commit->object.flags & UNINTERESTING)
1576                         printf("boundary\n");
1577         }
1578         else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1579                 write_filename_info(suspect->path);
1580
1581         cp = nth_line(sb, ent->lno);
1582         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1583                 char ch;
1584                 if (cnt)
1585                         printf("%s %d %d\n", hex,
1586                                ent->s_lno + 1 + cnt,
1587                                ent->lno + 1 + cnt);
1588                 putchar('\t');
1589                 do {
1590                         ch = *cp++;
1591                         putchar(ch);
1592                 } while (ch != '\n' &&
1593                          cp < sb->final_buf + sb->final_buf_size);
1594         }
1595 }
1596
1597 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1598 {
1599         int cnt;
1600         const char *cp;
1601         struct origin *suspect = ent->suspect;
1602         struct commit_info ci;
1603         char hex[41];
1604         int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1605
1606         get_commit_info(suspect->commit, &ci, 1);
1607         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1608
1609         cp = nth_line(sb, ent->lno);
1610         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1611                 char ch;
1612                 int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;
1613
1614                 if (suspect->commit->object.flags & UNINTERESTING) {
1615                         if (blank_boundary)
1616                                 memset(hex, ' ', length);
1617                         else if (!cmd_is_annotate) {
1618                                 length--;
1619                                 putchar('^');
1620                         }
1621                 }
1622
1623                 printf("%.*s", length, hex);
1624                 if (opt & OUTPUT_ANNOTATE_COMPAT)
1625                         printf("\t(%10s\t%10s\t%d)", ci.author,
1626                                format_time(ci.author_time, ci.author_tz,
1627                                            show_raw_time),
1628                                ent->lno + 1 + cnt);
1629                 else {
1630                         if (opt & OUTPUT_SHOW_SCORE)
1631                                 printf(" %*d %02d",
1632                                        max_score_digits, ent->score,
1633                                        ent->suspect->refcnt);
1634                         if (opt & OUTPUT_SHOW_NAME)
1635                                 printf(" %-*.*s", longest_file, longest_file,
1636                                        suspect->path);
1637                         if (opt & OUTPUT_SHOW_NUMBER)
1638                                 printf(" %*d", max_orig_digits,
1639                                        ent->s_lno + 1 + cnt);
1640
1641                         if (!(opt & OUTPUT_NO_AUTHOR))
1642                                 printf(" (%-*.*s %10s",
1643                                        longest_author, longest_author,
1644                                        ci.author,
1645                                        format_time(ci.author_time,
1646                                                    ci.author_tz,
1647                                                    show_raw_time));
1648                         printf(" %*d) ",
1649                                max_digits, ent->lno + 1 + cnt);
1650                 }
1651                 do {
1652                         ch = *cp++;
1653                         putchar(ch);
1654                 } while (ch != '\n' &&
1655                          cp < sb->final_buf + sb->final_buf_size);
1656         }
1657 }
1658
1659 static void output(struct scoreboard *sb, int option)
1660 {
1661         struct blame_entry *ent;
1662
1663         if (option & OUTPUT_PORCELAIN) {
1664                 for (ent = sb->ent; ent; ent = ent->next) {
1665                         struct blame_entry *oth;
1666                         struct origin *suspect = ent->suspect;
1667                         struct commit *commit = suspect->commit;
1668                         if (commit->object.flags & MORE_THAN_ONE_PATH)
1669                                 continue;
1670                         for (oth = ent->next; oth; oth = oth->next) {
1671                                 if ((oth->suspect->commit != commit) ||
1672                                     !strcmp(oth->suspect->path, suspect->path))
1673                                         continue;
1674                                 commit->object.flags |= MORE_THAN_ONE_PATH;
1675                                 break;
1676                         }
1677                 }
1678         }
1679
1680         for (ent = sb->ent; ent; ent = ent->next) {
1681                 if (option & OUTPUT_PORCELAIN)
1682                         emit_porcelain(sb, ent);
1683                 else {
1684                         emit_other(sb, ent, option);
1685                 }
1686         }
1687 }
1688
1689 /*
1690  * To allow quick access to the contents of nth line in the
1691  * final image, prepare an index in the scoreboard.
1692  */
1693 static int prepare_lines(struct scoreboard *sb)
1694 {
1695         const char *buf = sb->final_buf;
1696         unsigned long len = sb->final_buf_size;
1697         int num = 0, incomplete = 0, bol = 1;
1698
1699         if (len && buf[len-1] != '\n')
1700                 incomplete++; /* incomplete line at the end */
1701         while (len--) {
1702                 if (bol) {
1703                         sb->lineno = xrealloc(sb->lineno,
1704                                               sizeof(int* ) * (num + 1));
1705                         sb->lineno[num] = buf - sb->final_buf;
1706                         bol = 0;
1707                 }
1708                 if (*buf++ == '\n') {
1709                         num++;
1710                         bol = 1;
1711                 }
1712         }
1713         sb->lineno = xrealloc(sb->lineno,
1714                               sizeof(int* ) * (num + incomplete + 1));
1715         sb->lineno[num + incomplete] = buf - sb->final_buf;
1716         sb->num_lines = num + incomplete;
1717         return sb->num_lines;
1718 }
1719
1720 /*
1721  * Add phony grafts for use with -S; this is primarily to
1722  * support git-cvsserver that wants to give a linear history
1723  * to its clients.
1724  */
1725 static int read_ancestry(const char *graft_file)
1726 {
1727         FILE *fp = fopen(graft_file, "r");
1728         char buf[1024];
1729         if (!fp)
1730                 return -1;
1731         while (fgets(buf, sizeof(buf), fp)) {
1732                 /* The format is just "Commit Parent1 Parent2 ...\n" */
1733                 int len = strlen(buf);
1734                 struct commit_graft *graft = read_graft_line(buf, len);
1735                 if (graft)
1736                         register_commit_graft(graft, 0);
1737         }
1738         fclose(fp);
1739         return 0;
1740 }
1741
1742 /*
1743  * How many columns do we need to show line numbers in decimal?
1744  */
1745 static int lineno_width(int lines)
1746 {
1747         int i, width;
1748
1749         for (width = 1, i = 10; i <= lines + 1; width++)
1750                 i *= 10;
1751         return width;
1752 }
1753
1754 /*
1755  * How many columns do we need to show line numbers, authors,
1756  * and filenames?
1757  */
1758 static void find_alignment(struct scoreboard *sb, int *option)
1759 {
1760         int longest_src_lines = 0;
1761         int longest_dst_lines = 0;
1762         unsigned largest_score = 0;
1763         struct blame_entry *e;
1764
1765         for (e = sb->ent; e; e = e->next) {
1766                 struct origin *suspect = e->suspect;
1767                 struct commit_info ci;
1768                 int num;
1769
1770                 if (strcmp(suspect->path, sb->path))
1771                         *option |= OUTPUT_SHOW_NAME;
1772                 num = strlen(suspect->path);
1773                 if (longest_file < num)
1774                         longest_file = num;
1775                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1776                         suspect->commit->object.flags |= METAINFO_SHOWN;
1777                         get_commit_info(suspect->commit, &ci, 1);
1778                         num = strlen(ci.author);
1779                         if (longest_author < num)
1780                                 longest_author = num;
1781                 }
1782                 num = e->s_lno + e->num_lines;
1783                 if (longest_src_lines < num)
1784                         longest_src_lines = num;
1785                 num = e->lno + e->num_lines;
1786                 if (longest_dst_lines < num)
1787                         longest_dst_lines = num;
1788                 if (largest_score < ent_score(sb, e))
1789                         largest_score = ent_score(sb, e);
1790         }
1791         max_orig_digits = lineno_width(longest_src_lines);
1792         max_digits = lineno_width(longest_dst_lines);
1793         max_score_digits = lineno_width(largest_score);
1794 }
1795
1796 /*
1797  * For debugging -- origin is refcounted, and this asserts that
1798  * we do not underflow.
1799  */
1800 static void sanity_check_refcnt(struct scoreboard *sb)
1801 {
1802         int baa = 0;
1803         struct blame_entry *ent;
1804
1805         for (ent = sb->ent; ent; ent = ent->next) {
1806                 /* Nobody should have zero or negative refcnt */
1807                 if (ent->suspect->refcnt <= 0) {
1808                         fprintf(stderr, "%s in %s has negative refcnt %d\n",
1809                                 ent->suspect->path,
1810                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1811                                 ent->suspect->refcnt);
1812                         baa = 1;
1813                 }
1814         }
1815         for (ent = sb->ent; ent; ent = ent->next) {
1816                 /* Mark the ones that haven't been checked */
1817                 if (0 < ent->suspect->refcnt)
1818                         ent->suspect->refcnt = -ent->suspect->refcnt;
1819         }
1820         for (ent = sb->ent; ent; ent = ent->next) {
1821                 /*
1822                  * ... then pick each and see if they have the the
1823                  * correct refcnt.
1824                  */
1825                 int found;
1826                 struct blame_entry *e;
1827                 struct origin *suspect = ent->suspect;
1828
1829                 if (0 < suspect->refcnt)
1830                         continue;
1831                 suspect->refcnt = -suspect->refcnt; /* Unmark */
1832                 for (found = 0, e = sb->ent; e; e = e->next) {
1833                         if (e->suspect != suspect)
1834                                 continue;
1835                         found++;
1836                 }
1837                 if (suspect->refcnt != found) {
1838                         fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1839                                 ent->suspect->path,
1840                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1841                                 ent->suspect->refcnt, found);
1842                         baa = 2;
1843                 }
1844         }
1845         if (baa) {
1846                 int opt = 0160;
1847                 find_alignment(sb, &opt);
1848                 output(sb, opt);
1849                 die("Baa %d!", baa);
1850         }
1851 }
1852
1853 /*
1854  * Used for the command line parsing; check if the path exists
1855  * in the working tree.
1856  */
1857 static int has_path_in_work_tree(const char *path)
1858 {
1859         struct stat st;
1860         return !lstat(path, &st);
1861 }
1862
1863 static unsigned parse_score(const char *arg)
1864 {
1865         char *end;
1866         unsigned long score = strtoul(arg, &end, 10);
1867         if (*end)
1868                 return 0;
1869         return score;
1870 }
1871
1872 static const char *add_prefix(const char *prefix, const char *path)
1873 {
1874         if (!prefix || !prefix[0])
1875                 return path;
1876         return prefix_path(prefix, strlen(prefix), path);
1877 }
1878
1879 /*
1880  * Parsing of (comma separated) one item in the -L option
1881  */
1882 static const char *parse_loc(const char *spec,
1883                              struct scoreboard *sb, long lno,
1884                              long begin, long *ret)
1885 {
1886         char *term;
1887         const char *line;
1888         long num;
1889         int reg_error;
1890         regex_t regexp;
1891         regmatch_t match[1];
1892
1893         /* Allow "-L <something>,+20" to mean starting at <something>
1894          * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1895          * <something>.
1896          */
1897         if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1898                 num = strtol(spec + 1, &term, 10);
1899                 if (term != spec + 1) {
1900                         if (spec[0] == '-')
1901                                 num = 0 - num;
1902                         if (0 < num)
1903                                 *ret = begin + num - 2;
1904                         else if (!num)
1905                                 *ret = begin;
1906                         else
1907                                 *ret = begin + num;
1908                         return term;
1909                 }
1910                 return spec;
1911         }
1912         num = strtol(spec, &term, 10);
1913         if (term != spec) {
1914                 *ret = num;
1915                 return term;
1916         }
1917         if (spec[0] != '/')
1918                 return spec;
1919
1920         /* it could be a regexp of form /.../ */
1921         for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1922                 if (*term == '\\')
1923                         term++;
1924         }
1925         if (*term != '/')
1926                 return spec;
1927
1928         /* try [spec+1 .. term-1] as regexp */
1929         *term = 0;
1930         begin--; /* input is in human terms */
1931         line = nth_line(sb, begin);
1932
1933         if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1934             !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1935                 const char *cp = line + match[0].rm_so;
1936                 const char *nline;
1937
1938                 while (begin++ < lno) {
1939                         nline = nth_line(sb, begin);
1940                         if (line <= cp && cp < nline)
1941                                 break;
1942                         line = nline;
1943                 }
1944                 *ret = begin;
1945                 regfree(&regexp);
1946                 *term++ = '/';
1947                 return term;
1948         }
1949         else {
1950                 char errbuf[1024];
1951                 regerror(reg_error, &regexp, errbuf, 1024);
1952                 die("-L parameter '%s': %s", spec + 1, errbuf);
1953         }
1954 }
1955
1956 /*
1957  * Parsing of -L option
1958  */
1959 static void prepare_blame_range(struct scoreboard *sb,
1960                                 const char *bottomtop,
1961                                 long lno,
1962                                 long *bottom, long *top)
1963 {
1964         const char *term;
1965
1966         term = parse_loc(bottomtop, sb, lno, 1, bottom);
1967         if (*term == ',') {
1968                 term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
1969                 if (*term)
1970                         usage(blame_usage);
1971         }
1972         if (*term)
1973                 usage(blame_usage);
1974 }
1975
1976 static int git_blame_config(const char *var, const char *value)
1977 {
1978         if (!strcmp(var, "blame.showroot")) {
1979                 show_root = git_config_bool(var, value);
1980                 return 0;
1981         }
1982         if (!strcmp(var, "blame.blankboundary")) {
1983                 blank_boundary = git_config_bool(var, value);
1984                 return 0;
1985         }
1986         return git_default_config(var, value);
1987 }
1988
1989 static struct commit *fake_working_tree_commit(const char *path, const char *contents_from)
1990 {
1991         struct commit *commit;
1992         struct origin *origin;
1993         unsigned char head_sha1[20];
1994         char *buf;
1995         const char *ident;
1996         int fd;
1997         time_t now;
1998         unsigned long fin_size;
1999         int size, len;
2000         struct cache_entry *ce;
2001         unsigned mode;
2002
2003         if (get_sha1("HEAD", head_sha1))
2004                 die("No such ref: HEAD");
2005
2006         time(&now);
2007         commit = xcalloc(1, sizeof(*commit));
2008         commit->parents = xcalloc(1, sizeof(*commit->parents));
2009         commit->parents->item = lookup_commit_reference(head_sha1);
2010         commit->object.parsed = 1;
2011         commit->date = now;
2012         commit->object.type = OBJ_COMMIT;
2013
2014         origin = make_origin(commit, path);
2015
2016         if (!contents_from || strcmp("-", contents_from)) {
2017                 struct stat st;
2018                 const char *read_from;
2019
2020                 if (contents_from) {
2021                         if (stat(contents_from, &st) < 0)
2022                                 die("Cannot stat %s", contents_from);
2023                         read_from = contents_from;
2024                 }
2025                 else {
2026                         if (lstat(path, &st) < 0)
2027                                 die("Cannot lstat %s", path);
2028                         read_from = path;
2029                 }
2030                 fin_size = xsize_t(st.st_size);
2031                 buf = xmalloc(fin_size+1);
2032                 mode = canon_mode(st.st_mode);
2033                 switch (st.st_mode & S_IFMT) {
2034                 case S_IFREG:
2035                         fd = open(read_from, O_RDONLY);
2036                         if (fd < 0)
2037                                 die("cannot open %s", read_from);
2038                         if (read_in_full(fd, buf, fin_size) != fin_size)
2039                                 die("cannot read %s", read_from);
2040                         break;
2041                 case S_IFLNK:
2042                         if (readlink(read_from, buf, fin_size+1) != fin_size)
2043                                 die("cannot readlink %s", read_from);
2044                         break;
2045                 default:
2046                         die("unsupported file type %s", read_from);
2047                 }
2048         }
2049         else {
2050                 /* Reading from stdin */
2051                 contents_from = "standard input";
2052                 buf = NULL;
2053                 fin_size = 0;
2054                 mode = 0;
2055                 while (1) {
2056                         ssize_t cnt = 8192;
2057                         buf = xrealloc(buf, fin_size + cnt);
2058                         cnt = xread(0, buf + fin_size, cnt);
2059                         if (cnt < 0)
2060                                 die("read error %s from stdin",
2061                                     strerror(errno));
2062                         if (!cnt)
2063                                 break;
2064                         fin_size += cnt;
2065                 }
2066                 buf = xrealloc(buf, fin_size + 1);
2067         }
2068         buf[fin_size] = 0;
2069         origin->file.ptr = buf;
2070         origin->file.size = fin_size;
2071         pretend_sha1_file(buf, fin_size, OBJ_BLOB, origin->blob_sha1);
2072         commit->util = origin;
2073
2074         /*
2075          * Read the current index, replace the path entry with
2076          * origin->blob_sha1 without mucking with its mode or type
2077          * bits; we are not going to write this index out -- we just
2078          * want to run "diff-index --cached".
2079          */
2080         discard_cache();
2081         read_cache();
2082
2083         len = strlen(path);
2084         if (!mode) {
2085                 int pos = cache_name_pos(path, len);
2086                 if (0 <= pos)
2087                         mode = ntohl(active_cache[pos]->ce_mode);
2088                 else
2089                         /* Let's not bother reading from HEAD tree */
2090                         mode = S_IFREG | 0644;
2091         }
2092         size = cache_entry_size(len);
2093         ce = xcalloc(1, size);
2094         hashcpy(ce->sha1, origin->blob_sha1);
2095         memcpy(ce->name, path, len);
2096         ce->ce_flags = create_ce_flags(len, 0);
2097         ce->ce_mode = create_ce_mode(mode);
2098         add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
2099
2100         /*
2101          * We are not going to write this out, so this does not matter
2102          * right now, but someday we might optimize diff-index --cached
2103          * with cache-tree information.
2104          */
2105         cache_tree_invalidate_path(active_cache_tree, path);
2106
2107         commit->buffer = xmalloc(400);
2108         ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
2109         snprintf(commit->buffer, 400,
2110                 "tree 0000000000000000000000000000000000000000\n"
2111                 "parent %s\n"
2112                 "author %s\n"
2113                 "committer %s\n\n"
2114                 "Version of %s from %s\n",
2115                 sha1_to_hex(head_sha1),
2116                 ident, ident, path, contents_from ? contents_from : path);
2117         return commit;
2118 }
2119
2120 int cmd_blame(int argc, const char **argv, const char *prefix)
2121 {
2122         struct rev_info revs;
2123         const char *path;
2124         struct scoreboard sb;
2125         struct origin *o;
2126         struct blame_entry *ent;
2127         int i, seen_dashdash, unk, opt;
2128         long bottom, top, lno;
2129         int output_option = 0;
2130         int show_stats = 0;
2131         const char *revs_file = NULL;
2132         const char *final_commit_name = NULL;
2133         enum object_type type;
2134         const char *bottomtop = NULL;
2135         const char *contents_from = NULL;
2136
2137         cmd_is_annotate = !strcmp(argv[0], "annotate");
2138
2139         git_config(git_blame_config);
2140         save_commit_buffer = 0;
2141
2142         opt = 0;
2143         seen_dashdash = 0;
2144         for (unk = i = 1; i < argc; i++) {
2145                 const char *arg = argv[i];
2146                 if (*arg != '-')
2147                         break;
2148                 else if (!strcmp("-b", arg))
2149                         blank_boundary = 1;
2150                 else if (!strcmp("--root", arg))
2151                         show_root = 1;
2152                 else if (!strcmp(arg, "--show-stats"))
2153                         show_stats = 1;
2154                 else if (!strcmp("-c", arg))
2155                         output_option |= OUTPUT_ANNOTATE_COMPAT;
2156                 else if (!strcmp("-t", arg))
2157                         output_option |= OUTPUT_RAW_TIMESTAMP;
2158                 else if (!strcmp("-l", arg))
2159                         output_option |= OUTPUT_LONG_OBJECT_NAME;
2160                 else if (!strcmp("-s", arg))
2161                         output_option |= OUTPUT_NO_AUTHOR;
2162                 else if (!strcmp("-S", arg) && ++i < argc)
2163                         revs_file = argv[i];
2164                 else if (!prefixcmp(arg, "-M")) {
2165                         opt |= PICKAXE_BLAME_MOVE;
2166                         blame_move_score = parse_score(arg+2);
2167                 }
2168                 else if (!prefixcmp(arg, "-C")) {
2169                         /*
2170                          * -C enables copy from removed files;
2171                          * -C -C enables copy from existing files, but only
2172                          *       when blaming a new file;
2173                          * -C -C -C enables copy from existing files for
2174                          *          everybody
2175                          */
2176                         if (opt & PICKAXE_BLAME_COPY_HARDER)
2177                                 opt |= PICKAXE_BLAME_COPY_HARDEST;
2178                         if (opt & PICKAXE_BLAME_COPY)
2179                                 opt |= PICKAXE_BLAME_COPY_HARDER;
2180                         opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
2181                         blame_copy_score = parse_score(arg+2);
2182                 }
2183                 else if (!prefixcmp(arg, "-L")) {
2184                         if (!arg[2]) {
2185                                 if (++i >= argc)
2186                                         usage(blame_usage);
2187                                 arg = argv[i];
2188                         }
2189                         else
2190                                 arg += 2;
2191                         if (bottomtop)
2192                                 die("More than one '-L n,m' option given");
2193                         bottomtop = arg;
2194                 }
2195                 else if (!strcmp("--contents", arg)) {
2196                         if (++i >= argc)
2197                                 usage(blame_usage);
2198                         contents_from = argv[i];
2199                 }
2200                 else if (!strcmp("--incremental", arg))
2201                         incremental = 1;
2202                 else if (!strcmp("--score-debug", arg))
2203                         output_option |= OUTPUT_SHOW_SCORE;
2204                 else if (!strcmp("-f", arg) ||
2205                          !strcmp("--show-name", arg))
2206                         output_option |= OUTPUT_SHOW_NAME;
2207                 else if (!strcmp("-n", arg) ||
2208                          !strcmp("--show-number", arg))
2209                         output_option |= OUTPUT_SHOW_NUMBER;
2210                 else if (!strcmp("-p", arg) ||
2211                          !strcmp("--porcelain", arg))
2212                         output_option |= OUTPUT_PORCELAIN;
2213                 else if (!strcmp("--", arg)) {
2214                         seen_dashdash = 1;
2215                         i++;
2216                         break;
2217                 }
2218                 else
2219                         argv[unk++] = arg;
2220         }
2221
2222         if (!incremental)
2223                 setup_pager();
2224
2225         if (!blame_move_score)
2226                 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
2227         if (!blame_copy_score)
2228                 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
2229
2230         /*
2231          * We have collected options unknown to us in argv[1..unk]
2232          * which are to be passed to revision machinery if we are
2233          * going to do the "bottom" processing.
2234          *
2235          * The remaining are:
2236          *
2237          * (1) if seen_dashdash, its either
2238          *     "-options -- <path>" or
2239          *     "-options -- <path> <rev>".
2240          *     but the latter is allowed only if there is no
2241          *     options that we passed to revision machinery.
2242          *
2243          * (2) otherwise, we may have "--" somewhere later and
2244          *     might be looking at the first one of multiple 'rev'
2245          *     parameters (e.g. " master ^next ^maint -- path").
2246          *     See if there is a dashdash first, and give the
2247          *     arguments before that to revision machinery.
2248          *     After that there must be one 'path'.
2249          *
2250          * (3) otherwise, its one of the three:
2251          *     "-options <path> <rev>"
2252          *     "-options <rev> <path>"
2253          *     "-options <path>"
2254          *     but again the first one is allowed only if
2255          *     there is no options that we passed to revision
2256          *     machinery.
2257          */
2258
2259         if (seen_dashdash) {
2260                 /* (1) */
2261                 if (argc <= i)
2262                         usage(blame_usage);
2263                 path = add_prefix(prefix, argv[i]);
2264                 if (i + 1 == argc - 1) {
2265                         if (unk != 1)
2266                                 usage(blame_usage);
2267                         argv[unk++] = argv[i + 1];
2268                 }
2269                 else if (i + 1 != argc)
2270                         /* garbage at end */
2271                         usage(blame_usage);
2272         }
2273         else {
2274                 int j;
2275                 for (j = i; !seen_dashdash && j < argc; j++)
2276                         if (!strcmp(argv[j], "--"))
2277                                 seen_dashdash = j;
2278                 if (seen_dashdash) {
2279                         /* (2) */
2280                         if (seen_dashdash + 1 != argc - 1)
2281                                 usage(blame_usage);
2282                         path = add_prefix(prefix, argv[seen_dashdash + 1]);
2283                         for (j = i; j < seen_dashdash; j++)
2284                                 argv[unk++] = argv[j];
2285                 }
2286                 else {
2287                         /* (3) */
2288                         if (argc <= i)
2289                                 usage(blame_usage);
2290                         path = add_prefix(prefix, argv[i]);
2291                         if (i + 1 == argc - 1) {
2292                                 final_commit_name = argv[i + 1];
2293
2294                                 /* if (unk == 1) we could be getting
2295                                  * old-style
2296                                  */
2297                                 if (unk == 1 && !has_path_in_work_tree(path)) {
2298                                         path = add_prefix(prefix, argv[i + 1]);
2299                                         final_commit_name = argv[i];
2300                                 }
2301                         }
2302                         else if (i != argc - 1)
2303                                 usage(blame_usage); /* garbage at end */
2304
2305                         if (!has_path_in_work_tree(path))
2306                                 die("cannot stat path %s: %s",
2307                                     path, strerror(errno));
2308                 }
2309         }
2310
2311         if (final_commit_name)
2312                 argv[unk++] = final_commit_name;
2313
2314         /*
2315          * Now we got rev and path.  We do not want the path pruning
2316          * but we may want "bottom" processing.
2317          */
2318         argv[unk++] = "--"; /* terminate the rev name */
2319         argv[unk] = NULL;
2320
2321         init_revisions(&revs, NULL);
2322         setup_revisions(unk, argv, &revs, NULL);
2323         memset(&sb, 0, sizeof(sb));
2324
2325         /*
2326          * There must be one and only one positive commit in the
2327          * revs->pending array.
2328          */
2329         for (i = 0; i < revs.pending.nr; i++) {
2330                 struct object *obj = revs.pending.objects[i].item;
2331                 if (obj->flags & UNINTERESTING)
2332                         continue;
2333                 while (obj->type == OBJ_TAG)
2334                         obj = deref_tag(obj, NULL, 0);
2335                 if (obj->type != OBJ_COMMIT)
2336                         die("Non commit %s?",
2337                             revs.pending.objects[i].name);
2338                 if (sb.final)
2339                         die("More than one commit to dig from %s and %s?",
2340                             revs.pending.objects[i].name,
2341                             final_commit_name);
2342                 sb.final = (struct commit *) obj;
2343                 final_commit_name = revs.pending.objects[i].name;
2344         }
2345
2346         if (!sb.final) {
2347                 /*
2348                  * "--not A B -- path" without anything positive;
2349                  * do not default to HEAD, but use the working tree
2350                  * or "--contents".
2351                  */
2352                 sb.final = fake_working_tree_commit(path, contents_from);
2353                 add_pending_object(&revs, &(sb.final->object), ":");
2354         }
2355         else if (contents_from)
2356                 die("Cannot use --contents with final commit object name");
2357
2358         /*
2359          * If we have bottom, this will mark the ancestors of the
2360          * bottom commits we would reach while traversing as
2361          * uninteresting.
2362          */
2363         prepare_revision_walk(&revs);
2364
2365         if (is_null_sha1(sb.final->object.sha1)) {
2366                 char *buf;
2367                 o = sb.final->util;
2368                 buf = xmalloc(o->file.size + 1);
2369                 memcpy(buf, o->file.ptr, o->file.size + 1);
2370                 sb.final_buf = buf;
2371                 sb.final_buf_size = o->file.size;
2372         }
2373         else {
2374                 o = get_origin(&sb, sb.final, path);
2375                 if (fill_blob_sha1(o))
2376                         die("no such path %s in %s", path, final_commit_name);
2377
2378                 sb.final_buf = read_sha1_file(o->blob_sha1, &type,
2379                                               &sb.final_buf_size);
2380         }
2381         num_read_blob++;
2382         lno = prepare_lines(&sb);
2383
2384         bottom = top = 0;
2385         if (bottomtop)
2386                 prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
2387         if (bottom && top && top < bottom) {
2388                 long tmp;
2389                 tmp = top; top = bottom; bottom = tmp;
2390         }
2391         if (bottom < 1)
2392                 bottom = 1;
2393         if (top < 1)
2394                 top = lno;
2395         bottom--;
2396         if (lno < top)
2397                 die("file %s has only %lu lines", path, lno);
2398
2399         ent = xcalloc(1, sizeof(*ent));
2400         ent->lno = bottom;
2401         ent->num_lines = top - bottom;
2402         ent->suspect = o;
2403         ent->s_lno = bottom;
2404
2405         sb.ent = ent;
2406         sb.path = path;
2407
2408         if (revs_file && read_ancestry(revs_file))
2409                 die("reading graft file %s failed: %s",
2410                     revs_file, strerror(errno));
2411
2412         read_mailmap(&mailmap, ".mailmap", NULL);
2413
2414         assign_blame(&sb, &revs, opt);
2415
2416         if (incremental)
2417                 return 0;
2418
2419         coalesce(&sb);
2420
2421         if (!(output_option & OUTPUT_PORCELAIN))
2422                 find_alignment(&sb, &output_option);
2423
2424         output(&sb, output_option);
2425         free((void *)sb.final_buf);
2426         for (ent = sb.ent; ent; ) {
2427                 struct blame_entry *e = ent->next;
2428                 free(ent);
2429                 ent = e;
2430         }
2431
2432         if (show_stats) {
2433                 printf("num read blob: %d\n", num_read_blob);
2434                 printf("num get patch: %d\n", num_get_patch);
2435                 printf("num commits: %d\n", num_commits);
2436         }
2437         return 0;
2438 }