user-manual: clean up fast-forward and dangling-objects sections
[git] / builtin-blame.c
1 /*
2  * Pickaxe
3  *
4  * Copyright (c) 2006, Junio C Hamano
5  */
6
7 #include "cache.h"
8 #include "builtin.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree-walk.h"
13 #include "diff.h"
14 #include "diffcore.h"
15 #include "revision.h"
16 #include "quote.h"
17 #include "xdiff-interface.h"
18 #include "cache-tree.h"
19
20 static char blame_usage[] =
21 "git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [--contents <filename>] [--incremental] [commit] [--] file\n"
22 "  -c                  Use the same output mode as git-annotate (Default: off)\n"
23 "  -b                  Show blank SHA-1 for boundary commits (Default: off)\n"
24 "  -l                  Show long commit SHA1 (Default: off)\n"
25 "  --root              Do not treat root commits as boundaries (Default: off)\n"
26 "  -t                  Show raw timestamp (Default: off)\n"
27 "  -f, --show-name     Show original filename (Default: auto)\n"
28 "  -n, --show-number   Show original linenumber (Default: off)\n"
29 "  -p, --porcelain     Show in a format designed for machine consumption\n"
30 "  -L n,m              Process only line range n,m, counting from 1\n"
31 "  -M, -C              Find line movements within and across files\n"
32 "  --incremental       Show blame entries as we find them, incrementally\n"
33 "  --contents file     Use <file>'s contents as the final image\n"
34 "  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
35
36 static int longest_file;
37 static int longest_author;
38 static int max_orig_digits;
39 static int max_digits;
40 static int max_score_digits;
41 static int show_root;
42 static int blank_boundary;
43 static int incremental;
44 static int cmd_is_annotate;
45
46 #ifndef DEBUG
47 #define DEBUG 0
48 #endif
49
50 /* stats */
51 static int num_read_blob;
52 static int num_get_patch;
53 static int num_commits;
54
55 #define PICKAXE_BLAME_MOVE              01
56 #define PICKAXE_BLAME_COPY              02
57 #define PICKAXE_BLAME_COPY_HARDER       04
58 #define PICKAXE_BLAME_COPY_HARDEST      010
59
60 /*
61  * blame for a blame_entry with score lower than these thresholds
62  * is not passed to the parent using move/copy logic.
63  */
64 static unsigned blame_move_score;
65 static unsigned blame_copy_score;
66 #define BLAME_DEFAULT_MOVE_SCORE        20
67 #define BLAME_DEFAULT_COPY_SCORE        40
68
69 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
70 #define METAINFO_SHOWN          (1u<<12)
71 #define MORE_THAN_ONE_PATH      (1u<<13)
72
73 /*
74  * One blob in a commit that is being suspected
75  */
76 struct origin {
77         int refcnt;
78         struct commit *commit;
79         mmfile_t file;
80         unsigned char blob_sha1[20];
81         char path[FLEX_ARRAY];
82 };
83
84 /*
85  * Given an origin, prepare mmfile_t structure to be used by the
86  * diff machinery
87  */
88 static char *fill_origin_blob(struct origin *o, mmfile_t *file)
89 {
90         if (!o->file.ptr) {
91                 enum object_type type;
92                 num_read_blob++;
93                 file->ptr = read_sha1_file(o->blob_sha1, &type,
94                                            (unsigned long *)(&(file->size)));
95                 o->file = *file;
96         }
97         else
98                 *file = o->file;
99         return file->ptr;
100 }
101
102 /*
103  * Origin is refcounted and usually we keep the blob contents to be
104  * reused.
105  */
106 static inline struct origin *origin_incref(struct origin *o)
107 {
108         if (o)
109                 o->refcnt++;
110         return o;
111 }
112
113 static void origin_decref(struct origin *o)
114 {
115         if (o && --o->refcnt <= 0) {
116                 if (o->file.ptr)
117                         free(o->file.ptr);
118                 memset(o, 0, sizeof(*o));
119                 free(o);
120         }
121 }
122
123 /*
124  * Each group of lines is described by a blame_entry; it can be split
125  * as we pass blame to the parents.  They form a linked list in the
126  * scoreboard structure, sorted by the target line number.
127  */
128 struct blame_entry {
129         struct blame_entry *prev;
130         struct blame_entry *next;
131
132         /* the first line of this group in the final image;
133          * internally all line numbers are 0 based.
134          */
135         int lno;
136
137         /* how many lines this group has */
138         int num_lines;
139
140         /* the commit that introduced this group into the final image */
141         struct origin *suspect;
142
143         /* true if the suspect is truly guilty; false while we have not
144          * checked if the group came from one of its parents.
145          */
146         char guilty;
147
148         /* the line number of the first line of this group in the
149          * suspect's file; internally all line numbers are 0 based.
150          */
151         int s_lno;
152
153         /* how significant this entry is -- cached to avoid
154          * scanning the lines over and over.
155          */
156         unsigned score;
157 };
158
159 /*
160  * The current state of the blame assignment.
161  */
162 struct scoreboard {
163         /* the final commit (i.e. where we started digging from) */
164         struct commit *final;
165
166         const char *path;
167
168         /*
169          * The contents in the final image.
170          * Used by many functions to obtain contents of the nth line,
171          * indexed with scoreboard.lineno[blame_entry.lno].
172          */
173         const char *final_buf;
174         unsigned long final_buf_size;
175
176         /* linked list of blames */
177         struct blame_entry *ent;
178
179         /* look-up a line in the final buffer */
180         int num_lines;
181         int *lineno;
182 };
183
184 static inline int same_suspect(struct origin *a, struct origin *b)
185 {
186         if (a == b)
187                 return 1;
188         if (a->commit != b->commit)
189                 return 0;
190         return !strcmp(a->path, b->path);
191 }
192
193 static void sanity_check_refcnt(struct scoreboard *);
194
195 /*
196  * If two blame entries that are next to each other came from
197  * contiguous lines in the same origin (i.e. <commit, path> pair),
198  * merge them together.
199  */
200 static void coalesce(struct scoreboard *sb)
201 {
202         struct blame_entry *ent, *next;
203
204         for (ent = sb->ent; ent && (next = ent->next); ent = next) {
205                 if (same_suspect(ent->suspect, next->suspect) &&
206                     ent->guilty == next->guilty &&
207                     ent->s_lno + ent->num_lines == next->s_lno) {
208                         ent->num_lines += next->num_lines;
209                         ent->next = next->next;
210                         if (ent->next)
211                                 ent->next->prev = ent;
212                         origin_decref(next->suspect);
213                         free(next);
214                         ent->score = 0;
215                         next = ent; /* again */
216                 }
217         }
218
219         if (DEBUG) /* sanity */
220                 sanity_check_refcnt(sb);
221 }
222
223 /*
224  * Given a commit and a path in it, create a new origin structure.
225  * The callers that add blame to the scoreboard should use
226  * get_origin() to obtain shared, refcounted copy instead of calling
227  * this function directly.
228  */
229 static struct origin *make_origin(struct commit *commit, const char *path)
230 {
231         struct origin *o;
232         o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
233         o->commit = commit;
234         o->refcnt = 1;
235         strcpy(o->path, path);
236         return o;
237 }
238
239 /*
240  * Locate an existing origin or create a new one.
241  */
242 static struct origin *get_origin(struct scoreboard *sb,
243                                  struct commit *commit,
244                                  const char *path)
245 {
246         struct blame_entry *e;
247
248         for (e = sb->ent; e; e = e->next) {
249                 if (e->suspect->commit == commit &&
250                     !strcmp(e->suspect->path, path))
251                         return origin_incref(e->suspect);
252         }
253         return make_origin(commit, path);
254 }
255
256 /*
257  * Fill the blob_sha1 field of an origin if it hasn't, so that later
258  * call to fill_origin_blob() can use it to locate the data.  blob_sha1
259  * for an origin is also used to pass the blame for the entire file to
260  * the parent to detect the case where a child's blob is identical to
261  * that of its parent's.
262  */
263 static int fill_blob_sha1(struct origin *origin)
264 {
265         unsigned mode;
266
267         if (!is_null_sha1(origin->blob_sha1))
268                 return 0;
269         if (get_tree_entry(origin->commit->object.sha1,
270                            origin->path,
271                            origin->blob_sha1, &mode))
272                 goto error_out;
273         if (sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB)
274                 goto error_out;
275         return 0;
276  error_out:
277         hashclr(origin->blob_sha1);
278         return -1;
279 }
280
281 /*
282  * We have an origin -- check if the same path exists in the
283  * parent and return an origin structure to represent it.
284  */
285 static struct origin *find_origin(struct scoreboard *sb,
286                                   struct commit *parent,
287                                   struct origin *origin)
288 {
289         struct origin *porigin = NULL;
290         struct diff_options diff_opts;
291         const char *paths[2];
292
293         if (parent->util) {
294                 /*
295                  * Each commit object can cache one origin in that
296                  * commit.  This is a freestanding copy of origin and
297                  * not refcounted.
298                  */
299                 struct origin *cached = parent->util;
300                 if (!strcmp(cached->path, origin->path)) {
301                         /*
302                          * The same path between origin and its parent
303                          * without renaming -- the most common case.
304                          */
305                         porigin = get_origin(sb, parent, cached->path);
306
307                         /*
308                          * If the origin was newly created (i.e. get_origin
309                          * would call make_origin if none is found in the
310                          * scoreboard), it does not know the blob_sha1,
311                          * so copy it.  Otherwise porigin was in the
312                          * scoreboard and already knows blob_sha1.
313                          */
314                         if (porigin->refcnt == 1)
315                                 hashcpy(porigin->blob_sha1, cached->blob_sha1);
316                         return porigin;
317                 }
318                 /* otherwise it was not very useful; free it */
319                 free(parent->util);
320                 parent->util = NULL;
321         }
322
323         /* See if the origin->path is different between parent
324          * and origin first.  Most of the time they are the
325          * same and diff-tree is fairly efficient about this.
326          */
327         diff_setup(&diff_opts);
328         diff_opts.recursive = 1;
329         diff_opts.detect_rename = 0;
330         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
331         paths[0] = origin->path;
332         paths[1] = NULL;
333
334         diff_tree_setup_paths(paths, &diff_opts);
335         if (diff_setup_done(&diff_opts) < 0)
336                 die("diff-setup");
337
338         if (is_null_sha1(origin->commit->object.sha1))
339                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
340         else
341                 diff_tree_sha1(parent->tree->object.sha1,
342                                origin->commit->tree->object.sha1,
343                                "", &diff_opts);
344         diffcore_std(&diff_opts);
345
346         /* It is either one entry that says "modified", or "created",
347          * or nothing.
348          */
349         if (!diff_queued_diff.nr) {
350                 /* The path is the same as parent */
351                 porigin = get_origin(sb, parent, origin->path);
352                 hashcpy(porigin->blob_sha1, origin->blob_sha1);
353         }
354         else if (diff_queued_diff.nr != 1)
355                 die("internal error in blame::find_origin");
356         else {
357                 struct diff_filepair *p = diff_queued_diff.queue[0];
358                 switch (p->status) {
359                 default:
360                         die("internal error in blame::find_origin (%c)",
361                             p->status);
362                 case 'M':
363                         porigin = get_origin(sb, parent, origin->path);
364                         hashcpy(porigin->blob_sha1, p->one->sha1);
365                         break;
366                 case 'A':
367                 case 'T':
368                         /* Did not exist in parent, or type changed */
369                         break;
370                 }
371         }
372         diff_flush(&diff_opts);
373         if (porigin) {
374                 /*
375                  * Create a freestanding copy that is not part of
376                  * the refcounted origin found in the scoreboard, and
377                  * cache it in the commit.
378                  */
379                 struct origin *cached;
380
381                 cached = make_origin(porigin->commit, porigin->path);
382                 hashcpy(cached->blob_sha1, porigin->blob_sha1);
383                 parent->util = cached;
384         }
385         return porigin;
386 }
387
388 /*
389  * We have an origin -- find the path that corresponds to it in its
390  * parent and return an origin structure to represent it.
391  */
392 static struct origin *find_rename(struct scoreboard *sb,
393                                   struct commit *parent,
394                                   struct origin *origin)
395 {
396         struct origin *porigin = NULL;
397         struct diff_options diff_opts;
398         int i;
399         const char *paths[2];
400
401         diff_setup(&diff_opts);
402         diff_opts.recursive = 1;
403         diff_opts.detect_rename = DIFF_DETECT_RENAME;
404         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
405         diff_opts.single_follow = origin->path;
406         paths[0] = NULL;
407         diff_tree_setup_paths(paths, &diff_opts);
408         if (diff_setup_done(&diff_opts) < 0)
409                 die("diff-setup");
410
411         if (is_null_sha1(origin->commit->object.sha1))
412                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
413         else
414                 diff_tree_sha1(parent->tree->object.sha1,
415                                origin->commit->tree->object.sha1,
416                                "", &diff_opts);
417         diffcore_std(&diff_opts);
418
419         for (i = 0; i < diff_queued_diff.nr; i++) {
420                 struct diff_filepair *p = diff_queued_diff.queue[i];
421                 if ((p->status == 'R' || p->status == 'C') &&
422                     !strcmp(p->two->path, origin->path)) {
423                         porigin = get_origin(sb, parent, p->one->path);
424                         hashcpy(porigin->blob_sha1, p->one->sha1);
425                         break;
426                 }
427         }
428         diff_flush(&diff_opts);
429         return porigin;
430 }
431
432 /*
433  * Parsing of patch chunks...
434  */
435 struct chunk {
436         /* line number in postimage; up to but not including this
437          * line is the same as preimage
438          */
439         int same;
440
441         /* preimage line number after this chunk */
442         int p_next;
443
444         /* postimage line number after this chunk */
445         int t_next;
446 };
447
448 struct patch {
449         struct chunk *chunks;
450         int num;
451 };
452
453 struct blame_diff_state {
454         struct xdiff_emit_state xm;
455         struct patch *ret;
456         unsigned hunk_post_context;
457         unsigned hunk_in_pre_context : 1;
458 };
459
460 static void process_u_diff(void *state_, char *line, unsigned long len)
461 {
462         struct blame_diff_state *state = state_;
463         struct chunk *chunk;
464         int off1, off2, len1, len2, num;
465
466         num = state->ret->num;
467         if (len < 4 || line[0] != '@' || line[1] != '@') {
468                 if (state->hunk_in_pre_context && line[0] == ' ')
469                         state->ret->chunks[num - 1].same++;
470                 else {
471                         state->hunk_in_pre_context = 0;
472                         if (line[0] == ' ')
473                                 state->hunk_post_context++;
474                         else
475                                 state->hunk_post_context = 0;
476                 }
477                 return;
478         }
479
480         if (num && state->hunk_post_context) {
481                 chunk = &state->ret->chunks[num - 1];
482                 chunk->p_next -= state->hunk_post_context;
483                 chunk->t_next -= state->hunk_post_context;
484         }
485         state->ret->num = ++num;
486         state->ret->chunks = xrealloc(state->ret->chunks,
487                                       sizeof(struct chunk) * num);
488         chunk = &state->ret->chunks[num - 1];
489         if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
490                 state->ret->num--;
491                 return;
492         }
493
494         /* Line numbers in patch output are one based. */
495         off1--;
496         off2--;
497
498         chunk->same = len2 ? off2 : (off2 + 1);
499
500         chunk->p_next = off1 + (len1 ? len1 : 1);
501         chunk->t_next = chunk->same + len2;
502         state->hunk_in_pre_context = 1;
503         state->hunk_post_context = 0;
504 }
505
506 static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
507                                     int context)
508 {
509         struct blame_diff_state state;
510         xpparam_t xpp;
511         xdemitconf_t xecfg;
512         xdemitcb_t ecb;
513
514         xpp.flags = XDF_NEED_MINIMAL;
515         xecfg.ctxlen = context;
516         xecfg.flags = 0;
517         ecb.outf = xdiff_outf;
518         ecb.priv = &state;
519         memset(&state, 0, sizeof(state));
520         state.xm.consume = process_u_diff;
521         state.ret = xmalloc(sizeof(struct patch));
522         state.ret->chunks = NULL;
523         state.ret->num = 0;
524
525         xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
526
527         if (state.ret->num) {
528                 struct chunk *chunk;
529                 chunk = &state.ret->chunks[state.ret->num - 1];
530                 chunk->p_next -= state.hunk_post_context;
531                 chunk->t_next -= state.hunk_post_context;
532         }
533         return state.ret;
534 }
535
536 /*
537  * Run diff between two origins and grab the patch output, so that
538  * we can pass blame for lines origin is currently suspected for
539  * to its parent.
540  */
541 static struct patch *get_patch(struct origin *parent, struct origin *origin)
542 {
543         mmfile_t file_p, file_o;
544         struct patch *patch;
545
546         fill_origin_blob(parent, &file_p);
547         fill_origin_blob(origin, &file_o);
548         if (!file_p.ptr || !file_o.ptr)
549                 return NULL;
550         patch = compare_buffer(&file_p, &file_o, 0);
551         num_get_patch++;
552         return patch;
553 }
554
555 static void free_patch(struct patch *p)
556 {
557         free(p->chunks);
558         free(p);
559 }
560
561 /*
562  * Link in a new blame entry to the scoreboard.  Entries that cover the
563  * same line range have been removed from the scoreboard previously.
564  */
565 static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
566 {
567         struct blame_entry *ent, *prev = NULL;
568
569         origin_incref(e->suspect);
570
571         for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
572                 prev = ent;
573
574         /* prev, if not NULL, is the last one that is below e */
575         e->prev = prev;
576         if (prev) {
577                 e->next = prev->next;
578                 prev->next = e;
579         }
580         else {
581                 e->next = sb->ent;
582                 sb->ent = e;
583         }
584         if (e->next)
585                 e->next->prev = e;
586 }
587
588 /*
589  * src typically is on-stack; we want to copy the information in it to
590  * an malloced blame_entry that is already on the linked list of the
591  * scoreboard.  The origin of dst loses a refcnt while the origin of src
592  * gains one.
593  */
594 static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
595 {
596         struct blame_entry *p, *n;
597
598         p = dst->prev;
599         n = dst->next;
600         origin_incref(src->suspect);
601         origin_decref(dst->suspect);
602         memcpy(dst, src, sizeof(*src));
603         dst->prev = p;
604         dst->next = n;
605         dst->score = 0;
606 }
607
608 static const char *nth_line(struct scoreboard *sb, int lno)
609 {
610         return sb->final_buf + sb->lineno[lno];
611 }
612
613 /*
614  * It is known that lines between tlno to same came from parent, and e
615  * has an overlap with that range.  it also is known that parent's
616  * line plno corresponds to e's line tlno.
617  *
618  *                <---- e ----->
619  *                   <------>
620  *                   <------------>
621  *             <------------>
622  *             <------------------>
623  *
624  * Split e into potentially three parts; before this chunk, the chunk
625  * to be blamed for the parent, and after that portion.
626  */
627 static void split_overlap(struct blame_entry *split,
628                           struct blame_entry *e,
629                           int tlno, int plno, int same,
630                           struct origin *parent)
631 {
632         int chunk_end_lno;
633         memset(split, 0, sizeof(struct blame_entry [3]));
634
635         if (e->s_lno < tlno) {
636                 /* there is a pre-chunk part not blamed on parent */
637                 split[0].suspect = origin_incref(e->suspect);
638                 split[0].lno = e->lno;
639                 split[0].s_lno = e->s_lno;
640                 split[0].num_lines = tlno - e->s_lno;
641                 split[1].lno = e->lno + tlno - e->s_lno;
642                 split[1].s_lno = plno;
643         }
644         else {
645                 split[1].lno = e->lno;
646                 split[1].s_lno = plno + (e->s_lno - tlno);
647         }
648
649         if (same < e->s_lno + e->num_lines) {
650                 /* there is a post-chunk part not blamed on parent */
651                 split[2].suspect = origin_incref(e->suspect);
652                 split[2].lno = e->lno + (same - e->s_lno);
653                 split[2].s_lno = e->s_lno + (same - e->s_lno);
654                 split[2].num_lines = e->s_lno + e->num_lines - same;
655                 chunk_end_lno = split[2].lno;
656         }
657         else
658                 chunk_end_lno = e->lno + e->num_lines;
659         split[1].num_lines = chunk_end_lno - split[1].lno;
660
661         /*
662          * if it turns out there is nothing to blame the parent for,
663          * forget about the splitting.  !split[1].suspect signals this.
664          */
665         if (split[1].num_lines < 1)
666                 return;
667         split[1].suspect = origin_incref(parent);
668 }
669
670 /*
671  * split_overlap() divided an existing blame e into up to three parts
672  * in split.  Adjust the linked list of blames in the scoreboard to
673  * reflect the split.
674  */
675 static void split_blame(struct scoreboard *sb,
676                         struct blame_entry *split,
677                         struct blame_entry *e)
678 {
679         struct blame_entry *new_entry;
680
681         if (split[0].suspect && split[2].suspect) {
682                 /* The first part (reuse storage for the existing entry e) */
683                 dup_entry(e, &split[0]);
684
685                 /* The last part -- me */
686                 new_entry = xmalloc(sizeof(*new_entry));
687                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
688                 add_blame_entry(sb, new_entry);
689
690                 /* ... and the middle part -- parent */
691                 new_entry = xmalloc(sizeof(*new_entry));
692                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
693                 add_blame_entry(sb, new_entry);
694         }
695         else if (!split[0].suspect && !split[2].suspect)
696                 /*
697                  * The parent covers the entire area; reuse storage for
698                  * e and replace it with the parent.
699                  */
700                 dup_entry(e, &split[1]);
701         else if (split[0].suspect) {
702                 /* me and then parent */
703                 dup_entry(e, &split[0]);
704
705                 new_entry = xmalloc(sizeof(*new_entry));
706                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
707                 add_blame_entry(sb, new_entry);
708         }
709         else {
710                 /* parent and then me */
711                 dup_entry(e, &split[1]);
712
713                 new_entry = xmalloc(sizeof(*new_entry));
714                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
715                 add_blame_entry(sb, new_entry);
716         }
717
718         if (DEBUG) { /* sanity */
719                 struct blame_entry *ent;
720                 int lno = sb->ent->lno, corrupt = 0;
721
722                 for (ent = sb->ent; ent; ent = ent->next) {
723                         if (lno != ent->lno)
724                                 corrupt = 1;
725                         if (ent->s_lno < 0)
726                                 corrupt = 1;
727                         lno += ent->num_lines;
728                 }
729                 if (corrupt) {
730                         lno = sb->ent->lno;
731                         for (ent = sb->ent; ent; ent = ent->next) {
732                                 printf("L %8d l %8d n %8d\n",
733                                        lno, ent->lno, ent->num_lines);
734                                 lno = ent->lno + ent->num_lines;
735                         }
736                         die("oops");
737                 }
738         }
739 }
740
741 /*
742  * After splitting the blame, the origins used by the
743  * on-stack blame_entry should lose one refcnt each.
744  */
745 static void decref_split(struct blame_entry *split)
746 {
747         int i;
748
749         for (i = 0; i < 3; i++)
750                 origin_decref(split[i].suspect);
751 }
752
753 /*
754  * Helper for blame_chunk().  blame_entry e is known to overlap with
755  * the patch hunk; split it and pass blame to the parent.
756  */
757 static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
758                           int tlno, int plno, int same,
759                           struct origin *parent)
760 {
761         struct blame_entry split[3];
762
763         split_overlap(split, e, tlno, plno, same, parent);
764         if (split[1].suspect)
765                 split_blame(sb, split, e);
766         decref_split(split);
767 }
768
769 /*
770  * Find the line number of the last line the target is suspected for.
771  */
772 static int find_last_in_target(struct scoreboard *sb, struct origin *target)
773 {
774         struct blame_entry *e;
775         int last_in_target = -1;
776
777         for (e = sb->ent; e; e = e->next) {
778                 if (e->guilty || !same_suspect(e->suspect, target))
779                         continue;
780                 if (last_in_target < e->s_lno + e->num_lines)
781                         last_in_target = e->s_lno + e->num_lines;
782         }
783         return last_in_target;
784 }
785
786 /*
787  * Process one hunk from the patch between the current suspect for
788  * blame_entry e and its parent.  Find and split the overlap, and
789  * pass blame to the overlapping part to the parent.
790  */
791 static void blame_chunk(struct scoreboard *sb,
792                         int tlno, int plno, int same,
793                         struct origin *target, struct origin *parent)
794 {
795         struct blame_entry *e;
796
797         for (e = sb->ent; e; e = e->next) {
798                 if (e->guilty || !same_suspect(e->suspect, target))
799                         continue;
800                 if (same <= e->s_lno)
801                         continue;
802                 if (tlno < e->s_lno + e->num_lines)
803                         blame_overlap(sb, e, tlno, plno, same, parent);
804         }
805 }
806
807 /*
808  * We are looking at the origin 'target' and aiming to pass blame
809  * for the lines it is suspected to its parent.  Run diff to find
810  * which lines came from parent and pass blame for them.
811  */
812 static int pass_blame_to_parent(struct scoreboard *sb,
813                                 struct origin *target,
814                                 struct origin *parent)
815 {
816         int i, last_in_target, plno, tlno;
817         struct patch *patch;
818
819         last_in_target = find_last_in_target(sb, target);
820         if (last_in_target < 0)
821                 return 1; /* nothing remains for this target */
822
823         patch = get_patch(parent, target);
824         plno = tlno = 0;
825         for (i = 0; i < patch->num; i++) {
826                 struct chunk *chunk = &patch->chunks[i];
827
828                 blame_chunk(sb, tlno, plno, chunk->same, target, parent);
829                 plno = chunk->p_next;
830                 tlno = chunk->t_next;
831         }
832         /* The rest (i.e. anything after tlno) are the same as the parent */
833         blame_chunk(sb, tlno, plno, last_in_target, target, parent);
834
835         free_patch(patch);
836         return 0;
837 }
838
839 /*
840  * The lines in blame_entry after splitting blames many times can become
841  * very small and trivial, and at some point it becomes pointless to
842  * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
843  * ordinary C program, and it is not worth to say it was copied from
844  * totally unrelated file in the parent.
845  *
846  * Compute how trivial the lines in the blame_entry are.
847  */
848 static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
849 {
850         unsigned score;
851         const char *cp, *ep;
852
853         if (e->score)
854                 return e->score;
855
856         score = 1;
857         cp = nth_line(sb, e->lno);
858         ep = nth_line(sb, e->lno + e->num_lines);
859         while (cp < ep) {
860                 unsigned ch = *((unsigned char *)cp);
861                 if (isalnum(ch))
862                         score++;
863                 cp++;
864         }
865         e->score = score;
866         return score;
867 }
868
869 /*
870  * best_so_far[] and this[] are both a split of an existing blame_entry
871  * that passes blame to the parent.  Maintain best_so_far the best split
872  * so far, by comparing this and best_so_far and copying this into
873  * bst_so_far as needed.
874  */
875 static void copy_split_if_better(struct scoreboard *sb,
876                                  struct blame_entry *best_so_far,
877                                  struct blame_entry *this)
878 {
879         int i;
880
881         if (!this[1].suspect)
882                 return;
883         if (best_so_far[1].suspect) {
884                 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
885                         return;
886         }
887
888         for (i = 0; i < 3; i++)
889                 origin_incref(this[i].suspect);
890         decref_split(best_so_far);
891         memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
892 }
893
894 /*
895  * We are looking at a part of the final image represented by
896  * ent (tlno and same are offset by ent->s_lno).
897  * tlno is where we are looking at in the final image.
898  * up to (but not including) same match preimage.
899  * plno is where we are looking at in the preimage.
900  *
901  * <-------------- final image ---------------------->
902  *       <------ent------>
903  *         ^tlno ^same
904  *    <---------preimage----->
905  *         ^plno
906  *
907  * All line numbers are 0-based.
908  */
909 static void handle_split(struct scoreboard *sb,
910                          struct blame_entry *ent,
911                          int tlno, int plno, int same,
912                          struct origin *parent,
913                          struct blame_entry *split)
914 {
915         if (ent->num_lines <= tlno)
916                 return;
917         if (tlno < same) {
918                 struct blame_entry this[3];
919                 tlno += ent->s_lno;
920                 same += ent->s_lno;
921                 split_overlap(this, ent, tlno, plno, same, parent);
922                 copy_split_if_better(sb, split, this);
923                 decref_split(this);
924         }
925 }
926
927 /*
928  * Find the lines from parent that are the same as ent so that
929  * we can pass blames to it.  file_p has the blob contents for
930  * the parent.
931  */
932 static void find_copy_in_blob(struct scoreboard *sb,
933                               struct blame_entry *ent,
934                               struct origin *parent,
935                               struct blame_entry *split,
936                               mmfile_t *file_p)
937 {
938         const char *cp;
939         int cnt;
940         mmfile_t file_o;
941         struct patch *patch;
942         int i, plno, tlno;
943
944         /*
945          * Prepare mmfile that contains only the lines in ent.
946          */
947         cp = nth_line(sb, ent->lno);
948         file_o.ptr = (char*) cp;
949         cnt = ent->num_lines;
950
951         while (cnt && cp < sb->final_buf + sb->final_buf_size) {
952                 if (*cp++ == '\n')
953                         cnt--;
954         }
955         file_o.size = cp - file_o.ptr;
956
957         patch = compare_buffer(file_p, &file_o, 1);
958
959         /*
960          * file_o is a part of final image we are annotating.
961          * file_p partially may match that image.
962          */
963         memset(split, 0, sizeof(struct blame_entry [3]));
964         plno = tlno = 0;
965         for (i = 0; i < patch->num; i++) {
966                 struct chunk *chunk = &patch->chunks[i];
967
968                 handle_split(sb, ent, tlno, plno, chunk->same, parent, split);
969                 plno = chunk->p_next;
970                 tlno = chunk->t_next;
971         }
972         /* remainder, if any, all match the preimage */
973         handle_split(sb, ent, tlno, plno, ent->num_lines, parent, split);
974         free_patch(patch);
975 }
976
977 /*
978  * See if lines currently target is suspected for can be attributed to
979  * parent.
980  */
981 static int find_move_in_parent(struct scoreboard *sb,
982                                struct origin *target,
983                                struct origin *parent)
984 {
985         int last_in_target, made_progress;
986         struct blame_entry *e, split[3];
987         mmfile_t file_p;
988
989         last_in_target = find_last_in_target(sb, target);
990         if (last_in_target < 0)
991                 return 1; /* nothing remains for this target */
992
993         fill_origin_blob(parent, &file_p);
994         if (!file_p.ptr)
995                 return 0;
996
997         made_progress = 1;
998         while (made_progress) {
999                 made_progress = 0;
1000                 for (e = sb->ent; e; e = e->next) {
1001                         if (e->guilty || !same_suspect(e->suspect, target))
1002                                 continue;
1003                         find_copy_in_blob(sb, e, parent, split, &file_p);
1004                         if (split[1].suspect &&
1005                             blame_move_score < ent_score(sb, &split[1])) {
1006                                 split_blame(sb, split, e);
1007                                 made_progress = 1;
1008                         }
1009                         decref_split(split);
1010                 }
1011         }
1012         return 0;
1013 }
1014
1015 struct blame_list {
1016         struct blame_entry *ent;
1017         struct blame_entry split[3];
1018 };
1019
1020 /*
1021  * Count the number of entries the target is suspected for,
1022  * and prepare a list of entry and the best split.
1023  */
1024 static struct blame_list *setup_blame_list(struct scoreboard *sb,
1025                                            struct origin *target,
1026                                            int *num_ents_p)
1027 {
1028         struct blame_entry *e;
1029         int num_ents, i;
1030         struct blame_list *blame_list = NULL;
1031
1032         for (e = sb->ent, num_ents = 0; e; e = e->next)
1033                 if (!e->guilty && same_suspect(e->suspect, target))
1034                         num_ents++;
1035         if (num_ents) {
1036                 blame_list = xcalloc(num_ents, sizeof(struct blame_list));
1037                 for (e = sb->ent, i = 0; e; e = e->next)
1038                         if (!e->guilty && same_suspect(e->suspect, target))
1039                                 blame_list[i++].ent = e;
1040         }
1041         *num_ents_p = num_ents;
1042         return blame_list;
1043 }
1044
1045 /*
1046  * For lines target is suspected for, see if we can find code movement
1047  * across file boundary from the parent commit.  porigin is the path
1048  * in the parent we already tried.
1049  */
1050 static int find_copy_in_parent(struct scoreboard *sb,
1051                                struct origin *target,
1052                                struct commit *parent,
1053                                struct origin *porigin,
1054                                int opt)
1055 {
1056         struct diff_options diff_opts;
1057         const char *paths[1];
1058         int i, j;
1059         int retval;
1060         struct blame_list *blame_list;
1061         int num_ents;
1062
1063         blame_list = setup_blame_list(sb, target, &num_ents);
1064         if (!blame_list)
1065                 return 1; /* nothing remains for this target */
1066
1067         diff_setup(&diff_opts);
1068         diff_opts.recursive = 1;
1069         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1070
1071         paths[0] = NULL;
1072         diff_tree_setup_paths(paths, &diff_opts);
1073         if (diff_setup_done(&diff_opts) < 0)
1074                 die("diff-setup");
1075
1076         /* Try "find copies harder" on new path if requested;
1077          * we do not want to use diffcore_rename() actually to
1078          * match things up; find_copies_harder is set only to
1079          * force diff_tree_sha1() to feed all filepairs to diff_queue,
1080          * and this code needs to be after diff_setup_done(), which
1081          * usually makes find-copies-harder imply copy detection.
1082          */
1083         if ((opt & PICKAXE_BLAME_COPY_HARDEST)
1084             || ((opt & PICKAXE_BLAME_COPY_HARDER)
1085                 && (!porigin || strcmp(target->path, porigin->path))))
1086                 diff_opts.find_copies_harder = 1;
1087
1088         if (is_null_sha1(target->commit->object.sha1))
1089                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
1090         else
1091                 diff_tree_sha1(parent->tree->object.sha1,
1092                                target->commit->tree->object.sha1,
1093                                "", &diff_opts);
1094
1095         if (!diff_opts.find_copies_harder)
1096                 diffcore_std(&diff_opts);
1097
1098         retval = 0;
1099         while (1) {
1100                 int made_progress = 0;
1101
1102                 for (i = 0; i < diff_queued_diff.nr; i++) {
1103                         struct diff_filepair *p = diff_queued_diff.queue[i];
1104                         struct origin *norigin;
1105                         mmfile_t file_p;
1106                         struct blame_entry this[3];
1107
1108                         if (!DIFF_FILE_VALID(p->one))
1109                                 continue; /* does not exist in parent */
1110                         if (porigin && !strcmp(p->one->path, porigin->path))
1111                                 /* find_move already dealt with this path */
1112                                 continue;
1113
1114                         norigin = get_origin(sb, parent, p->one->path);
1115                         hashcpy(norigin->blob_sha1, p->one->sha1);
1116                         fill_origin_blob(norigin, &file_p);
1117                         if (!file_p.ptr)
1118                                 continue;
1119
1120                         for (j = 0; j < num_ents; j++) {
1121                                 find_copy_in_blob(sb, blame_list[j].ent,
1122                                                   norigin, this, &file_p);
1123                                 copy_split_if_better(sb, blame_list[j].split,
1124                                                      this);
1125                                 decref_split(this);
1126                         }
1127                         origin_decref(norigin);
1128                 }
1129
1130                 for (j = 0; j < num_ents; j++) {
1131                         struct blame_entry *split = blame_list[j].split;
1132                         if (split[1].suspect &&
1133                             blame_copy_score < ent_score(sb, &split[1])) {
1134                                 split_blame(sb, split, blame_list[j].ent);
1135                                 made_progress = 1;
1136                         }
1137                         decref_split(split);
1138                 }
1139                 free(blame_list);
1140
1141                 if (!made_progress)
1142                         break;
1143                 blame_list = setup_blame_list(sb, target, &num_ents);
1144                 if (!blame_list) {
1145                         retval = 1;
1146                         break;
1147                 }
1148         }
1149         diff_flush(&diff_opts);
1150
1151         return retval;
1152 }
1153
1154 /*
1155  * The blobs of origin and porigin exactly match, so everything
1156  * origin is suspected for can be blamed on the parent.
1157  */
1158 static void pass_whole_blame(struct scoreboard *sb,
1159                              struct origin *origin, struct origin *porigin)
1160 {
1161         struct blame_entry *e;
1162
1163         if (!porigin->file.ptr && origin->file.ptr) {
1164                 /* Steal its file */
1165                 porigin->file = origin->file;
1166                 origin->file.ptr = NULL;
1167         }
1168         for (e = sb->ent; e; e = e->next) {
1169                 if (!same_suspect(e->suspect, origin))
1170                         continue;
1171                 origin_incref(porigin);
1172                 origin_decref(e->suspect);
1173                 e->suspect = porigin;
1174         }
1175 }
1176
1177 #define MAXPARENT 16
1178
1179 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
1180 {
1181         int i, pass;
1182         struct commit *commit = origin->commit;
1183         struct commit_list *parent;
1184         struct origin *parent_origin[MAXPARENT], *porigin;
1185
1186         memset(parent_origin, 0, sizeof(parent_origin));
1187
1188         /* The first pass looks for unrenamed path to optimize for
1189          * common cases, then we look for renames in the second pass.
1190          */
1191         for (pass = 0; pass < 2; pass++) {
1192                 struct origin *(*find)(struct scoreboard *,
1193                                        struct commit *, struct origin *);
1194                 find = pass ? find_rename : find_origin;
1195
1196                 for (i = 0, parent = commit->parents;
1197                      i < MAXPARENT && parent;
1198                      parent = parent->next, i++) {
1199                         struct commit *p = parent->item;
1200                         int j, same;
1201
1202                         if (parent_origin[i])
1203                                 continue;
1204                         if (parse_commit(p))
1205                                 continue;
1206                         porigin = find(sb, p, origin);
1207                         if (!porigin)
1208                                 continue;
1209                         if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1210                                 pass_whole_blame(sb, origin, porigin);
1211                                 origin_decref(porigin);
1212                                 goto finish;
1213                         }
1214                         for (j = same = 0; j < i; j++)
1215                                 if (parent_origin[j] &&
1216                                     !hashcmp(parent_origin[j]->blob_sha1,
1217                                              porigin->blob_sha1)) {
1218                                         same = 1;
1219                                         break;
1220                                 }
1221                         if (!same)
1222                                 parent_origin[i] = porigin;
1223                         else
1224                                 origin_decref(porigin);
1225                 }
1226         }
1227
1228         num_commits++;
1229         for (i = 0, parent = commit->parents;
1230              i < MAXPARENT && parent;
1231              parent = parent->next, i++) {
1232                 struct origin *porigin = parent_origin[i];
1233                 if (!porigin)
1234                         continue;
1235                 if (pass_blame_to_parent(sb, origin, porigin))
1236                         goto finish;
1237         }
1238
1239         /*
1240          * Optionally find moves in parents' files.
1241          */
1242         if (opt & PICKAXE_BLAME_MOVE)
1243                 for (i = 0, parent = commit->parents;
1244                      i < MAXPARENT && parent;
1245                      parent = parent->next, i++) {
1246                         struct origin *porigin = parent_origin[i];
1247                         if (!porigin)
1248                                 continue;
1249                         if (find_move_in_parent(sb, origin, porigin))
1250                                 goto finish;
1251                 }
1252
1253         /*
1254          * Optionally find copies from parents' files.
1255          */
1256         if (opt & PICKAXE_BLAME_COPY)
1257                 for (i = 0, parent = commit->parents;
1258                      i < MAXPARENT && parent;
1259                      parent = parent->next, i++) {
1260                         struct origin *porigin = parent_origin[i];
1261                         if (find_copy_in_parent(sb, origin, parent->item,
1262                                                 porigin, opt))
1263                                 goto finish;
1264                 }
1265
1266  finish:
1267         for (i = 0; i < MAXPARENT; i++)
1268                 origin_decref(parent_origin[i]);
1269 }
1270
1271 /*
1272  * Information on commits, used for output.
1273  */
1274 struct commit_info
1275 {
1276         const char *author;
1277         const char *author_mail;
1278         unsigned long author_time;
1279         const char *author_tz;
1280
1281         /* filled only when asked for details */
1282         const char *committer;
1283         const char *committer_mail;
1284         unsigned long committer_time;
1285         const char *committer_tz;
1286
1287         const char *summary;
1288 };
1289
1290 /*
1291  * Parse author/committer line in the commit object buffer
1292  */
1293 static void get_ac_line(const char *inbuf, const char *what,
1294                         int bufsz, char *person, const char **mail,
1295                         unsigned long *time, const char **tz)
1296 {
1297         int len;
1298         char *tmp, *endp;
1299
1300         tmp = strstr(inbuf, what);
1301         if (!tmp)
1302                 goto error_out;
1303         tmp += strlen(what);
1304         endp = strchr(tmp, '\n');
1305         if (!endp)
1306                 len = strlen(tmp);
1307         else
1308                 len = endp - tmp;
1309         if (bufsz <= len) {
1310         error_out:
1311                 /* Ugh */
1312                 *mail = *tz = "(unknown)";
1313                 *time = 0;
1314                 return;
1315         }
1316         memcpy(person, tmp, len);
1317
1318         tmp = person;
1319         tmp += len;
1320         *tmp = 0;
1321         while (*tmp != ' ')
1322                 tmp--;
1323         *tz = tmp+1;
1324
1325         *tmp = 0;
1326         while (*tmp != ' ')
1327                 tmp--;
1328         *time = strtoul(tmp, NULL, 10);
1329
1330         *tmp = 0;
1331         while (*tmp != ' ')
1332                 tmp--;
1333         *mail = tmp + 1;
1334         *tmp = 0;
1335 }
1336
1337 static void get_commit_info(struct commit *commit,
1338                             struct commit_info *ret,
1339                             int detailed)
1340 {
1341         int len;
1342         char *tmp, *endp;
1343         static char author_buf[1024];
1344         static char committer_buf[1024];
1345         static char summary_buf[1024];
1346
1347         /*
1348          * We've operated without save_commit_buffer, so
1349          * we now need to populate them for output.
1350          */
1351         if (!commit->buffer) {
1352                 enum object_type type;
1353                 unsigned long size;
1354                 commit->buffer =
1355                         read_sha1_file(commit->object.sha1, &type, &size);
1356         }
1357         ret->author = author_buf;
1358         get_ac_line(commit->buffer, "\nauthor ",
1359                     sizeof(author_buf), author_buf, &ret->author_mail,
1360                     &ret->author_time, &ret->author_tz);
1361
1362         if (!detailed)
1363                 return;
1364
1365         ret->committer = committer_buf;
1366         get_ac_line(commit->buffer, "\ncommitter ",
1367                     sizeof(committer_buf), committer_buf, &ret->committer_mail,
1368                     &ret->committer_time, &ret->committer_tz);
1369
1370         ret->summary = summary_buf;
1371         tmp = strstr(commit->buffer, "\n\n");
1372         if (!tmp) {
1373         error_out:
1374                 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1375                 return;
1376         }
1377         tmp += 2;
1378         endp = strchr(tmp, '\n');
1379         if (!endp)
1380                 endp = tmp + strlen(tmp);
1381         len = endp - tmp;
1382         if (len >= sizeof(summary_buf) || len == 0)
1383                 goto error_out;
1384         memcpy(summary_buf, tmp, len);
1385         summary_buf[len] = 0;
1386 }
1387
1388 /*
1389  * To allow LF and other nonportable characters in pathnames,
1390  * they are c-style quoted as needed.
1391  */
1392 static void write_filename_info(const char *path)
1393 {
1394         printf("filename ");
1395         write_name_quoted(NULL, 0, path, 1, stdout);
1396         putchar('\n');
1397 }
1398
1399 /*
1400  * The blame_entry is found to be guilty for the range.  Mark it
1401  * as such, and show it in incremental output.
1402  */
1403 static void found_guilty_entry(struct blame_entry *ent)
1404 {
1405         if (ent->guilty)
1406                 return;
1407         ent->guilty = 1;
1408         if (incremental) {
1409                 struct origin *suspect = ent->suspect;
1410
1411                 printf("%s %d %d %d\n",
1412                        sha1_to_hex(suspect->commit->object.sha1),
1413                        ent->s_lno + 1, ent->lno + 1, ent->num_lines);
1414                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1415                         struct commit_info ci;
1416                         suspect->commit->object.flags |= METAINFO_SHOWN;
1417                         get_commit_info(suspect->commit, &ci, 1);
1418                         printf("author %s\n", ci.author);
1419                         printf("author-mail %s\n", ci.author_mail);
1420                         printf("author-time %lu\n", ci.author_time);
1421                         printf("author-tz %s\n", ci.author_tz);
1422                         printf("committer %s\n", ci.committer);
1423                         printf("committer-mail %s\n", ci.committer_mail);
1424                         printf("committer-time %lu\n", ci.committer_time);
1425                         printf("committer-tz %s\n", ci.committer_tz);
1426                         printf("summary %s\n", ci.summary);
1427                         if (suspect->commit->object.flags & UNINTERESTING)
1428                                 printf("boundary\n");
1429                 }
1430                 write_filename_info(suspect->path);
1431         }
1432 }
1433
1434 /*
1435  * The main loop -- while the scoreboard has lines whose true origin
1436  * is still unknown, pick one blame_entry, and allow its current
1437  * suspect to pass blames to its parents.
1438  */
1439 static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1440 {
1441         while (1) {
1442                 struct blame_entry *ent;
1443                 struct commit *commit;
1444                 struct origin *suspect = NULL;
1445
1446                 /* find one suspect to break down */
1447                 for (ent = sb->ent; !suspect && ent; ent = ent->next)
1448                         if (!ent->guilty)
1449                                 suspect = ent->suspect;
1450                 if (!suspect)
1451                         return; /* all done */
1452
1453                 /*
1454                  * We will use this suspect later in the loop,
1455                  * so hold onto it in the meantime.
1456                  */
1457                 origin_incref(suspect);
1458                 commit = suspect->commit;
1459                 if (!commit->object.parsed)
1460                         parse_commit(commit);
1461                 if (!(commit->object.flags & UNINTERESTING) &&
1462                     !(revs->max_age != -1 && commit->date < revs->max_age))
1463                         pass_blame(sb, suspect, opt);
1464                 else {
1465                         commit->object.flags |= UNINTERESTING;
1466                         if (commit->object.parsed)
1467                                 mark_parents_uninteresting(commit);
1468                 }
1469                 /* treat root commit as boundary */
1470                 if (!commit->parents && !show_root)
1471                         commit->object.flags |= UNINTERESTING;
1472
1473                 /* Take responsibility for the remaining entries */
1474                 for (ent = sb->ent; ent; ent = ent->next)
1475                         if (same_suspect(ent->suspect, suspect))
1476                                 found_guilty_entry(ent);
1477                 origin_decref(suspect);
1478
1479                 if (DEBUG) /* sanity */
1480                         sanity_check_refcnt(sb);
1481         }
1482 }
1483
1484 static const char *format_time(unsigned long time, const char *tz_str,
1485                                int show_raw_time)
1486 {
1487         static char time_buf[128];
1488         time_t t = time;
1489         int minutes, tz;
1490         struct tm *tm;
1491
1492         if (show_raw_time) {
1493                 sprintf(time_buf, "%lu %s", time, tz_str);
1494                 return time_buf;
1495         }
1496
1497         tz = atoi(tz_str);
1498         minutes = tz < 0 ? -tz : tz;
1499         minutes = (minutes / 100)*60 + (minutes % 100);
1500         minutes = tz < 0 ? -minutes : minutes;
1501         t = time + minutes * 60;
1502         tm = gmtime(&t);
1503
1504         strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1505         strcat(time_buf, tz_str);
1506         return time_buf;
1507 }
1508
1509 #define OUTPUT_ANNOTATE_COMPAT  001
1510 #define OUTPUT_LONG_OBJECT_NAME 002
1511 #define OUTPUT_RAW_TIMESTAMP    004
1512 #define OUTPUT_PORCELAIN        010
1513 #define OUTPUT_SHOW_NAME        020
1514 #define OUTPUT_SHOW_NUMBER      040
1515 #define OUTPUT_SHOW_SCORE      0100
1516
1517 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1518 {
1519         int cnt;
1520         const char *cp;
1521         struct origin *suspect = ent->suspect;
1522         char hex[41];
1523
1524         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1525         printf("%s%c%d %d %d\n",
1526                hex,
1527                ent->guilty ? ' ' : '*', // purely for debugging
1528                ent->s_lno + 1,
1529                ent->lno + 1,
1530                ent->num_lines);
1531         if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1532                 struct commit_info ci;
1533                 suspect->commit->object.flags |= METAINFO_SHOWN;
1534                 get_commit_info(suspect->commit, &ci, 1);
1535                 printf("author %s\n", ci.author);
1536                 printf("author-mail %s\n", ci.author_mail);
1537                 printf("author-time %lu\n", ci.author_time);
1538                 printf("author-tz %s\n", ci.author_tz);
1539                 printf("committer %s\n", ci.committer);
1540                 printf("committer-mail %s\n", ci.committer_mail);
1541                 printf("committer-time %lu\n", ci.committer_time);
1542                 printf("committer-tz %s\n", ci.committer_tz);
1543                 write_filename_info(suspect->path);
1544                 printf("summary %s\n", ci.summary);
1545                 if (suspect->commit->object.flags & UNINTERESTING)
1546                         printf("boundary\n");
1547         }
1548         else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1549                 write_filename_info(suspect->path);
1550
1551         cp = nth_line(sb, ent->lno);
1552         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1553                 char ch;
1554                 if (cnt)
1555                         printf("%s %d %d\n", hex,
1556                                ent->s_lno + 1 + cnt,
1557                                ent->lno + 1 + cnt);
1558                 putchar('\t');
1559                 do {
1560                         ch = *cp++;
1561                         putchar(ch);
1562                 } while (ch != '\n' &&
1563                          cp < sb->final_buf + sb->final_buf_size);
1564         }
1565 }
1566
1567 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1568 {
1569         int cnt;
1570         const char *cp;
1571         struct origin *suspect = ent->suspect;
1572         struct commit_info ci;
1573         char hex[41];
1574         int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1575
1576         get_commit_info(suspect->commit, &ci, 1);
1577         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1578
1579         cp = nth_line(sb, ent->lno);
1580         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1581                 char ch;
1582                 int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;
1583
1584                 if (suspect->commit->object.flags & UNINTERESTING) {
1585                         if (blank_boundary)
1586                                 memset(hex, ' ', length);
1587                         else if (!cmd_is_annotate) {
1588                                 length--;
1589                                 putchar('^');
1590                         }
1591                 }
1592
1593                 printf("%.*s", length, hex);
1594                 if (opt & OUTPUT_ANNOTATE_COMPAT)
1595                         printf("\t(%10s\t%10s\t%d)", ci.author,
1596                                format_time(ci.author_time, ci.author_tz,
1597                                            show_raw_time),
1598                                ent->lno + 1 + cnt);
1599                 else {
1600                         if (opt & OUTPUT_SHOW_SCORE)
1601                                 printf(" %*d %02d",
1602                                        max_score_digits, ent->score,
1603                                        ent->suspect->refcnt);
1604                         if (opt & OUTPUT_SHOW_NAME)
1605                                 printf(" %-*.*s", longest_file, longest_file,
1606                                        suspect->path);
1607                         if (opt & OUTPUT_SHOW_NUMBER)
1608                                 printf(" %*d", max_orig_digits,
1609                                        ent->s_lno + 1 + cnt);
1610                         printf(" (%-*.*s %10s %*d) ",
1611                                longest_author, longest_author, ci.author,
1612                                format_time(ci.author_time, ci.author_tz,
1613                                            show_raw_time),
1614                                max_digits, ent->lno + 1 + cnt);
1615                 }
1616                 do {
1617                         ch = *cp++;
1618                         putchar(ch);
1619                 } while (ch != '\n' &&
1620                          cp < sb->final_buf + sb->final_buf_size);
1621         }
1622 }
1623
1624 static void output(struct scoreboard *sb, int option)
1625 {
1626         struct blame_entry *ent;
1627
1628         if (option & OUTPUT_PORCELAIN) {
1629                 for (ent = sb->ent; ent; ent = ent->next) {
1630                         struct blame_entry *oth;
1631                         struct origin *suspect = ent->suspect;
1632                         struct commit *commit = suspect->commit;
1633                         if (commit->object.flags & MORE_THAN_ONE_PATH)
1634                                 continue;
1635                         for (oth = ent->next; oth; oth = oth->next) {
1636                                 if ((oth->suspect->commit != commit) ||
1637                                     !strcmp(oth->suspect->path, suspect->path))
1638                                         continue;
1639                                 commit->object.flags |= MORE_THAN_ONE_PATH;
1640                                 break;
1641                         }
1642                 }
1643         }
1644
1645         for (ent = sb->ent; ent; ent = ent->next) {
1646                 if (option & OUTPUT_PORCELAIN)
1647                         emit_porcelain(sb, ent);
1648                 else {
1649                         emit_other(sb, ent, option);
1650                 }
1651         }
1652 }
1653
1654 /*
1655  * To allow quick access to the contents of nth line in the
1656  * final image, prepare an index in the scoreboard.
1657  */
1658 static int prepare_lines(struct scoreboard *sb)
1659 {
1660         const char *buf = sb->final_buf;
1661         unsigned long len = sb->final_buf_size;
1662         int num = 0, incomplete = 0, bol = 1;
1663
1664         if (len && buf[len-1] != '\n')
1665                 incomplete++; /* incomplete line at the end */
1666         while (len--) {
1667                 if (bol) {
1668                         sb->lineno = xrealloc(sb->lineno,
1669                                               sizeof(int* ) * (num + 1));
1670                         sb->lineno[num] = buf - sb->final_buf;
1671                         bol = 0;
1672                 }
1673                 if (*buf++ == '\n') {
1674                         num++;
1675                         bol = 1;
1676                 }
1677         }
1678         sb->lineno = xrealloc(sb->lineno,
1679                               sizeof(int* ) * (num + incomplete + 1));
1680         sb->lineno[num + incomplete] = buf - sb->final_buf;
1681         sb->num_lines = num + incomplete;
1682         return sb->num_lines;
1683 }
1684
1685 /*
1686  * Add phony grafts for use with -S; this is primarily to
1687  * support git-cvsserver that wants to give a linear history
1688  * to its clients.
1689  */
1690 static int read_ancestry(const char *graft_file)
1691 {
1692         FILE *fp = fopen(graft_file, "r");
1693         char buf[1024];
1694         if (!fp)
1695                 return -1;
1696         while (fgets(buf, sizeof(buf), fp)) {
1697                 /* The format is just "Commit Parent1 Parent2 ...\n" */
1698                 int len = strlen(buf);
1699                 struct commit_graft *graft = read_graft_line(buf, len);
1700                 if (graft)
1701                         register_commit_graft(graft, 0);
1702         }
1703         fclose(fp);
1704         return 0;
1705 }
1706
1707 /*
1708  * How many columns do we need to show line numbers in decimal?
1709  */
1710 static int lineno_width(int lines)
1711 {
1712         int i, width;
1713
1714         for (width = 1, i = 10; i <= lines + 1; width++)
1715                 i *= 10;
1716         return width;
1717 }
1718
1719 /*
1720  * How many columns do we need to show line numbers, authors,
1721  * and filenames?
1722  */
1723 static void find_alignment(struct scoreboard *sb, int *option)
1724 {
1725         int longest_src_lines = 0;
1726         int longest_dst_lines = 0;
1727         unsigned largest_score = 0;
1728         struct blame_entry *e;
1729
1730         for (e = sb->ent; e; e = e->next) {
1731                 struct origin *suspect = e->suspect;
1732                 struct commit_info ci;
1733                 int num;
1734
1735                 if (strcmp(suspect->path, sb->path))
1736                         *option |= OUTPUT_SHOW_NAME;
1737                 num = strlen(suspect->path);
1738                 if (longest_file < num)
1739                         longest_file = num;
1740                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1741                         suspect->commit->object.flags |= METAINFO_SHOWN;
1742                         get_commit_info(suspect->commit, &ci, 1);
1743                         num = strlen(ci.author);
1744                         if (longest_author < num)
1745                                 longest_author = num;
1746                 }
1747                 num = e->s_lno + e->num_lines;
1748                 if (longest_src_lines < num)
1749                         longest_src_lines = num;
1750                 num = e->lno + e->num_lines;
1751                 if (longest_dst_lines < num)
1752                         longest_dst_lines = num;
1753                 if (largest_score < ent_score(sb, e))
1754                         largest_score = ent_score(sb, e);
1755         }
1756         max_orig_digits = lineno_width(longest_src_lines);
1757         max_digits = lineno_width(longest_dst_lines);
1758         max_score_digits = lineno_width(largest_score);
1759 }
1760
1761 /*
1762  * For debugging -- origin is refcounted, and this asserts that
1763  * we do not underflow.
1764  */
1765 static void sanity_check_refcnt(struct scoreboard *sb)
1766 {
1767         int baa = 0;
1768         struct blame_entry *ent;
1769
1770         for (ent = sb->ent; ent; ent = ent->next) {
1771                 /* Nobody should have zero or negative refcnt */
1772                 if (ent->suspect->refcnt <= 0) {
1773                         fprintf(stderr, "%s in %s has negative refcnt %d\n",
1774                                 ent->suspect->path,
1775                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1776                                 ent->suspect->refcnt);
1777                         baa = 1;
1778                 }
1779         }
1780         for (ent = sb->ent; ent; ent = ent->next) {
1781                 /* Mark the ones that haven't been checked */
1782                 if (0 < ent->suspect->refcnt)
1783                         ent->suspect->refcnt = -ent->suspect->refcnt;
1784         }
1785         for (ent = sb->ent; ent; ent = ent->next) {
1786                 /*
1787                  * ... then pick each and see if they have the the
1788                  * correct refcnt.
1789                  */
1790                 int found;
1791                 struct blame_entry *e;
1792                 struct origin *suspect = ent->suspect;
1793
1794                 if (0 < suspect->refcnt)
1795                         continue;
1796                 suspect->refcnt = -suspect->refcnt; /* Unmark */
1797                 for (found = 0, e = sb->ent; e; e = e->next) {
1798                         if (e->suspect != suspect)
1799                                 continue;
1800                         found++;
1801                 }
1802                 if (suspect->refcnt != found) {
1803                         fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1804                                 ent->suspect->path,
1805                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1806                                 ent->suspect->refcnt, found);
1807                         baa = 2;
1808                 }
1809         }
1810         if (baa) {
1811                 int opt = 0160;
1812                 find_alignment(sb, &opt);
1813                 output(sb, opt);
1814                 die("Baa %d!", baa);
1815         }
1816 }
1817
1818 /*
1819  * Used for the command line parsing; check if the path exists
1820  * in the working tree.
1821  */
1822 static int has_path_in_work_tree(const char *path)
1823 {
1824         struct stat st;
1825         return !lstat(path, &st);
1826 }
1827
1828 static unsigned parse_score(const char *arg)
1829 {
1830         char *end;
1831         unsigned long score = strtoul(arg, &end, 10);
1832         if (*end)
1833                 return 0;
1834         return score;
1835 }
1836
1837 static const char *add_prefix(const char *prefix, const char *path)
1838 {
1839         if (!prefix || !prefix[0])
1840                 return path;
1841         return prefix_path(prefix, strlen(prefix), path);
1842 }
1843
1844 /*
1845  * Parsing of (comma separated) one item in the -L option
1846  */
1847 static const char *parse_loc(const char *spec,
1848                              struct scoreboard *sb, long lno,
1849                              long begin, long *ret)
1850 {
1851         char *term;
1852         const char *line;
1853         long num;
1854         int reg_error;
1855         regex_t regexp;
1856         regmatch_t match[1];
1857
1858         /* Allow "-L <something>,+20" to mean starting at <something>
1859          * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1860          * <something>.
1861          */
1862         if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1863                 num = strtol(spec + 1, &term, 10);
1864                 if (term != spec + 1) {
1865                         if (spec[0] == '-')
1866                                 num = 0 - num;
1867                         if (0 < num)
1868                                 *ret = begin + num - 2;
1869                         else if (!num)
1870                                 *ret = begin;
1871                         else
1872                                 *ret = begin + num;
1873                         return term;
1874                 }
1875                 return spec;
1876         }
1877         num = strtol(spec, &term, 10);
1878         if (term != spec) {
1879                 *ret = num;
1880                 return term;
1881         }
1882         if (spec[0] != '/')
1883                 return spec;
1884
1885         /* it could be a regexp of form /.../ */
1886         for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1887                 if (*term == '\\')
1888                         term++;
1889         }
1890         if (*term != '/')
1891                 return spec;
1892
1893         /* try [spec+1 .. term-1] as regexp */
1894         *term = 0;
1895         begin--; /* input is in human terms */
1896         line = nth_line(sb, begin);
1897
1898         if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1899             !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1900                 const char *cp = line + match[0].rm_so;
1901                 const char *nline;
1902
1903                 while (begin++ < lno) {
1904                         nline = nth_line(sb, begin);
1905                         if (line <= cp && cp < nline)
1906                                 break;
1907                         line = nline;
1908                 }
1909                 *ret = begin;
1910                 regfree(&regexp);
1911                 *term++ = '/';
1912                 return term;
1913         }
1914         else {
1915                 char errbuf[1024];
1916                 regerror(reg_error, &regexp, errbuf, 1024);
1917                 die("-L parameter '%s': %s", spec + 1, errbuf);
1918         }
1919 }
1920
1921 /*
1922  * Parsing of -L option
1923  */
1924 static void prepare_blame_range(struct scoreboard *sb,
1925                                 const char *bottomtop,
1926                                 long lno,
1927                                 long *bottom, long *top)
1928 {
1929         const char *term;
1930
1931         term = parse_loc(bottomtop, sb, lno, 1, bottom);
1932         if (*term == ',') {
1933                 term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
1934                 if (*term)
1935                         usage(blame_usage);
1936         }
1937         if (*term)
1938                 usage(blame_usage);
1939 }
1940
1941 static int git_blame_config(const char *var, const char *value)
1942 {
1943         if (!strcmp(var, "blame.showroot")) {
1944                 show_root = git_config_bool(var, value);
1945                 return 0;
1946         }
1947         if (!strcmp(var, "blame.blankboundary")) {
1948                 blank_boundary = git_config_bool(var, value);
1949                 return 0;
1950         }
1951         return git_default_config(var, value);
1952 }
1953
1954 static struct commit *fake_working_tree_commit(const char *path, const char *contents_from)
1955 {
1956         struct commit *commit;
1957         struct origin *origin;
1958         unsigned char head_sha1[20];
1959         char *buf;
1960         const char *ident;
1961         int fd;
1962         time_t now;
1963         unsigned long fin_size;
1964         int size, len;
1965         struct cache_entry *ce;
1966         unsigned mode;
1967
1968         if (get_sha1("HEAD", head_sha1))
1969                 die("No such ref: HEAD");
1970
1971         time(&now);
1972         commit = xcalloc(1, sizeof(*commit));
1973         commit->parents = xcalloc(1, sizeof(*commit->parents));
1974         commit->parents->item = lookup_commit_reference(head_sha1);
1975         commit->object.parsed = 1;
1976         commit->date = now;
1977         commit->object.type = OBJ_COMMIT;
1978
1979         origin = make_origin(commit, path);
1980
1981         if (!contents_from || strcmp("-", contents_from)) {
1982                 struct stat st;
1983                 const char *read_from;
1984
1985                 if (contents_from) {
1986                         if (stat(contents_from, &st) < 0)
1987                                 die("Cannot stat %s", contents_from);
1988                         read_from = contents_from;
1989                 }
1990                 else {
1991                         if (lstat(path, &st) < 0)
1992                                 die("Cannot lstat %s", path);
1993                         read_from = path;
1994                 }
1995                 fin_size = xsize_t(st.st_size);
1996                 buf = xmalloc(fin_size+1);
1997                 mode = canon_mode(st.st_mode);
1998                 switch (st.st_mode & S_IFMT) {
1999                 case S_IFREG:
2000                         fd = open(read_from, O_RDONLY);
2001                         if (fd < 0)
2002                                 die("cannot open %s", read_from);
2003                         if (read_in_full(fd, buf, fin_size) != fin_size)
2004                                 die("cannot read %s", read_from);
2005                         break;
2006                 case S_IFLNK:
2007                         if (readlink(read_from, buf, fin_size+1) != fin_size)
2008                                 die("cannot readlink %s", read_from);
2009                         break;
2010                 default:
2011                         die("unsupported file type %s", read_from);
2012                 }
2013         }
2014         else {
2015                 /* Reading from stdin */
2016                 contents_from = "standard input";
2017                 buf = NULL;
2018                 fin_size = 0;
2019                 mode = 0;
2020                 while (1) {
2021                         ssize_t cnt = 8192;
2022                         buf = xrealloc(buf, fin_size + cnt);
2023                         cnt = xread(0, buf + fin_size, cnt);
2024                         if (cnt < 0)
2025                                 die("read error %s from stdin",
2026                                     strerror(errno));
2027                         if (!cnt)
2028                                 break;
2029                         fin_size += cnt;
2030                 }
2031                 buf = xrealloc(buf, fin_size + 1);
2032         }
2033         buf[fin_size] = 0;
2034         origin->file.ptr = buf;
2035         origin->file.size = fin_size;
2036         pretend_sha1_file(buf, fin_size, OBJ_BLOB, origin->blob_sha1);
2037         commit->util = origin;
2038
2039         /*
2040          * Read the current index, replace the path entry with
2041          * origin->blob_sha1 without mucking with its mode or type
2042          * bits; we are not going to write this index out -- we just
2043          * want to run "diff-index --cached".
2044          */
2045         discard_cache();
2046         read_cache();
2047
2048         len = strlen(path);
2049         if (!mode) {
2050                 int pos = cache_name_pos(path, len);
2051                 if (0 <= pos)
2052                         mode = ntohl(active_cache[pos]->ce_mode);
2053                 else
2054                         /* Let's not bother reading from HEAD tree */
2055                         mode = S_IFREG | 0644;
2056         }
2057         size = cache_entry_size(len);
2058         ce = xcalloc(1, size);
2059         hashcpy(ce->sha1, origin->blob_sha1);
2060         memcpy(ce->name, path, len);
2061         ce->ce_flags = create_ce_flags(len, 0);
2062         ce->ce_mode = create_ce_mode(mode);
2063         add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
2064
2065         /*
2066          * We are not going to write this out, so this does not matter
2067          * right now, but someday we might optimize diff-index --cached
2068          * with cache-tree information.
2069          */
2070         cache_tree_invalidate_path(active_cache_tree, path);
2071
2072         commit->buffer = xmalloc(400);
2073         ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
2074         snprintf(commit->buffer, 400,
2075                 "tree 0000000000000000000000000000000000000000\n"
2076                 "parent %s\n"
2077                 "author %s\n"
2078                 "committer %s\n\n"
2079                 "Version of %s from %s\n",
2080                 sha1_to_hex(head_sha1),
2081                 ident, ident, path, contents_from ? contents_from : path);
2082         return commit;
2083 }
2084
2085 int cmd_blame(int argc, const char **argv, const char *prefix)
2086 {
2087         struct rev_info revs;
2088         const char *path;
2089         struct scoreboard sb;
2090         struct origin *o;
2091         struct blame_entry *ent;
2092         int i, seen_dashdash, unk, opt;
2093         long bottom, top, lno;
2094         int output_option = 0;
2095         int show_stats = 0;
2096         const char *revs_file = NULL;
2097         const char *final_commit_name = NULL;
2098         enum object_type type;
2099         const char *bottomtop = NULL;
2100         const char *contents_from = NULL;
2101
2102         cmd_is_annotate = !strcmp(argv[0], "annotate");
2103
2104         git_config(git_blame_config);
2105         save_commit_buffer = 0;
2106
2107         opt = 0;
2108         seen_dashdash = 0;
2109         for (unk = i = 1; i < argc; i++) {
2110                 const char *arg = argv[i];
2111                 if (*arg != '-')
2112                         break;
2113                 else if (!strcmp("-b", arg))
2114                         blank_boundary = 1;
2115                 else if (!strcmp("--root", arg))
2116                         show_root = 1;
2117                 else if (!strcmp(arg, "--show-stats"))
2118                         show_stats = 1;
2119                 else if (!strcmp("-c", arg))
2120                         output_option |= OUTPUT_ANNOTATE_COMPAT;
2121                 else if (!strcmp("-t", arg))
2122                         output_option |= OUTPUT_RAW_TIMESTAMP;
2123                 else if (!strcmp("-l", arg))
2124                         output_option |= OUTPUT_LONG_OBJECT_NAME;
2125                 else if (!strcmp("-S", arg) && ++i < argc)
2126                         revs_file = argv[i];
2127                 else if (!prefixcmp(arg, "-M")) {
2128                         opt |= PICKAXE_BLAME_MOVE;
2129                         blame_move_score = parse_score(arg+2);
2130                 }
2131                 else if (!prefixcmp(arg, "-C")) {
2132                         /*
2133                          * -C enables copy from removed files;
2134                          * -C -C enables copy from existing files, but only
2135                          *       when blaming a new file;
2136                          * -C -C -C enables copy from existing files for
2137                          *          everybody
2138                          */
2139                         if (opt & PICKAXE_BLAME_COPY_HARDER)
2140                                 opt |= PICKAXE_BLAME_COPY_HARDEST;
2141                         if (opt & PICKAXE_BLAME_COPY)
2142                                 opt |= PICKAXE_BLAME_COPY_HARDER;
2143                         opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
2144                         blame_copy_score = parse_score(arg+2);
2145                 }
2146                 else if (!prefixcmp(arg, "-L")) {
2147                         if (!arg[2]) {
2148                                 if (++i >= argc)
2149                                         usage(blame_usage);
2150                                 arg = argv[i];
2151                         }
2152                         else
2153                                 arg += 2;
2154                         if (bottomtop)
2155                                 die("More than one '-L n,m' option given");
2156                         bottomtop = arg;
2157                 }
2158                 else if (!strcmp("--contents", arg)) {
2159                         if (++i >= argc)
2160                                 usage(blame_usage);
2161                         contents_from = argv[i];
2162                 }
2163                 else if (!strcmp("--incremental", arg))
2164                         incremental = 1;
2165                 else if (!strcmp("--score-debug", arg))
2166                         output_option |= OUTPUT_SHOW_SCORE;
2167                 else if (!strcmp("-f", arg) ||
2168                          !strcmp("--show-name", arg))
2169                         output_option |= OUTPUT_SHOW_NAME;
2170                 else if (!strcmp("-n", arg) ||
2171                          !strcmp("--show-number", arg))
2172                         output_option |= OUTPUT_SHOW_NUMBER;
2173                 else if (!strcmp("-p", arg) ||
2174                          !strcmp("--porcelain", arg))
2175                         output_option |= OUTPUT_PORCELAIN;
2176                 else if (!strcmp("--", arg)) {
2177                         seen_dashdash = 1;
2178                         i++;
2179                         break;
2180                 }
2181                 else
2182                         argv[unk++] = arg;
2183         }
2184
2185         if (!incremental)
2186                 setup_pager();
2187
2188         if (!blame_move_score)
2189                 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
2190         if (!blame_copy_score)
2191                 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
2192
2193         /*
2194          * We have collected options unknown to us in argv[1..unk]
2195          * which are to be passed to revision machinery if we are
2196          * going to do the "bottom" processing.
2197          *
2198          * The remaining are:
2199          *
2200          * (1) if seen_dashdash, its either
2201          *     "-options -- <path>" or
2202          *     "-options -- <path> <rev>".
2203          *     but the latter is allowed only if there is no
2204          *     options that we passed to revision machinery.
2205          *
2206          * (2) otherwise, we may have "--" somewhere later and
2207          *     might be looking at the first one of multiple 'rev'
2208          *     parameters (e.g. " master ^next ^maint -- path").
2209          *     See if there is a dashdash first, and give the
2210          *     arguments before that to revision machinery.
2211          *     After that there must be one 'path'.
2212          *
2213          * (3) otherwise, its one of the three:
2214          *     "-options <path> <rev>"
2215          *     "-options <rev> <path>"
2216          *     "-options <path>"
2217          *     but again the first one is allowed only if
2218          *     there is no options that we passed to revision
2219          *     machinery.
2220          */
2221
2222         if (seen_dashdash) {
2223                 /* (1) */
2224                 if (argc <= i)
2225                         usage(blame_usage);
2226                 path = add_prefix(prefix, argv[i]);
2227                 if (i + 1 == argc - 1) {
2228                         if (unk != 1)
2229                                 usage(blame_usage);
2230                         argv[unk++] = argv[i + 1];
2231                 }
2232                 else if (i + 1 != argc)
2233                         /* garbage at end */
2234                         usage(blame_usage);
2235         }
2236         else {
2237                 int j;
2238                 for (j = i; !seen_dashdash && j < argc; j++)
2239                         if (!strcmp(argv[j], "--"))
2240                                 seen_dashdash = j;
2241                 if (seen_dashdash) {
2242                         /* (2) */
2243                         if (seen_dashdash + 1 != argc - 1)
2244                                 usage(blame_usage);
2245                         path = add_prefix(prefix, argv[seen_dashdash + 1]);
2246                         for (j = i; j < seen_dashdash; j++)
2247                                 argv[unk++] = argv[j];
2248                 }
2249                 else {
2250                         /* (3) */
2251                         if (argc <= i)
2252                                 usage(blame_usage);
2253                         path = add_prefix(prefix, argv[i]);
2254                         if (i + 1 == argc - 1) {
2255                                 final_commit_name = argv[i + 1];
2256
2257                                 /* if (unk == 1) we could be getting
2258                                  * old-style
2259                                  */
2260                                 if (unk == 1 && !has_path_in_work_tree(path)) {
2261                                         path = add_prefix(prefix, argv[i + 1]);
2262                                         final_commit_name = argv[i];
2263                                 }
2264                         }
2265                         else if (i != argc - 1)
2266                                 usage(blame_usage); /* garbage at end */
2267
2268                         if (!has_path_in_work_tree(path))
2269                                 die("cannot stat path %s: %s",
2270                                     path, strerror(errno));
2271                 }
2272         }
2273
2274         if (final_commit_name)
2275                 argv[unk++] = final_commit_name;
2276
2277         /*
2278          * Now we got rev and path.  We do not want the path pruning
2279          * but we may want "bottom" processing.
2280          */
2281         argv[unk++] = "--"; /* terminate the rev name */
2282         argv[unk] = NULL;
2283
2284         init_revisions(&revs, NULL);
2285         setup_revisions(unk, argv, &revs, NULL);
2286         memset(&sb, 0, sizeof(sb));
2287
2288         /*
2289          * There must be one and only one positive commit in the
2290          * revs->pending array.
2291          */
2292         for (i = 0; i < revs.pending.nr; i++) {
2293                 struct object *obj = revs.pending.objects[i].item;
2294                 if (obj->flags & UNINTERESTING)
2295                         continue;
2296                 while (obj->type == OBJ_TAG)
2297                         obj = deref_tag(obj, NULL, 0);
2298                 if (obj->type != OBJ_COMMIT)
2299                         die("Non commit %s?",
2300                             revs.pending.objects[i].name);
2301                 if (sb.final)
2302                         die("More than one commit to dig from %s and %s?",
2303                             revs.pending.objects[i].name,
2304                             final_commit_name);
2305                 sb.final = (struct commit *) obj;
2306                 final_commit_name = revs.pending.objects[i].name;
2307         }
2308
2309         if (!sb.final) {
2310                 /*
2311                  * "--not A B -- path" without anything positive;
2312                  * do not default to HEAD, but use the working tree
2313                  * or "--contents".
2314                  */
2315                 sb.final = fake_working_tree_commit(path, contents_from);
2316                 add_pending_object(&revs, &(sb.final->object), ":");
2317         }
2318         else if (contents_from)
2319                 die("Cannot use --contents with final commit object name");
2320
2321         /*
2322          * If we have bottom, this will mark the ancestors of the
2323          * bottom commits we would reach while traversing as
2324          * uninteresting.
2325          */
2326         prepare_revision_walk(&revs);
2327
2328         if (is_null_sha1(sb.final->object.sha1)) {
2329                 char *buf;
2330                 o = sb.final->util;
2331                 buf = xmalloc(o->file.size + 1);
2332                 memcpy(buf, o->file.ptr, o->file.size + 1);
2333                 sb.final_buf = buf;
2334                 sb.final_buf_size = o->file.size;
2335         }
2336         else {
2337                 o = get_origin(&sb, sb.final, path);
2338                 if (fill_blob_sha1(o))
2339                         die("no such path %s in %s", path, final_commit_name);
2340
2341                 sb.final_buf = read_sha1_file(o->blob_sha1, &type,
2342                                               &sb.final_buf_size);
2343         }
2344         num_read_blob++;
2345         lno = prepare_lines(&sb);
2346
2347         bottom = top = 0;
2348         if (bottomtop)
2349                 prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
2350         if (bottom && top && top < bottom) {
2351                 long tmp;
2352                 tmp = top; top = bottom; bottom = tmp;
2353         }
2354         if (bottom < 1)
2355                 bottom = 1;
2356         if (top < 1)
2357                 top = lno;
2358         bottom--;
2359         if (lno < top)
2360                 die("file %s has only %lu lines", path, lno);
2361
2362         ent = xcalloc(1, sizeof(*ent));
2363         ent->lno = bottom;
2364         ent->num_lines = top - bottom;
2365         ent->suspect = o;
2366         ent->s_lno = bottom;
2367
2368         sb.ent = ent;
2369         sb.path = path;
2370
2371         if (revs_file && read_ancestry(revs_file))
2372                 die("reading graft file %s failed: %s",
2373                     revs_file, strerror(errno));
2374
2375         assign_blame(&sb, &revs, opt);
2376
2377         if (incremental)
2378                 return 0;
2379
2380         coalesce(&sb);
2381
2382         if (!(output_option & OUTPUT_PORCELAIN))
2383                 find_alignment(&sb, &output_option);
2384
2385         output(&sb, output_option);
2386         free((void *)sb.final_buf);
2387         for (ent = sb.ent; ent; ) {
2388                 struct blame_entry *e = ent->next;
2389                 free(ent);
2390                 ent = e;
2391         }
2392
2393         if (show_stats) {
2394                 printf("num read blob: %d\n", num_read_blob);
2395                 printf("num get patch: %d\n", num_get_patch);
2396                 printf("num commits: %d\n", num_commits);
2397         }
2398         return 0;
2399 }