add replay and log to the usage string of git-bisect
[git] / builtin-blame.c
1 /*
2  * Pickaxe
3  *
4  * Copyright (c) 2006, Junio C Hamano
5  */
6
7 #include "cache.h"
8 #include "builtin.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree-walk.h"
13 #include "diff.h"
14 #include "diffcore.h"
15 #include "revision.h"
16 #include "quote.h"
17 #include "xdiff-interface.h"
18 #include "cache-tree.h"
19
20 static char blame_usage[] =
21 "git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [--contents <filename>] [--incremental] [commit] [--] file\n"
22 "  -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
23 "  -b                  Show blank SHA-1 for boundary commits (Default: off)\n"
24 "  -l, --long          Show long commit SHA1 (Default: off)\n"
25 "  --root              Do not treat root commits as boundaries (Default: off)\n"
26 "  -t, --time          Show raw timestamp (Default: off)\n"
27 "  -f, --show-name     Show original filename (Default: auto)\n"
28 "  -n, --show-number   Show original linenumber (Default: off)\n"
29 "  -p, --porcelain     Show in a format designed for machine consumption\n"
30 "  -L n,m              Process only line range n,m, counting from 1\n"
31 "  -M, -C              Find line movements within and across files\n"
32 "  --incremental       Show blame entries as we find them, incrementally\n"
33 "  --contents file     Use <file>'s contents as the final image\n"
34 "  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";
35
36 static int longest_file;
37 static int longest_author;
38 static int max_orig_digits;
39 static int max_digits;
40 static int max_score_digits;
41 static int show_root;
42 static int blank_boundary;
43 static int incremental;
44 static int cmd_is_annotate;
45
46 #ifndef DEBUG
47 #define DEBUG 0
48 #endif
49
50 /* stats */
51 static int num_read_blob;
52 static int num_get_patch;
53 static int num_commits;
54
55 #define PICKAXE_BLAME_MOVE              01
56 #define PICKAXE_BLAME_COPY              02
57 #define PICKAXE_BLAME_COPY_HARDER       04
58
59 /*
60  * blame for a blame_entry with score lower than these thresholds
61  * is not passed to the parent using move/copy logic.
62  */
63 static unsigned blame_move_score;
64 static unsigned blame_copy_score;
65 #define BLAME_DEFAULT_MOVE_SCORE        20
66 #define BLAME_DEFAULT_COPY_SCORE        40
67
68 /* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
69 #define METAINFO_SHOWN          (1u<<12)
70 #define MORE_THAN_ONE_PATH      (1u<<13)
71
72 /*
73  * One blob in a commit that is being suspected
74  */
75 struct origin {
76         int refcnt;
77         struct commit *commit;
78         mmfile_t file;
79         unsigned char blob_sha1[20];
80         char path[FLEX_ARRAY];
81 };
82
83 /*
84  * Given an origin, prepare mmfile_t structure to be used by the
85  * diff machinery
86  */
87 static char *fill_origin_blob(struct origin *o, mmfile_t *file)
88 {
89         if (!o->file.ptr) {
90                 char type[10];
91                 num_read_blob++;
92                 file->ptr = read_sha1_file(o->blob_sha1, type,
93                                            (unsigned long *)(&(file->size)));
94                 o->file = *file;
95         }
96         else
97                 *file = o->file;
98         return file->ptr;
99 }
100
101 /*
102  * Origin is refcounted and usually we keep the blob contents to be
103  * reused.
104  */
105 static inline struct origin *origin_incref(struct origin *o)
106 {
107         if (o)
108                 o->refcnt++;
109         return o;
110 }
111
112 static void origin_decref(struct origin *o)
113 {
114         if (o && --o->refcnt <= 0) {
115                 if (o->file.ptr)
116                         free(o->file.ptr);
117                 memset(o, 0, sizeof(*o));
118                 free(o);
119         }
120 }
121
122 /*
123  * Each group of lines is described by a blame_entry; it can be split
124  * as we pass blame to the parents.  They form a linked list in the
125  * scoreboard structure, sorted by the target line number.
126  */
127 struct blame_entry {
128         struct blame_entry *prev;
129         struct blame_entry *next;
130
131         /* the first line of this group in the final image;
132          * internally all line numbers are 0 based.
133          */
134         int lno;
135
136         /* how many lines this group has */
137         int num_lines;
138
139         /* the commit that introduced this group into the final image */
140         struct origin *suspect;
141
142         /* true if the suspect is truly guilty; false while we have not
143          * checked if the group came from one of its parents.
144          */
145         char guilty;
146
147         /* the line number of the first line of this group in the
148          * suspect's file; internally all line numbers are 0 based.
149          */
150         int s_lno;
151
152         /* how significant this entry is -- cached to avoid
153          * scanning the lines over and over.
154          */
155         unsigned score;
156 };
157
158 /*
159  * The current state of the blame assignment.
160  */
161 struct scoreboard {
162         /* the final commit (i.e. where we started digging from) */
163         struct commit *final;
164
165         const char *path;
166
167         /*
168          * The contents in the final image.
169          * Used by many functions to obtain contents of the nth line,
170          * indexed with scoreboard.lineno[blame_entry.lno].
171          */
172         const char *final_buf;
173         unsigned long final_buf_size;
174
175         /* linked list of blames */
176         struct blame_entry *ent;
177
178         /* look-up a line in the final buffer */
179         int num_lines;
180         int *lineno;
181 };
182
183 static int cmp_suspect(struct origin *a, struct origin *b)
184 {
185         int cmp = hashcmp(a->commit->object.sha1, b->commit->object.sha1);
186         if (cmp)
187                 return cmp;
188         return strcmp(a->path, b->path);
189 }
190
191 #define cmp_suspect(a, b) ( ((a)==(b)) ? 0 : cmp_suspect(a,b) )
192
193 static void sanity_check_refcnt(struct scoreboard *);
194
195 /*
196  * If two blame entries that are next to each other came from
197  * contiguous lines in the same origin (i.e. <commit, path> pair),
198  * merge them together.
199  */
200 static void coalesce(struct scoreboard *sb)
201 {
202         struct blame_entry *ent, *next;
203
204         for (ent = sb->ent; ent && (next = ent->next); ent = next) {
205                 if (!cmp_suspect(ent->suspect, next->suspect) &&
206                     ent->guilty == next->guilty &&
207                     ent->s_lno + ent->num_lines == next->s_lno) {
208                         ent->num_lines += next->num_lines;
209                         ent->next = next->next;
210                         if (ent->next)
211                                 ent->next->prev = ent;
212                         origin_decref(next->suspect);
213                         free(next);
214                         ent->score = 0;
215                         next = ent; /* again */
216                 }
217         }
218
219         if (DEBUG) /* sanity */
220                 sanity_check_refcnt(sb);
221 }
222
223 /*
224  * Given a commit and a path in it, create a new origin structure.
225  * The callers that add blame to the scoreboard should use
226  * get_origin() to obtain shared, refcounted copy instead of calling
227  * this function directly.
228  */
229 static struct origin *make_origin(struct commit *commit, const char *path)
230 {
231         struct origin *o;
232         o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
233         o->commit = commit;
234         o->refcnt = 1;
235         strcpy(o->path, path);
236         return o;
237 }
238
239 /*
240  * Locate an existing origin or create a new one.
241  */
242 static struct origin *get_origin(struct scoreboard *sb,
243                                  struct commit *commit,
244                                  const char *path)
245 {
246         struct blame_entry *e;
247
248         for (e = sb->ent; e; e = e->next) {
249                 if (e->suspect->commit == commit &&
250                     !strcmp(e->suspect->path, path))
251                         return origin_incref(e->suspect);
252         }
253         return make_origin(commit, path);
254 }
255
256 /*
257  * Fill the blob_sha1 field of an origin if it hasn't, so that later
258  * call to fill_origin_blob() can use it to locate the data.  blob_sha1
259  * for an origin is also used to pass the blame for the entire file to
260  * the parent to detect the case where a child's blob is identical to
261  * that of its parent's.
262  */
263 static int fill_blob_sha1(struct origin *origin)
264 {
265         unsigned mode;
266         char type[10];
267
268         if (!is_null_sha1(origin->blob_sha1))
269                 return 0;
270         if (get_tree_entry(origin->commit->object.sha1,
271                            origin->path,
272                            origin->blob_sha1, &mode))
273                 goto error_out;
274         if (sha1_object_info(origin->blob_sha1, type, NULL) ||
275             strcmp(type, blob_type))
276                 goto error_out;
277         return 0;
278  error_out:
279         hashclr(origin->blob_sha1);
280         return -1;
281 }
282
283 /*
284  * We have an origin -- check if the same path exists in the
285  * parent and return an origin structure to represent it.
286  */
287 static struct origin *find_origin(struct scoreboard *sb,
288                                   struct commit *parent,
289                                   struct origin *origin)
290 {
291         struct origin *porigin = NULL;
292         struct diff_options diff_opts;
293         const char *paths[2];
294
295         if (parent->util) {
296                 /*
297                  * Each commit object can cache one origin in that
298                  * commit.  This is a freestanding copy of origin and
299                  * not refcounted.
300                  */
301                 struct origin *cached = parent->util;
302                 if (!strcmp(cached->path, origin->path)) {
303                         /*
304                          * The same path between origin and its parent
305                          * without renaming -- the most common case.
306                          */
307                         porigin = get_origin(sb, parent, cached->path);
308
309                         /*
310                          * If the origin was newly created (i.e. get_origin
311                          * would call make_origin if none is found in the
312                          * scoreboard), it does not know the blob_sha1,
313                          * so copy it.  Otherwise porigin was in the
314                          * scoreboard and already knows blob_sha1.
315                          */
316                         if (porigin->refcnt == 1)
317                                 hashcpy(porigin->blob_sha1, cached->blob_sha1);
318                         return porigin;
319                 }
320                 /* otherwise it was not very useful; free it */
321                 free(parent->util);
322                 parent->util = NULL;
323         }
324
325         /* See if the origin->path is different between parent
326          * and origin first.  Most of the time they are the
327          * same and diff-tree is fairly efficient about this.
328          */
329         diff_setup(&diff_opts);
330         diff_opts.recursive = 1;
331         diff_opts.detect_rename = 0;
332         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
333         paths[0] = origin->path;
334         paths[1] = NULL;
335
336         diff_tree_setup_paths(paths, &diff_opts);
337         if (diff_setup_done(&diff_opts) < 0)
338                 die("diff-setup");
339
340         if (is_null_sha1(origin->commit->object.sha1))
341                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
342         else
343                 diff_tree_sha1(parent->tree->object.sha1,
344                                origin->commit->tree->object.sha1,
345                                "", &diff_opts);
346         diffcore_std(&diff_opts);
347
348         /* It is either one entry that says "modified", or "created",
349          * or nothing.
350          */
351         if (!diff_queued_diff.nr) {
352                 /* The path is the same as parent */
353                 porigin = get_origin(sb, parent, origin->path);
354                 hashcpy(porigin->blob_sha1, origin->blob_sha1);
355         }
356         else if (diff_queued_diff.nr != 1)
357                 die("internal error in blame::find_origin");
358         else {
359                 struct diff_filepair *p = diff_queued_diff.queue[0];
360                 switch (p->status) {
361                 default:
362                         die("internal error in blame::find_origin (%c)",
363                             p->status);
364                 case 'M':
365                         porigin = get_origin(sb, parent, origin->path);
366                         hashcpy(porigin->blob_sha1, p->one->sha1);
367                         break;
368                 case 'A':
369                 case 'T':
370                         /* Did not exist in parent, or type changed */
371                         break;
372                 }
373         }
374         diff_flush(&diff_opts);
375         if (porigin) {
376                 /*
377                  * Create a freestanding copy that is not part of
378                  * the refcounted origin found in the scoreboard, and
379                  * cache it in the commit.
380                  */
381                 struct origin *cached;
382
383                 cached = make_origin(porigin->commit, porigin->path);
384                 hashcpy(cached->blob_sha1, porigin->blob_sha1);
385                 parent->util = cached;
386         }
387         return porigin;
388 }
389
390 /*
391  * We have an origin -- find the path that corresponds to it in its
392  * parent and return an origin structure to represent it.
393  */
394 static struct origin *find_rename(struct scoreboard *sb,
395                                   struct commit *parent,
396                                   struct origin *origin)
397 {
398         struct origin *porigin = NULL;
399         struct diff_options diff_opts;
400         int i;
401         const char *paths[2];
402
403         diff_setup(&diff_opts);
404         diff_opts.recursive = 1;
405         diff_opts.detect_rename = DIFF_DETECT_RENAME;
406         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
407         diff_opts.single_follow = origin->path;
408         paths[0] = NULL;
409         diff_tree_setup_paths(paths, &diff_opts);
410         if (diff_setup_done(&diff_opts) < 0)
411                 die("diff-setup");
412
413         if (is_null_sha1(origin->commit->object.sha1))
414                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
415         else
416                 diff_tree_sha1(parent->tree->object.sha1,
417                                origin->commit->tree->object.sha1,
418                                "", &diff_opts);
419         diffcore_std(&diff_opts);
420
421         for (i = 0; i < diff_queued_diff.nr; i++) {
422                 struct diff_filepair *p = diff_queued_diff.queue[i];
423                 if ((p->status == 'R' || p->status == 'C') &&
424                     !strcmp(p->two->path, origin->path)) {
425                         porigin = get_origin(sb, parent, p->one->path);
426                         hashcpy(porigin->blob_sha1, p->one->sha1);
427                         break;
428                 }
429         }
430         diff_flush(&diff_opts);
431         return porigin;
432 }
433
434 /*
435  * Parsing of patch chunks...
436  */
437 struct chunk {
438         /* line number in postimage; up to but not including this
439          * line is the same as preimage
440          */
441         int same;
442
443         /* preimage line number after this chunk */
444         int p_next;
445
446         /* postimage line number after this chunk */
447         int t_next;
448 };
449
450 struct patch {
451         struct chunk *chunks;
452         int num;
453 };
454
455 struct blame_diff_state {
456         struct xdiff_emit_state xm;
457         struct patch *ret;
458         unsigned hunk_post_context;
459         unsigned hunk_in_pre_context : 1;
460 };
461
462 static void process_u_diff(void *state_, char *line, unsigned long len)
463 {
464         struct blame_diff_state *state = state_;
465         struct chunk *chunk;
466         int off1, off2, len1, len2, num;
467
468         num = state->ret->num;
469         if (len < 4 || line[0] != '@' || line[1] != '@') {
470                 if (state->hunk_in_pre_context && line[0] == ' ')
471                         state->ret->chunks[num - 1].same++;
472                 else {
473                         state->hunk_in_pre_context = 0;
474                         if (line[0] == ' ')
475                                 state->hunk_post_context++;
476                         else
477                                 state->hunk_post_context = 0;
478                 }
479                 return;
480         }
481
482         if (num && state->hunk_post_context) {
483                 chunk = &state->ret->chunks[num - 1];
484                 chunk->p_next -= state->hunk_post_context;
485                 chunk->t_next -= state->hunk_post_context;
486         }
487         state->ret->num = ++num;
488         state->ret->chunks = xrealloc(state->ret->chunks,
489                                       sizeof(struct chunk) * num);
490         chunk = &state->ret->chunks[num - 1];
491         if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
492                 state->ret->num--;
493                 return;
494         }
495
496         /* Line numbers in patch output are one based. */
497         off1--;
498         off2--;
499
500         chunk->same = len2 ? off2 : (off2 + 1);
501
502         chunk->p_next = off1 + (len1 ? len1 : 1);
503         chunk->t_next = chunk->same + len2;
504         state->hunk_in_pre_context = 1;
505         state->hunk_post_context = 0;
506 }
507
508 static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
509                                     int context)
510 {
511         struct blame_diff_state state;
512         xpparam_t xpp;
513         xdemitconf_t xecfg;
514         xdemitcb_t ecb;
515
516         xpp.flags = XDF_NEED_MINIMAL;
517         xecfg.ctxlen = context;
518         xecfg.flags = 0;
519         ecb.outf = xdiff_outf;
520         ecb.priv = &state;
521         memset(&state, 0, sizeof(state));
522         state.xm.consume = process_u_diff;
523         state.ret = xmalloc(sizeof(struct patch));
524         state.ret->chunks = NULL;
525         state.ret->num = 0;
526
527         xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);
528
529         if (state.ret->num) {
530                 struct chunk *chunk;
531                 chunk = &state.ret->chunks[state.ret->num - 1];
532                 chunk->p_next -= state.hunk_post_context;
533                 chunk->t_next -= state.hunk_post_context;
534         }
535         return state.ret;
536 }
537
538 /*
539  * Run diff between two origins and grab the patch output, so that
540  * we can pass blame for lines origin is currently suspected for
541  * to its parent.
542  */
543 static struct patch *get_patch(struct origin *parent, struct origin *origin)
544 {
545         mmfile_t file_p, file_o;
546         struct patch *patch;
547
548         fill_origin_blob(parent, &file_p);
549         fill_origin_blob(origin, &file_o);
550         if (!file_p.ptr || !file_o.ptr)
551                 return NULL;
552         patch = compare_buffer(&file_p, &file_o, 0);
553         num_get_patch++;
554         return patch;
555 }
556
557 static void free_patch(struct patch *p)
558 {
559         free(p->chunks);
560         free(p);
561 }
562
563 /*
564  * Link in a new blame entry to the scoreboard.  Entries that cover the
565  * same line range have been removed from the scoreboard previously.
566  */
567 static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
568 {
569         struct blame_entry *ent, *prev = NULL;
570
571         origin_incref(e->suspect);
572
573         for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
574                 prev = ent;
575
576         /* prev, if not NULL, is the last one that is below e */
577         e->prev = prev;
578         if (prev) {
579                 e->next = prev->next;
580                 prev->next = e;
581         }
582         else {
583                 e->next = sb->ent;
584                 sb->ent = e;
585         }
586         if (e->next)
587                 e->next->prev = e;
588 }
589
590 /*
591  * src typically is on-stack; we want to copy the information in it to
592  * an malloced blame_entry that is already on the linked list of the
593  * scoreboard.  The origin of dst loses a refcnt while the origin of src
594  * gains one.
595  */
596 static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
597 {
598         struct blame_entry *p, *n;
599
600         p = dst->prev;
601         n = dst->next;
602         origin_incref(src->suspect);
603         origin_decref(dst->suspect);
604         memcpy(dst, src, sizeof(*src));
605         dst->prev = p;
606         dst->next = n;
607         dst->score = 0;
608 }
609
610 static const char *nth_line(struct scoreboard *sb, int lno)
611 {
612         return sb->final_buf + sb->lineno[lno];
613 }
614
615 /*
616  * It is known that lines between tlno to same came from parent, and e
617  * has an overlap with that range.  it also is known that parent's
618  * line plno corresponds to e's line tlno.
619  *
620  *                <---- e ----->
621  *                   <------>
622  *                   <------------>
623  *             <------------>
624  *             <------------------>
625  *
626  * Split e into potentially three parts; before this chunk, the chunk
627  * to be blamed for the parent, and after that portion.
628  */
629 static void split_overlap(struct blame_entry *split,
630                           struct blame_entry *e,
631                           int tlno, int plno, int same,
632                           struct origin *parent)
633 {
634         int chunk_end_lno;
635         memset(split, 0, sizeof(struct blame_entry [3]));
636
637         if (e->s_lno < tlno) {
638                 /* there is a pre-chunk part not blamed on parent */
639                 split[0].suspect = origin_incref(e->suspect);
640                 split[0].lno = e->lno;
641                 split[0].s_lno = e->s_lno;
642                 split[0].num_lines = tlno - e->s_lno;
643                 split[1].lno = e->lno + tlno - e->s_lno;
644                 split[1].s_lno = plno;
645         }
646         else {
647                 split[1].lno = e->lno;
648                 split[1].s_lno = plno + (e->s_lno - tlno);
649         }
650
651         if (same < e->s_lno + e->num_lines) {
652                 /* there is a post-chunk part not blamed on parent */
653                 split[2].suspect = origin_incref(e->suspect);
654                 split[2].lno = e->lno + (same - e->s_lno);
655                 split[2].s_lno = e->s_lno + (same - e->s_lno);
656                 split[2].num_lines = e->s_lno + e->num_lines - same;
657                 chunk_end_lno = split[2].lno;
658         }
659         else
660                 chunk_end_lno = e->lno + e->num_lines;
661         split[1].num_lines = chunk_end_lno - split[1].lno;
662
663         /*
664          * if it turns out there is nothing to blame the parent for,
665          * forget about the splitting.  !split[1].suspect signals this.
666          */
667         if (split[1].num_lines < 1)
668                 return;
669         split[1].suspect = origin_incref(parent);
670 }
671
672 /*
673  * split_overlap() divided an existing blame e into up to three parts
674  * in split.  Adjust the linked list of blames in the scoreboard to
675  * reflect the split.
676  */
677 static void split_blame(struct scoreboard *sb,
678                         struct blame_entry *split,
679                         struct blame_entry *e)
680 {
681         struct blame_entry *new_entry;
682
683         if (split[0].suspect && split[2].suspect) {
684                 /* The first part (reuse storage for the existing entry e) */
685                 dup_entry(e, &split[0]);
686
687                 /* The last part -- me */
688                 new_entry = xmalloc(sizeof(*new_entry));
689                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
690                 add_blame_entry(sb, new_entry);
691
692                 /* ... and the middle part -- parent */
693                 new_entry = xmalloc(sizeof(*new_entry));
694                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
695                 add_blame_entry(sb, new_entry);
696         }
697         else if (!split[0].suspect && !split[2].suspect)
698                 /*
699                  * The parent covers the entire area; reuse storage for
700                  * e and replace it with the parent.
701                  */
702                 dup_entry(e, &split[1]);
703         else if (split[0].suspect) {
704                 /* me and then parent */
705                 dup_entry(e, &split[0]);
706
707                 new_entry = xmalloc(sizeof(*new_entry));
708                 memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
709                 add_blame_entry(sb, new_entry);
710         }
711         else {
712                 /* parent and then me */
713                 dup_entry(e, &split[1]);
714
715                 new_entry = xmalloc(sizeof(*new_entry));
716                 memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
717                 add_blame_entry(sb, new_entry);
718         }
719
720         if (DEBUG) { /* sanity */
721                 struct blame_entry *ent;
722                 int lno = sb->ent->lno, corrupt = 0;
723
724                 for (ent = sb->ent; ent; ent = ent->next) {
725                         if (lno != ent->lno)
726                                 corrupt = 1;
727                         if (ent->s_lno < 0)
728                                 corrupt = 1;
729                         lno += ent->num_lines;
730                 }
731                 if (corrupt) {
732                         lno = sb->ent->lno;
733                         for (ent = sb->ent; ent; ent = ent->next) {
734                                 printf("L %8d l %8d n %8d\n",
735                                        lno, ent->lno, ent->num_lines);
736                                 lno = ent->lno + ent->num_lines;
737                         }
738                         die("oops");
739                 }
740         }
741 }
742
743 /*
744  * After splitting the blame, the origins used by the
745  * on-stack blame_entry should lose one refcnt each.
746  */
747 static void decref_split(struct blame_entry *split)
748 {
749         int i;
750
751         for (i = 0; i < 3; i++)
752                 origin_decref(split[i].suspect);
753 }
754
755 /*
756  * Helper for blame_chunk().  blame_entry e is known to overlap with
757  * the patch hunk; split it and pass blame to the parent.
758  */
759 static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
760                           int tlno, int plno, int same,
761                           struct origin *parent)
762 {
763         struct blame_entry split[3];
764
765         split_overlap(split, e, tlno, plno, same, parent);
766         if (split[1].suspect)
767                 split_blame(sb, split, e);
768         decref_split(split);
769 }
770
771 /*
772  * Find the line number of the last line the target is suspected for.
773  */
774 static int find_last_in_target(struct scoreboard *sb, struct origin *target)
775 {
776         struct blame_entry *e;
777         int last_in_target = -1;
778
779         for (e = sb->ent; e; e = e->next) {
780                 if (e->guilty || cmp_suspect(e->suspect, target))
781                         continue;
782                 if (last_in_target < e->s_lno + e->num_lines)
783                         last_in_target = e->s_lno + e->num_lines;
784         }
785         return last_in_target;
786 }
787
788 /*
789  * Process one hunk from the patch between the current suspect for
790  * blame_entry e and its parent.  Find and split the overlap, and
791  * pass blame to the overlapping part to the parent.
792  */
793 static void blame_chunk(struct scoreboard *sb,
794                         int tlno, int plno, int same,
795                         struct origin *target, struct origin *parent)
796 {
797         struct blame_entry *e;
798
799         for (e = sb->ent; e; e = e->next) {
800                 if (e->guilty || cmp_suspect(e->suspect, target))
801                         continue;
802                 if (same <= e->s_lno)
803                         continue;
804                 if (tlno < e->s_lno + e->num_lines)
805                         blame_overlap(sb, e, tlno, plno, same, parent);
806         }
807 }
808
809 /*
810  * We are looking at the origin 'target' and aiming to pass blame
811  * for the lines it is suspected to its parent.  Run diff to find
812  * which lines came from parent and pass blame for them.
813  */
814 static int pass_blame_to_parent(struct scoreboard *sb,
815                                 struct origin *target,
816                                 struct origin *parent)
817 {
818         int i, last_in_target, plno, tlno;
819         struct patch *patch;
820
821         last_in_target = find_last_in_target(sb, target);
822         if (last_in_target < 0)
823                 return 1; /* nothing remains for this target */
824
825         patch = get_patch(parent, target);
826         plno = tlno = 0;
827         for (i = 0; i < patch->num; i++) {
828                 struct chunk *chunk = &patch->chunks[i];
829
830                 blame_chunk(sb, tlno, plno, chunk->same, target, parent);
831                 plno = chunk->p_next;
832                 tlno = chunk->t_next;
833         }
834         /* The rest (i.e. anything after tlno) are the same as the parent */
835         blame_chunk(sb, tlno, plno, last_in_target, target, parent);
836
837         free_patch(patch);
838         return 0;
839 }
840
841 /*
842  * The lines in blame_entry after splitting blames many times can become
843  * very small and trivial, and at some point it becomes pointless to
844  * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
845  * ordinary C program, and it is not worth to say it was copied from
846  * totally unrelated file in the parent.
847  *
848  * Compute how trivial the lines in the blame_entry are.
849  */
850 static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
851 {
852         unsigned score;
853         const char *cp, *ep;
854
855         if (e->score)
856                 return e->score;
857
858         score = 1;
859         cp = nth_line(sb, e->lno);
860         ep = nth_line(sb, e->lno + e->num_lines);
861         while (cp < ep) {
862                 unsigned ch = *((unsigned char *)cp);
863                 if (isalnum(ch))
864                         score++;
865                 cp++;
866         }
867         e->score = score;
868         return score;
869 }
870
871 /*
872  * best_so_far[] and this[] are both a split of an existing blame_entry
873  * that passes blame to the parent.  Maintain best_so_far the best split
874  * so far, by comparing this and best_so_far and copying this into
875  * bst_so_far as needed.
876  */
877 static void copy_split_if_better(struct scoreboard *sb,
878                                  struct blame_entry *best_so_far,
879                                  struct blame_entry *this)
880 {
881         int i;
882
883         if (!this[1].suspect)
884                 return;
885         if (best_so_far[1].suspect) {
886                 if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
887                         return;
888         }
889
890         for (i = 0; i < 3; i++)
891                 origin_incref(this[i].suspect);
892         decref_split(best_so_far);
893         memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
894 }
895
896 /*
897  * Find the lines from parent that are the same as ent so that
898  * we can pass blames to it.  file_p has the blob contents for
899  * the parent.
900  */
901 static void find_copy_in_blob(struct scoreboard *sb,
902                               struct blame_entry *ent,
903                               struct origin *parent,
904                               struct blame_entry *split,
905                               mmfile_t *file_p)
906 {
907         const char *cp;
908         int cnt;
909         mmfile_t file_o;
910         struct patch *patch;
911         int i, plno, tlno;
912
913         /*
914          * Prepare mmfile that contains only the lines in ent.
915          */
916         cp = nth_line(sb, ent->lno);
917         file_o.ptr = (char*) cp;
918         cnt = ent->num_lines;
919
920         while (cnt && cp < sb->final_buf + sb->final_buf_size) {
921                 if (*cp++ == '\n')
922                         cnt--;
923         }
924         file_o.size = cp - file_o.ptr;
925
926         patch = compare_buffer(file_p, &file_o, 1);
927
928         memset(split, 0, sizeof(struct blame_entry [3]));
929         plno = tlno = 0;
930         for (i = 0; i < patch->num; i++) {
931                 struct chunk *chunk = &patch->chunks[i];
932
933                 /* tlno to chunk->same are the same as ent */
934                 if (ent->num_lines <= tlno)
935                         break;
936                 if (tlno < chunk->same) {
937                         struct blame_entry this[3];
938                         split_overlap(this, ent,
939                                       tlno + ent->s_lno, plno,
940                                       chunk->same + ent->s_lno,
941                                       parent);
942                         copy_split_if_better(sb, split, this);
943                         decref_split(this);
944                 }
945                 plno = chunk->p_next;
946                 tlno = chunk->t_next;
947         }
948         free_patch(patch);
949 }
950
951 /*
952  * See if lines currently target is suspected for can be attributed to
953  * parent.
954  */
955 static int find_move_in_parent(struct scoreboard *sb,
956                                struct origin *target,
957                                struct origin *parent)
958 {
959         int last_in_target, made_progress;
960         struct blame_entry *e, split[3];
961         mmfile_t file_p;
962
963         last_in_target = find_last_in_target(sb, target);
964         if (last_in_target < 0)
965                 return 1; /* nothing remains for this target */
966
967         fill_origin_blob(parent, &file_p);
968         if (!file_p.ptr)
969                 return 0;
970
971         made_progress = 1;
972         while (made_progress) {
973                 made_progress = 0;
974                 for (e = sb->ent; e; e = e->next) {
975                         if (e->guilty || cmp_suspect(e->suspect, target))
976                                 continue;
977                         find_copy_in_blob(sb, e, parent, split, &file_p);
978                         if (split[1].suspect &&
979                             blame_move_score < ent_score(sb, &split[1])) {
980                                 split_blame(sb, split, e);
981                                 made_progress = 1;
982                         }
983                         decref_split(split);
984                 }
985         }
986         return 0;
987 }
988
989 struct blame_list {
990         struct blame_entry *ent;
991         struct blame_entry split[3];
992 };
993
994 /*
995  * Count the number of entries the target is suspected for,
996  * and prepare a list of entry and the best split.
997  */
998 static struct blame_list *setup_blame_list(struct scoreboard *sb,
999                                            struct origin *target,
1000                                            int *num_ents_p)
1001 {
1002         struct blame_entry *e;
1003         int num_ents, i;
1004         struct blame_list *blame_list = NULL;
1005
1006         for (e = sb->ent, num_ents = 0; e; e = e->next)
1007                 if (!e->guilty && !cmp_suspect(e->suspect, target))
1008                         num_ents++;
1009         if (num_ents) {
1010                 blame_list = xcalloc(num_ents, sizeof(struct blame_list));
1011                 for (e = sb->ent, i = 0; e; e = e->next)
1012                         if (!e->guilty && !cmp_suspect(e->suspect, target))
1013                                 blame_list[i++].ent = e;
1014         }
1015         *num_ents_p = num_ents;
1016         return blame_list;
1017 }
1018
1019 /*
1020  * For lines target is suspected for, see if we can find code movement
1021  * across file boundary from the parent commit.  porigin is the path
1022  * in the parent we already tried.
1023  */
1024 static int find_copy_in_parent(struct scoreboard *sb,
1025                                struct origin *target,
1026                                struct commit *parent,
1027                                struct origin *porigin,
1028                                int opt)
1029 {
1030         struct diff_options diff_opts;
1031         const char *paths[1];
1032         int i, j;
1033         int retval;
1034         struct blame_list *blame_list;
1035         int num_ents;
1036
1037         blame_list = setup_blame_list(sb, target, &num_ents);
1038         if (!blame_list)
1039                 return 1; /* nothing remains for this target */
1040
1041         diff_setup(&diff_opts);
1042         diff_opts.recursive = 1;
1043         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1044
1045         paths[0] = NULL;
1046         diff_tree_setup_paths(paths, &diff_opts);
1047         if (diff_setup_done(&diff_opts) < 0)
1048                 die("diff-setup");
1049
1050         /* Try "find copies harder" on new path if requested;
1051          * we do not want to use diffcore_rename() actually to
1052          * match things up; find_copies_harder is set only to
1053          * force diff_tree_sha1() to feed all filepairs to diff_queue,
1054          * and this code needs to be after diff_setup_done(), which
1055          * usually makes find-copies-harder imply copy detection.
1056          */
1057         if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
1058             (!porigin || strcmp(target->path, porigin->path)))
1059                 diff_opts.find_copies_harder = 1;
1060
1061         if (is_null_sha1(target->commit->object.sha1))
1062                 do_diff_cache(parent->tree->object.sha1, &diff_opts);
1063         else
1064                 diff_tree_sha1(parent->tree->object.sha1,
1065                                target->commit->tree->object.sha1,
1066                                "", &diff_opts);
1067
1068         if (!diff_opts.find_copies_harder)
1069                 diffcore_std(&diff_opts);
1070
1071         retval = 0;
1072         while (1) {
1073                 int made_progress = 0;
1074
1075                 for (i = 0; i < diff_queued_diff.nr; i++) {
1076                         struct diff_filepair *p = diff_queued_diff.queue[i];
1077                         struct origin *norigin;
1078                         mmfile_t file_p;
1079                         struct blame_entry this[3];
1080
1081                         if (!DIFF_FILE_VALID(p->one))
1082                                 continue; /* does not exist in parent */
1083                         if (porigin && !strcmp(p->one->path, porigin->path))
1084                                 /* find_move already dealt with this path */
1085                                 continue;
1086
1087                         norigin = get_origin(sb, parent, p->one->path);
1088                         hashcpy(norigin->blob_sha1, p->one->sha1);
1089                         fill_origin_blob(norigin, &file_p);
1090                         if (!file_p.ptr)
1091                                 continue;
1092
1093                         for (j = 0; j < num_ents; j++) {
1094                                 find_copy_in_blob(sb, blame_list[j].ent,
1095                                                   norigin, this, &file_p);
1096                                 copy_split_if_better(sb, blame_list[j].split,
1097                                                      this);
1098                                 decref_split(this);
1099                         }
1100                         origin_decref(norigin);
1101                 }
1102
1103                 for (j = 0; j < num_ents; j++) {
1104                         struct blame_entry *split = blame_list[j].split;
1105                         if (split[1].suspect &&
1106                             blame_copy_score < ent_score(sb, &split[1])) {
1107                                 split_blame(sb, split, blame_list[j].ent);
1108                                 made_progress = 1;
1109                         }
1110                         decref_split(split);
1111                 }
1112                 free(blame_list);
1113
1114                 if (!made_progress)
1115                         break;
1116                 blame_list = setup_blame_list(sb, target, &num_ents);
1117                 if (!blame_list) {
1118                         retval = 1;
1119                         break;
1120                 }
1121         }
1122         diff_flush(&diff_opts);
1123
1124         return retval;
1125 }
1126
1127 /*
1128  * The blobs of origin and porigin exactly match, so everything
1129  * origin is suspected for can be blamed on the parent.
1130  */
1131 static void pass_whole_blame(struct scoreboard *sb,
1132                              struct origin *origin, struct origin *porigin)
1133 {
1134         struct blame_entry *e;
1135
1136         if (!porigin->file.ptr && origin->file.ptr) {
1137                 /* Steal its file */
1138                 porigin->file = origin->file;
1139                 origin->file.ptr = NULL;
1140         }
1141         for (e = sb->ent; e; e = e->next) {
1142                 if (cmp_suspect(e->suspect, origin))
1143                         continue;
1144                 origin_incref(porigin);
1145                 origin_decref(e->suspect);
1146                 e->suspect = porigin;
1147         }
1148 }
1149
1150 #define MAXPARENT 16
1151
1152 static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
1153 {
1154         int i, pass;
1155         struct commit *commit = origin->commit;
1156         struct commit_list *parent;
1157         struct origin *parent_origin[MAXPARENT], *porigin;
1158
1159         memset(parent_origin, 0, sizeof(parent_origin));
1160
1161         /* The first pass looks for unrenamed path to optimize for
1162          * common cases, then we look for renames in the second pass.
1163          */
1164         for (pass = 0; pass < 2; pass++) {
1165                 struct origin *(*find)(struct scoreboard *,
1166                                        struct commit *, struct origin *);
1167                 find = pass ? find_rename : find_origin;
1168
1169                 for (i = 0, parent = commit->parents;
1170                      i < MAXPARENT && parent;
1171                      parent = parent->next, i++) {
1172                         struct commit *p = parent->item;
1173                         int j, same;
1174
1175                         if (parent_origin[i])
1176                                 continue;
1177                         if (parse_commit(p))
1178                                 continue;
1179                         porigin = find(sb, p, origin);
1180                         if (!porigin)
1181                                 continue;
1182                         if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1183                                 pass_whole_blame(sb, origin, porigin);
1184                                 origin_decref(porigin);
1185                                 goto finish;
1186                         }
1187                         for (j = same = 0; j < i; j++)
1188                                 if (parent_origin[j] &&
1189                                     !hashcmp(parent_origin[j]->blob_sha1,
1190                                              porigin->blob_sha1)) {
1191                                         same = 1;
1192                                         break;
1193                                 }
1194                         if (!same)
1195                                 parent_origin[i] = porigin;
1196                         else
1197                                 origin_decref(porigin);
1198                 }
1199         }
1200
1201         num_commits++;
1202         for (i = 0, parent = commit->parents;
1203              i < MAXPARENT && parent;
1204              parent = parent->next, i++) {
1205                 struct origin *porigin = parent_origin[i];
1206                 if (!porigin)
1207                         continue;
1208                 if (pass_blame_to_parent(sb, origin, porigin))
1209                         goto finish;
1210         }
1211
1212         /*
1213          * Optionally find moves in parents' files.
1214          */
1215         if (opt & PICKAXE_BLAME_MOVE)
1216                 for (i = 0, parent = commit->parents;
1217                      i < MAXPARENT && parent;
1218                      parent = parent->next, i++) {
1219                         struct origin *porigin = parent_origin[i];
1220                         if (!porigin)
1221                                 continue;
1222                         if (find_move_in_parent(sb, origin, porigin))
1223                                 goto finish;
1224                 }
1225
1226         /*
1227          * Optionally find copies from parents' files.
1228          */
1229         if (opt & PICKAXE_BLAME_COPY)
1230                 for (i = 0, parent = commit->parents;
1231                      i < MAXPARENT && parent;
1232                      parent = parent->next, i++) {
1233                         struct origin *porigin = parent_origin[i];
1234                         if (find_copy_in_parent(sb, origin, parent->item,
1235                                                 porigin, opt))
1236                                 goto finish;
1237                 }
1238
1239  finish:
1240         for (i = 0; i < MAXPARENT; i++)
1241                 origin_decref(parent_origin[i]);
1242 }
1243
1244 /*
1245  * Information on commits, used for output.
1246  */
1247 struct commit_info
1248 {
1249         char *author;
1250         char *author_mail;
1251         unsigned long author_time;
1252         char *author_tz;
1253
1254         /* filled only when asked for details */
1255         char *committer;
1256         char *committer_mail;
1257         unsigned long committer_time;
1258         char *committer_tz;
1259
1260         char *summary;
1261 };
1262
1263 /*
1264  * Parse author/committer line in the commit object buffer
1265  */
1266 static void get_ac_line(const char *inbuf, const char *what,
1267                         int bufsz, char *person, char **mail,
1268                         unsigned long *time, char **tz)
1269 {
1270         int len;
1271         char *tmp, *endp;
1272
1273         tmp = strstr(inbuf, what);
1274         if (!tmp)
1275                 goto error_out;
1276         tmp += strlen(what);
1277         endp = strchr(tmp, '\n');
1278         if (!endp)
1279                 len = strlen(tmp);
1280         else
1281                 len = endp - tmp;
1282         if (bufsz <= len) {
1283         error_out:
1284                 /* Ugh */
1285                 person = *mail = *tz = "(unknown)";
1286                 *time = 0;
1287                 return;
1288         }
1289         memcpy(person, tmp, len);
1290
1291         tmp = person;
1292         tmp += len;
1293         *tmp = 0;
1294         while (*tmp != ' ')
1295                 tmp--;
1296         *tz = tmp+1;
1297
1298         *tmp = 0;
1299         while (*tmp != ' ')
1300                 tmp--;
1301         *time = strtoul(tmp, NULL, 10);
1302
1303         *tmp = 0;
1304         while (*tmp != ' ')
1305                 tmp--;
1306         *mail = tmp + 1;
1307         *tmp = 0;
1308 }
1309
1310 static void get_commit_info(struct commit *commit,
1311                             struct commit_info *ret,
1312                             int detailed)
1313 {
1314         int len;
1315         char *tmp, *endp;
1316         static char author_buf[1024];
1317         static char committer_buf[1024];
1318         static char summary_buf[1024];
1319
1320         /*
1321          * We've operated without save_commit_buffer, so
1322          * we now need to populate them for output.
1323          */
1324         if (!commit->buffer) {
1325                 char type[20];
1326                 unsigned long size;
1327                 commit->buffer =
1328                         read_sha1_file(commit->object.sha1, type, &size);
1329         }
1330         ret->author = author_buf;
1331         get_ac_line(commit->buffer, "\nauthor ",
1332                     sizeof(author_buf), author_buf, &ret->author_mail,
1333                     &ret->author_time, &ret->author_tz);
1334
1335         if (!detailed)
1336                 return;
1337
1338         ret->committer = committer_buf;
1339         get_ac_line(commit->buffer, "\ncommitter ",
1340                     sizeof(committer_buf), committer_buf, &ret->committer_mail,
1341                     &ret->committer_time, &ret->committer_tz);
1342
1343         ret->summary = summary_buf;
1344         tmp = strstr(commit->buffer, "\n\n");
1345         if (!tmp) {
1346         error_out:
1347                 sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
1348                 return;
1349         }
1350         tmp += 2;
1351         endp = strchr(tmp, '\n');
1352         if (!endp)
1353                 endp = tmp + strlen(tmp);
1354         len = endp - tmp;
1355         if (len >= sizeof(summary_buf) || len == 0)
1356                 goto error_out;
1357         memcpy(summary_buf, tmp, len);
1358         summary_buf[len] = 0;
1359 }
1360
1361 /*
1362  * To allow LF and other nonportable characters in pathnames,
1363  * they are c-style quoted as needed.
1364  */
1365 static void write_filename_info(const char *path)
1366 {
1367         printf("filename ");
1368         write_name_quoted(NULL, 0, path, 1, stdout);
1369         putchar('\n');
1370 }
1371
1372 /*
1373  * The blame_entry is found to be guilty for the range.  Mark it
1374  * as such, and show it in incremental output.
1375  */
1376 static void found_guilty_entry(struct blame_entry *ent)
1377 {
1378         if (ent->guilty)
1379                 return;
1380         ent->guilty = 1;
1381         if (incremental) {
1382                 struct origin *suspect = ent->suspect;
1383
1384                 printf("%s %d %d %d\n",
1385                        sha1_to_hex(suspect->commit->object.sha1),
1386                        ent->s_lno + 1, ent->lno + 1, ent->num_lines);
1387                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1388                         struct commit_info ci;
1389                         suspect->commit->object.flags |= METAINFO_SHOWN;
1390                         get_commit_info(suspect->commit, &ci, 1);
1391                         printf("author %s\n", ci.author);
1392                         printf("author-mail %s\n", ci.author_mail);
1393                         printf("author-time %lu\n", ci.author_time);
1394                         printf("author-tz %s\n", ci.author_tz);
1395                         printf("committer %s\n", ci.committer);
1396                         printf("committer-mail %s\n", ci.committer_mail);
1397                         printf("committer-time %lu\n", ci.committer_time);
1398                         printf("committer-tz %s\n", ci.committer_tz);
1399                         printf("summary %s\n", ci.summary);
1400                         if (suspect->commit->object.flags & UNINTERESTING)
1401                                 printf("boundary\n");
1402                 }
1403                 write_filename_info(suspect->path);
1404         }
1405 }
1406
1407 /*
1408  * The main loop -- while the scoreboard has lines whose true origin
1409  * is still unknown, pick one blame_entry, and allow its current
1410  * suspect to pass blames to its parents.
1411  */
1412 static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
1413 {
1414         while (1) {
1415                 struct blame_entry *ent;
1416                 struct commit *commit;
1417                 struct origin *suspect = NULL;
1418
1419                 /* find one suspect to break down */
1420                 for (ent = sb->ent; !suspect && ent; ent = ent->next)
1421                         if (!ent->guilty)
1422                                 suspect = ent->suspect;
1423                 if (!suspect)
1424                         return; /* all done */
1425
1426                 /*
1427                  * We will use this suspect later in the loop,
1428                  * so hold onto it in the meantime.
1429                  */
1430                 origin_incref(suspect);
1431                 commit = suspect->commit;
1432                 if (!commit->object.parsed)
1433                         parse_commit(commit);
1434                 if (!(commit->object.flags & UNINTERESTING) &&
1435                     !(revs->max_age != -1 && commit->date < revs->max_age))
1436                         pass_blame(sb, suspect, opt);
1437                 else {
1438                         commit->object.flags |= UNINTERESTING;
1439                         if (commit->object.parsed)
1440                                 mark_parents_uninteresting(commit);
1441                 }
1442                 /* treat root commit as boundary */
1443                 if (!commit->parents && !show_root)
1444                         commit->object.flags |= UNINTERESTING;
1445
1446                 /* Take responsibility for the remaining entries */
1447                 for (ent = sb->ent; ent; ent = ent->next)
1448                         if (!cmp_suspect(ent->suspect, suspect))
1449                                 found_guilty_entry(ent);
1450                 origin_decref(suspect);
1451
1452                 if (DEBUG) /* sanity */
1453                         sanity_check_refcnt(sb);
1454         }
1455 }
1456
1457 static const char *format_time(unsigned long time, const char *tz_str,
1458                                int show_raw_time)
1459 {
1460         static char time_buf[128];
1461         time_t t = time;
1462         int minutes, tz;
1463         struct tm *tm;
1464
1465         if (show_raw_time) {
1466                 sprintf(time_buf, "%lu %s", time, tz_str);
1467                 return time_buf;
1468         }
1469
1470         tz = atoi(tz_str);
1471         minutes = tz < 0 ? -tz : tz;
1472         minutes = (minutes / 100)*60 + (minutes % 100);
1473         minutes = tz < 0 ? -minutes : minutes;
1474         t = time + minutes * 60;
1475         tm = gmtime(&t);
1476
1477         strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
1478         strcat(time_buf, tz_str);
1479         return time_buf;
1480 }
1481
1482 #define OUTPUT_ANNOTATE_COMPAT  001
1483 #define OUTPUT_LONG_OBJECT_NAME 002
1484 #define OUTPUT_RAW_TIMESTAMP    004
1485 #define OUTPUT_PORCELAIN        010
1486 #define OUTPUT_SHOW_NAME        020
1487 #define OUTPUT_SHOW_NUMBER      040
1488 #define OUTPUT_SHOW_SCORE      0100
1489
1490 static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
1491 {
1492         int cnt;
1493         const char *cp;
1494         struct origin *suspect = ent->suspect;
1495         char hex[41];
1496
1497         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1498         printf("%s%c%d %d %d\n",
1499                hex,
1500                ent->guilty ? ' ' : '*', // purely for debugging
1501                ent->s_lno + 1,
1502                ent->lno + 1,
1503                ent->num_lines);
1504         if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1505                 struct commit_info ci;
1506                 suspect->commit->object.flags |= METAINFO_SHOWN;
1507                 get_commit_info(suspect->commit, &ci, 1);
1508                 printf("author %s\n", ci.author);
1509                 printf("author-mail %s\n", ci.author_mail);
1510                 printf("author-time %lu\n", ci.author_time);
1511                 printf("author-tz %s\n", ci.author_tz);
1512                 printf("committer %s\n", ci.committer);
1513                 printf("committer-mail %s\n", ci.committer_mail);
1514                 printf("committer-time %lu\n", ci.committer_time);
1515                 printf("committer-tz %s\n", ci.committer_tz);
1516                 write_filename_info(suspect->path);
1517                 printf("summary %s\n", ci.summary);
1518                 if (suspect->commit->object.flags & UNINTERESTING)
1519                         printf("boundary\n");
1520         }
1521         else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1522                 write_filename_info(suspect->path);
1523
1524         cp = nth_line(sb, ent->lno);
1525         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1526                 char ch;
1527                 if (cnt)
1528                         printf("%s %d %d\n", hex,
1529                                ent->s_lno + 1 + cnt,
1530                                ent->lno + 1 + cnt);
1531                 putchar('\t');
1532                 do {
1533                         ch = *cp++;
1534                         putchar(ch);
1535                 } while (ch != '\n' &&
1536                          cp < sb->final_buf + sb->final_buf_size);
1537         }
1538 }
1539
1540 static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
1541 {
1542         int cnt;
1543         const char *cp;
1544         struct origin *suspect = ent->suspect;
1545         struct commit_info ci;
1546         char hex[41];
1547         int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
1548
1549         get_commit_info(suspect->commit, &ci, 1);
1550         strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
1551
1552         cp = nth_line(sb, ent->lno);
1553         for (cnt = 0; cnt < ent->num_lines; cnt++) {
1554                 char ch;
1555                 int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;
1556
1557                 if (suspect->commit->object.flags & UNINTERESTING) {
1558                         if (blank_boundary)
1559                                 memset(hex, ' ', length);
1560                         else if (!cmd_is_annotate) {
1561                                 length--;
1562                                 putchar('^');
1563                         }
1564                 }
1565
1566                 printf("%.*s", length, hex);
1567                 if (opt & OUTPUT_ANNOTATE_COMPAT)
1568                         printf("\t(%10s\t%10s\t%d)", ci.author,
1569                                format_time(ci.author_time, ci.author_tz,
1570                                            show_raw_time),
1571                                ent->lno + 1 + cnt);
1572                 else {
1573                         if (opt & OUTPUT_SHOW_SCORE)
1574                                 printf(" %*d %02d",
1575                                        max_score_digits, ent->score,
1576                                        ent->suspect->refcnt);
1577                         if (opt & OUTPUT_SHOW_NAME)
1578                                 printf(" %-*.*s", longest_file, longest_file,
1579                                        suspect->path);
1580                         if (opt & OUTPUT_SHOW_NUMBER)
1581                                 printf(" %*d", max_orig_digits,
1582                                        ent->s_lno + 1 + cnt);
1583                         printf(" (%-*.*s %10s %*d) ",
1584                                longest_author, longest_author, ci.author,
1585                                format_time(ci.author_time, ci.author_tz,
1586                                            show_raw_time),
1587                                max_digits, ent->lno + 1 + cnt);
1588                 }
1589                 do {
1590                         ch = *cp++;
1591                         putchar(ch);
1592                 } while (ch != '\n' &&
1593                          cp < sb->final_buf + sb->final_buf_size);
1594         }
1595 }
1596
1597 static void output(struct scoreboard *sb, int option)
1598 {
1599         struct blame_entry *ent;
1600
1601         if (option & OUTPUT_PORCELAIN) {
1602                 for (ent = sb->ent; ent; ent = ent->next) {
1603                         struct blame_entry *oth;
1604                         struct origin *suspect = ent->suspect;
1605                         struct commit *commit = suspect->commit;
1606                         if (commit->object.flags & MORE_THAN_ONE_PATH)
1607                                 continue;
1608                         for (oth = ent->next; oth; oth = oth->next) {
1609                                 if ((oth->suspect->commit != commit) ||
1610                                     !strcmp(oth->suspect->path, suspect->path))
1611                                         continue;
1612                                 commit->object.flags |= MORE_THAN_ONE_PATH;
1613                                 break;
1614                         }
1615                 }
1616         }
1617
1618         for (ent = sb->ent; ent; ent = ent->next) {
1619                 if (option & OUTPUT_PORCELAIN)
1620                         emit_porcelain(sb, ent);
1621                 else {
1622                         emit_other(sb, ent, option);
1623                 }
1624         }
1625 }
1626
1627 /*
1628  * To allow quick access to the contents of nth line in the
1629  * final image, prepare an index in the scoreboard.
1630  */
1631 static int prepare_lines(struct scoreboard *sb)
1632 {
1633         const char *buf = sb->final_buf;
1634         unsigned long len = sb->final_buf_size;
1635         int num = 0, incomplete = 0, bol = 1;
1636
1637         if (len && buf[len-1] != '\n')
1638                 incomplete++; /* incomplete line at the end */
1639         while (len--) {
1640                 if (bol) {
1641                         sb->lineno = xrealloc(sb->lineno,
1642                                               sizeof(int* ) * (num + 1));
1643                         sb->lineno[num] = buf - sb->final_buf;
1644                         bol = 0;
1645                 }
1646                 if (*buf++ == '\n') {
1647                         num++;
1648                         bol = 1;
1649                 }
1650         }
1651         sb->lineno = xrealloc(sb->lineno,
1652                               sizeof(int* ) * (num + incomplete + 1));
1653         sb->lineno[num + incomplete] = buf - sb->final_buf;
1654         sb->num_lines = num + incomplete;
1655         return sb->num_lines;
1656 }
1657
1658 /*
1659  * Add phony grafts for use with -S; this is primarily to
1660  * support git-cvsserver that wants to give a linear history
1661  * to its clients.
1662  */
1663 static int read_ancestry(const char *graft_file)
1664 {
1665         FILE *fp = fopen(graft_file, "r");
1666         char buf[1024];
1667         if (!fp)
1668                 return -1;
1669         while (fgets(buf, sizeof(buf), fp)) {
1670                 /* The format is just "Commit Parent1 Parent2 ...\n" */
1671                 int len = strlen(buf);
1672                 struct commit_graft *graft = read_graft_line(buf, len);
1673                 if (graft)
1674                         register_commit_graft(graft, 0);
1675         }
1676         fclose(fp);
1677         return 0;
1678 }
1679
1680 /*
1681  * How many columns do we need to show line numbers in decimal?
1682  */
1683 static int lineno_width(int lines)
1684 {
1685         int i, width;
1686
1687         for (width = 1, i = 10; i <= lines + 1; width++)
1688                 i *= 10;
1689         return width;
1690 }
1691
1692 /*
1693  * How many columns do we need to show line numbers, authors,
1694  * and filenames?
1695  */
1696 static void find_alignment(struct scoreboard *sb, int *option)
1697 {
1698         int longest_src_lines = 0;
1699         int longest_dst_lines = 0;
1700         unsigned largest_score = 0;
1701         struct blame_entry *e;
1702
1703         for (e = sb->ent; e; e = e->next) {
1704                 struct origin *suspect = e->suspect;
1705                 struct commit_info ci;
1706                 int num;
1707
1708                 if (strcmp(suspect->path, sb->path))
1709                         *option |= OUTPUT_SHOW_NAME;
1710                 num = strlen(suspect->path);
1711                 if (longest_file < num)
1712                         longest_file = num;
1713                 if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
1714                         suspect->commit->object.flags |= METAINFO_SHOWN;
1715                         get_commit_info(suspect->commit, &ci, 1);
1716                         num = strlen(ci.author);
1717                         if (longest_author < num)
1718                                 longest_author = num;
1719                 }
1720                 num = e->s_lno + e->num_lines;
1721                 if (longest_src_lines < num)
1722                         longest_src_lines = num;
1723                 num = e->lno + e->num_lines;
1724                 if (longest_dst_lines < num)
1725                         longest_dst_lines = num;
1726                 if (largest_score < ent_score(sb, e))
1727                         largest_score = ent_score(sb, e);
1728         }
1729         max_orig_digits = lineno_width(longest_src_lines);
1730         max_digits = lineno_width(longest_dst_lines);
1731         max_score_digits = lineno_width(largest_score);
1732 }
1733
1734 /*
1735  * For debugging -- origin is refcounted, and this asserts that
1736  * we do not underflow.
1737  */
1738 static void sanity_check_refcnt(struct scoreboard *sb)
1739 {
1740         int baa = 0;
1741         struct blame_entry *ent;
1742
1743         for (ent = sb->ent; ent; ent = ent->next) {
1744                 /* Nobody should have zero or negative refcnt */
1745                 if (ent->suspect->refcnt <= 0) {
1746                         fprintf(stderr, "%s in %s has negative refcnt %d\n",
1747                                 ent->suspect->path,
1748                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1749                                 ent->suspect->refcnt);
1750                         baa = 1;
1751                 }
1752         }
1753         for (ent = sb->ent; ent; ent = ent->next) {
1754                 /* Mark the ones that haven't been checked */
1755                 if (0 < ent->suspect->refcnt)
1756                         ent->suspect->refcnt = -ent->suspect->refcnt;
1757         }
1758         for (ent = sb->ent; ent; ent = ent->next) {
1759                 /*
1760                  * ... then pick each and see if they have the the
1761                  * correct refcnt.
1762                  */
1763                 int found;
1764                 struct blame_entry *e;
1765                 struct origin *suspect = ent->suspect;
1766
1767                 if (0 < suspect->refcnt)
1768                         continue;
1769                 suspect->refcnt = -suspect->refcnt; /* Unmark */
1770                 for (found = 0, e = sb->ent; e; e = e->next) {
1771                         if (e->suspect != suspect)
1772                                 continue;
1773                         found++;
1774                 }
1775                 if (suspect->refcnt != found) {
1776                         fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
1777                                 ent->suspect->path,
1778                                 sha1_to_hex(ent->suspect->commit->object.sha1),
1779                                 ent->suspect->refcnt, found);
1780                         baa = 2;
1781                 }
1782         }
1783         if (baa) {
1784                 int opt = 0160;
1785                 find_alignment(sb, &opt);
1786                 output(sb, opt);
1787                 die("Baa %d!", baa);
1788         }
1789 }
1790
1791 /*
1792  * Used for the command line parsing; check if the path exists
1793  * in the working tree.
1794  */
1795 static int has_path_in_work_tree(const char *path)
1796 {
1797         struct stat st;
1798         return !lstat(path, &st);
1799 }
1800
1801 static unsigned parse_score(const char *arg)
1802 {
1803         char *end;
1804         unsigned long score = strtoul(arg, &end, 10);
1805         if (*end)
1806                 return 0;
1807         return score;
1808 }
1809
1810 static const char *add_prefix(const char *prefix, const char *path)
1811 {
1812         if (!prefix || !prefix[0])
1813                 return path;
1814         return prefix_path(prefix, strlen(prefix), path);
1815 }
1816
1817 /*
1818  * Parsing of (comma separated) one item in the -L option
1819  */
1820 static const char *parse_loc(const char *spec,
1821                              struct scoreboard *sb, long lno,
1822                              long begin, long *ret)
1823 {
1824         char *term;
1825         const char *line;
1826         long num;
1827         int reg_error;
1828         regex_t regexp;
1829         regmatch_t match[1];
1830
1831         /* Allow "-L <something>,+20" to mean starting at <something>
1832          * for 20 lines, or "-L <something>,-5" for 5 lines ending at
1833          * <something>.
1834          */
1835         if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
1836                 num = strtol(spec + 1, &term, 10);
1837                 if (term != spec + 1) {
1838                         if (spec[0] == '-')
1839                                 num = 0 - num;
1840                         if (0 < num)
1841                                 *ret = begin + num - 2;
1842                         else if (!num)
1843                                 *ret = begin;
1844                         else
1845                                 *ret = begin + num;
1846                         return term;
1847                 }
1848                 return spec;
1849         }
1850         num = strtol(spec, &term, 10);
1851         if (term != spec) {
1852                 *ret = num;
1853                 return term;
1854         }
1855         if (spec[0] != '/')
1856                 return spec;
1857
1858         /* it could be a regexp of form /.../ */
1859         for (term = (char*) spec + 1; *term && *term != '/'; term++) {
1860                 if (*term == '\\')
1861                         term++;
1862         }
1863         if (*term != '/')
1864                 return spec;
1865
1866         /* try [spec+1 .. term-1] as regexp */
1867         *term = 0;
1868         begin--; /* input is in human terms */
1869         line = nth_line(sb, begin);
1870
1871         if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
1872             !(reg_error = regexec(&regexp, line, 1, match, 0))) {
1873                 const char *cp = line + match[0].rm_so;
1874                 const char *nline;
1875
1876                 while (begin++ < lno) {
1877                         nline = nth_line(sb, begin);
1878                         if (line <= cp && cp < nline)
1879                                 break;
1880                         line = nline;
1881                 }
1882                 *ret = begin;
1883                 regfree(&regexp);
1884                 *term++ = '/';
1885                 return term;
1886         }
1887         else {
1888                 char errbuf[1024];
1889                 regerror(reg_error, &regexp, errbuf, 1024);
1890                 die("-L parameter '%s': %s", spec + 1, errbuf);
1891         }
1892 }
1893
1894 /*
1895  * Parsing of -L option
1896  */
1897 static void prepare_blame_range(struct scoreboard *sb,
1898                                 const char *bottomtop,
1899                                 long lno,
1900                                 long *bottom, long *top)
1901 {
1902         const char *term;
1903
1904         term = parse_loc(bottomtop, sb, lno, 1, bottom);
1905         if (*term == ',') {
1906                 term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
1907                 if (*term)
1908                         usage(blame_usage);
1909         }
1910         if (*term)
1911                 usage(blame_usage);
1912 }
1913
1914 static int git_blame_config(const char *var, const char *value)
1915 {
1916         if (!strcmp(var, "blame.showroot")) {
1917                 show_root = git_config_bool(var, value);
1918                 return 0;
1919         }
1920         if (!strcmp(var, "blame.blankboundary")) {
1921                 blank_boundary = git_config_bool(var, value);
1922                 return 0;
1923         }
1924         return git_default_config(var, value);
1925 }
1926
1927 static struct commit *fake_working_tree_commit(const char *path, const char *contents_from)
1928 {
1929         struct commit *commit;
1930         struct origin *origin;
1931         unsigned char head_sha1[20];
1932         char *buf;
1933         const char *ident;
1934         int fd;
1935         time_t now;
1936         unsigned long fin_size;
1937         int size, len;
1938         struct cache_entry *ce;
1939         unsigned mode;
1940
1941         if (get_sha1("HEAD", head_sha1))
1942                 die("No such ref: HEAD");
1943
1944         time(&now);
1945         commit = xcalloc(1, sizeof(*commit));
1946         commit->parents = xcalloc(1, sizeof(*commit->parents));
1947         commit->parents->item = lookup_commit_reference(head_sha1);
1948         commit->object.parsed = 1;
1949         commit->date = now;
1950         commit->object.type = OBJ_COMMIT;
1951
1952         origin = make_origin(commit, path);
1953
1954         if (!contents_from || strcmp("-", contents_from)) {
1955                 struct stat st;
1956                 const char *read_from;
1957
1958                 if (contents_from) {
1959                         if (stat(contents_from, &st) < 0)
1960                                 die("Cannot stat %s", contents_from);
1961                         read_from = contents_from;
1962                 }
1963                 else {
1964                         if (lstat(path, &st) < 0)
1965                                 die("Cannot lstat %s", path);
1966                         read_from = path;
1967                 }
1968                 fin_size = st.st_size;
1969                 buf = xmalloc(fin_size+1);
1970                 mode = canon_mode(st.st_mode);
1971                 switch (st.st_mode & S_IFMT) {
1972                 case S_IFREG:
1973                         fd = open(read_from, O_RDONLY);
1974                         if (fd < 0)
1975                                 die("cannot open %s", read_from);
1976                         if (read_in_full(fd, buf, fin_size) != fin_size)
1977                                 die("cannot read %s", read_from);
1978                         break;
1979                 case S_IFLNK:
1980                         if (readlink(read_from, buf, fin_size+1) != fin_size)
1981                                 die("cannot readlink %s", read_from);
1982                         break;
1983                 default:
1984                         die("unsupported file type %s", read_from);
1985                 }
1986         }
1987         else {
1988                 /* Reading from stdin */
1989                 contents_from = "standard input";
1990                 buf = NULL;
1991                 fin_size = 0;
1992                 mode = 0;
1993                 while (1) {
1994                         ssize_t cnt = 8192;
1995                         buf = xrealloc(buf, fin_size + cnt);
1996                         cnt = xread(0, buf + fin_size, cnt);
1997                         if (cnt < 0)
1998                                 die("read error %s from stdin",
1999                                     strerror(errno));
2000                         if (!cnt)
2001                                 break;
2002                         fin_size += cnt;
2003                 }
2004                 buf = xrealloc(buf, fin_size + 1);
2005         }
2006         buf[fin_size] = 0;
2007         origin->file.ptr = buf;
2008         origin->file.size = fin_size;
2009         pretend_sha1_file(buf, fin_size, blob_type, origin->blob_sha1);
2010         commit->util = origin;
2011
2012         /*
2013          * Read the current index, replace the path entry with
2014          * origin->blob_sha1 without mucking with its mode or type
2015          * bits; we are not going to write this index out -- we just
2016          * want to run "diff-index --cached".
2017          */
2018         discard_cache();
2019         read_cache();
2020
2021         len = strlen(path);
2022         if (!mode) {
2023                 int pos = cache_name_pos(path, len);
2024                 if (0 <= pos)
2025                         mode = ntohl(active_cache[pos]->ce_mode);
2026                 else
2027                         /* Let's not bother reading from HEAD tree */
2028                         mode = S_IFREG | 0644;
2029         }
2030         size = cache_entry_size(len);
2031         ce = xcalloc(1, size);
2032         hashcpy(ce->sha1, origin->blob_sha1);
2033         memcpy(ce->name, path, len);
2034         ce->ce_flags = create_ce_flags(len, 0);
2035         ce->ce_mode = create_ce_mode(mode);
2036         add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
2037
2038         /*
2039          * We are not going to write this out, so this does not matter
2040          * right now, but someday we might optimize diff-index --cached
2041          * with cache-tree information.
2042          */
2043         cache_tree_invalidate_path(active_cache_tree, path);
2044
2045         commit->buffer = xmalloc(400);
2046         ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
2047         sprintf(commit->buffer,
2048                 "tree 0000000000000000000000000000000000000000\n"
2049                 "parent %s\n"
2050                 "author %s\n"
2051                 "committer %s\n\n"
2052                 "Version of %s from %s\n",
2053                 sha1_to_hex(head_sha1),
2054                 ident, ident, path, contents_from ? contents_from : path);
2055         return commit;
2056 }
2057
2058 int cmd_blame(int argc, const char **argv, const char *prefix)
2059 {
2060         struct rev_info revs;
2061         const char *path;
2062         struct scoreboard sb;
2063         struct origin *o;
2064         struct blame_entry *ent;
2065         int i, seen_dashdash, unk, opt;
2066         long bottom, top, lno;
2067         int output_option = 0;
2068         const char *revs_file = NULL;
2069         const char *final_commit_name = NULL;
2070         char type[10];
2071         const char *bottomtop = NULL;
2072         const char *contents_from = NULL;
2073
2074         cmd_is_annotate = !strcmp(argv[0], "annotate");
2075
2076         git_config(git_blame_config);
2077         save_commit_buffer = 0;
2078
2079         opt = 0;
2080         seen_dashdash = 0;
2081         for (unk = i = 1; i < argc; i++) {
2082                 const char *arg = argv[i];
2083                 if (*arg != '-')
2084                         break;
2085                 else if (!strcmp("-b", arg))
2086                         blank_boundary = 1;
2087                 else if (!strcmp("--root", arg))
2088                         show_root = 1;
2089                 else if (!strcmp("-c", arg))
2090                         output_option |= OUTPUT_ANNOTATE_COMPAT;
2091                 else if (!strcmp("-t", arg))
2092                         output_option |= OUTPUT_RAW_TIMESTAMP;
2093                 else if (!strcmp("-l", arg))
2094                         output_option |= OUTPUT_LONG_OBJECT_NAME;
2095                 else if (!strcmp("-S", arg) && ++i < argc)
2096                         revs_file = argv[i];
2097                 else if (!strncmp("-M", arg, 2)) {
2098                         opt |= PICKAXE_BLAME_MOVE;
2099                         blame_move_score = parse_score(arg+2);
2100                 }
2101                 else if (!strncmp("-C", arg, 2)) {
2102                         if (opt & PICKAXE_BLAME_COPY)
2103                                 opt |= PICKAXE_BLAME_COPY_HARDER;
2104                         opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
2105                         blame_copy_score = parse_score(arg+2);
2106                 }
2107                 else if (!strncmp("-L", arg, 2)) {
2108                         if (!arg[2]) {
2109                                 if (++i >= argc)
2110                                         usage(blame_usage);
2111                                 arg = argv[i];
2112                         }
2113                         else
2114                                 arg += 2;
2115                         if (bottomtop)
2116                                 die("More than one '-L n,m' option given");
2117                         bottomtop = arg;
2118                 }
2119                 else if (!strcmp("--contents", arg)) {
2120                         if (++i >= argc)
2121                                 usage(blame_usage);
2122                         contents_from = argv[i];
2123                 }
2124                 else if (!strcmp("--incremental", arg))
2125                         incremental = 1;
2126                 else if (!strcmp("--score-debug", arg))
2127                         output_option |= OUTPUT_SHOW_SCORE;
2128                 else if (!strcmp("-f", arg) ||
2129                          !strcmp("--show-name", arg))
2130                         output_option |= OUTPUT_SHOW_NAME;
2131                 else if (!strcmp("-n", arg) ||
2132                          !strcmp("--show-number", arg))
2133                         output_option |= OUTPUT_SHOW_NUMBER;
2134                 else if (!strcmp("-p", arg) ||
2135                          !strcmp("--porcelain", arg))
2136                         output_option |= OUTPUT_PORCELAIN;
2137                 else if (!strcmp("--", arg)) {
2138                         seen_dashdash = 1;
2139                         i++;
2140                         break;
2141                 }
2142                 else
2143                         argv[unk++] = arg;
2144         }
2145
2146         if (!incremental)
2147                 setup_pager();
2148
2149         if (!blame_move_score)
2150                 blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
2151         if (!blame_copy_score)
2152                 blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
2153
2154         /*
2155          * We have collected options unknown to us in argv[1..unk]
2156          * which are to be passed to revision machinery if we are
2157          * going to do the "bottom" processing.
2158          *
2159          * The remaining are:
2160          *
2161          * (1) if seen_dashdash, its either
2162          *     "-options -- <path>" or
2163          *     "-options -- <path> <rev>".
2164          *     but the latter is allowed only if there is no
2165          *     options that we passed to revision machinery.
2166          *
2167          * (2) otherwise, we may have "--" somewhere later and
2168          *     might be looking at the first one of multiple 'rev'
2169          *     parameters (e.g. " master ^next ^maint -- path").
2170          *     See if there is a dashdash first, and give the
2171          *     arguments before that to revision machinery.
2172          *     After that there must be one 'path'.
2173          *
2174          * (3) otherwise, its one of the three:
2175          *     "-options <path> <rev>"
2176          *     "-options <rev> <path>"
2177          *     "-options <path>"
2178          *     but again the first one is allowed only if
2179          *     there is no options that we passed to revision
2180          *     machinery.
2181          */
2182
2183         if (seen_dashdash) {
2184                 /* (1) */
2185                 if (argc <= i)
2186                         usage(blame_usage);
2187                 path = add_prefix(prefix, argv[i]);
2188                 if (i + 1 == argc - 1) {
2189                         if (unk != 1)
2190                                 usage(blame_usage);
2191                         argv[unk++] = argv[i + 1];
2192                 }
2193                 else if (i + 1 != argc)
2194                         /* garbage at end */
2195                         usage(blame_usage);
2196         }
2197         else {
2198                 int j;
2199                 for (j = i; !seen_dashdash && j < argc; j++)
2200                         if (!strcmp(argv[j], "--"))
2201                                 seen_dashdash = j;
2202                 if (seen_dashdash) {
2203                         if (seen_dashdash + 1 != argc - 1)
2204                                 usage(blame_usage);
2205                         path = add_prefix(prefix, argv[seen_dashdash + 1]);
2206                         for (j = i; j < seen_dashdash; j++)
2207                                 argv[unk++] = argv[j];
2208                 }
2209                 else {
2210                         /* (3) */
2211                         path = add_prefix(prefix, argv[i]);
2212                         if (i + 1 == argc - 1) {
2213                                 final_commit_name = argv[i + 1];
2214
2215                                 /* if (unk == 1) we could be getting
2216                                  * old-style
2217                                  */
2218                                 if (unk == 1 && !has_path_in_work_tree(path)) {
2219                                         path = add_prefix(prefix, argv[i + 1]);
2220                                         final_commit_name = argv[i];
2221                                 }
2222                         }
2223                         else if (i != argc - 1)
2224                                 usage(blame_usage); /* garbage at end */
2225
2226                         if (!has_path_in_work_tree(path))
2227                                 die("cannot stat path %s: %s",
2228                                     path, strerror(errno));
2229                 }
2230         }
2231
2232         if (final_commit_name)
2233                 argv[unk++] = final_commit_name;
2234
2235         /*
2236          * Now we got rev and path.  We do not want the path pruning
2237          * but we may want "bottom" processing.
2238          */
2239         argv[unk++] = "--"; /* terminate the rev name */
2240         argv[unk] = NULL;
2241
2242         init_revisions(&revs, NULL);
2243         setup_revisions(unk, argv, &revs, NULL);
2244         memset(&sb, 0, sizeof(sb));
2245
2246         /*
2247          * There must be one and only one positive commit in the
2248          * revs->pending array.
2249          */
2250         for (i = 0; i < revs.pending.nr; i++) {
2251                 struct object *obj = revs.pending.objects[i].item;
2252                 if (obj->flags & UNINTERESTING)
2253                         continue;
2254                 while (obj->type == OBJ_TAG)
2255                         obj = deref_tag(obj, NULL, 0);
2256                 if (obj->type != OBJ_COMMIT)
2257                         die("Non commit %s?",
2258                             revs.pending.objects[i].name);
2259                 if (sb.final)
2260                         die("More than one commit to dig from %s and %s?",
2261                             revs.pending.objects[i].name,
2262                             final_commit_name);
2263                 sb.final = (struct commit *) obj;
2264                 final_commit_name = revs.pending.objects[i].name;
2265         }
2266
2267         if (!sb.final) {
2268                 /*
2269                  * "--not A B -- path" without anything positive;
2270                  * do not default to HEAD, but use the working tree
2271                  * or "--contents".
2272                  */
2273                 sb.final = fake_working_tree_commit(path, contents_from);
2274                 add_pending_object(&revs, &(sb.final->object), ":");
2275         }
2276         else if (contents_from)
2277                 die("Cannot use --contents with final commit object name");
2278
2279         /*
2280          * If we have bottom, this will mark the ancestors of the
2281          * bottom commits we would reach while traversing as
2282          * uninteresting.
2283          */
2284         prepare_revision_walk(&revs);
2285
2286         if (is_null_sha1(sb.final->object.sha1)) {
2287                 char *buf;
2288                 o = sb.final->util;
2289                 buf = xmalloc(o->file.size + 1);
2290                 memcpy(buf, o->file.ptr, o->file.size + 1);
2291                 sb.final_buf = buf;
2292                 sb.final_buf_size = o->file.size;
2293         }
2294         else {
2295                 o = get_origin(&sb, sb.final, path);
2296                 if (fill_blob_sha1(o))
2297                         die("no such path %s in %s", path, final_commit_name);
2298
2299                 sb.final_buf = read_sha1_file(o->blob_sha1, type,
2300                                               &sb.final_buf_size);
2301         }
2302         num_read_blob++;
2303         lno = prepare_lines(&sb);
2304
2305         bottom = top = 0;
2306         if (bottomtop)
2307                 prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
2308         if (bottom && top && top < bottom) {
2309                 long tmp;
2310                 tmp = top; top = bottom; bottom = tmp;
2311         }
2312         if (bottom < 1)
2313                 bottom = 1;
2314         if (top < 1)
2315                 top = lno;
2316         bottom--;
2317         if (lno < top)
2318                 die("file %s has only %lu lines", path, lno);
2319
2320         ent = xcalloc(1, sizeof(*ent));
2321         ent->lno = bottom;
2322         ent->num_lines = top - bottom;
2323         ent->suspect = o;
2324         ent->s_lno = bottom;
2325
2326         sb.ent = ent;
2327         sb.path = path;
2328
2329         if (revs_file && read_ancestry(revs_file))
2330                 die("reading graft file %s failed: %s",
2331                     revs_file, strerror(errno));
2332
2333         assign_blame(&sb, &revs, opt);
2334
2335         if (incremental)
2336                 return 0;
2337
2338         coalesce(&sb);
2339
2340         if (!(output_option & OUTPUT_PORCELAIN))
2341                 find_alignment(&sb, &output_option);
2342
2343         output(&sb, output_option);
2344         free((void *)sb.final_buf);
2345         for (ent = sb.ent; ent; ) {
2346                 struct blame_entry *e = ent->next;
2347                 free(ent);
2348                 ent = e;
2349         }
2350
2351         if (DEBUG) {
2352                 printf("num read blob: %d\n", num_read_blob);
2353                 printf("num get patch: %d\n", num_get_patch);
2354                 printf("num commits: %d\n", num_commits);
2355         }
2356         return 0;
2357 }