git diff: improve range handling
[git] / builtin / diff.c
1 /*
2  * Builtin "git diff"
3  *
4  * Copyright (c) 2006 Junio C Hamano
5  */
6 #define USE_THE_INDEX_COMPATIBILITY_MACROS
7 #include "cache.h"
8 #include "config.h"
9 #include "ewah/ewok.h"
10 #include "lockfile.h"
11 #include "color.h"
12 #include "commit.h"
13 #include "blob.h"
14 #include "tag.h"
15 #include "diff.h"
16 #include "diffcore.h"
17 #include "revision.h"
18 #include "log-tree.h"
19 #include "builtin.h"
20 #include "submodule.h"
21 #include "sha1-array.h"
22
23 #define DIFF_NO_INDEX_EXPLICIT 1
24 #define DIFF_NO_INDEX_IMPLICIT 2
25
26 static const char builtin_diff_usage[] =
27 "git diff [<options>] [<commit> [<commit>]] [--] [<path>...]";
28
29 static const char *blob_path(struct object_array_entry *entry)
30 {
31         return entry->path ? entry->path : entry->name;
32 }
33
34 static void stuff_change(struct diff_options *opt,
35                          unsigned old_mode, unsigned new_mode,
36                          const struct object_id *old_oid,
37                          const struct object_id *new_oid,
38                          int old_oid_valid,
39                          int new_oid_valid,
40                          const char *old_path,
41                          const char *new_path)
42 {
43         struct diff_filespec *one, *two;
44
45         if (!is_null_oid(old_oid) && !is_null_oid(new_oid) &&
46             oideq(old_oid, new_oid) && (old_mode == new_mode))
47                 return;
48
49         if (opt->flags.reverse_diff) {
50                 SWAP(old_mode, new_mode);
51                 SWAP(old_oid, new_oid);
52                 SWAP(old_path, new_path);
53         }
54
55         if (opt->prefix &&
56             (strncmp(old_path, opt->prefix, opt->prefix_length) ||
57              strncmp(new_path, opt->prefix, opt->prefix_length)))
58                 return;
59
60         one = alloc_filespec(old_path);
61         two = alloc_filespec(new_path);
62         fill_filespec(one, old_oid, old_oid_valid, old_mode);
63         fill_filespec(two, new_oid, new_oid_valid, new_mode);
64
65         diff_queue(&diff_queued_diff, one, two);
66 }
67
68 static int builtin_diff_b_f(struct rev_info *revs,
69                             int argc, const char **argv,
70                             struct object_array_entry **blob)
71 {
72         /* Blob vs file in the working tree*/
73         struct stat st;
74         const char *path;
75
76         if (argc > 1)
77                 usage(builtin_diff_usage);
78
79         GUARD_PATHSPEC(&revs->prune_data, PATHSPEC_FROMTOP | PATHSPEC_LITERAL);
80         path = revs->prune_data.items[0].match;
81
82         if (lstat(path, &st))
83                 die_errno(_("failed to stat '%s'"), path);
84         if (!(S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)))
85                 die(_("'%s': not a regular file or symlink"), path);
86
87         diff_set_mnemonic_prefix(&revs->diffopt, "o/", "w/");
88
89         if (blob[0]->mode == S_IFINVALID)
90                 blob[0]->mode = canon_mode(st.st_mode);
91
92         stuff_change(&revs->diffopt,
93                      blob[0]->mode, canon_mode(st.st_mode),
94                      &blob[0]->item->oid, &null_oid,
95                      1, 0,
96                      blob[0]->path ? blob[0]->path : path,
97                      path);
98         diffcore_std(&revs->diffopt);
99         diff_flush(&revs->diffopt);
100         return 0;
101 }
102
103 static int builtin_diff_blobs(struct rev_info *revs,
104                               int argc, const char **argv,
105                               struct object_array_entry **blob)
106 {
107         const unsigned mode = canon_mode(S_IFREG | 0644);
108
109         if (argc > 1)
110                 usage(builtin_diff_usage);
111
112         if (blob[0]->mode == S_IFINVALID)
113                 blob[0]->mode = mode;
114
115         if (blob[1]->mode == S_IFINVALID)
116                 blob[1]->mode = mode;
117
118         stuff_change(&revs->diffopt,
119                      blob[0]->mode, blob[1]->mode,
120                      &blob[0]->item->oid, &blob[1]->item->oid,
121                      1, 1,
122                      blob_path(blob[0]), blob_path(blob[1]));
123         diffcore_std(&revs->diffopt);
124         diff_flush(&revs->diffopt);
125         return 0;
126 }
127
128 static int builtin_diff_index(struct rev_info *revs,
129                               int argc, const char **argv)
130 {
131         int cached = 0;
132         while (1 < argc) {
133                 const char *arg = argv[1];
134                 if (!strcmp(arg, "--cached") || !strcmp(arg, "--staged"))
135                         cached = 1;
136                 else
137                         usage(builtin_diff_usage);
138                 argv++; argc--;
139         }
140         /*
141          * Make sure there is one revision (i.e. pending object),
142          * and there is no revision filtering parameters.
143          */
144         if (revs->pending.nr != 1 ||
145             revs->max_count != -1 || revs->min_age != -1 ||
146             revs->max_age != -1)
147                 usage(builtin_diff_usage);
148         if (!cached) {
149                 setup_work_tree();
150                 if (read_cache_preload(&revs->diffopt.pathspec) < 0) {
151                         perror("read_cache_preload");
152                         return -1;
153                 }
154         } else if (read_cache() < 0) {
155                 perror("read_cache");
156                 return -1;
157         }
158         return run_diff_index(revs, cached);
159 }
160
161 static int builtin_diff_tree(struct rev_info *revs,
162                              int argc, const char **argv,
163                              struct object_array_entry *ent0,
164                              struct object_array_entry *ent1)
165 {
166         const struct object_id *(oid[2]);
167         int swap = 0;
168
169         if (argc > 1)
170                 usage(builtin_diff_usage);
171
172         /*
173          * We saw two trees, ent0 and ent1.  If ent1 is uninteresting,
174          * swap them.
175          */
176         if (ent1->item->flags & UNINTERESTING)
177                 swap = 1;
178         oid[swap] = &ent0->item->oid;
179         oid[1 - swap] = &ent1->item->oid;
180         diff_tree_oid(oid[0], oid[1], "", &revs->diffopt);
181         log_tree_diff_flush(revs);
182         return 0;
183 }
184
185 static int builtin_diff_combined(struct rev_info *revs,
186                                  int argc, const char **argv,
187                                  struct object_array_entry *ent,
188                                  int ents)
189 {
190         struct oid_array parents = OID_ARRAY_INIT;
191         int i;
192
193         if (argc > 1)
194                 usage(builtin_diff_usage);
195
196         if (!revs->dense_combined_merges && !revs->combine_merges)
197                 revs->dense_combined_merges = revs->combine_merges = 1;
198         for (i = 1; i < ents; i++)
199                 oid_array_append(&parents, &ent[i].item->oid);
200         diff_tree_combined(&ent[0].item->oid, &parents,
201                            revs->dense_combined_merges, revs);
202         oid_array_clear(&parents);
203         return 0;
204 }
205
206 static void refresh_index_quietly(void)
207 {
208         struct lock_file lock_file = LOCK_INIT;
209         int fd;
210
211         fd = hold_locked_index(&lock_file, 0);
212         if (fd < 0)
213                 return;
214         discard_cache();
215         read_cache();
216         refresh_cache(REFRESH_QUIET|REFRESH_UNMERGED);
217         repo_update_index_if_able(the_repository, &lock_file);
218 }
219
220 static int builtin_diff_files(struct rev_info *revs, int argc, const char **argv)
221 {
222         unsigned int options = 0;
223
224         while (1 < argc && argv[1][0] == '-') {
225                 if (!strcmp(argv[1], "--base"))
226                         revs->max_count = 1;
227                 else if (!strcmp(argv[1], "--ours"))
228                         revs->max_count = 2;
229                 else if (!strcmp(argv[1], "--theirs"))
230                         revs->max_count = 3;
231                 else if (!strcmp(argv[1], "-q"))
232                         options |= DIFF_SILENT_ON_REMOVED;
233                 else if (!strcmp(argv[1], "-h"))
234                         usage(builtin_diff_usage);
235                 else
236                         return error(_("invalid option: %s"), argv[1]);
237                 argv++; argc--;
238         }
239
240         /*
241          * "diff --base" should not combine merges because it was not
242          * asked to.  "diff -c" should not densify (if the user wants
243          * dense one, --cc can be explicitly asked for, or just rely
244          * on the default).
245          */
246         if (revs->max_count == -1 && !revs->combine_merges &&
247             (revs->diffopt.output_format & DIFF_FORMAT_PATCH))
248                 revs->combine_merges = revs->dense_combined_merges = 1;
249
250         setup_work_tree();
251         if (read_cache_preload(&revs->diffopt.pathspec) < 0) {
252                 perror("read_cache_preload");
253                 return -1;
254         }
255         return run_diff_files(revs, options);
256 }
257
258 struct symdiff {
259         struct bitmap *skip;
260         int warn;
261         const char *base, *left, *right;
262 };
263
264 /*
265  * Check for symmetric-difference arguments, and if present, arrange
266  * everything we need to know to handle them correctly.  As a bonus,
267  * weed out all bogus range-based revision specifications, e.g.,
268  * "git diff A..B C..D" or "git diff A..B C" get rejected.
269  *
270  * For an actual symmetric diff, *symdiff is set this way:
271  *
272  *  - its skip is non-NULL and marks *all* rev->pending.objects[i]
273  *    indices that the caller should ignore (extra merge bases, of
274  *    which there might be many, and A in A...B).  Note that the
275  *    chosen merge base and right side are NOT marked.
276  *  - warn is set if there are multiple merge bases.
277  *  - base, left, and right point to the names to use in a
278  *    warning about multiple merge bases.
279  *
280  * If there is no symmetric diff argument, sym->skip is NULL and
281  * sym->warn is cleared.  The remaining fields are not set.
282  */
283 static void symdiff_prepare(struct rev_info *rev, struct symdiff *sym)
284 {
285         int i, is_symdiff = 0, basecount = 0, othercount = 0;
286         int lpos = -1, rpos = -1, basepos = -1;
287         struct bitmap *map = NULL;
288
289         /*
290          * Use the whence fields to find merge bases and left and
291          * right parts of symmetric difference, so that we do not
292          * depend on the order that revisions are parsed.  If there
293          * are any revs that aren't from these sources, we have a
294          * "git diff C A...B" or "git diff A...B C" case.  Or we
295          * could even get "git diff A...B C...E", for instance.
296          *
297          * If we don't have just one merge base, we pick one
298          * at random.
299          *
300          * NB: REV_CMD_LEFT, REV_CMD_RIGHT are also used for A..B,
301          * so we must check for SYMMETRIC_LEFT too.  The two arrays
302          * rev->pending.objects and rev->cmdline.rev are parallel.
303          */
304         for (i = 0; i < rev->cmdline.nr; i++) {
305                 struct object *obj = rev->pending.objects[i].item;
306                 switch (rev->cmdline.rev[i].whence) {
307                 case REV_CMD_MERGE_BASE:
308                         if (basepos < 0)
309                                 basepos = i;
310                         basecount++;
311                         break;          /* do mark all bases */
312                 case REV_CMD_LEFT:
313                         if (lpos >= 0)
314                                 usage(builtin_diff_usage);
315                         lpos = i;
316                         if (obj->flags & SYMMETRIC_LEFT) {
317                                 is_symdiff = 1;
318                                 break;  /* do mark A */
319                         }
320                         continue;
321                 case REV_CMD_RIGHT:
322                         if (rpos >= 0)
323                                 usage(builtin_diff_usage);
324                         rpos = i;
325                         continue;       /* don't mark B */
326                 case REV_CMD_PARENTS_ONLY:
327                 case REV_CMD_REF:
328                 case REV_CMD_REV:
329                         othercount++;
330                         continue;
331                 }
332                 if (map == NULL)
333                         map = bitmap_new();
334                 bitmap_set(map, i);
335         }
336
337         /*
338          * Forbid any additional revs for both A...B and A..B.
339          */
340         if (lpos >= 0 && othercount > 0)
341                 usage(builtin_diff_usage);
342
343         if (!is_symdiff) {
344                 bitmap_free(map);
345                 sym->warn = 0;
346                 sym->skip = NULL;
347                 return;
348         }
349
350         sym->left = rev->pending.objects[lpos].name;
351         sym->right = rev->pending.objects[rpos].name;
352         sym->base = rev->pending.objects[basepos].name;
353         if (basecount == 0)
354                 die(_("%s...%s: no merge base"), sym->left, sym->right);
355         bitmap_unset(map, basepos);     /* unmark the base we want */
356         sym->warn = basecount > 1;
357         sym->skip = map;
358 }
359
360 int cmd_diff(int argc, const char **argv, const char *prefix)
361 {
362         int i;
363         struct rev_info rev;
364         struct object_array ent = OBJECT_ARRAY_INIT;
365         int blobs = 0, paths = 0;
366         struct object_array_entry *blob[2];
367         int nongit = 0, no_index = 0;
368         int result = 0;
369         struct symdiff sdiff;
370
371         /*
372          * We could get N tree-ish in the rev.pending_objects list.
373          * Also there could be M blobs there, and P pathspecs.
374          *
375          * N=0, M=0:
376          *      cache vs files (diff-files)
377          * N=0, M=2:
378          *      compare two random blobs.  P must be zero.
379          * N=0, M=1, P=1:
380          *      compare a blob with a working tree file.
381          *
382          * N=1, M=0:
383          *      tree vs cache (diff-index --cached)
384          *
385          * N=2, M=0:
386          *      tree vs tree (diff-tree)
387          *
388          * N=0, M=0, P=2:
389          *      compare two filesystem entities (aka --no-index).
390          *
391          * Other cases are errors.
392          */
393
394         /* Were we asked to do --no-index explicitly? */
395         for (i = 1; i < argc; i++) {
396                 if (!strcmp(argv[i], "--")) {
397                         i++;
398                         break;
399                 }
400                 if (!strcmp(argv[i], "--no-index"))
401                         no_index = DIFF_NO_INDEX_EXPLICIT;
402                 if (argv[i][0] != '-')
403                         break;
404         }
405
406         prefix = setup_git_directory_gently(&nongit);
407
408         if (!no_index) {
409                 /*
410                  * Treat git diff with at least one path outside of the
411                  * repo the same as if the command would have been executed
412                  * outside of a git repository.  In this case it behaves
413                  * the same way as "git diff --no-index <a> <b>", which acts
414                  * as a colourful "diff" replacement.
415                  */
416                 if (nongit || ((argc == i + 2) &&
417                                (!path_inside_repo(prefix, argv[i]) ||
418                                 !path_inside_repo(prefix, argv[i + 1]))))
419                         no_index = DIFF_NO_INDEX_IMPLICIT;
420         }
421
422         init_diff_ui_defaults();
423         git_config(git_diff_ui_config, NULL);
424         precompose_argv(argc, argv);
425
426         repo_init_revisions(the_repository, &rev, prefix);
427
428         /* Set up defaults that will apply to both no-index and regular diffs. */
429         rev.diffopt.stat_width = -1;
430         rev.diffopt.stat_graph_width = -1;
431         rev.diffopt.flags.allow_external = 1;
432         rev.diffopt.flags.allow_textconv = 1;
433
434         /* If this is a no-index diff, just run it and exit there. */
435         if (no_index)
436                 exit(diff_no_index(&rev, no_index == DIFF_NO_INDEX_IMPLICIT,
437                                    argc, argv));
438
439
440         /*
441          * Otherwise, we are doing the usual "git" diff; set up any
442          * further defaults that apply to regular diffs.
443          */
444         rev.diffopt.skip_stat_unmatch = !!diff_auto_refresh_index;
445
446         /*
447          * Default to intent-to-add entries invisible in the
448          * index. This makes them show up as new files in diff-files
449          * and not at all in diff-cached.
450          */
451         rev.diffopt.ita_invisible_in_index = 1;
452
453         if (nongit)
454                 die(_("Not a git repository"));
455         argc = setup_revisions(argc, argv, &rev, NULL);
456         if (!rev.diffopt.output_format) {
457                 rev.diffopt.output_format = DIFF_FORMAT_PATCH;
458                 diff_setup_done(&rev.diffopt);
459         }
460
461         rev.diffopt.flags.recursive = 1;
462
463         setup_diff_pager(&rev.diffopt);
464
465         /*
466          * Do we have --cached and not have a pending object, then
467          * default to HEAD by hand.  Eek.
468          */
469         if (!rev.pending.nr) {
470                 int i;
471                 for (i = 1; i < argc; i++) {
472                         const char *arg = argv[i];
473                         if (!strcmp(arg, "--"))
474                                 break;
475                         else if (!strcmp(arg, "--cached") ||
476                                  !strcmp(arg, "--staged")) {
477                                 add_head_to_pending(&rev);
478                                 if (!rev.pending.nr) {
479                                         struct tree *tree;
480                                         tree = lookup_tree(the_repository,
481                                                            the_repository->hash_algo->empty_tree);
482                                         add_pending_object(&rev, &tree->object, "HEAD");
483                                 }
484                                 break;
485                         }
486                 }
487         }
488
489         symdiff_prepare(&rev, &sdiff);
490         for (i = 0; i < rev.pending.nr; i++) {
491                 struct object_array_entry *entry = &rev.pending.objects[i];
492                 struct object *obj = entry->item;
493                 const char *name = entry->name;
494                 int flags = (obj->flags & UNINTERESTING);
495                 if (!obj->parsed)
496                         obj = parse_object(the_repository, &obj->oid);
497                 obj = deref_tag(the_repository, obj, NULL, 0);
498                 if (!obj)
499                         die(_("invalid object '%s' given."), name);
500                 if (obj->type == OBJ_COMMIT)
501                         obj = &get_commit_tree(((struct commit *)obj))->object;
502
503                 if (obj->type == OBJ_TREE) {
504                         if (sdiff.skip && bitmap_get(sdiff.skip, i))
505                                 continue;
506                         obj->flags |= flags;
507                         add_object_array(obj, name, &ent);
508                 } else if (obj->type == OBJ_BLOB) {
509                         if (2 <= blobs)
510                                 die(_("more than two blobs given: '%s'"), name);
511                         blob[blobs] = entry;
512                         blobs++;
513
514                 } else {
515                         die(_("unhandled object '%s' given."), name);
516                 }
517         }
518         if (rev.prune_data.nr)
519                 paths += rev.prune_data.nr;
520
521         /*
522          * Now, do the arguments look reasonable?
523          */
524         if (!ent.nr) {
525                 switch (blobs) {
526                 case 0:
527                         result = builtin_diff_files(&rev, argc, argv);
528                         break;
529                 case 1:
530                         if (paths != 1)
531                                 usage(builtin_diff_usage);
532                         result = builtin_diff_b_f(&rev, argc, argv, blob);
533                         break;
534                 case 2:
535                         if (paths)
536                                 usage(builtin_diff_usage);
537                         result = builtin_diff_blobs(&rev, argc, argv, blob);
538                         break;
539                 default:
540                         usage(builtin_diff_usage);
541                 }
542         }
543         else if (blobs)
544                 usage(builtin_diff_usage);
545         else if (ent.nr == 1)
546                 result = builtin_diff_index(&rev, argc, argv);
547         else if (ent.nr == 2) {
548                 if (sdiff.warn)
549                         warning(_("%s...%s: multiple merge bases, using %s"),
550                                 sdiff.left, sdiff.right, sdiff.base);
551                 result = builtin_diff_tree(&rev, argc, argv,
552                                            &ent.objects[0], &ent.objects[1]);
553         } else
554                 result = builtin_diff_combined(&rev, argc, argv,
555                                                ent.objects, ent.nr);
556         result = diff_result_code(&rev.diffopt, result);
557         if (1 < rev.diffopt.skip_stat_unmatch)
558                 refresh_index_quietly();
559         UNLEAK(rev);
560         UNLEAK(ent);
561         UNLEAK(blob);
562         return result;
563 }