utf8: add utf8_strwidth()
[git] / diff-lib.c
1 /*
2  * Copyright (C) 2005 Junio C Hamano
3  */
4 #include "cache.h"
5 #include "quote.h"
6 #include "commit.h"
7 #include "diff.h"
8 #include "diffcore.h"
9 #include "revision.h"
10 #include "cache-tree.h"
11 #include "unpack-trees.h"
12 #include "refs.h"
13
14 /*
15  * diff-files
16  */
17
18 /*
19  * Has the work tree entity been removed?
20  *
21  * Return 1 if it was removed from the work tree, 0 if an entity to be
22  * compared with the cache entry ce still exists (the latter includes
23  * the case where a directory that is not a submodule repository
24  * exists for ce that is a submodule -- it is a submodule that is not
25  * checked out).  Return negative for an error.
26  */
27 static int check_removed(const struct cache_entry *ce, struct stat *st)
28 {
29         if (lstat(ce->name, st) < 0) {
30                 if (errno != ENOENT && errno != ENOTDIR)
31                         return -1;
32                 return 1;
33         }
34         if (has_symlink_leading_path(ce_namelen(ce), ce->name))
35                 return 1;
36         if (S_ISDIR(st->st_mode)) {
37                 unsigned char sub[20];
38
39                 /*
40                  * If ce is already a gitlink, we can have a plain
41                  * directory (i.e. the submodule is not checked out),
42                  * or a checked out submodule.  Either case this is not
43                  * a case where something was removed from the work tree,
44                  * so we will return 0.
45                  *
46                  * Otherwise, if the directory is not a submodule
47                  * repository, that means ce which was a blob turned into
48                  * a directory --- the blob was removed!
49                  */
50                 if (!S_ISGITLINK(ce->ce_mode) &&
51                     resolve_gitlink_ref(ce->name, "HEAD", sub))
52                         return 1;
53         }
54         return 0;
55 }
56
57 int run_diff_files(struct rev_info *revs, unsigned int option)
58 {
59         int entries, i;
60         int diff_unmerged_stage = revs->max_count;
61         int silent_on_removed = option & DIFF_SILENT_ON_REMOVED;
62         unsigned ce_option = ((option & DIFF_RACY_IS_MODIFIED)
63                               ? CE_MATCH_RACY_IS_DIRTY : 0);
64         char symcache[PATH_MAX];
65
66         diff_set_mnemonic_prefix(&revs->diffopt, "i/", "w/");
67
68         if (diff_unmerged_stage < 0)
69                 diff_unmerged_stage = 2;
70         entries = active_nr;
71         symcache[0] = '\0';
72         for (i = 0; i < entries; i++) {
73                 struct stat st;
74                 unsigned int oldmode, newmode;
75                 struct cache_entry *ce = active_cache[i];
76                 int changed;
77
78                 if (DIFF_OPT_TST(&revs->diffopt, QUIET) &&
79                         DIFF_OPT_TST(&revs->diffopt, HAS_CHANGES))
80                         break;
81
82                 if (!ce_path_match(ce, revs->prune_data))
83                         continue;
84
85                 if (ce_stage(ce)) {
86                         struct combine_diff_path *dpath;
87                         int num_compare_stages = 0;
88                         size_t path_len;
89
90                         path_len = ce_namelen(ce);
91
92                         dpath = xmalloc(combine_diff_path_size(5, path_len));
93                         dpath->path = (char *) &(dpath->parent[5]);
94
95                         dpath->next = NULL;
96                         dpath->len = path_len;
97                         memcpy(dpath->path, ce->name, path_len);
98                         dpath->path[path_len] = '\0';
99                         hashclr(dpath->sha1);
100                         memset(&(dpath->parent[0]), 0,
101                                sizeof(struct combine_diff_parent)*5);
102
103                         changed = check_removed(ce, &st);
104                         if (!changed)
105                                 dpath->mode = ce_mode_from_stat(ce, st.st_mode);
106                         else {
107                                 if (changed < 0) {
108                                         perror(ce->name);
109                                         continue;
110                                 }
111                                 if (silent_on_removed)
112                                         continue;
113                         }
114
115                         while (i < entries) {
116                                 struct cache_entry *nce = active_cache[i];
117                                 int stage;
118
119                                 if (strcmp(ce->name, nce->name))
120                                         break;
121
122                                 /* Stage #2 (ours) is the first parent,
123                                  * stage #3 (theirs) is the second.
124                                  */
125                                 stage = ce_stage(nce);
126                                 if (2 <= stage) {
127                                         int mode = nce->ce_mode;
128                                         num_compare_stages++;
129                                         hashcpy(dpath->parent[stage-2].sha1, nce->sha1);
130                                         dpath->parent[stage-2].mode = ce_mode_from_stat(nce, mode);
131                                         dpath->parent[stage-2].status =
132                                                 DIFF_STATUS_MODIFIED;
133                                 }
134
135                                 /* diff against the proper unmerged stage */
136                                 if (stage == diff_unmerged_stage)
137                                         ce = nce;
138                                 i++;
139                         }
140                         /*
141                          * Compensate for loop update
142                          */
143                         i--;
144
145                         if (revs->combine_merges && num_compare_stages == 2) {
146                                 show_combined_diff(dpath, 2,
147                                                    revs->dense_combined_merges,
148                                                    revs);
149                                 free(dpath);
150                                 continue;
151                         }
152                         free(dpath);
153                         dpath = NULL;
154
155                         /*
156                          * Show the diff for the 'ce' if we found the one
157                          * from the desired stage.
158                          */
159                         diff_unmerge(&revs->diffopt, ce->name, 0, null_sha1);
160                         if (ce_stage(ce) != diff_unmerged_stage)
161                                 continue;
162                 }
163
164                 if (ce_uptodate(ce))
165                         continue;
166
167                 changed = check_removed(ce, &st);
168                 if (changed) {
169                         if (changed < 0) {
170                                 perror(ce->name);
171                                 continue;
172                         }
173                         if (silent_on_removed)
174                                 continue;
175                         diff_addremove(&revs->diffopt, '-', ce->ce_mode,
176                                        ce->sha1, ce->name);
177                         continue;
178                 }
179                 changed = ce_match_stat(ce, &st, ce_option);
180                 if (!changed) {
181                         ce_mark_uptodate(ce);
182                         if (!DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER))
183                                 continue;
184                 }
185                 oldmode = ce->ce_mode;
186                 newmode = ce_mode_from_stat(ce, st.st_mode);
187                 diff_change(&revs->diffopt, oldmode, newmode,
188                             ce->sha1, (changed ? null_sha1 : ce->sha1),
189                             ce->name);
190
191         }
192         diffcore_std(&revs->diffopt);
193         diff_flush(&revs->diffopt);
194         return 0;
195 }
196
197 /*
198  * diff-index
199  */
200
201 struct oneway_unpack_data {
202         struct rev_info *revs;
203         char symcache[PATH_MAX];
204 };
205
206 /* A file entry went away or appeared */
207 static void diff_index_show_file(struct rev_info *revs,
208                                  const char *prefix,
209                                  struct cache_entry *ce,
210                                  const unsigned char *sha1, unsigned int mode)
211 {
212         diff_addremove(&revs->diffopt, prefix[0], mode,
213                        sha1, ce->name);
214 }
215
216 static int get_stat_data(struct cache_entry *ce,
217                          const unsigned char **sha1p,
218                          unsigned int *modep,
219                          int cached, int match_missing,
220                          struct oneway_unpack_data *cbdata)
221 {
222         const unsigned char *sha1 = ce->sha1;
223         unsigned int mode = ce->ce_mode;
224
225         if (!cached) {
226                 int changed;
227                 struct stat st;
228                 changed = check_removed(ce, &st);
229                 if (changed < 0)
230                         return -1;
231                 else if (changed) {
232                         if (match_missing) {
233                                 *sha1p = sha1;
234                                 *modep = mode;
235                                 return 0;
236                         }
237                         return -1;
238                 }
239                 changed = ce_match_stat(ce, &st, 0);
240                 if (changed) {
241                         mode = ce_mode_from_stat(ce, st.st_mode);
242                         sha1 = null_sha1;
243                 }
244         }
245
246         *sha1p = sha1;
247         *modep = mode;
248         return 0;
249 }
250
251 static void show_new_file(struct oneway_unpack_data *cbdata,
252                           struct cache_entry *new,
253                           int cached, int match_missing)
254 {
255         const unsigned char *sha1;
256         unsigned int mode;
257         struct rev_info *revs = cbdata->revs;
258
259         /*
260          * New file in the index: it might actually be different in
261          * the working copy.
262          */
263         if (get_stat_data(new, &sha1, &mode, cached, match_missing, cbdata) < 0)
264                 return;
265
266         diff_index_show_file(revs, "+", new, sha1, mode);
267 }
268
269 static int show_modified(struct oneway_unpack_data *cbdata,
270                          struct cache_entry *old,
271                          struct cache_entry *new,
272                          int report_missing,
273                          int cached, int match_missing)
274 {
275         unsigned int mode, oldmode;
276         const unsigned char *sha1;
277         struct rev_info *revs = cbdata->revs;
278
279         if (get_stat_data(new, &sha1, &mode, cached, match_missing, cbdata) < 0) {
280                 if (report_missing)
281                         diff_index_show_file(revs, "-", old,
282                                              old->sha1, old->ce_mode);
283                 return -1;
284         }
285
286         if (revs->combine_merges && !cached &&
287             (hashcmp(sha1, old->sha1) || hashcmp(old->sha1, new->sha1))) {
288                 struct combine_diff_path *p;
289                 int pathlen = ce_namelen(new);
290
291                 p = xmalloc(combine_diff_path_size(2, pathlen));
292                 p->path = (char *) &p->parent[2];
293                 p->next = NULL;
294                 p->len = pathlen;
295                 memcpy(p->path, new->name, pathlen);
296                 p->path[pathlen] = 0;
297                 p->mode = mode;
298                 hashclr(p->sha1);
299                 memset(p->parent, 0, 2 * sizeof(struct combine_diff_parent));
300                 p->parent[0].status = DIFF_STATUS_MODIFIED;
301                 p->parent[0].mode = new->ce_mode;
302                 hashcpy(p->parent[0].sha1, new->sha1);
303                 p->parent[1].status = DIFF_STATUS_MODIFIED;
304                 p->parent[1].mode = old->ce_mode;
305                 hashcpy(p->parent[1].sha1, old->sha1);
306                 show_combined_diff(p, 2, revs->dense_combined_merges, revs);
307                 free(p);
308                 return 0;
309         }
310
311         oldmode = old->ce_mode;
312         if (mode == oldmode && !hashcmp(sha1, old->sha1) &&
313             !DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER))
314                 return 0;
315
316         diff_change(&revs->diffopt, oldmode, mode,
317                     old->sha1, sha1, old->name);
318         return 0;
319 }
320
321 /*
322  * This turns all merge entries into "stage 3". That guarantees that
323  * when we read in the new tree (into "stage 1"), we won't lose sight
324  * of the fact that we had unmerged entries.
325  */
326 static void mark_merge_entries(void)
327 {
328         int i;
329         for (i = 0; i < active_nr; i++) {
330                 struct cache_entry *ce = active_cache[i];
331                 if (!ce_stage(ce))
332                         continue;
333                 ce->ce_flags |= CE_STAGEMASK;
334         }
335 }
336
337 /*
338  * This gets a mix of an existing index and a tree, one pathname entry
339  * at a time. The index entry may be a single stage-0 one, but it could
340  * also be multiple unmerged entries (in which case idx_pos/idx_nr will
341  * give you the position and number of entries in the index).
342  */
343 static void do_oneway_diff(struct unpack_trees_options *o,
344         struct cache_entry *idx,
345         struct cache_entry *tree)
346 {
347         struct oneway_unpack_data *cbdata = o->unpack_data;
348         struct rev_info *revs = cbdata->revs;
349         int match_missing, cached;
350
351         /*
352          * Backward compatibility wart - "diff-index -m" does
353          * not mean "do not ignore merges", but "match_missing".
354          *
355          * But with the revision flag parsing, that's found in
356          * "!revs->ignore_merges".
357          */
358         cached = o->index_only;
359         match_missing = !revs->ignore_merges;
360
361         if (cached && idx && ce_stage(idx)) {
362                 if (tree)
363                         diff_unmerge(&revs->diffopt, idx->name, idx->ce_mode, idx->sha1);
364                 return;
365         }
366
367         /*
368          * Something added to the tree?
369          */
370         if (!tree) {
371                 show_new_file(cbdata, idx, cached, match_missing);
372                 return;
373         }
374
375         /*
376          * Something removed from the tree?
377          */
378         if (!idx) {
379                 diff_index_show_file(revs, "-", tree, tree->sha1, tree->ce_mode);
380                 return;
381         }
382
383         /* Show difference between old and new */
384         show_modified(cbdata, tree, idx, 1, cached, match_missing);
385 }
386
387 static inline void skip_same_name(struct cache_entry *ce, struct unpack_trees_options *o)
388 {
389         int len = ce_namelen(ce);
390         const struct index_state *index = o->src_index;
391
392         while (o->pos < index->cache_nr) {
393                 struct cache_entry *next = index->cache[o->pos];
394                 if (len != ce_namelen(next))
395                         break;
396                 if (memcmp(ce->name, next->name, len))
397                         break;
398                 o->pos++;
399         }
400 }
401
402 /*
403  * The unpack_trees() interface is designed for merging, so
404  * the different source entries are designed primarily for
405  * the source trees, with the old index being really mainly
406  * used for being replaced by the result.
407  *
408  * For diffing, the index is more important, and we only have a
409  * single tree.
410  *
411  * We're supposed to return how many index entries we want to skip.
412  *
413  * This wrapper makes it all more readable, and takes care of all
414  * the fairly complex unpack_trees() semantic requirements, including
415  * the skipping, the path matching, the type conflict cases etc.
416  */
417 static int oneway_diff(struct cache_entry **src, struct unpack_trees_options *o)
418 {
419         struct cache_entry *idx = src[0];
420         struct cache_entry *tree = src[1];
421         struct oneway_unpack_data *cbdata = o->unpack_data;
422         struct rev_info *revs = cbdata->revs;
423
424         if (idx && ce_stage(idx))
425                 skip_same_name(idx, o);
426
427         /*
428          * Unpack-trees generates a DF/conflict entry if
429          * there was a directory in the index and a tree
430          * in the tree. From a diff standpoint, that's a
431          * delete of the tree and a create of the file.
432          */
433         if (tree == o->df_conflict_entry)
434                 tree = NULL;
435
436         if (ce_path_match(idx ? idx : tree, revs->prune_data))
437                 do_oneway_diff(o, idx, tree);
438
439         return 0;
440 }
441
442 int run_diff_index(struct rev_info *revs, int cached)
443 {
444         struct object *ent;
445         struct tree *tree;
446         const char *tree_name;
447         struct unpack_trees_options opts;
448         struct tree_desc t;
449         struct oneway_unpack_data unpack_cb;
450
451         mark_merge_entries();
452
453         ent = revs->pending.objects[0].item;
454         tree_name = revs->pending.objects[0].name;
455         tree = parse_tree_indirect(ent->sha1);
456         if (!tree)
457                 return error("bad tree object %s", tree_name);
458
459         unpack_cb.revs = revs;
460         unpack_cb.symcache[0] = '\0';
461         memset(&opts, 0, sizeof(opts));
462         opts.head_idx = 1;
463         opts.index_only = cached;
464         opts.merge = 1;
465         opts.fn = oneway_diff;
466         opts.unpack_data = &unpack_cb;
467         opts.src_index = &the_index;
468         opts.dst_index = NULL;
469
470         init_tree_desc(&t, tree->buffer, tree->size);
471         if (unpack_trees(1, &t, &opts))
472                 exit(128);
473
474         diff_set_mnemonic_prefix(&revs->diffopt, "c/", cached ? "i/" : "w/");
475         diffcore_std(&revs->diffopt);
476         diff_flush(&revs->diffopt);
477         return 0;
478 }
479
480 int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt)
481 {
482         struct tree *tree;
483         struct rev_info revs;
484         int i;
485         struct cache_entry **dst;
486         struct cache_entry *last = NULL;
487         struct unpack_trees_options opts;
488         struct tree_desc t;
489         struct oneway_unpack_data unpack_cb;
490
491         /*
492          * This is used by git-blame to run diff-cache internally;
493          * it potentially needs to repeatedly run this, so we will
494          * start by removing the higher order entries the last round
495          * left behind.
496          */
497         dst = active_cache;
498         for (i = 0; i < active_nr; i++) {
499                 struct cache_entry *ce = active_cache[i];
500                 if (ce_stage(ce)) {
501                         if (last && !strcmp(ce->name, last->name))
502                                 continue;
503                         cache_tree_invalidate_path(active_cache_tree,
504                                                    ce->name);
505                         last = ce;
506                         ce->ce_flags |= CE_REMOVE;
507                 }
508                 *dst++ = ce;
509         }
510         active_nr = dst - active_cache;
511
512         init_revisions(&revs, NULL);
513         revs.prune_data = opt->paths;
514         tree = parse_tree_indirect(tree_sha1);
515         if (!tree)
516                 die("bad tree object %s", sha1_to_hex(tree_sha1));
517
518         unpack_cb.revs = &revs;
519         unpack_cb.symcache[0] = '\0';
520         memset(&opts, 0, sizeof(opts));
521         opts.head_idx = 1;
522         opts.index_only = 1;
523         opts.merge = 1;
524         opts.fn = oneway_diff;
525         opts.unpack_data = &unpack_cb;
526         opts.src_index = &the_index;
527         opts.dst_index = &the_index;
528
529         init_tree_desc(&t, tree->buffer, tree->size);
530         if (unpack_trees(1, &t, &opts))
531                 exit(128);
532         return 0;
533 }