Merge branch 'ab/progress-cleanup' into seen
[git] / merge-ort.c
1 /*
2  * "Ostensibly Recursive's Twin" merge strategy, or "ort" for short.  Meant
3  * as a drop-in replacement for the "recursive" merge strategy, allowing one
4  * to replace
5  *
6  *   git merge [-s recursive]
7  *
8  * with
9  *
10  *   git merge -s ort
11  *
12  * Note: git's parser allows the space between '-s' and its argument to be
13  * missing.  (Should I have backronymed "ham", "alsa", "kip", "nap, "alvo",
14  * "cale", "peedy", or "ins" instead of "ort"?)
15  */
16
17 #include "cache.h"
18 #include "merge-ort.h"
19
20 #include "alloc.h"
21 #include "attr.h"
22 #include "blob.h"
23 #include "cache-tree.h"
24 #include "commit.h"
25 #include "commit-reach.h"
26 #include "diff.h"
27 #include "diffcore.h"
28 #include "dir.h"
29 #include "entry.h"
30 #include "ll-merge.h"
31 #include "object-store.h"
32 #include "revision.h"
33 #include "strmap.h"
34 #include "submodule.h"
35 #include "tree.h"
36 #include "unpack-trees.h"
37 #include "xdiff-interface.h"
38
39 /*
40  * We have many arrays of size 3.  Whenever we have such an array, the
41  * indices refer to one of the sides of the three-way merge.  This is so
42  * pervasive that the constants 0, 1, and 2 are used in many places in the
43  * code (especially in arithmetic operations to find the other side's index
44  * or to compute a relevant mask), but sometimes these enum names are used
45  * to aid code clarity.
46  *
47  * See also 'filemask' and 'dirmask' in struct conflict_info; the "ith side"
48  * referred to there is one of these three sides.
49  */
50 enum merge_side {
51         MERGE_BASE = 0,
52         MERGE_SIDE1 = 1,
53         MERGE_SIDE2 = 2
54 };
55
56 static unsigned RESULT_INITIALIZED = 0x1abe11ed; /* unlikely accidental value */
57
58 struct traversal_callback_data {
59         unsigned long mask;
60         unsigned long dirmask;
61         struct name_entry names[3];
62 };
63
64 struct rename_info {
65         /*
66          * All variables that are arrays of size 3 correspond to data tracked
67          * for the sides in enum merge_side.  Index 0 is almost always unused
68          * because we often only need to track information for MERGE_SIDE1 and
69          * MERGE_SIDE2 (MERGE_BASE can't have rename information since renames
70          * are determined relative to what changed since the MERGE_BASE).
71          */
72
73         /*
74          * pairs: pairing of filenames from diffcore_rename()
75          */
76         struct diff_queue_struct pairs[3];
77
78         /*
79          * dirs_removed: directories removed on a given side of history.
80          *
81          * The keys of dirs_removed[side] are the directories that were removed
82          * on the given side of history.  The value of the strintmap for each
83          * directory is a value from enum dir_rename_relevance.
84          */
85         struct strintmap dirs_removed[3];
86
87         /*
88          * dir_rename_count: tracking where parts of a directory were renamed to
89          *
90          * When files in a directory are renamed, they may not all go to the
91          * same location.  Each strmap here tracks:
92          *      old_dir => {new_dir => int}
93          * That is, dir_rename_count[side] is a strmap to a strintmap.
94          */
95         struct strmap dir_rename_count[3];
96
97         /*
98          * dir_renames: computed directory renames
99          *
100          * This is a map of old_dir => new_dir and is derived in part from
101          * dir_rename_count.
102          */
103         struct strmap dir_renames[3];
104
105         /*
106          * relevant_sources: deleted paths wanted in rename detection, and why
107          *
108          * relevant_sources is a set of deleted paths on each side of
109          * history for which we need rename detection.  If a path is deleted
110          * on one side of history, we need to detect if it is part of a
111          * rename if either
112          *    * the file is modified/deleted on the other side of history
113          *    * we need to detect renames for an ancestor directory
114          * If neither of those are true, we can skip rename detection for
115          * that path.  The reason is stored as a value from enum
116          * file_rename_relevance, as the reason can inform the algorithm in
117          * diffcore_rename_extended().
118          */
119         struct strintmap relevant_sources[3];
120
121         /*
122          * dir_rename_mask:
123          *   0: optimization removing unmodified potential rename source okay
124          *   2 or 4: optimization okay, but must check for files added to dir
125          *   7: optimization forbidden; need rename source in case of dir rename
126          */
127         unsigned dir_rename_mask:3;
128
129         /*
130          * callback_data_*: supporting data structures for alternate traversal
131          *
132          * We sometimes need to be able to traverse through all the files
133          * in a given tree before all immediate subdirectories within that
134          * tree.  Since traverse_trees() doesn't do that naturally, we have
135          * a traverse_trees_wrapper() that stores any immediate
136          * subdirectories while traversing files, then traverses the
137          * immediate subdirectories later.  These callback_data* variables
138          * store the information for the subdirectories so that we can do
139          * that traversal order.
140          */
141         struct traversal_callback_data *callback_data;
142         int callback_data_nr, callback_data_alloc;
143         char *callback_data_traverse_path;
144
145         /*
146          * merge_trees: trees passed to the merge algorithm for the merge
147          *
148          * merge_trees records the trees passed to the merge algorithm.  But,
149          * this data also is stored in merge_result->priv.  If a sequence of
150          * merges are being done (such as when cherry-picking or rebasing),
151          * the next merge can look at this and re-use information from
152          * previous merges under certain circumstances.
153          *
154          * See also all the cached_* variables.
155          */
156         struct tree *merge_trees[3];
157
158         /*
159          * cached_pairs_valid_side: which side's cached info can be reused
160          *
161          * See the description for merge_trees.  For repeated merges, at most
162          * only one side's cached information can be used.  Valid values:
163          *   MERGE_SIDE2: cached data from side2 can be reused
164          *   MERGE_SIDE1: cached data from side1 can be reused
165          *   0:           no cached data can be reused
166          */
167         int cached_pairs_valid_side;
168
169         /*
170          * cached_pairs: Caching of renames and deletions.
171          *
172          * These are mappings recording renames and deletions of individual
173          * files (not directories).  They are thus a map from an old
174          * filename to either NULL (for deletions) or a new filename (for
175          * renames).
176          */
177         struct strmap cached_pairs[3];
178
179         /*
180          * cached_target_names: just the destinations from cached_pairs
181          *
182          * We sometimes want a fast lookup to determine if a given filename
183          * is one of the destinations in cached_pairs.  cached_target_names
184          * is thus duplicative information, but it provides a fast lookup.
185          */
186         struct strset cached_target_names[3];
187
188         /*
189          * cached_irrelevant: Caching of rename_sources that aren't relevant.
190          *
191          * If we try to detect a rename for a source path and succeed, it's
192          * part of a rename.  If we try to detect a rename for a source path
193          * and fail, then it's a delete.  If we do not try to detect a rename
194          * for a path, then we don't know if it's a rename or a delete.  If
195          * merge-ort doesn't think the path is relevant, then we just won't
196          * cache anything for that path.  But there's a slight problem in
197          * that merge-ort can think a path is RELEVANT_LOCATION, but due to
198          * commit 9bd342137e ("diffcore-rename: determine which
199          * relevant_sources are no longer relevant", 2021-03-13),
200          * diffcore-rename can downgrade the path to RELEVANT_NO_MORE.  To
201          * avoid excessive calls to diffcore_rename_extended() we still need
202          * to cache such paths, though we cannot record them as either
203          * renames or deletes.  So we cache them here as a "turned out to be
204          * irrelevant *for this commit*" as they are often also irrelevant
205          * for subsequent commits, though we will have to do some extra
206          * checking to see whether such paths become relevant for rename
207          * detection when cherry-picking/rebasing subsequent commits.
208          */
209         struct strset cached_irrelevant[3];
210
211         /*
212          * needed_limit: value needed for inexact rename detection to run
213          *
214          * If the current rename limit wasn't high enough for inexact
215          * rename detection to run, this records the limit needed.  Otherwise,
216          * this value remains 0.
217          */
218         int needed_limit;
219 };
220
221 struct merge_options_internal {
222         /*
223          * paths: primary data structure in all of merge ort.
224          *
225          * The keys of paths:
226          *   * are full relative paths from the toplevel of the repository
227          *     (e.g. "drivers/firmware/raspberrypi.c").
228          *   * store all relevant paths in the repo, both directories and
229          *     files (e.g. drivers, drivers/firmware would also be included)
230          *   * these keys serve to intern all the path strings, which allows
231          *     us to do pointer comparison on directory names instead of
232          *     strcmp; we just have to be careful to use the interned strings.
233          *     (Technically paths_to_free may track some strings that were
234          *      removed from froms paths.)
235          *
236          * The values of paths:
237          *   * either a pointer to a merged_info, or a conflict_info struct
238          *   * merged_info contains all relevant information for a
239          *     non-conflicted entry.
240          *   * conflict_info contains a merged_info, plus any additional
241          *     information about a conflict such as the higher orders stages
242          *     involved and the names of the paths those came from (handy
243          *     once renames get involved).
244          *   * a path may start "conflicted" (i.e. point to a conflict_info)
245          *     and then a later step (e.g. three-way content merge) determines
246          *     it can be cleanly merged, at which point it'll be marked clean
247          *     and the algorithm will ignore any data outside the contained
248          *     merged_info for that entry
249          *   * If an entry remains conflicted, the merged_info portion of a
250          *     conflict_info will later be filled with whatever version of
251          *     the file should be placed in the working directory (e.g. an
252          *     as-merged-as-possible variation that contains conflict markers).
253          */
254         struct strmap paths;
255
256         /*
257          * conflicted: a subset of keys->values from "paths"
258          *
259          * conflicted is basically an optimization between process_entries()
260          * and record_conflicted_index_entries(); the latter could loop over
261          * ALL the entries in paths AGAIN and look for the ones that are
262          * still conflicted, but since process_entries() has to loop over
263          * all of them, it saves the ones it couldn't resolve in this strmap
264          * so that record_conflicted_index_entries() can iterate just the
265          * relevant entries.
266          */
267         struct strmap conflicted;
268
269         /*
270          * paths_to_free: additional list of strings to free
271          *
272          * If keys are removed from "paths", they are added to paths_to_free
273          * to ensure they are later freed.  We avoid free'ing immediately since
274          * other places (e.g. conflict_info.pathnames[]) may still be
275          * referencing these paths.
276          */
277         struct string_list paths_to_free;
278
279         /*
280          * output: special messages and conflict notices for various paths
281          *
282          * This is a map of pathnames (a subset of the keys in "paths" above)
283          * to strbufs.  It gathers various warning/conflict/notice messages
284          * for later processing.
285          */
286         struct strmap output;
287
288         /*
289          * renames: various data relating to rename detection
290          */
291         struct rename_info renames;
292
293         /*
294          * attr_index: hacky minimal index used for renormalization
295          *
296          * renormalization code _requires_ an index, though it only needs to
297          * find a .gitattributes file within the index.  So, when
298          * renormalization is important, we create a special index with just
299          * that one file.
300          */
301         struct index_state attr_index;
302
303         /*
304          * current_dir_name, toplevel_dir: temporary vars
305          *
306          * These are used in collect_merge_info_callback(), and will set the
307          * various merged_info.directory_name for the various paths we get;
308          * see documentation for that variable and the requirements placed on
309          * that field.
310          */
311         const char *current_dir_name;
312         const char *toplevel_dir;
313
314         /* call_depth: recursion level counter for merging merge bases */
315         int call_depth;
316 };
317
318 struct version_info {
319         struct object_id oid;
320         unsigned short mode;
321 };
322
323 struct merged_info {
324         /* if is_null, ignore result.  otherwise result has oid & mode */
325         struct version_info result;
326         unsigned is_null:1;
327
328         /*
329          * clean: whether the path in question is cleanly merged.
330          *
331          * see conflict_info.merged for more details.
332          */
333         unsigned clean:1;
334
335         /*
336          * basename_offset: offset of basename of path.
337          *
338          * perf optimization to avoid recomputing offset of final '/'
339          * character in pathname (0 if no '/' in pathname).
340          */
341         size_t basename_offset;
342
343          /*
344           * directory_name: containing directory name.
345           *
346           * Note that we assume directory_name is constructed such that
347           *    strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name,
348           * i.e. string equality is equivalent to pointer equality.  For this
349           * to hold, we have to be careful setting directory_name.
350           */
351         const char *directory_name;
352 };
353
354 struct conflict_info {
355         /*
356          * merged: the version of the path that will be written to working tree
357          *
358          * WARNING: It is critical to check merged.clean and ensure it is 0
359          * before reading any conflict_info fields outside of merged.
360          * Allocated merge_info structs will always have clean set to 1.
361          * Allocated conflict_info structs will have merged.clean set to 0
362          * initially.  The merged.clean field is how we know if it is safe
363          * to access other parts of conflict_info besides merged; if a
364          * conflict_info's merged.clean is changed to 1, the rest of the
365          * algorithm is not allowed to look at anything outside of the
366          * merged member anymore.
367          */
368         struct merged_info merged;
369
370         /* oids & modes from each of the three trees for this path */
371         struct version_info stages[3];
372
373         /* pathnames for each stage; may differ due to rename detection */
374         const char *pathnames[3];
375
376         /* Whether this path is/was involved in a directory/file conflict */
377         unsigned df_conflict:1;
378
379         /*
380          * Whether this path is/was involved in a non-content conflict other
381          * than a directory/file conflict (e.g. rename/rename, rename/delete,
382          * file location based on possible directory rename).
383          */
384         unsigned path_conflict:1;
385
386         /*
387          * For filemask and dirmask, the ith bit corresponds to whether the
388          * ith entry is a file (filemask) or a directory (dirmask).  Thus,
389          * filemask & dirmask is always zero, and filemask | dirmask is at
390          * most 7 but can be less when a path does not appear as either a
391          * file or a directory on at least one side of history.
392          *
393          * Note that these masks are related to enum merge_side, as the ith
394          * entry corresponds to side i.
395          *
396          * These values come from a traverse_trees() call; more info may be
397          * found looking at tree-walk.h's struct traverse_info,
398          * particularly the documentation above the "fn" member (note that
399          * filemask = mask & ~dirmask from that documentation).
400          */
401         unsigned filemask:3;
402         unsigned dirmask:3;
403
404         /*
405          * Optimization to track which stages match, to avoid the need to
406          * recompute it in multiple steps. Either 0 or at least 2 bits are
407          * set; if at least 2 bits are set, their corresponding stages match.
408          */
409         unsigned match_mask:3;
410 };
411
412 /*** Function Grouping: various utility functions ***/
413
414 /*
415  * For the next three macros, see warning for conflict_info.merged.
416  *
417  * In each of the below, mi is a struct merged_info*, and ci was defined
418  * as a struct conflict_info* (but we need to verify ci isn't actually
419  * pointed at a struct merged_info*).
420  *
421  * INITIALIZE_CI: Assign ci to mi but only if it's safe; set to NULL otherwise.
422  * VERIFY_CI: Ensure that something we assigned to a conflict_info* is one.
423  * ASSIGN_AND_VERIFY_CI: Similar to VERIFY_CI but do assignment first.
424  */
425 #define INITIALIZE_CI(ci, mi) do {                                           \
426         (ci) = (!(mi) || (mi)->clean) ? NULL : (struct conflict_info *)(mi); \
427 } while (0)
428 #define VERIFY_CI(ci) assert(ci && !ci->merged.clean);
429 #define ASSIGN_AND_VERIFY_CI(ci, mi) do {    \
430         (ci) = (struct conflict_info *)(mi);  \
431         assert((ci) && !(mi)->clean);        \
432 } while (0)
433
434 static void free_strmap_strings(struct strmap *map)
435 {
436         struct hashmap_iter iter;
437         struct strmap_entry *entry;
438
439         strmap_for_each_entry(map, &iter, entry) {
440                 free((char*)entry->key);
441         }
442 }
443
444 static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
445                                           int reinitialize)
446 {
447         struct rename_info *renames = &opti->renames;
448         int i;
449         void (*strmap_func)(struct strmap *, int) =
450                 reinitialize ? strmap_partial_clear : strmap_clear;
451         void (*strintmap_func)(struct strintmap *) =
452                 reinitialize ? strintmap_partial_clear : strintmap_clear;
453         void (*strset_func)(struct strset *) =
454                 reinitialize ? strset_partial_clear : strset_clear;
455
456         /*
457          * We marked opti->paths with strdup_strings = 0, so that we
458          * wouldn't have to make another copy of the fullpath created by
459          * make_traverse_path from setup_path_info().  But, now that we've
460          * used it and have no other references to these strings, it is time
461          * to deallocate them.
462          */
463         free_strmap_strings(&opti->paths);
464         strmap_func(&opti->paths, 1);
465
466         /*
467          * All keys and values in opti->conflicted are a subset of those in
468          * opti->paths.  We don't want to deallocate anything twice, so we
469          * don't free the keys and we pass 0 for free_values.
470          */
471         strmap_func(&opti->conflicted, 0);
472
473         /*
474          * opti->paths_to_free is similar to opti->paths; we created it with
475          * strdup_strings = 0 to avoid making _another_ copy of the fullpath
476          * but now that we've used it and have no other references to these
477          * strings, it is time to deallocate them.  We do so by temporarily
478          * setting strdup_strings to 1.
479          */
480         opti->paths_to_free.strdup_strings = 1;
481         string_list_clear(&opti->paths_to_free, 0);
482         opti->paths_to_free.strdup_strings = 0;
483
484         if (opti->attr_index.cache_nr) /* true iff opt->renormalize */
485                 discard_index(&opti->attr_index);
486
487         /* Free memory used by various renames maps */
488         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
489                 strintmap_func(&renames->dirs_removed[i]);
490                 strmap_func(&renames->dir_renames[i], 0);
491                 strintmap_func(&renames->relevant_sources[i]);
492                 if (!reinitialize)
493                         assert(renames->cached_pairs_valid_side == 0);
494                 if (i != renames->cached_pairs_valid_side) {
495                         strset_func(&renames->cached_target_names[i]);
496                         strmap_func(&renames->cached_pairs[i], 1);
497                         strset_func(&renames->cached_irrelevant[i]);
498                         partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
499                         if (!reinitialize)
500                                 strmap_clear(&renames->dir_rename_count[i], 1);
501                 }
502         }
503         renames->cached_pairs_valid_side = 0;
504         renames->dir_rename_mask = 0;
505
506         if (!reinitialize) {
507                 struct hashmap_iter iter;
508                 struct strmap_entry *e;
509
510                 /* Release and free each strbuf found in output */
511                 strmap_for_each_entry(&opti->output, &iter, e) {
512                         struct strbuf *sb = e->value;
513                         strbuf_release(sb);
514                         /*
515                          * While strictly speaking we don't need to free(sb)
516                          * here because we could pass free_values=1 when
517                          * calling strmap_clear() on opti->output, that would
518                          * require strmap_clear to do another
519                          * strmap_for_each_entry() loop, so we just free it
520                          * while we're iterating anyway.
521                          */
522                         free(sb);
523                 }
524                 strmap_clear(&opti->output, 0);
525         }
526
527         /* Clean out callback_data as well. */
528         FREE_AND_NULL(renames->callback_data);
529         renames->callback_data_nr = renames->callback_data_alloc = 0;
530 }
531
532 static int err(struct merge_options *opt, const char *err, ...)
533 {
534         va_list params;
535         struct strbuf sb = STRBUF_INIT;
536
537         strbuf_addstr(&sb, "error: ");
538         va_start(params, err);
539         strbuf_vaddf(&sb, err, params);
540         va_end(params);
541
542         error("%s", sb.buf);
543         strbuf_release(&sb);
544
545         return -1;
546 }
547
548 static void format_commit(struct strbuf *sb,
549                           int indent,
550                           struct commit *commit)
551 {
552         struct merge_remote_desc *desc;
553         struct pretty_print_context ctx = {0};
554         ctx.abbrev = DEFAULT_ABBREV;
555
556         strbuf_addchars(sb, ' ', indent);
557         desc = merge_remote_util(commit);
558         if (desc) {
559                 strbuf_addf(sb, "virtual %s\n", desc->name);
560                 return;
561         }
562
563         format_commit_message(commit, "%h %s", sb, &ctx);
564         strbuf_addch(sb, '\n');
565 }
566
567 __attribute__((format (printf, 4, 5)))
568 static void path_msg(struct merge_options *opt,
569                      const char *path,
570                      int omittable_hint, /* skippable under --remerge-diff */
571                      const char *fmt, ...)
572 {
573         va_list ap;
574         struct strbuf *sb = strmap_get(&opt->priv->output, path);
575         if (!sb) {
576                 sb = xmalloc(sizeof(*sb));
577                 strbuf_init(sb, 0);
578                 strmap_put(&opt->priv->output, path, sb);
579         }
580
581         va_start(ap, fmt);
582         strbuf_vaddf(sb, fmt, ap);
583         va_end(ap);
584
585         strbuf_addch(sb, '\n');
586 }
587
588 /* add a string to a strbuf, but converting "/" to "_" */
589 static void add_flattened_path(struct strbuf *out, const char *s)
590 {
591         size_t i = out->len;
592         strbuf_addstr(out, s);
593         for (; i < out->len; i++)
594                 if (out->buf[i] == '/')
595                         out->buf[i] = '_';
596 }
597
598 static char *unique_path(struct strmap *existing_paths,
599                          const char *path,
600                          const char *branch)
601 {
602         struct strbuf newpath = STRBUF_INIT;
603         int suffix = 0;
604         size_t base_len;
605
606         strbuf_addf(&newpath, "%s~", path);
607         add_flattened_path(&newpath, branch);
608
609         base_len = newpath.len;
610         while (strmap_contains(existing_paths, newpath.buf)) {
611                 strbuf_setlen(&newpath, base_len);
612                 strbuf_addf(&newpath, "_%d", suffix++);
613         }
614
615         return strbuf_detach(&newpath, NULL);
616 }
617
618 /*** Function Grouping: functions related to collect_merge_info() ***/
619
620 static int traverse_trees_wrapper_callback(int n,
621                                            unsigned long mask,
622                                            unsigned long dirmask,
623                                            struct name_entry *names,
624                                            struct traverse_info *info)
625 {
626         struct merge_options *opt = info->data;
627         struct rename_info *renames = &opt->priv->renames;
628         unsigned filemask = mask & ~dirmask;
629
630         assert(n==3);
631
632         if (!renames->callback_data_traverse_path)
633                 renames->callback_data_traverse_path = xstrdup(info->traverse_path);
634
635         if (filemask && filemask == renames->dir_rename_mask)
636                 renames->dir_rename_mask = 0x07;
637
638         ALLOC_GROW(renames->callback_data, renames->callback_data_nr + 1,
639                    renames->callback_data_alloc);
640         renames->callback_data[renames->callback_data_nr].mask = mask;
641         renames->callback_data[renames->callback_data_nr].dirmask = dirmask;
642         COPY_ARRAY(renames->callback_data[renames->callback_data_nr].names,
643                    names, 3);
644         renames->callback_data_nr++;
645
646         return mask;
647 }
648
649 /*
650  * Much like traverse_trees(), BUT:
651  *   - read all the tree entries FIRST, saving them
652  *   - note that the above step provides an opportunity to compute necessary
653  *     additional details before the "real" traversal
654  *   - loop through the saved entries and call the original callback on them
655  */
656 static int traverse_trees_wrapper(struct index_state *istate,
657                                   int n,
658                                   struct tree_desc *t,
659                                   struct traverse_info *info)
660 {
661         int ret, i, old_offset;
662         traverse_callback_t old_fn;
663         char *old_callback_data_traverse_path;
664         struct merge_options *opt = info->data;
665         struct rename_info *renames = &opt->priv->renames;
666
667         assert(renames->dir_rename_mask == 2 || renames->dir_rename_mask == 4);
668
669         old_callback_data_traverse_path = renames->callback_data_traverse_path;
670         old_fn = info->fn;
671         old_offset = renames->callback_data_nr;
672
673         renames->callback_data_traverse_path = NULL;
674         info->fn = traverse_trees_wrapper_callback;
675         ret = traverse_trees(istate, n, t, info);
676         if (ret < 0)
677                 return ret;
678
679         info->traverse_path = renames->callback_data_traverse_path;
680         info->fn = old_fn;
681         for (i = old_offset; i < renames->callback_data_nr; ++i) {
682                 info->fn(n,
683                          renames->callback_data[i].mask,
684                          renames->callback_data[i].dirmask,
685                          renames->callback_data[i].names,
686                          info);
687         }
688
689         renames->callback_data_nr = old_offset;
690         free(renames->callback_data_traverse_path);
691         renames->callback_data_traverse_path = old_callback_data_traverse_path;
692         info->traverse_path = NULL;
693         return 0;
694 }
695
696 static void setup_path_info(struct merge_options *opt,
697                             struct string_list_item *result,
698                             const char *current_dir_name,
699                             int current_dir_name_len,
700                             char *fullpath, /* we'll take over ownership */
701                             struct name_entry *names,
702                             struct name_entry *merged_version,
703                             unsigned is_null,     /* boolean */
704                             unsigned df_conflict, /* boolean */
705                             unsigned filemask,
706                             unsigned dirmask,
707                             int resolved          /* boolean */)
708 {
709         /* result->util is void*, so mi is a convenience typed variable */
710         struct merged_info *mi;
711
712         assert(!is_null || resolved);
713         assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
714         assert(resolved == (merged_version != NULL));
715
716         mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
717                                    sizeof(struct conflict_info));
718         mi->directory_name = current_dir_name;
719         mi->basename_offset = current_dir_name_len;
720         mi->clean = !!resolved;
721         if (resolved) {
722                 mi->result.mode = merged_version->mode;
723                 oidcpy(&mi->result.oid, &merged_version->oid);
724                 mi->is_null = !!is_null;
725         } else {
726                 int i;
727                 struct conflict_info *ci;
728
729                 ASSIGN_AND_VERIFY_CI(ci, mi);
730                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
731                         ci->pathnames[i] = fullpath;
732                         ci->stages[i].mode = names[i].mode;
733                         oidcpy(&ci->stages[i].oid, &names[i].oid);
734                 }
735                 ci->filemask = filemask;
736                 ci->dirmask = dirmask;
737                 ci->df_conflict = !!df_conflict;
738                 if (dirmask)
739                         /*
740                          * Assume is_null for now, but if we have entries
741                          * under the directory then when it is complete in
742                          * write_completed_directory() it'll update this.
743                          * Also, for D/F conflicts, we have to handle the
744                          * directory first, then clear this bit and process
745                          * the file to see how it is handled -- that occurs
746                          * near the top of process_entry().
747                          */
748                         mi->is_null = 1;
749         }
750         strmap_put(&opt->priv->paths, fullpath, mi);
751         result->string = fullpath;
752         result->util = mi;
753 }
754
755 static void add_pair(struct merge_options *opt,
756                      struct name_entry *names,
757                      const char *pathname,
758                      unsigned side,
759                      unsigned is_add /* if false, is_delete */,
760                      unsigned match_mask,
761                      unsigned dir_rename_mask)
762 {
763         struct diff_filespec *one, *two;
764         struct rename_info *renames = &opt->priv->renames;
765         int names_idx = is_add ? side : 0;
766
767         if (is_add) {
768                 assert(match_mask == 0 || match_mask == 6);
769                 if (strset_contains(&renames->cached_target_names[side],
770                                     pathname))
771                         return;
772         } else {
773                 unsigned content_relevant = (match_mask == 0);
774                 unsigned location_relevant = (dir_rename_mask == 0x07);
775
776                 assert(match_mask == 0 || match_mask == 3 || match_mask == 5);
777
778                 /*
779                  * If pathname is found in cached_irrelevant[side] due to
780                  * previous pick but for this commit content is relevant,
781                  * then we need to remove it from cached_irrelevant.
782                  */
783                 if (content_relevant)
784                         /* strset_remove is no-op if strset doesn't have key */
785                         strset_remove(&renames->cached_irrelevant[side],
786                                       pathname);
787
788                 /*
789                  * We do not need to re-detect renames for paths that we already
790                  * know the pairing, i.e. for cached_pairs (or
791                  * cached_irrelevant).  However, handle_deferred_entries() needs
792                  * to loop over the union of keys from relevant_sources[side] and
793                  * cached_pairs[side], so for simplicity we set relevant_sources
794                  * for all the cached_pairs too and then strip them back out in
795                  * prune_cached_from_relevant() at the beginning of
796                  * detect_regular_renames().
797                  */
798                 if (content_relevant || location_relevant) {
799                         /* content_relevant trumps location_relevant */
800                         strintmap_set(&renames->relevant_sources[side], pathname,
801                                       content_relevant ? RELEVANT_CONTENT : RELEVANT_LOCATION);
802                 }
803
804                 /*
805                  * Avoid creating pair if we've already cached rename results.
806                  * Note that we do this after setting relevant_sources[side]
807                  * as noted in the comment above.
808                  */
809                 if (strmap_contains(&renames->cached_pairs[side], pathname) ||
810                     strset_contains(&renames->cached_irrelevant[side], pathname))
811                         return;
812         }
813
814         one = alloc_filespec(pathname);
815         two = alloc_filespec(pathname);
816         fill_filespec(is_add ? two : one,
817                       &names[names_idx].oid, 1, names[names_idx].mode);
818         diff_queue(&renames->pairs[side], one, two);
819 }
820
821 static void collect_rename_info(struct merge_options *opt,
822                                 struct name_entry *names,
823                                 const char *dirname,
824                                 const char *fullname,
825                                 unsigned filemask,
826                                 unsigned dirmask,
827                                 unsigned match_mask)
828 {
829         struct rename_info *renames = &opt->priv->renames;
830         unsigned side;
831
832         /*
833          * Update dir_rename_mask (determines ignore-rename-source validity)
834          *
835          * dir_rename_mask helps us keep track of when directory rename
836          * detection may be relevant.  Basically, whenver a directory is
837          * removed on one side of history, and a file is added to that
838          * directory on the other side of history, directory rename
839          * detection is relevant (meaning we have to detect renames for all
840          * files within that directory to deduce where the directory
841          * moved).  Also, whenever a directory needs directory rename
842          * detection, due to the "majority rules" choice for where to move
843          * it (see t6423 testcase 1f), we also need to detect renames for
844          * all files within subdirectories of that directory as well.
845          *
846          * Here we haven't looked at files within the directory yet, we are
847          * just looking at the directory itself.  So, if we aren't yet in
848          * a case where a parent directory needed directory rename detection
849          * (i.e. dir_rename_mask != 0x07), and if the directory was removed
850          * on one side of history, record the mask of the other side of
851          * history in dir_rename_mask.
852          */
853         if (renames->dir_rename_mask != 0x07 &&
854             (dirmask == 3 || dirmask == 5)) {
855                 /* simple sanity check */
856                 assert(renames->dir_rename_mask == 0 ||
857                        renames->dir_rename_mask == (dirmask & ~1));
858                 /* update dir_rename_mask; have it record mask of new side */
859                 renames->dir_rename_mask = (dirmask & ~1);
860         }
861
862         /* Update dirs_removed, as needed */
863         if (dirmask == 1 || dirmask == 3 || dirmask == 5) {
864                 /* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */
865                 unsigned sides = (0x07 - dirmask)/2;
866                 unsigned relevance = (renames->dir_rename_mask == 0x07) ?
867                                         RELEVANT_FOR_ANCESTOR : NOT_RELEVANT;
868                 /*
869                  * Record relevance of this directory.  However, note that
870                  * when collect_merge_info_callback() recurses into this
871                  * directory and calls collect_rename_info() on paths
872                  * within that directory, if we find a path that was added
873                  * to this directory on the other side of history, we will
874                  * upgrade this value to RELEVANT_FOR_SELF; see below.
875                  */
876                 if (sides & 1)
877                         strintmap_set(&renames->dirs_removed[1], fullname,
878                                       relevance);
879                 if (sides & 2)
880                         strintmap_set(&renames->dirs_removed[2], fullname,
881                                       relevance);
882         }
883
884         /*
885          * Here's the block that potentially upgrades to RELEVANT_FOR_SELF.
886          * When we run across a file added to a directory.  In such a case,
887          * find the directory of the file and upgrade its relevance.
888          */
889         if (renames->dir_rename_mask == 0x07 &&
890             (filemask == 2 || filemask == 4)) {
891                 /*
892                  * Need directory rename for parent directory on other side
893                  * of history from added file.  Thus
894                  *    side = (~filemask & 0x06) >> 1
895                  * or
896                  *    side = 3 - (filemask/2).
897                  */
898                 unsigned side = 3 - (filemask >> 1);
899                 strintmap_set(&renames->dirs_removed[side], dirname,
900                               RELEVANT_FOR_SELF);
901         }
902
903         if (filemask == 0 || filemask == 7)
904                 return;
905
906         for (side = MERGE_SIDE1; side <= MERGE_SIDE2; ++side) {
907                 unsigned side_mask = (1 << side);
908
909                 /* Check for deletion on side */
910                 if ((filemask & 1) && !(filemask & side_mask))
911                         add_pair(opt, names, fullname, side, 0 /* delete */,
912                                  match_mask & filemask,
913                                  renames->dir_rename_mask);
914
915                 /* Check for addition on side */
916                 if (!(filemask & 1) && (filemask & side_mask))
917                         add_pair(opt, names, fullname, side, 1 /* add */,
918                                  match_mask & filemask,
919                                  renames->dir_rename_mask);
920         }
921 }
922
923 static int collect_merge_info_callback(int n,
924                                        unsigned long mask,
925                                        unsigned long dirmask,
926                                        struct name_entry *names,
927                                        struct traverse_info *info)
928 {
929         /*
930          * n is 3.  Always.
931          * common ancestor (mbase) has mask 1, and stored in index 0 of names
932          * head of side 1  (side1) has mask 2, and stored in index 1 of names
933          * head of side 2  (side2) has mask 4, and stored in index 2 of names
934          */
935         struct merge_options *opt = info->data;
936         struct merge_options_internal *opti = opt->priv;
937         struct rename_info *renames = &opt->priv->renames;
938         struct string_list_item pi;  /* Path Info */
939         struct conflict_info *ci; /* typed alias to pi.util (which is void*) */
940         struct name_entry *p;
941         size_t len;
942         char *fullpath;
943         const char *dirname = opti->current_dir_name;
944         unsigned prev_dir_rename_mask = renames->dir_rename_mask;
945         unsigned filemask = mask & ~dirmask;
946         unsigned match_mask = 0; /* will be updated below */
947         unsigned mbase_null = !(mask & 1);
948         unsigned side1_null = !(mask & 2);
949         unsigned side2_null = !(mask & 4);
950         unsigned side1_matches_mbase = (!side1_null && !mbase_null &&
951                                         names[0].mode == names[1].mode &&
952                                         oideq(&names[0].oid, &names[1].oid));
953         unsigned side2_matches_mbase = (!side2_null && !mbase_null &&
954                                         names[0].mode == names[2].mode &&
955                                         oideq(&names[0].oid, &names[2].oid));
956         unsigned sides_match = (!side1_null && !side2_null &&
957                                 names[1].mode == names[2].mode &&
958                                 oideq(&names[1].oid, &names[2].oid));
959
960         /*
961          * Note: When a path is a file on one side of history and a directory
962          * in another, we have a directory/file conflict.  In such cases, if
963          * the conflict doesn't resolve from renames and deletions, then we
964          * always leave directories where they are and move files out of the
965          * way.  Thus, while struct conflict_info has a df_conflict field to
966          * track such conflicts, we ignore that field for any directories at
967          * a path and only pay attention to it for files at the given path.
968          * The fact that we leave directories were they are also means that
969          * we do not need to worry about getting additional df_conflict
970          * information propagated from parent directories down to children
971          * (unlike, say traverse_trees_recursive() in unpack-trees.c, which
972          * sets a newinfo.df_conflicts field specifically to propagate it).
973          */
974         unsigned df_conflict = (filemask != 0) && (dirmask != 0);
975
976         /* n = 3 is a fundamental assumption. */
977         if (n != 3)
978                 BUG("Called collect_merge_info_callback wrong");
979
980         /*
981          * A bunch of sanity checks verifying that traverse_trees() calls
982          * us the way I expect.  Could just remove these at some point,
983          * though maybe they are helpful to future code readers.
984          */
985         assert(mbase_null == is_null_oid(&names[0].oid));
986         assert(side1_null == is_null_oid(&names[1].oid));
987         assert(side2_null == is_null_oid(&names[2].oid));
988         assert(!mbase_null || !side1_null || !side2_null);
989         assert(mask > 0 && mask < 8);
990
991         /* Determine match_mask */
992         if (side1_matches_mbase)
993                 match_mask = (side2_matches_mbase ? 7 : 3);
994         else if (side2_matches_mbase)
995                 match_mask = 5;
996         else if (sides_match)
997                 match_mask = 6;
998
999         /*
1000          * Get the name of the relevant filepath, which we'll pass to
1001          * setup_path_info() for tracking.
1002          */
1003         p = names;
1004         while (!p->mode)
1005                 p++;
1006         len = traverse_path_len(info, p->pathlen);
1007
1008         /* +1 in both of the following lines to include the NUL byte */
1009         fullpath = xmalloc(len + 1);
1010         make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
1011
1012         /*
1013          * If mbase, side1, and side2 all match, we can resolve early.  Even
1014          * if these are trees, there will be no renames or anything
1015          * underneath.
1016          */
1017         if (side1_matches_mbase && side2_matches_mbase) {
1018                 /* mbase, side1, & side2 all match; use mbase as resolution */
1019                 setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1020                                 names, names+0, mbase_null, 0,
1021                                 filemask, dirmask, 1);
1022                 return mask;
1023         }
1024
1025         /*
1026          * Gather additional information used in rename detection.
1027          */
1028         collect_rename_info(opt, names, dirname, fullpath,
1029                             filemask, dirmask, match_mask);
1030
1031         /*
1032          * Record information about the path so we can resolve later in
1033          * process_entries.
1034          */
1035         setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
1036                         names, NULL, 0, df_conflict, filemask, dirmask, 0);
1037
1038         ci = pi.util;
1039         VERIFY_CI(ci);
1040         ci->match_mask = match_mask;
1041
1042         /* If dirmask, recurse into subdirectories */
1043         if (dirmask) {
1044                 struct traverse_info newinfo;
1045                 struct tree_desc t[3];
1046                 void *buf[3] = {NULL, NULL, NULL};
1047                 const char *original_dir_name;
1048                 int i, ret;
1049
1050                 ci->match_mask &= filemask;
1051                 newinfo = *info;
1052                 newinfo.prev = info;
1053                 newinfo.name = p->path;
1054                 newinfo.namelen = p->pathlen;
1055                 newinfo.pathlen = st_add3(newinfo.pathlen, p->pathlen, 1);
1056                 /*
1057                  * If this directory we are about to recurse into cared about
1058                  * its parent directory (the current directory) having a D/F
1059                  * conflict, then we'd propagate the masks in this way:
1060                  *    newinfo.df_conflicts |= (mask & ~dirmask);
1061                  * But we don't worry about propagating D/F conflicts.  (See
1062                  * comment near setting of local df_conflict variable near
1063                  * the beginning of this function).
1064                  */
1065
1066                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
1067                         if (i == 1 && side1_matches_mbase)
1068                                 t[1] = t[0];
1069                         else if (i == 2 && side2_matches_mbase)
1070                                 t[2] = t[0];
1071                         else if (i == 2 && sides_match)
1072                                 t[2] = t[1];
1073                         else {
1074                                 const struct object_id *oid = NULL;
1075                                 if (dirmask & 1)
1076                                         oid = &names[i].oid;
1077                                 buf[i] = fill_tree_descriptor(opt->repo,
1078                                                               t + i, oid);
1079                         }
1080                         dirmask >>= 1;
1081                 }
1082
1083                 original_dir_name = opti->current_dir_name;
1084                 opti->current_dir_name = pi.string;
1085                 if (renames->dir_rename_mask == 0 ||
1086                     renames->dir_rename_mask == 0x07)
1087                         ret = traverse_trees(NULL, 3, t, &newinfo);
1088                 else
1089                         ret = traverse_trees_wrapper(NULL, 3, t, &newinfo);
1090                 opti->current_dir_name = original_dir_name;
1091                 renames->dir_rename_mask = prev_dir_rename_mask;
1092
1093                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
1094                         free(buf[i]);
1095
1096                 if (ret < 0)
1097                         return -1;
1098         }
1099
1100         return mask;
1101 }
1102
1103 static int collect_merge_info(struct merge_options *opt,
1104                               struct tree *merge_base,
1105                               struct tree *side1,
1106                               struct tree *side2)
1107 {
1108         int ret;
1109         struct tree_desc t[3];
1110         struct traverse_info info;
1111
1112         opt->priv->toplevel_dir = "";
1113         opt->priv->current_dir_name = opt->priv->toplevel_dir;
1114         setup_traverse_info(&info, opt->priv->toplevel_dir);
1115         info.fn = collect_merge_info_callback;
1116         info.data = opt;
1117         info.show_all_errors = 1;
1118
1119         parse_tree(merge_base);
1120         parse_tree(side1);
1121         parse_tree(side2);
1122         init_tree_desc(t + 0, merge_base->buffer, merge_base->size);
1123         init_tree_desc(t + 1, side1->buffer, side1->size);
1124         init_tree_desc(t + 2, side2->buffer, side2->size);
1125
1126         trace2_region_enter("merge", "traverse_trees", opt->repo);
1127         ret = traverse_trees(NULL, 3, t, &info);
1128         trace2_region_leave("merge", "traverse_trees", opt->repo);
1129
1130         return ret;
1131 }
1132
1133 /*** Function Grouping: functions related to threeway content merges ***/
1134
1135 static int find_first_merges(struct repository *repo,
1136                              const char *path,
1137                              struct commit *a,
1138                              struct commit *b,
1139                              struct object_array *result)
1140 {
1141         int i, j;
1142         struct object_array merges = OBJECT_ARRAY_INIT;
1143         struct commit *commit;
1144         int contains_another;
1145
1146         char merged_revision[GIT_MAX_HEXSZ + 2];
1147         const char *rev_args[] = { "rev-list", "--merges", "--ancestry-path",
1148                                    "--all", merged_revision, NULL };
1149         struct rev_info revs;
1150         struct setup_revision_opt rev_opts;
1151
1152         memset(result, 0, sizeof(struct object_array));
1153         memset(&rev_opts, 0, sizeof(rev_opts));
1154
1155         /* get all revisions that merge commit a */
1156         xsnprintf(merged_revision, sizeof(merged_revision), "^%s",
1157                   oid_to_hex(&a->object.oid));
1158         repo_init_revisions(repo, &revs, NULL);
1159         rev_opts.submodule = path;
1160         /* FIXME: can't handle linked worktrees in submodules yet */
1161         revs.single_worktree = path != NULL;
1162         setup_revisions(ARRAY_SIZE(rev_args)-1, rev_args, &revs, &rev_opts);
1163
1164         /* save all revisions from the above list that contain b */
1165         if (prepare_revision_walk(&revs))
1166                 die("revision walk setup failed");
1167         while ((commit = get_revision(&revs)) != NULL) {
1168                 struct object *o = &(commit->object);
1169                 if (in_merge_bases(b, commit))
1170                         add_object_array(o, NULL, &merges);
1171         }
1172         reset_revision_walk();
1173
1174         /* Now we've got all merges that contain a and b. Prune all
1175          * merges that contain another found merge and save them in
1176          * result.
1177          */
1178         for (i = 0; i < merges.nr; i++) {
1179                 struct commit *m1 = (struct commit *) merges.objects[i].item;
1180
1181                 contains_another = 0;
1182                 for (j = 0; j < merges.nr; j++) {
1183                         struct commit *m2 = (struct commit *) merges.objects[j].item;
1184                         if (i != j && in_merge_bases(m2, m1)) {
1185                                 contains_another = 1;
1186                                 break;
1187                         }
1188                 }
1189
1190                 if (!contains_another)
1191                         add_object_array(merges.objects[i].item, NULL, result);
1192         }
1193
1194         object_array_clear(&merges);
1195         return result->nr;
1196 }
1197
1198 static int merge_submodule(struct merge_options *opt,
1199                            const char *path,
1200                            const struct object_id *o,
1201                            const struct object_id *a,
1202                            const struct object_id *b,
1203                            struct object_id *result)
1204 {
1205         struct commit *commit_o, *commit_a, *commit_b;
1206         int parent_count;
1207         struct object_array merges;
1208         struct strbuf sb = STRBUF_INIT;
1209
1210         int i;
1211         int search = !opt->priv->call_depth;
1212
1213         /* store fallback answer in result in case we fail */
1214         oidcpy(result, opt->priv->call_depth ? o : a);
1215
1216         /* we can not handle deletion conflicts */
1217         if (is_null_oid(o))
1218                 return 0;
1219         if (is_null_oid(a))
1220                 return 0;
1221         if (is_null_oid(b))
1222                 return 0;
1223
1224         if (add_submodule_odb(path)) {
1225                 path_msg(opt, path, 0,
1226                          _("Failed to merge submodule %s (not checked out)"),
1227                          path);
1228                 return 0;
1229         }
1230
1231         if (!(commit_o = lookup_commit_reference(opt->repo, o)) ||
1232             !(commit_a = lookup_commit_reference(opt->repo, a)) ||
1233             !(commit_b = lookup_commit_reference(opt->repo, b))) {
1234                 path_msg(opt, path, 0,
1235                          _("Failed to merge submodule %s (commits not present)"),
1236                          path);
1237                 return 0;
1238         }
1239
1240         /* check whether both changes are forward */
1241         if (!in_merge_bases(commit_o, commit_a) ||
1242             !in_merge_bases(commit_o, commit_b)) {
1243                 path_msg(opt, path, 0,
1244                          _("Failed to merge submodule %s "
1245                            "(commits don't follow merge-base)"),
1246                          path);
1247                 return 0;
1248         }
1249
1250         /* Case #1: a is contained in b or vice versa */
1251         if (in_merge_bases(commit_a, commit_b)) {
1252                 oidcpy(result, b);
1253                 path_msg(opt, path, 1,
1254                          _("Note: Fast-forwarding submodule %s to %s"),
1255                          path, oid_to_hex(b));
1256                 return 1;
1257         }
1258         if (in_merge_bases(commit_b, commit_a)) {
1259                 oidcpy(result, a);
1260                 path_msg(opt, path, 1,
1261                          _("Note: Fast-forwarding submodule %s to %s"),
1262                          path, oid_to_hex(a));
1263                 return 1;
1264         }
1265
1266         /*
1267          * Case #2: There are one or more merges that contain a and b in
1268          * the submodule. If there is only one, then present it as a
1269          * suggestion to the user, but leave it marked unmerged so the
1270          * user needs to confirm the resolution.
1271          */
1272
1273         /* Skip the search if makes no sense to the calling context.  */
1274         if (!search)
1275                 return 0;
1276
1277         /* find commit which merges them */
1278         parent_count = find_first_merges(opt->repo, path, commit_a, commit_b,
1279                                          &merges);
1280         switch (parent_count) {
1281         case 0:
1282                 path_msg(opt, path, 0, _("Failed to merge submodule %s"), path);
1283                 break;
1284
1285         case 1:
1286                 format_commit(&sb, 4,
1287                               (struct commit *)merges.objects[0].item);
1288                 path_msg(opt, path, 0,
1289                          _("Failed to merge submodule %s, but a possible merge "
1290                            "resolution exists:\n%s\n"),
1291                          path, sb.buf);
1292                 path_msg(opt, path, 1,
1293                          _("If this is correct simply add it to the index "
1294                            "for example\n"
1295                            "by using:\n\n"
1296                            "  git update-index --cacheinfo 160000 %s \"%s\"\n\n"
1297                            "which will accept this suggestion.\n"),
1298                          oid_to_hex(&merges.objects[0].item->oid), path);
1299                 strbuf_release(&sb);
1300                 break;
1301         default:
1302                 for (i = 0; i < merges.nr; i++)
1303                         format_commit(&sb, 4,
1304                                       (struct commit *)merges.objects[i].item);
1305                 path_msg(opt, path, 0,
1306                          _("Failed to merge submodule %s, but multiple "
1307                            "possible merges exist:\n%s"), path, sb.buf);
1308                 strbuf_release(&sb);
1309         }
1310
1311         object_array_clear(&merges);
1312         return 0;
1313 }
1314
1315 static void initialize_attr_index(struct merge_options *opt)
1316 {
1317         /*
1318          * The renormalize_buffer() functions require attributes, and
1319          * annoyingly those can only be read from the working tree or from
1320          * an index_state.  merge-ort doesn't have an index_state, so we
1321          * generate a fake one containing only attribute information.
1322          */
1323         struct merged_info *mi;
1324         struct index_state *attr_index = &opt->priv->attr_index;
1325         struct cache_entry *ce;
1326
1327         attr_index->initialized = 1;
1328
1329         if (!opt->renormalize)
1330                 return;
1331
1332         mi = strmap_get(&opt->priv->paths, GITATTRIBUTES_FILE);
1333         if (!mi)
1334                 return;
1335
1336         if (mi->clean) {
1337                 int len = strlen(GITATTRIBUTES_FILE);
1338                 ce = make_empty_cache_entry(attr_index, len);
1339                 ce->ce_mode = create_ce_mode(mi->result.mode);
1340                 ce->ce_flags = create_ce_flags(0);
1341                 ce->ce_namelen = len;
1342                 oidcpy(&ce->oid, &mi->result.oid);
1343                 memcpy(ce->name, GITATTRIBUTES_FILE, len);
1344                 add_index_entry(attr_index, ce,
1345                                 ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
1346                 get_stream_filter(attr_index, GITATTRIBUTES_FILE, &ce->oid);
1347         } else {
1348                 int stage, len;
1349                 struct conflict_info *ci;
1350
1351                 ASSIGN_AND_VERIFY_CI(ci, mi);
1352                 for (stage = 0; stage < 3; stage++) {
1353                         unsigned stage_mask = (1 << stage);
1354
1355                         if (!(ci->filemask & stage_mask))
1356                                 continue;
1357                         len = strlen(GITATTRIBUTES_FILE);
1358                         ce = make_empty_cache_entry(attr_index, len);
1359                         ce->ce_mode = create_ce_mode(ci->stages[stage].mode);
1360                         ce->ce_flags = create_ce_flags(stage);
1361                         ce->ce_namelen = len;
1362                         oidcpy(&ce->oid, &ci->stages[stage].oid);
1363                         memcpy(ce->name, GITATTRIBUTES_FILE, len);
1364                         add_index_entry(attr_index, ce,
1365                                         ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
1366                         get_stream_filter(attr_index, GITATTRIBUTES_FILE,
1367                                           &ce->oid);
1368                 }
1369         }
1370 }
1371
1372 static int merge_3way(struct merge_options *opt,
1373                       const char *path,
1374                       const struct object_id *o,
1375                       const struct object_id *a,
1376                       const struct object_id *b,
1377                       const char *pathnames[3],
1378                       const int extra_marker_size,
1379                       mmbuffer_t *result_buf)
1380 {
1381         mmfile_t orig, src1, src2;
1382         struct ll_merge_options ll_opts = {0};
1383         char *base, *name1, *name2;
1384         int merge_status;
1385
1386         if (!opt->priv->attr_index.initialized)
1387                 initialize_attr_index(opt);
1388
1389         ll_opts.renormalize = opt->renormalize;
1390         ll_opts.extra_marker_size = extra_marker_size;
1391         ll_opts.xdl_opts = opt->xdl_opts;
1392
1393         if (opt->priv->call_depth) {
1394                 ll_opts.virtual_ancestor = 1;
1395                 ll_opts.variant = 0;
1396         } else {
1397                 switch (opt->recursive_variant) {
1398                 case MERGE_VARIANT_OURS:
1399                         ll_opts.variant = XDL_MERGE_FAVOR_OURS;
1400                         break;
1401                 case MERGE_VARIANT_THEIRS:
1402                         ll_opts.variant = XDL_MERGE_FAVOR_THEIRS;
1403                         break;
1404                 default:
1405                         ll_opts.variant = 0;
1406                         break;
1407                 }
1408         }
1409
1410         assert(pathnames[0] && pathnames[1] && pathnames[2] && opt->ancestor);
1411         if (pathnames[0] == pathnames[1] && pathnames[1] == pathnames[2]) {
1412                 base  = mkpathdup("%s", opt->ancestor);
1413                 name1 = mkpathdup("%s", opt->branch1);
1414                 name2 = mkpathdup("%s", opt->branch2);
1415         } else {
1416                 base  = mkpathdup("%s:%s", opt->ancestor, pathnames[0]);
1417                 name1 = mkpathdup("%s:%s", opt->branch1,  pathnames[1]);
1418                 name2 = mkpathdup("%s:%s", opt->branch2,  pathnames[2]);
1419         }
1420
1421         read_mmblob(&orig, o);
1422         read_mmblob(&src1, a);
1423         read_mmblob(&src2, b);
1424
1425         merge_status = ll_merge(result_buf, path, &orig, base,
1426                                 &src1, name1, &src2, name2,
1427                                 &opt->priv->attr_index, &ll_opts);
1428
1429         free(base);
1430         free(name1);
1431         free(name2);
1432         free(orig.ptr);
1433         free(src1.ptr);
1434         free(src2.ptr);
1435         return merge_status;
1436 }
1437
1438 static int handle_content_merge(struct merge_options *opt,
1439                                 const char *path,
1440                                 const struct version_info *o,
1441                                 const struct version_info *a,
1442                                 const struct version_info *b,
1443                                 const char *pathnames[3],
1444                                 const int extra_marker_size,
1445                                 struct version_info *result)
1446 {
1447         /*
1448          * path is the target location where we want to put the file, and
1449          * is used to determine any normalization rules in ll_merge.
1450          *
1451          * The normal case is that path and all entries in pathnames are
1452          * identical, though renames can affect which path we got one of
1453          * the three blobs to merge on various sides of history.
1454          *
1455          * extra_marker_size is the amount to extend conflict markers in
1456          * ll_merge; this is neeed if we have content merges of content
1457          * merges, which happens for example with rename/rename(2to1) and
1458          * rename/add conflicts.
1459          */
1460         unsigned clean = 1;
1461
1462         /*
1463          * handle_content_merge() needs both files to be of the same type, i.e.
1464          * both files OR both submodules OR both symlinks.  Conflicting types
1465          * needs to be handled elsewhere.
1466          */
1467         assert((S_IFMT & a->mode) == (S_IFMT & b->mode));
1468
1469         /* Merge modes */
1470         if (a->mode == b->mode || a->mode == o->mode)
1471                 result->mode = b->mode;
1472         else {
1473                 /* must be the 100644/100755 case */
1474                 assert(S_ISREG(a->mode));
1475                 result->mode = a->mode;
1476                 clean = (b->mode == o->mode);
1477                 /*
1478                  * FIXME: If opt->priv->call_depth && !clean, then we really
1479                  * should not make result->mode match either a->mode or
1480                  * b->mode; that causes t6036 "check conflicting mode for
1481                  * regular file" to fail.  It would be best to use some other
1482                  * mode, but we'll confuse all kinds of stuff if we use one
1483                  * where S_ISREG(result->mode) isn't true, and if we use
1484                  * something like 0100666, then tree-walk.c's calls to
1485                  * canon_mode() will just normalize that to 100644 for us and
1486                  * thus not solve anything.
1487                  *
1488                  * Figure out if there's some kind of way we can work around
1489                  * this...
1490                  */
1491         }
1492
1493         /*
1494          * Trivial oid merge.
1495          *
1496          * Note: While one might assume that the next four lines would
1497          * be unnecessary due to the fact that match_mask is often
1498          * setup and already handled, renames don't always take care
1499          * of that.
1500          */
1501         if (oideq(&a->oid, &b->oid) || oideq(&a->oid, &o->oid))
1502                 oidcpy(&result->oid, &b->oid);
1503         else if (oideq(&b->oid, &o->oid))
1504                 oidcpy(&result->oid, &a->oid);
1505
1506         /* Remaining rules depend on file vs. submodule vs. symlink. */
1507         else if (S_ISREG(a->mode)) {
1508                 mmbuffer_t result_buf;
1509                 int ret = 0, merge_status;
1510                 int two_way;
1511
1512                 /*
1513                  * If 'o' is different type, treat it as null so we do a
1514                  * two-way merge.
1515                  */
1516                 two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode));
1517
1518                 merge_status = merge_3way(opt, path,
1519                                           two_way ? null_oid() : &o->oid,
1520                                           &a->oid, &b->oid,
1521                                           pathnames, extra_marker_size,
1522                                           &result_buf);
1523
1524                 if ((merge_status < 0) || !result_buf.ptr)
1525                         ret = err(opt, _("Failed to execute internal merge"));
1526
1527                 if (!ret &&
1528                     write_object_file(result_buf.ptr, result_buf.size,
1529                                       blob_type, &result->oid))
1530                         ret = err(opt, _("Unable to add %s to database"),
1531                                   path);
1532
1533                 free(result_buf.ptr);
1534                 if (ret)
1535                         return -1;
1536                 clean &= (merge_status == 0);
1537                 path_msg(opt, path, 1, _("Auto-merging %s"), path);
1538         } else if (S_ISGITLINK(a->mode)) {
1539                 int two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode));
1540                 clean = merge_submodule(opt, pathnames[0],
1541                                         two_way ? null_oid() : &o->oid,
1542                                         &a->oid, &b->oid, &result->oid);
1543                 if (opt->priv->call_depth && two_way && !clean) {
1544                         result->mode = o->mode;
1545                         oidcpy(&result->oid, &o->oid);
1546                 }
1547         } else if (S_ISLNK(a->mode)) {
1548                 if (opt->priv->call_depth) {
1549                         clean = 0;
1550                         result->mode = o->mode;
1551                         oidcpy(&result->oid, &o->oid);
1552                 } else {
1553                         switch (opt->recursive_variant) {
1554                         case MERGE_VARIANT_NORMAL:
1555                                 clean = 0;
1556                                 oidcpy(&result->oid, &a->oid);
1557                                 break;
1558                         case MERGE_VARIANT_OURS:
1559                                 oidcpy(&result->oid, &a->oid);
1560                                 break;
1561                         case MERGE_VARIANT_THEIRS:
1562                                 oidcpy(&result->oid, &b->oid);
1563                                 break;
1564                         }
1565                 }
1566         } else
1567                 BUG("unsupported object type in the tree: %06o for %s",
1568                     a->mode, path);
1569
1570         return clean;
1571 }
1572
1573 /*** Function Grouping: functions related to detect_and_process_renames(), ***
1574  *** which are split into directory and regular rename detection sections. ***/
1575
1576 /*** Function Grouping: functions related to directory rename detection ***/
1577
1578 struct collision_info {
1579         struct string_list source_files;
1580         unsigned reported_already:1;
1581 };
1582
1583 /*
1584  * Return a new string that replaces the beginning portion (which matches
1585  * rename_info->key), with rename_info->util.new_dir.  In perl-speak:
1586  *   new_path_name = (old_path =~ s/rename_info->key/rename_info->value/);
1587  * NOTE:
1588  *   Caller must ensure that old_path starts with rename_info->key + '/'.
1589  */
1590 static char *apply_dir_rename(struct strmap_entry *rename_info,
1591                               const char *old_path)
1592 {
1593         struct strbuf new_path = STRBUF_INIT;
1594         const char *old_dir = rename_info->key;
1595         const char *new_dir = rename_info->value;
1596         int oldlen, newlen, new_dir_len;
1597
1598         oldlen = strlen(old_dir);
1599         if (*new_dir == '\0')
1600                 /*
1601                  * If someone renamed/merged a subdirectory into the root
1602                  * directory (e.g. 'some/subdir' -> ''), then we want to
1603                  * avoid returning
1604                  *     '' + '/filename'
1605                  * as the rename; we need to make old_path + oldlen advance
1606                  * past the '/' character.
1607                  */
1608                 oldlen++;
1609         new_dir_len = strlen(new_dir);
1610         newlen = new_dir_len + (strlen(old_path) - oldlen) + 1;
1611         strbuf_grow(&new_path, newlen);
1612         strbuf_add(&new_path, new_dir, new_dir_len);
1613         strbuf_addstr(&new_path, &old_path[oldlen]);
1614
1615         return strbuf_detach(&new_path, NULL);
1616 }
1617
1618 static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask)
1619 {
1620         struct merged_info *mi = strmap_get(paths, path);
1621         struct conflict_info *ci;
1622         if (!mi)
1623                 return 0;
1624         INITIALIZE_CI(ci, mi);
1625         return mi->clean || (side_mask & (ci->filemask | ci->dirmask));
1626 }
1627
1628 /*
1629  * See if there is a directory rename for path, and if there are any file
1630  * level conflicts on the given side for the renamed location.  If there is
1631  * a rename and there are no conflicts, return the new name.  Otherwise,
1632  * return NULL.
1633  */
1634 static char *handle_path_level_conflicts(struct merge_options *opt,
1635                                          const char *path,
1636                                          unsigned side_index,
1637                                          struct strmap_entry *rename_info,
1638                                          struct strmap *collisions)
1639 {
1640         char *new_path = NULL;
1641         struct collision_info *c_info;
1642         int clean = 1;
1643         struct strbuf collision_paths = STRBUF_INIT;
1644
1645         /*
1646          * entry has the mapping of old directory name to new directory name
1647          * that we want to apply to path.
1648          */
1649         new_path = apply_dir_rename(rename_info, path);
1650         if (!new_path)
1651                 BUG("Failed to apply directory rename!");
1652
1653         /*
1654          * The caller needs to have ensured that it has pre-populated
1655          * collisions with all paths that map to new_path.  Do a quick check
1656          * to ensure that's the case.
1657          */
1658         c_info = strmap_get(collisions, new_path);
1659         if (c_info == NULL)
1660                 BUG("c_info is NULL");
1661
1662         /*
1663          * Check for one-sided add/add/.../add conflicts, i.e.
1664          * where implicit renames from the other side doing
1665          * directory rename(s) can affect this side of history
1666          * to put multiple paths into the same location.  Warn
1667          * and bail on directory renames for such paths.
1668          */
1669         if (c_info->reported_already) {
1670                 clean = 0;
1671         } else if (path_in_way(&opt->priv->paths, new_path, 1 << side_index)) {
1672                 c_info->reported_already = 1;
1673                 strbuf_add_separated_string_list(&collision_paths, ", ",
1674                                                  &c_info->source_files);
1675                 path_msg(opt, new_path, 0,
1676                          _("CONFLICT (implicit dir rename): Existing file/dir "
1677                            "at %s in the way of implicit directory rename(s) "
1678                            "putting the following path(s) there: %s."),
1679                        new_path, collision_paths.buf);
1680                 clean = 0;
1681         } else if (c_info->source_files.nr > 1) {
1682                 c_info->reported_already = 1;
1683                 strbuf_add_separated_string_list(&collision_paths, ", ",
1684                                                  &c_info->source_files);
1685                 path_msg(opt, new_path, 0,
1686                          _("CONFLICT (implicit dir rename): Cannot map more "
1687                            "than one path to %s; implicit directory renames "
1688                            "tried to put these paths there: %s"),
1689                        new_path, collision_paths.buf);
1690                 clean = 0;
1691         }
1692
1693         /* Free memory we no longer need */
1694         strbuf_release(&collision_paths);
1695         if (!clean && new_path) {
1696                 free(new_path);
1697                 return NULL;
1698         }
1699
1700         return new_path;
1701 }
1702
1703 static void get_provisional_directory_renames(struct merge_options *opt,
1704                                               unsigned side,
1705                                               int *clean)
1706 {
1707         struct hashmap_iter iter;
1708         struct strmap_entry *entry;
1709         struct rename_info *renames = &opt->priv->renames;
1710
1711         /*
1712          * Collapse
1713          *    dir_rename_count: old_directory -> {new_directory -> count}
1714          * down to
1715          *    dir_renames: old_directory -> best_new_directory
1716          * where best_new_directory is the one with the unique highest count.
1717          */
1718         strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) {
1719                 const char *source_dir = entry->key;
1720                 struct strintmap *counts = entry->value;
1721                 struct hashmap_iter count_iter;
1722                 struct strmap_entry *count_entry;
1723                 int max = 0;
1724                 int bad_max = 0;
1725                 const char *best = NULL;
1726
1727                 strintmap_for_each_entry(counts, &count_iter, count_entry) {
1728                         const char *target_dir = count_entry->key;
1729                         intptr_t count = (intptr_t)count_entry->value;
1730
1731                         if (count == max)
1732                                 bad_max = max;
1733                         else if (count > max) {
1734                                 max = count;
1735                                 best = target_dir;
1736                         }
1737                 }
1738
1739                 if (max == 0)
1740                         continue;
1741
1742                 if (bad_max == max) {
1743                         path_msg(opt, source_dir, 0,
1744                                _("CONFLICT (directory rename split): "
1745                                  "Unclear where to rename %s to; it was "
1746                                  "renamed to multiple other directories, with "
1747                                  "no destination getting a majority of the "
1748                                  "files."),
1749                                source_dir);
1750                         *clean = 0;
1751                 } else {
1752                         strmap_put(&renames->dir_renames[side],
1753                                    source_dir, (void*)best);
1754                 }
1755         }
1756 }
1757
1758 static void handle_directory_level_conflicts(struct merge_options *opt)
1759 {
1760         struct hashmap_iter iter;
1761         struct strmap_entry *entry;
1762         struct string_list duplicated = STRING_LIST_INIT_NODUP;
1763         struct rename_info *renames = &opt->priv->renames;
1764         struct strmap *side1_dir_renames = &renames->dir_renames[MERGE_SIDE1];
1765         struct strmap *side2_dir_renames = &renames->dir_renames[MERGE_SIDE2];
1766         int i;
1767
1768         strmap_for_each_entry(side1_dir_renames, &iter, entry) {
1769                 if (strmap_contains(side2_dir_renames, entry->key))
1770                         string_list_append(&duplicated, entry->key);
1771         }
1772
1773         for (i = 0; i < duplicated.nr; i++) {
1774                 strmap_remove(side1_dir_renames, duplicated.items[i].string, 0);
1775                 strmap_remove(side2_dir_renames, duplicated.items[i].string, 0);
1776         }
1777         string_list_clear(&duplicated, 0);
1778 }
1779
1780 static struct strmap_entry *check_dir_renamed(const char *path,
1781                                               struct strmap *dir_renames)
1782 {
1783         char *temp = xstrdup(path);
1784         char *end;
1785         struct strmap_entry *e = NULL;
1786
1787         while ((end = strrchr(temp, '/'))) {
1788                 *end = '\0';
1789                 e = strmap_get_entry(dir_renames, temp);
1790                 if (e)
1791                         break;
1792         }
1793         free(temp);
1794         return e;
1795 }
1796
1797 static void compute_collisions(struct strmap *collisions,
1798                                struct strmap *dir_renames,
1799                                struct diff_queue_struct *pairs)
1800 {
1801         int i;
1802
1803         strmap_init_with_options(collisions, NULL, 0);
1804         if (strmap_empty(dir_renames))
1805                 return;
1806
1807         /*
1808          * Multiple files can be mapped to the same path due to directory
1809          * renames done by the other side of history.  Since that other
1810          * side of history could have merged multiple directories into one,
1811          * if our side of history added the same file basename to each of
1812          * those directories, then all N of them would get implicitly
1813          * renamed by the directory rename detection into the same path,
1814          * and we'd get an add/add/.../add conflict, and all those adds
1815          * from *this* side of history.  This is not representable in the
1816          * index, and users aren't going to easily be able to make sense of
1817          * it.  So we need to provide a good warning about what's
1818          * happening, and fall back to no-directory-rename detection
1819          * behavior for those paths.
1820          *
1821          * See testcases 9e and all of section 5 from t6043 for examples.
1822          */
1823         for (i = 0; i < pairs->nr; ++i) {
1824                 struct strmap_entry *rename_info;
1825                 struct collision_info *collision_info;
1826                 char *new_path;
1827                 struct diff_filepair *pair = pairs->queue[i];
1828
1829                 if (pair->status != 'A' && pair->status != 'R')
1830                         continue;
1831                 rename_info = check_dir_renamed(pair->two->path, dir_renames);
1832                 if (!rename_info)
1833                         continue;
1834
1835                 new_path = apply_dir_rename(rename_info, pair->two->path);
1836                 assert(new_path);
1837                 collision_info = strmap_get(collisions, new_path);
1838                 if (collision_info) {
1839                         free(new_path);
1840                 } else {
1841                         CALLOC_ARRAY(collision_info, 1);
1842                         string_list_init(&collision_info->source_files, 0);
1843                         strmap_put(collisions, new_path, collision_info);
1844                 }
1845                 string_list_insert(&collision_info->source_files,
1846                                    pair->two->path);
1847         }
1848 }
1849
1850 static char *check_for_directory_rename(struct merge_options *opt,
1851                                         const char *path,
1852                                         unsigned side_index,
1853                                         struct strmap *dir_renames,
1854                                         struct strmap *dir_rename_exclusions,
1855                                         struct strmap *collisions,
1856                                         int *clean_merge)
1857 {
1858         char *new_path = NULL;
1859         struct strmap_entry *rename_info;
1860         struct strmap_entry *otherinfo = NULL;
1861         const char *new_dir;
1862
1863         if (strmap_empty(dir_renames))
1864                 return new_path;
1865         rename_info = check_dir_renamed(path, dir_renames);
1866         if (!rename_info)
1867                 return new_path;
1868         /* old_dir = rename_info->key; */
1869         new_dir = rename_info->value;
1870
1871         /*
1872          * This next part is a little weird.  We do not want to do an
1873          * implicit rename into a directory we renamed on our side, because
1874          * that will result in a spurious rename/rename(1to2) conflict.  An
1875          * example:
1876          *   Base commit: dumbdir/afile, otherdir/bfile
1877          *   Side 1:      smrtdir/afile, otherdir/bfile
1878          *   Side 2:      dumbdir/afile, dumbdir/bfile
1879          * Here, while working on Side 1, we could notice that otherdir was
1880          * renamed/merged to dumbdir, and change the diff_filepair for
1881          * otherdir/bfile into a rename into dumbdir/bfile.  However, Side
1882          * 2 will notice the rename from dumbdir to smrtdir, and do the
1883          * transitive rename to move it from dumbdir/bfile to
1884          * smrtdir/bfile.  That gives us bfile in dumbdir vs being in
1885          * smrtdir, a rename/rename(1to2) conflict.  We really just want
1886          * the file to end up in smrtdir.  And the way to achieve that is
1887          * to not let Side1 do the rename to dumbdir, since we know that is
1888          * the source of one of our directory renames.
1889          *
1890          * That's why otherinfo and dir_rename_exclusions is here.
1891          *
1892          * As it turns out, this also prevents N-way transient rename
1893          * confusion; See testcases 9c and 9d of t6043.
1894          */
1895         otherinfo = strmap_get_entry(dir_rename_exclusions, new_dir);
1896         if (otherinfo) {
1897                 path_msg(opt, rename_info->key, 1,
1898                          _("WARNING: Avoiding applying %s -> %s rename "
1899                            "to %s, because %s itself was renamed."),
1900                          rename_info->key, new_dir, path, new_dir);
1901                 return NULL;
1902         }
1903
1904         new_path = handle_path_level_conflicts(opt, path, side_index,
1905                                                rename_info, collisions);
1906         *clean_merge &= (new_path != NULL);
1907
1908         return new_path;
1909 }
1910
1911 static void apply_directory_rename_modifications(struct merge_options *opt,
1912                                                  struct diff_filepair *pair,
1913                                                  char *new_path)
1914 {
1915         /*
1916          * The basic idea is to get the conflict_info from opt->priv->paths
1917          * at old path, and insert it into new_path; basically just this:
1918          *     ci = strmap_get(&opt->priv->paths, old_path);
1919          *     strmap_remove(&opt->priv->paths, old_path, 0);
1920          *     strmap_put(&opt->priv->paths, new_path, ci);
1921          * However, there are some factors complicating this:
1922          *     - opt->priv->paths may already have an entry at new_path
1923          *     - Each ci tracks its containing directory, so we need to
1924          *       update that
1925          *     - If another ci has the same containing directory, then
1926          *       the two char*'s MUST point to the same location.  See the
1927          *       comment in struct merged_info.  strcmp equality is not
1928          *       enough; we need pointer equality.
1929          *     - opt->priv->paths must hold the parent directories of any
1930          *       entries that are added.  So, if this directory rename
1931          *       causes entirely new directories, we must recursively add
1932          *       parent directories.
1933          *     - For each parent directory added to opt->priv->paths, we
1934          *       also need to get its parent directory stored in its
1935          *       conflict_info->merged.directory_name with all the same
1936          *       requirements about pointer equality.
1937          */
1938         struct string_list dirs_to_insert = STRING_LIST_INIT_NODUP;
1939         struct conflict_info *ci, *new_ci;
1940         struct strmap_entry *entry;
1941         const char *branch_with_new_path, *branch_with_dir_rename;
1942         const char *old_path = pair->two->path;
1943         const char *parent_name;
1944         const char *cur_path;
1945         int i, len;
1946
1947         entry = strmap_get_entry(&opt->priv->paths, old_path);
1948         old_path = entry->key;
1949         ci = entry->value;
1950         VERIFY_CI(ci);
1951
1952         /* Find parent directories missing from opt->priv->paths */
1953         cur_path = new_path;
1954         while (1) {
1955                 /* Find the parent directory of cur_path */
1956                 char *last_slash = strrchr(cur_path, '/');
1957                 if (last_slash) {
1958                         parent_name = xstrndup(cur_path, last_slash - cur_path);
1959                 } else {
1960                         parent_name = opt->priv->toplevel_dir;
1961                         break;
1962                 }
1963
1964                 /* Look it up in opt->priv->paths */
1965                 entry = strmap_get_entry(&opt->priv->paths, parent_name);
1966                 if (entry) {
1967                         free((char*)parent_name);
1968                         parent_name = entry->key; /* reuse known pointer */
1969                         break;
1970                 }
1971
1972                 /* Record this is one of the directories we need to insert */
1973                 string_list_append(&dirs_to_insert, parent_name);
1974                 cur_path = parent_name;
1975         }
1976
1977         /* Traverse dirs_to_insert and insert them into opt->priv->paths */
1978         for (i = dirs_to_insert.nr-1; i >= 0; --i) {
1979                 struct conflict_info *dir_ci;
1980                 char *cur_dir = dirs_to_insert.items[i].string;
1981
1982                 CALLOC_ARRAY(dir_ci, 1);
1983
1984                 dir_ci->merged.directory_name = parent_name;
1985                 len = strlen(parent_name);
1986                 /* len+1 because of trailing '/' character */
1987                 dir_ci->merged.basename_offset = (len > 0 ? len+1 : len);
1988                 dir_ci->dirmask = ci->filemask;
1989                 strmap_put(&opt->priv->paths, cur_dir, dir_ci);
1990
1991                 parent_name = cur_dir;
1992         }
1993
1994         /*
1995          * We are removing old_path from opt->priv->paths.  old_path also will
1996          * eventually need to be freed, but it may still be used by e.g.
1997          * ci->pathnames.  So, store it in another string-list for now.
1998          */
1999         string_list_append(&opt->priv->paths_to_free, old_path);
2000
2001         assert(ci->filemask == 2 || ci->filemask == 4);
2002         assert(ci->dirmask == 0);
2003         strmap_remove(&opt->priv->paths, old_path, 0);
2004
2005         branch_with_new_path   = (ci->filemask == 2) ? opt->branch1 : opt->branch2;
2006         branch_with_dir_rename = (ci->filemask == 2) ? opt->branch2 : opt->branch1;
2007
2008         /* Now, finally update ci and stick it into opt->priv->paths */
2009         ci->merged.directory_name = parent_name;
2010         len = strlen(parent_name);
2011         ci->merged.basename_offset = (len > 0 ? len+1 : len);
2012         new_ci = strmap_get(&opt->priv->paths, new_path);
2013         if (!new_ci) {
2014                 /* Place ci back into opt->priv->paths, but at new_path */
2015                 strmap_put(&opt->priv->paths, new_path, ci);
2016         } else {
2017                 int index;
2018
2019                 /* A few sanity checks */
2020                 VERIFY_CI(new_ci);
2021                 assert(ci->filemask == 2 || ci->filemask == 4);
2022                 assert((new_ci->filemask & ci->filemask) == 0);
2023                 assert(!new_ci->merged.clean);
2024
2025                 /* Copy stuff from ci into new_ci */
2026                 new_ci->filemask |= ci->filemask;
2027                 if (new_ci->dirmask)
2028                         new_ci->df_conflict = 1;
2029                 index = (ci->filemask >> 1);
2030                 new_ci->pathnames[index] = ci->pathnames[index];
2031                 new_ci->stages[index].mode = ci->stages[index].mode;
2032                 oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid);
2033
2034                 free(ci);
2035                 ci = new_ci;
2036         }
2037
2038         if (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE) {
2039                 /* Notify user of updated path */
2040                 if (pair->status == 'A')
2041                         path_msg(opt, new_path, 1,
2042                                  _("Path updated: %s added in %s inside a "
2043                                    "directory that was renamed in %s; moving "
2044                                    "it to %s."),
2045                                  old_path, branch_with_new_path,
2046                                  branch_with_dir_rename, new_path);
2047                 else
2048                         path_msg(opt, new_path, 1,
2049                                  _("Path updated: %s renamed to %s in %s, "
2050                                    "inside a directory that was renamed in %s; "
2051                                    "moving it to %s."),
2052                                  pair->one->path, old_path, branch_with_new_path,
2053                                  branch_with_dir_rename, new_path);
2054         } else {
2055                 /*
2056                  * opt->detect_directory_renames has the value
2057                  * MERGE_DIRECTORY_RENAMES_CONFLICT, so mark these as conflicts.
2058                  */
2059                 ci->path_conflict = 1;
2060                 if (pair->status == 'A')
2061                         path_msg(opt, new_path, 0,
2062                                  _("CONFLICT (file location): %s added in %s "
2063                                    "inside a directory that was renamed in %s, "
2064                                    "suggesting it should perhaps be moved to "
2065                                    "%s."),
2066                                  old_path, branch_with_new_path,
2067                                  branch_with_dir_rename, new_path);
2068                 else
2069                         path_msg(opt, new_path, 0,
2070                                  _("CONFLICT (file location): %s renamed to %s "
2071                                    "in %s, inside a directory that was renamed "
2072                                    "in %s, suggesting it should perhaps be "
2073                                    "moved to %s."),
2074                                  pair->one->path, old_path, branch_with_new_path,
2075                                  branch_with_dir_rename, new_path);
2076         }
2077
2078         /*
2079          * Finally, record the new location.
2080          */
2081         pair->two->path = new_path;
2082 }
2083
2084 /*** Function Grouping: functions related to regular rename detection ***/
2085
2086 static int process_renames(struct merge_options *opt,
2087                            struct diff_queue_struct *renames)
2088 {
2089         int clean_merge = 1, i;
2090
2091         for (i = 0; i < renames->nr; ++i) {
2092                 const char *oldpath = NULL, *newpath;
2093                 struct diff_filepair *pair = renames->queue[i];
2094                 struct conflict_info *oldinfo = NULL, *newinfo = NULL;
2095                 struct strmap_entry *old_ent, *new_ent;
2096                 unsigned int old_sidemask;
2097                 int target_index, other_source_index;
2098                 int source_deleted, collision, type_changed;
2099                 const char *rename_branch = NULL, *delete_branch = NULL;
2100
2101                 old_ent = strmap_get_entry(&opt->priv->paths, pair->one->path);
2102                 new_ent = strmap_get_entry(&opt->priv->paths, pair->two->path);
2103                 if (old_ent) {
2104                         oldpath = old_ent->key;
2105                         oldinfo = old_ent->value;
2106                 }
2107                 newpath = pair->two->path;
2108                 if (new_ent) {
2109                         newpath = new_ent->key;
2110                         newinfo = new_ent->value;
2111                 }
2112
2113                 /*
2114                  * If pair->one->path isn't in opt->priv->paths, that means
2115                  * that either directory rename detection removed that
2116                  * path, or a parent directory of oldpath was resolved and
2117                  * we don't even need the rename; in either case, we can
2118                  * skip it.  If oldinfo->merged.clean, then the other side
2119                  * of history had no changes to oldpath and we don't need
2120                  * the rename and can skip it.
2121                  */
2122                 if (!oldinfo || oldinfo->merged.clean)
2123                         continue;
2124
2125                 /*
2126                  * diff_filepairs have copies of pathnames, thus we have to
2127                  * use standard 'strcmp()' (negated) instead of '=='.
2128                  */
2129                 if (i + 1 < renames->nr &&
2130                     !strcmp(oldpath, renames->queue[i+1]->one->path)) {
2131                         /* Handle rename/rename(1to2) or rename/rename(1to1) */
2132                         const char *pathnames[3];
2133                         struct version_info merged;
2134                         struct conflict_info *base, *side1, *side2;
2135                         unsigned was_binary_blob = 0;
2136
2137                         pathnames[0] = oldpath;
2138                         pathnames[1] = newpath;
2139                         pathnames[2] = renames->queue[i+1]->two->path;
2140
2141                         base = strmap_get(&opt->priv->paths, pathnames[0]);
2142                         side1 = strmap_get(&opt->priv->paths, pathnames[1]);
2143                         side2 = strmap_get(&opt->priv->paths, pathnames[2]);
2144
2145                         VERIFY_CI(base);
2146                         VERIFY_CI(side1);
2147                         VERIFY_CI(side2);
2148
2149                         if (!strcmp(pathnames[1], pathnames[2])) {
2150                                 struct rename_info *ri = &opt->priv->renames;
2151                                 int j;
2152
2153                                 /* Both sides renamed the same way */
2154                                 assert(side1 == side2);
2155                                 memcpy(&side1->stages[0], &base->stages[0],
2156                                        sizeof(merged));
2157                                 side1->filemask |= (1 << MERGE_BASE);
2158                                 /* Mark base as resolved by removal */
2159                                 base->merged.is_null = 1;
2160                                 base->merged.clean = 1;
2161
2162                                 /*
2163                                  * Disable remembering renames optimization;
2164                                  * rename/rename(1to1) is incredibly rare, and
2165                                  * just disabling the optimization is easier
2166                                  * than purging cached_pairs,
2167                                  * cached_target_names, and dir_rename_counts.
2168                                  */
2169                                 for (j = 0; j < 3; j++)
2170                                         ri->merge_trees[j] = NULL;
2171
2172                                 /* We handled both renames, i.e. i+1 handled */
2173                                 i++;
2174                                 /* Move to next rename */
2175                                 continue;
2176                         }
2177
2178                         /* This is a rename/rename(1to2) */
2179                         clean_merge = handle_content_merge(opt,
2180                                                            pair->one->path,
2181                                                            &base->stages[0],
2182                                                            &side1->stages[1],
2183                                                            &side2->stages[2],
2184                                                            pathnames,
2185                                                            1 + 2 * opt->priv->call_depth,
2186                                                            &merged);
2187                         if (!clean_merge &&
2188                             merged.mode == side1->stages[1].mode &&
2189                             oideq(&merged.oid, &side1->stages[1].oid))
2190                                 was_binary_blob = 1;
2191                         memcpy(&side1->stages[1], &merged, sizeof(merged));
2192                         if (was_binary_blob) {
2193                                 /*
2194                                  * Getting here means we were attempting to
2195                                  * merge a binary blob.
2196                                  *
2197                                  * Since we can't merge binaries,
2198                                  * handle_content_merge() just takes one
2199                                  * side.  But we don't want to copy the
2200                                  * contents of one side to both paths.  We
2201                                  * used the contents of side1 above for
2202                                  * side1->stages, let's use the contents of
2203                                  * side2 for side2->stages below.
2204                                  */
2205                                 oidcpy(&merged.oid, &side2->stages[2].oid);
2206                                 merged.mode = side2->stages[2].mode;
2207                         }
2208                         memcpy(&side2->stages[2], &merged, sizeof(merged));
2209
2210                         side1->path_conflict = 1;
2211                         side2->path_conflict = 1;
2212                         /*
2213                          * TODO: For renames we normally remove the path at the
2214                          * old name.  It would thus seem consistent to do the
2215                          * same for rename/rename(1to2) cases, but we haven't
2216                          * done so traditionally and a number of the regression
2217                          * tests now encode an expectation that the file is
2218                          * left there at stage 1.  If we ever decide to change
2219                          * this, add the following two lines here:
2220                          *    base->merged.is_null = 1;
2221                          *    base->merged.clean = 1;
2222                          * and remove the setting of base->path_conflict to 1.
2223                          */
2224                         base->path_conflict = 1;
2225                         path_msg(opt, oldpath, 0,
2226                                  _("CONFLICT (rename/rename): %s renamed to "
2227                                    "%s in %s and to %s in %s."),
2228                                  pathnames[0],
2229                                  pathnames[1], opt->branch1,
2230                                  pathnames[2], opt->branch2);
2231
2232                         i++; /* We handled both renames, i.e. i+1 handled */
2233                         continue;
2234                 }
2235
2236                 VERIFY_CI(oldinfo);
2237                 VERIFY_CI(newinfo);
2238                 target_index = pair->score; /* from collect_renames() */
2239                 assert(target_index == 1 || target_index == 2);
2240                 other_source_index = 3 - target_index;
2241                 old_sidemask = (1 << other_source_index); /* 2 or 4 */
2242                 source_deleted = (oldinfo->filemask == 1);
2243                 collision = ((newinfo->filemask & old_sidemask) != 0);
2244                 type_changed = !source_deleted &&
2245                         (S_ISREG(oldinfo->stages[other_source_index].mode) !=
2246                          S_ISREG(newinfo->stages[target_index].mode));
2247                 if (type_changed && collision) {
2248                         /*
2249                          * special handling so later blocks can handle this...
2250                          *
2251                          * if type_changed && collision are both true, then this
2252                          * was really a double rename, but one side wasn't
2253                          * detected due to lack of break detection.  I.e.
2254                          * something like
2255                          *    orig: has normal file 'foo'
2256                          *    side1: renames 'foo' to 'bar', adds 'foo' symlink
2257                          *    side2: renames 'foo' to 'bar'
2258                          * In this case, the foo->bar rename on side1 won't be
2259                          * detected because the new symlink named 'foo' is
2260                          * there and we don't do break detection.  But we detect
2261                          * this here because we don't want to merge the content
2262                          * of the foo symlink with the foo->bar file, so we
2263                          * have some logic to handle this special case.  The
2264                          * easiest way to do that is make 'bar' on side1 not
2265                          * be considered a colliding file but the other part
2266                          * of a normal rename.  If the file is very different,
2267                          * well we're going to get content merge conflicts
2268                          * anyway so it doesn't hurt.  And if the colliding
2269                          * file also has a different type, that'll be handled
2270                          * by the content merge logic in process_entry() too.
2271                          *
2272                          * See also t6430, 'rename vs. rename/symlink'
2273                          */
2274                         collision = 0;
2275                 }
2276                 if (source_deleted) {
2277                         if (target_index == 1) {
2278                                 rename_branch = opt->branch1;
2279                                 delete_branch = opt->branch2;
2280                         } else {
2281                                 rename_branch = opt->branch2;
2282                                 delete_branch = opt->branch1;
2283                         }
2284                 }
2285
2286                 assert(source_deleted || oldinfo->filemask & old_sidemask);
2287
2288                 /* Need to check for special types of rename conflicts... */
2289                 if (collision && !source_deleted) {
2290                         /* collision: rename/add or rename/rename(2to1) */
2291                         const char *pathnames[3];
2292                         struct version_info merged;
2293
2294                         struct conflict_info *base, *side1, *side2;
2295                         unsigned clean;
2296
2297                         pathnames[0] = oldpath;
2298                         pathnames[other_source_index] = oldpath;
2299                         pathnames[target_index] = newpath;
2300
2301                         base = strmap_get(&opt->priv->paths, pathnames[0]);
2302                         side1 = strmap_get(&opt->priv->paths, pathnames[1]);
2303                         side2 = strmap_get(&opt->priv->paths, pathnames[2]);
2304
2305                         VERIFY_CI(base);
2306                         VERIFY_CI(side1);
2307                         VERIFY_CI(side2);
2308
2309                         clean = handle_content_merge(opt, pair->one->path,
2310                                                      &base->stages[0],
2311                                                      &side1->stages[1],
2312                                                      &side2->stages[2],
2313                                                      pathnames,
2314                                                      1 + 2 * opt->priv->call_depth,
2315                                                      &merged);
2316
2317                         memcpy(&newinfo->stages[target_index], &merged,
2318                                sizeof(merged));
2319                         if (!clean) {
2320                                 path_msg(opt, newpath, 0,
2321                                          _("CONFLICT (rename involved in "
2322                                            "collision): rename of %s -> %s has "
2323                                            "content conflicts AND collides "
2324                                            "with another path; this may result "
2325                                            "in nested conflict markers."),
2326                                          oldpath, newpath);
2327                         }
2328                 } else if (collision && source_deleted) {
2329                         /*
2330                          * rename/add/delete or rename/rename(2to1)/delete:
2331                          * since oldpath was deleted on the side that didn't
2332                          * do the rename, there's not much of a content merge
2333                          * we can do for the rename.  oldinfo->merged.is_null
2334                          * was already set, so we just leave things as-is so
2335                          * they look like an add/add conflict.
2336                          */
2337
2338                         newinfo->path_conflict = 1;
2339                         path_msg(opt, newpath, 0,
2340                                  _("CONFLICT (rename/delete): %s renamed "
2341                                    "to %s in %s, but deleted in %s."),
2342                                  oldpath, newpath, rename_branch, delete_branch);
2343                 } else {
2344                         /*
2345                          * a few different cases...start by copying the
2346                          * existing stage(s) from oldinfo over the newinfo
2347                          * and update the pathname(s).
2348                          */
2349                         memcpy(&newinfo->stages[0], &oldinfo->stages[0],
2350                                sizeof(newinfo->stages[0]));
2351                         newinfo->filemask |= (1 << MERGE_BASE);
2352                         newinfo->pathnames[0] = oldpath;
2353                         if (type_changed) {
2354                                 /* rename vs. typechange */
2355                                 /* Mark the original as resolved by removal */
2356                                 memcpy(&oldinfo->stages[0].oid, null_oid(),
2357                                        sizeof(oldinfo->stages[0].oid));
2358                                 oldinfo->stages[0].mode = 0;
2359                                 oldinfo->filemask &= 0x06;
2360                         } else if (source_deleted) {
2361                                 /* rename/delete */
2362                                 newinfo->path_conflict = 1;
2363                                 path_msg(opt, newpath, 0,
2364                                          _("CONFLICT (rename/delete): %s renamed"
2365                                            " to %s in %s, but deleted in %s."),
2366                                          oldpath, newpath,
2367                                          rename_branch, delete_branch);
2368                         } else {
2369                                 /* normal rename */
2370                                 memcpy(&newinfo->stages[other_source_index],
2371                                        &oldinfo->stages[other_source_index],
2372                                        sizeof(newinfo->stages[0]));
2373                                 newinfo->filemask |= (1 << other_source_index);
2374                                 newinfo->pathnames[other_source_index] = oldpath;
2375                         }
2376                 }
2377
2378                 if (!type_changed) {
2379                         /* Mark the original as resolved by removal */
2380                         oldinfo->merged.is_null = 1;
2381                         oldinfo->merged.clean = 1;
2382                 }
2383
2384         }
2385
2386         return clean_merge;
2387 }
2388
2389 static inline int possible_side_renames(struct rename_info *renames,
2390                                         unsigned side_index)
2391 {
2392         return renames->pairs[side_index].nr > 0 &&
2393                !strintmap_empty(&renames->relevant_sources[side_index]);
2394 }
2395
2396 static inline int possible_renames(struct rename_info *renames)
2397 {
2398         return possible_side_renames(renames, 1) ||
2399                possible_side_renames(renames, 2) ||
2400                !strmap_empty(&renames->cached_pairs[1]) ||
2401                !strmap_empty(&renames->cached_pairs[2]);
2402 }
2403
2404 static void resolve_diffpair_statuses(struct diff_queue_struct *q)
2405 {
2406         /*
2407          * A simplified version of diff_resolve_rename_copy(); would probably
2408          * just use that function but it's static...
2409          */
2410         int i;
2411         struct diff_filepair *p;
2412
2413         for (i = 0; i < q->nr; ++i) {
2414                 p = q->queue[i];
2415                 p->status = 0; /* undecided */
2416                 if (!DIFF_FILE_VALID(p->one))
2417                         p->status = DIFF_STATUS_ADDED;
2418                 else if (!DIFF_FILE_VALID(p->two))
2419                         p->status = DIFF_STATUS_DELETED;
2420                 else if (DIFF_PAIR_RENAME(p))
2421                         p->status = DIFF_STATUS_RENAMED;
2422         }
2423 }
2424
2425 static void prune_cached_from_relevant(struct rename_info *renames,
2426                                        unsigned side)
2427 {
2428         /* Reason for this function described in add_pair() */
2429         struct hashmap_iter iter;
2430         struct strmap_entry *entry;
2431
2432         /* Remove from relevant_sources all entries in cached_pairs[side] */
2433         strmap_for_each_entry(&renames->cached_pairs[side], &iter, entry) {
2434                 strintmap_remove(&renames->relevant_sources[side],
2435                                  entry->key);
2436         }
2437         /* Remove from relevant_sources all entries in cached_irrelevant[side] */
2438         strset_for_each_entry(&renames->cached_irrelevant[side], &iter, entry) {
2439                 strintmap_remove(&renames->relevant_sources[side],
2440                                  entry->key);
2441         }
2442 }
2443
2444 static void use_cached_pairs(struct merge_options *opt,
2445                              struct strmap *cached_pairs,
2446                              struct diff_queue_struct *pairs)
2447 {
2448         struct hashmap_iter iter;
2449         struct strmap_entry *entry;
2450
2451         /*
2452          * Add to side_pairs all entries from renames->cached_pairs[side_index].
2453          * (Info in cached_irrelevant[side_index] is not relevant here.)
2454          */
2455         strmap_for_each_entry(cached_pairs, &iter, entry) {
2456                 struct diff_filespec *one, *two;
2457                 const char *old_name = entry->key;
2458                 const char *new_name = entry->value;
2459                 if (!new_name)
2460                         new_name = old_name;
2461
2462                 /* We don't care about oid/mode, only filenames and status */
2463                 one = alloc_filespec(old_name);
2464                 two = alloc_filespec(new_name);
2465                 diff_queue(pairs, one, two);
2466                 pairs->queue[pairs->nr-1]->status = entry->value ? 'R' : 'D';
2467         }
2468 }
2469
2470 static void cache_new_pair(struct rename_info *renames,
2471                            int side,
2472                            char *old_path,
2473                            char *new_path,
2474                            int free_old_value)
2475 {
2476         char *old_value;
2477         new_path = xstrdup(new_path);
2478         old_value = strmap_put(&renames->cached_pairs[side],
2479                                old_path, new_path);
2480         strset_add(&renames->cached_target_names[side], new_path);
2481         if (free_old_value)
2482                 free(old_value);
2483         else
2484                 assert(!old_value);
2485 }
2486
2487 static void possibly_cache_new_pair(struct rename_info *renames,
2488                                     struct diff_filepair *p,
2489                                     unsigned side,
2490                                     char *new_path)
2491 {
2492         int dir_renamed_side = 0;
2493
2494         if (new_path) {
2495                 /*
2496                  * Directory renames happen on the other side of history from
2497                  * the side that adds new files to the old directory.
2498                  */
2499                 dir_renamed_side = 3 - side;
2500         } else {
2501                 int val = strintmap_get(&renames->relevant_sources[side],
2502                                         p->one->path);
2503                 if (val == RELEVANT_NO_MORE) {
2504                         assert(p->status == 'D');
2505                         strset_add(&renames->cached_irrelevant[side],
2506                                    p->one->path);
2507                 }
2508                 if (val <= 0)
2509                         return;
2510         }
2511
2512         if (p->status == 'D') {
2513                 /*
2514                  * If we already had this delete, we'll just set it's value
2515                  * to NULL again, so no harm.
2516                  */
2517                 strmap_put(&renames->cached_pairs[side], p->one->path, NULL);
2518         } else if (p->status == 'R') {
2519                 if (!new_path)
2520                         new_path = p->two->path;
2521                 else
2522                         cache_new_pair(renames, dir_renamed_side,
2523                                        p->two->path, new_path, 0);
2524                 cache_new_pair(renames, side, p->one->path, new_path, 1);
2525         } else if (p->status == 'A' && new_path) {
2526                 cache_new_pair(renames, dir_renamed_side,
2527                                p->two->path, new_path, 0);
2528         }
2529 }
2530
2531 static int compare_pairs(const void *a_, const void *b_)
2532 {
2533         const struct diff_filepair *a = *((const struct diff_filepair **)a_);
2534         const struct diff_filepair *b = *((const struct diff_filepair **)b_);
2535
2536         return strcmp(a->one->path, b->one->path);
2537 }
2538
2539 /* Call diffcore_rename() to update deleted/added pairs into rename pairs */
2540 static void detect_regular_renames(struct merge_options *opt,
2541                                    unsigned side_index)
2542 {
2543         struct diff_options diff_opts;
2544         struct rename_info *renames = &opt->priv->renames;
2545
2546         prune_cached_from_relevant(renames, side_index);
2547         if (!possible_side_renames(renames, side_index)) {
2548                 /*
2549                  * No rename detection needed for this side, but we still need
2550                  * to make sure 'adds' are marked correctly in case the other
2551                  * side had directory renames.
2552                  */
2553                 resolve_diffpair_statuses(&renames->pairs[side_index]);
2554                 return;
2555         }
2556
2557         partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]);
2558         repo_diff_setup(opt->repo, &diff_opts);
2559         diff_opts.flags.recursive = 1;
2560         diff_opts.flags.rename_empty = 0;
2561         diff_opts.detect_rename = DIFF_DETECT_RENAME;
2562         diff_opts.rename_limit = opt->rename_limit;
2563         if (opt->rename_limit <= 0)
2564                 diff_opts.rename_limit = 1000;
2565         diff_opts.rename_score = opt->rename_score;
2566         diff_opts.show_rename_progress = opt->show_rename_progress;
2567         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
2568         diff_setup_done(&diff_opts);
2569
2570         diff_queued_diff = renames->pairs[side_index];
2571         trace2_region_enter("diff", "diffcore_rename", opt->repo);
2572         diffcore_rename_extended(&diff_opts,
2573                                  &renames->relevant_sources[side_index],
2574                                  &renames->dirs_removed[side_index],
2575                                  &renames->dir_rename_count[side_index],
2576                                  &renames->cached_pairs[side_index]);
2577         trace2_region_leave("diff", "diffcore_rename", opt->repo);
2578         resolve_diffpair_statuses(&diff_queued_diff);
2579
2580         if (diff_opts.needed_rename_limit > renames->needed_limit)
2581                 renames->needed_limit = diff_opts.needed_rename_limit;
2582
2583         renames->pairs[side_index] = diff_queued_diff;
2584
2585         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
2586         diff_queued_diff.nr = 0;
2587         diff_queued_diff.queue = NULL;
2588         diff_flush(&diff_opts);
2589 }
2590
2591 /*
2592  * Get information of all renames which occurred in 'side_pairs', making use
2593  * of any implicit directory renames in side_dir_renames (also making use of
2594  * implicit directory renames rename_exclusions as needed by
2595  * check_for_directory_rename()).  Add all (updated) renames into result.
2596  */
2597 static int collect_renames(struct merge_options *opt,
2598                            struct diff_queue_struct *result,
2599                            unsigned side_index,
2600                            struct strmap *dir_renames_for_side,
2601                            struct strmap *rename_exclusions)
2602 {
2603         int i, clean = 1;
2604         struct strmap collisions;
2605         struct diff_queue_struct *side_pairs;
2606         struct hashmap_iter iter;
2607         struct strmap_entry *entry;
2608         struct rename_info *renames = &opt->priv->renames;
2609
2610         side_pairs = &renames->pairs[side_index];
2611         compute_collisions(&collisions, dir_renames_for_side, side_pairs);
2612
2613         for (i = 0; i < side_pairs->nr; ++i) {
2614                 struct diff_filepair *p = side_pairs->queue[i];
2615                 char *new_path; /* non-NULL only with directory renames */
2616
2617                 if (p->status != 'A' && p->status != 'R') {
2618                         possibly_cache_new_pair(renames, p, side_index, NULL);
2619                         diff_free_filepair(p);
2620                         continue;
2621                 }
2622
2623                 new_path = check_for_directory_rename(opt, p->two->path,
2624                                                       side_index,
2625                                                       dir_renames_for_side,
2626                                                       rename_exclusions,
2627                                                       &collisions,
2628                                                       &clean);
2629
2630                 possibly_cache_new_pair(renames, p, side_index, new_path);
2631                 if (p->status != 'R' && !new_path) {
2632                         diff_free_filepair(p);
2633                         continue;
2634                 }
2635
2636                 if (new_path)
2637                         apply_directory_rename_modifications(opt, p, new_path);
2638
2639                 /*
2640                  * p->score comes back from diffcore_rename_extended() with
2641                  * the similarity of the renamed file.  The similarity is
2642                  * was used to determine that the two files were related
2643                  * and are a rename, which we have already used, but beyond
2644                  * that we have no use for the similarity.  So p->score is
2645                  * now irrelevant.  However, process_renames() will need to
2646                  * know which side of the merge this rename was associated
2647                  * with, so overwrite p->score with that value.
2648                  */
2649                 p->score = side_index;
2650                 result->queue[result->nr++] = p;
2651         }
2652
2653         /* Free each value in the collisions map */
2654         strmap_for_each_entry(&collisions, &iter, entry) {
2655                 struct collision_info *info = entry->value;
2656                 string_list_clear(&info->source_files, 0);
2657         }
2658         /*
2659          * In compute_collisions(), we set collisions.strdup_strings to 0
2660          * so that we wouldn't have to make another copy of the new_path
2661          * allocated by apply_dir_rename().  But now that we've used them
2662          * and have no other references to these strings, it is time to
2663          * deallocate them.
2664          */
2665         free_strmap_strings(&collisions);
2666         strmap_clear(&collisions, 1);
2667         return clean;
2668 }
2669
2670 static int detect_and_process_renames(struct merge_options *opt,
2671                                       struct tree *merge_base,
2672                                       struct tree *side1,
2673                                       struct tree *side2)
2674 {
2675         struct diff_queue_struct combined;
2676         struct rename_info *renames = &opt->priv->renames;
2677         int need_dir_renames, s, clean = 1;
2678
2679         memset(&combined, 0, sizeof(combined));
2680         if (!possible_renames(renames))
2681                 goto cleanup;
2682
2683         trace2_region_enter("merge", "regular renames", opt->repo);
2684         detect_regular_renames(opt, MERGE_SIDE1);
2685         detect_regular_renames(opt, MERGE_SIDE2);
2686         use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]);
2687         use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]);
2688         trace2_region_leave("merge", "regular renames", opt->repo);
2689
2690         trace2_region_enter("merge", "directory renames", opt->repo);
2691         need_dir_renames =
2692           !opt->priv->call_depth &&
2693           (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE ||
2694            opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_CONFLICT);
2695
2696         if (need_dir_renames) {
2697                 get_provisional_directory_renames(opt, MERGE_SIDE1, &clean);
2698                 get_provisional_directory_renames(opt, MERGE_SIDE2, &clean);
2699                 handle_directory_level_conflicts(opt);
2700         }
2701
2702         ALLOC_GROW(combined.queue,
2703                    renames->pairs[1].nr + renames->pairs[2].nr,
2704                    combined.alloc);
2705         clean &= collect_renames(opt, &combined, MERGE_SIDE1,
2706                                  &renames->dir_renames[2],
2707                                  &renames->dir_renames[1]);
2708         clean &= collect_renames(opt, &combined, MERGE_SIDE2,
2709                                  &renames->dir_renames[1],
2710                                  &renames->dir_renames[2]);
2711         STABLE_QSORT(combined.queue, combined.nr, compare_pairs);
2712         trace2_region_leave("merge", "directory renames", opt->repo);
2713
2714         trace2_region_enter("merge", "process renames", opt->repo);
2715         clean &= process_renames(opt, &combined);
2716         trace2_region_leave("merge", "process renames", opt->repo);
2717
2718         goto simple_cleanup; /* collect_renames() handles some of cleanup */
2719
2720 cleanup:
2721         /*
2722          * Free now unneeded filepairs, which would have been handled
2723          * in collect_renames() normally but we skipped that code.
2724          */
2725         for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
2726                 struct diff_queue_struct *side_pairs;
2727                 int i;
2728
2729                 side_pairs = &renames->pairs[s];
2730                 for (i = 0; i < side_pairs->nr; ++i) {
2731                         struct diff_filepair *p = side_pairs->queue[i];
2732                         diff_free_filepair(p);
2733                 }
2734         }
2735
2736 simple_cleanup:
2737         /* Free memory for renames->pairs[] and combined */
2738         for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
2739                 free(renames->pairs[s].queue);
2740                 DIFF_QUEUE_CLEAR(&renames->pairs[s]);
2741         }
2742         if (combined.nr) {
2743                 int i;
2744                 for (i = 0; i < combined.nr; i++)
2745                         diff_free_filepair(combined.queue[i]);
2746                 free(combined.queue);
2747         }
2748
2749         return clean;
2750 }
2751
2752 /*** Function Grouping: functions related to process_entries() ***/
2753
2754 static int sort_dirs_next_to_their_children(const char *one, const char *two)
2755 {
2756         unsigned char c1, c2;
2757
2758         /*
2759          * Here we only care that entries for directories appear adjacent
2760          * to and before files underneath the directory.  We can achieve
2761          * that by pretending to add a trailing slash to every file and
2762          * then sorting.  In other words, we do not want the natural
2763          * sorting of
2764          *     foo
2765          *     foo.txt
2766          *     foo/bar
2767          * Instead, we want "foo" to sort as though it were "foo/", so that
2768          * we instead get
2769          *     foo.txt
2770          *     foo
2771          *     foo/bar
2772          * To achieve this, we basically implement our own strcmp, except that
2773          * if we get to the end of either string instead of comparing NUL to
2774          * another character, we compare '/' to it.
2775          *
2776          * If this unusual "sort as though '/' were appended" perplexes
2777          * you, perhaps it will help to note that this is not the final
2778          * sort.  write_tree() will sort again without the trailing slash
2779          * magic, but just on paths immediately under a given tree.
2780          *
2781          * The reason to not use df_name_compare directly was that it was
2782          * just too expensive (we don't have the string lengths handy), so
2783          * it was reimplemented.
2784          */
2785
2786         /*
2787          * NOTE: This function will never be called with two equal strings,
2788          * because it is used to sort the keys of a strmap, and strmaps have
2789          * unique keys by construction.  That simplifies our c1==c2 handling
2790          * below.
2791          */
2792
2793         while (*one && (*one == *two)) {
2794                 one++;
2795                 two++;
2796         }
2797
2798         c1 = *one ? *one : '/';
2799         c2 = *two ? *two : '/';
2800
2801         if (c1 == c2) {
2802                 /* Getting here means one is a leading directory of the other */
2803                 return (*one) ? 1 : -1;
2804         } else
2805                 return c1 - c2;
2806 }
2807
2808 static int read_oid_strbuf(struct merge_options *opt,
2809                            const struct object_id *oid,
2810                            struct strbuf *dst)
2811 {
2812         void *buf;
2813         enum object_type type;
2814         unsigned long size;
2815         buf = read_object_file(oid, &type, &size);
2816         if (!buf)
2817                 return err(opt, _("cannot read object %s"), oid_to_hex(oid));
2818         if (type != OBJ_BLOB) {
2819                 free(buf);
2820                 return err(opt, _("object %s is not a blob"), oid_to_hex(oid));
2821         }
2822         strbuf_attach(dst, buf, size, size + 1);
2823         return 0;
2824 }
2825
2826 static int blob_unchanged(struct merge_options *opt,
2827                           const struct version_info *base,
2828                           const struct version_info *side,
2829                           const char *path)
2830 {
2831         struct strbuf basebuf = STRBUF_INIT;
2832         struct strbuf sidebuf = STRBUF_INIT;
2833         int ret = 0; /* assume changed for safety */
2834         struct index_state *idx = &opt->priv->attr_index;
2835
2836         if (!idx->initialized)
2837                 initialize_attr_index(opt);
2838
2839         if (base->mode != side->mode)
2840                 return 0;
2841         if (oideq(&base->oid, &side->oid))
2842                 return 1;
2843
2844         if (read_oid_strbuf(opt, &base->oid, &basebuf) ||
2845             read_oid_strbuf(opt, &side->oid, &sidebuf))
2846                 goto error_return;
2847         /*
2848          * Note: binary | is used so that both renormalizations are
2849          * performed.  Comparison can be skipped if both files are
2850          * unchanged since their sha1s have already been compared.
2851          */
2852         if (renormalize_buffer(idx, path, basebuf.buf, basebuf.len, &basebuf) |
2853             renormalize_buffer(idx, path, sidebuf.buf, sidebuf.len, &sidebuf))
2854                 ret = (basebuf.len == sidebuf.len &&
2855                        !memcmp(basebuf.buf, sidebuf.buf, basebuf.len));
2856
2857 error_return:
2858         strbuf_release(&basebuf);
2859         strbuf_release(&sidebuf);
2860         return ret;
2861 }
2862
2863 struct directory_versions {
2864         /*
2865          * versions: list of (basename -> version_info)
2866          *
2867          * The basenames are in reverse lexicographic order of full pathnames,
2868          * as processed in process_entries().  This puts all entries within
2869          * a directory together, and covers the directory itself after
2870          * everything within it, allowing us to write subtrees before needing
2871          * to record information for the tree itself.
2872          */
2873         struct string_list versions;
2874
2875         /*
2876          * offsets: list of (full relative path directories -> integer offsets)
2877          *
2878          * Since versions contains basenames from files in multiple different
2879          * directories, we need to know which entries in versions correspond
2880          * to which directories.  Values of e.g.
2881          *     ""             0
2882          *     src            2
2883          *     src/moduleA    5
2884          * Would mean that entries 0-1 of versions are files in the toplevel
2885          * directory, entries 2-4 are files under src/, and the remaining
2886          * entries starting at index 5 are files under src/moduleA/.
2887          */
2888         struct string_list offsets;
2889
2890         /*
2891          * last_directory: directory that previously processed file found in
2892          *
2893          * last_directory starts NULL, but records the directory in which the
2894          * previous file was found within.  As soon as
2895          *    directory(current_file) != last_directory
2896          * then we need to start updating accounting in versions & offsets.
2897          * Note that last_directory is always the last path in "offsets" (or
2898          * NULL if "offsets" is empty) so this exists just for quick access.
2899          */
2900         const char *last_directory;
2901
2902         /* last_directory_len: cached computation of strlen(last_directory) */
2903         unsigned last_directory_len;
2904 };
2905
2906 static int tree_entry_order(const void *a_, const void *b_)
2907 {
2908         const struct string_list_item *a = a_;
2909         const struct string_list_item *b = b_;
2910
2911         const struct merged_info *ami = a->util;
2912         const struct merged_info *bmi = b->util;
2913         return base_name_compare(a->string, strlen(a->string), ami->result.mode,
2914                                  b->string, strlen(b->string), bmi->result.mode);
2915 }
2916
2917 static void write_tree(struct object_id *result_oid,
2918                        struct string_list *versions,
2919                        unsigned int offset,
2920                        size_t hash_size)
2921 {
2922         size_t maxlen = 0, extra;
2923         unsigned int nr;
2924         struct strbuf buf = STRBUF_INIT;
2925         int i;
2926
2927         assert(offset <= versions->nr);
2928         nr = versions->nr - offset;
2929         if (versions->nr)
2930                 /* No need for STABLE_QSORT -- filenames must be unique */
2931                 QSORT(versions->items + offset, nr, tree_entry_order);
2932
2933         /* Pre-allocate some space in buf */
2934         extra = hash_size + 8; /* 8: 6 for mode, 1 for space, 1 for NUL char */
2935         for (i = 0; i < nr; i++) {
2936                 maxlen += strlen(versions->items[offset+i].string) + extra;
2937         }
2938         strbuf_grow(&buf, maxlen);
2939
2940         /* Write each entry out to buf */
2941         for (i = 0; i < nr; i++) {
2942                 struct merged_info *mi = versions->items[offset+i].util;
2943                 struct version_info *ri = &mi->result;
2944                 strbuf_addf(&buf, "%o %s%c",
2945                             ri->mode,
2946                             versions->items[offset+i].string, '\0');
2947                 strbuf_add(&buf, ri->oid.hash, hash_size);
2948         }
2949
2950         /* Write this object file out, and record in result_oid */
2951         write_object_file(buf.buf, buf.len, tree_type, result_oid);
2952         strbuf_release(&buf);
2953 }
2954
2955 static void record_entry_for_tree(struct directory_versions *dir_metadata,
2956                                   const char *path,
2957                                   struct merged_info *mi)
2958 {
2959         const char *basename;
2960
2961         if (mi->is_null)
2962                 /* nothing to record */
2963                 return;
2964
2965         basename = path + mi->basename_offset;
2966         assert(strchr(basename, '/') == NULL);
2967         string_list_append(&dir_metadata->versions,
2968                            basename)->util = &mi->result;
2969 }
2970
2971 static void write_completed_directory(struct merge_options *opt,
2972                                       const char *new_directory_name,
2973                                       struct directory_versions *info)
2974 {
2975         const char *prev_dir;
2976         struct merged_info *dir_info = NULL;
2977         unsigned int offset;
2978
2979         /*
2980          * Some explanation of info->versions and info->offsets...
2981          *
2982          * process_entries() iterates over all relevant files AND
2983          * directories in reverse lexicographic order, and calls this
2984          * function.  Thus, an example of the paths that process_entries()
2985          * could operate on (along with the directories for those paths
2986          * being shown) is:
2987          *
2988          *     xtract.c             ""
2989          *     tokens.txt           ""
2990          *     src/moduleB/umm.c    src/moduleB
2991          *     src/moduleB/stuff.h  src/moduleB
2992          *     src/moduleB/baz.c    src/moduleB
2993          *     src/moduleB          src
2994          *     src/moduleA/foo.c    src/moduleA
2995          *     src/moduleA/bar.c    src/moduleA
2996          *     src/moduleA          src
2997          *     src                  ""
2998          *     Makefile             ""
2999          *
3000          * info->versions:
3001          *
3002          *     always contains the unprocessed entries and their
3003          *     version_info information.  For example, after the first five
3004          *     entries above, info->versions would be:
3005          *
3006          *         xtract.c     <xtract.c's version_info>
3007          *         token.txt    <token.txt's version_info>
3008          *         umm.c        <src/moduleB/umm.c's version_info>
3009          *         stuff.h      <src/moduleB/stuff.h's version_info>
3010          *         baz.c        <src/moduleB/baz.c's version_info>
3011          *
3012          *     Once a subdirectory is completed we remove the entries in
3013          *     that subdirectory from info->versions, writing it as a tree
3014          *     (write_tree()).  Thus, as soon as we get to src/moduleB,
3015          *     info->versions would be updated to
3016          *
3017          *         xtract.c     <xtract.c's version_info>
3018          *         token.txt    <token.txt's version_info>
3019          *         moduleB      <src/moduleB's version_info>
3020          *
3021          * info->offsets:
3022          *
3023          *     helps us track which entries in info->versions correspond to
3024          *     which directories.  When we are N directories deep (e.g. 4
3025          *     for src/modA/submod/subdir/), we have up to N+1 unprocessed
3026          *     directories (+1 because of toplevel dir).  Corresponding to
3027          *     the info->versions example above, after processing five entries
3028          *     info->offsets will be:
3029          *
3030          *         ""           0
3031          *         src/moduleB  2
3032          *
3033          *     which is used to know that xtract.c & token.txt are from the
3034          *     toplevel dirctory, while umm.c & stuff.h & baz.c are from the
3035          *     src/moduleB directory.  Again, following the example above,
3036          *     once we need to process src/moduleB, then info->offsets is
3037          *     updated to
3038          *
3039          *         ""           0
3040          *         src          2
3041          *
3042          *     which says that moduleB (and only moduleB so far) is in the
3043          *     src directory.
3044          *
3045          *     One unique thing to note about info->offsets here is that
3046          *     "src" was not added to info->offsets until there was a path
3047          *     (a file OR directory) immediately below src/ that got
3048          *     processed.
3049          *
3050          * Since process_entry() just appends new entries to info->versions,
3051          * write_completed_directory() only needs to do work if the next path
3052          * is in a directory that is different than the last directory found
3053          * in info->offsets.
3054          */
3055
3056         /*
3057          * If we are working with the same directory as the last entry, there
3058          * is no work to do.  (See comments above the directory_name member of
3059          * struct merged_info for why we can use pointer comparison instead of
3060          * strcmp here.)
3061          */
3062         if (new_directory_name == info->last_directory)
3063                 return;
3064
3065         /*
3066          * If we are just starting (last_directory is NULL), or last_directory
3067          * is a prefix of the current directory, then we can just update
3068          * info->offsets to record the offset where we started this directory
3069          * and update last_directory to have quick access to it.
3070          */
3071         if (info->last_directory == NULL ||
3072             !strncmp(new_directory_name, info->last_directory,
3073                      info->last_directory_len)) {
3074                 uintptr_t offset = info->versions.nr;
3075
3076                 info->last_directory = new_directory_name;
3077                 info->last_directory_len = strlen(info->last_directory);
3078                 /*
3079                  * Record the offset into info->versions where we will
3080                  * start recording basenames of paths found within
3081                  * new_directory_name.
3082                  */
3083                 string_list_append(&info->offsets,
3084                                    info->last_directory)->util = (void*)offset;
3085                 return;
3086         }
3087
3088         /*
3089          * The next entry that will be processed will be within
3090          * new_directory_name.  Since at this point we know that
3091          * new_directory_name is within a different directory than
3092          * info->last_directory, we have all entries for info->last_directory
3093          * in info->versions and we need to create a tree object for them.
3094          */
3095         dir_info = strmap_get(&opt->priv->paths, info->last_directory);
3096         assert(dir_info);
3097         offset = (uintptr_t)info->offsets.items[info->offsets.nr-1].util;
3098         if (offset == info->versions.nr) {
3099                 /*
3100                  * Actually, we don't need to create a tree object in this
3101                  * case.  Whenever all files within a directory disappear
3102                  * during the merge (e.g. unmodified on one side and
3103                  * deleted on the other, or files were renamed elsewhere),
3104                  * then we get here and the directory itself needs to be
3105                  * omitted from its parent tree as well.
3106                  */
3107                 dir_info->is_null = 1;
3108         } else {
3109                 /*
3110                  * Write out the tree to the git object directory, and also
3111                  * record the mode and oid in dir_info->result.
3112                  */
3113                 dir_info->is_null = 0;
3114                 dir_info->result.mode = S_IFDIR;
3115                 write_tree(&dir_info->result.oid, &info->versions, offset,
3116                            opt->repo->hash_algo->rawsz);
3117         }
3118
3119         /*
3120          * We've now used several entries from info->versions and one entry
3121          * from info->offsets, so we get rid of those values.
3122          */
3123         info->offsets.nr--;
3124         info->versions.nr = offset;
3125
3126         /*
3127          * Now we've taken care of the completed directory, but we need to
3128          * prepare things since future entries will be in
3129          * new_directory_name.  (In particular, process_entry() will be
3130          * appending new entries to info->versions.)  So, we need to make
3131          * sure new_directory_name is the last entry in info->offsets.
3132          */
3133         prev_dir = info->offsets.nr == 0 ? NULL :
3134                    info->offsets.items[info->offsets.nr-1].string;
3135         if (new_directory_name != prev_dir) {
3136                 uintptr_t c = info->versions.nr;
3137                 string_list_append(&info->offsets,
3138                                    new_directory_name)->util = (void*)c;
3139         }
3140
3141         /* And, of course, we need to update last_directory to match. */
3142         info->last_directory = new_directory_name;
3143         info->last_directory_len = strlen(info->last_directory);
3144 }
3145
3146 /* Per entry merge function */
3147 static void process_entry(struct merge_options *opt,
3148                           const char *path,
3149                           struct conflict_info *ci,
3150                           struct directory_versions *dir_metadata)
3151 {
3152         int df_file_index = 0;
3153
3154         VERIFY_CI(ci);
3155         assert(ci->filemask >= 0 && ci->filemask <= 7);
3156         /* ci->match_mask == 7 was handled in collect_merge_info_callback() */
3157         assert(ci->match_mask == 0 || ci->match_mask == 3 ||
3158                ci->match_mask == 5 || ci->match_mask == 6);
3159
3160         if (ci->dirmask) {
3161                 record_entry_for_tree(dir_metadata, path, &ci->merged);
3162                 if (ci->filemask == 0)
3163                         /* nothing else to handle */
3164                         return;
3165                 assert(ci->df_conflict);
3166         }
3167
3168         if (ci->df_conflict && ci->merged.result.mode == 0) {
3169                 int i;
3170
3171                 /*
3172                  * directory no longer in the way, but we do have a file we
3173                  * need to place here so we need to clean away the "directory
3174                  * merges to nothing" result.
3175                  */
3176                 ci->df_conflict = 0;
3177                 assert(ci->filemask != 0);
3178                 ci->merged.clean = 0;
3179                 ci->merged.is_null = 0;
3180                 /* and we want to zero out any directory-related entries */
3181                 ci->match_mask = (ci->match_mask & ~ci->dirmask);
3182                 ci->dirmask = 0;
3183                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
3184                         if (ci->filemask & (1 << i))
3185                                 continue;
3186                         ci->stages[i].mode = 0;
3187                         oidcpy(&ci->stages[i].oid, null_oid());
3188                 }
3189         } else if (ci->df_conflict && ci->merged.result.mode != 0) {
3190                 /*
3191                  * This started out as a D/F conflict, and the entries in
3192                  * the competing directory were not removed by the merge as
3193                  * evidenced by write_completed_directory() writing a value
3194                  * to ci->merged.result.mode.
3195                  */
3196                 struct conflict_info *new_ci;
3197                 const char *branch;
3198                 const char *old_path = path;
3199                 int i;
3200
3201                 assert(ci->merged.result.mode == S_IFDIR);
3202
3203                 /*
3204                  * If filemask is 1, we can just ignore the file as having
3205                  * been deleted on both sides.  We do not want to overwrite
3206                  * ci->merged.result, since it stores the tree for all the
3207                  * files under it.
3208                  */
3209                 if (ci->filemask == 1) {
3210                         ci->filemask = 0;
3211                         return;
3212                 }
3213
3214                 /*
3215                  * This file still exists on at least one side, and we want
3216                  * the directory to remain here, so we need to move this
3217                  * path to some new location.
3218                  */
3219                 CALLOC_ARRAY(new_ci, 1);
3220                 /* We don't really want new_ci->merged.result copied, but it'll
3221                  * be overwritten below so it doesn't matter.  We also don't
3222                  * want any directory mode/oid values copied, but we'll zero
3223                  * those out immediately.  We do want the rest of ci copied.
3224                  */
3225                 memcpy(new_ci, ci, sizeof(*ci));
3226                 new_ci->match_mask = (new_ci->match_mask & ~new_ci->dirmask);
3227                 new_ci->dirmask = 0;
3228                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
3229                         if (new_ci->filemask & (1 << i))
3230                                 continue;
3231                         /* zero out any entries related to directories */
3232                         new_ci->stages[i].mode = 0;
3233                         oidcpy(&new_ci->stages[i].oid, null_oid());
3234                 }
3235
3236                 /*
3237                  * Find out which side this file came from; note that we
3238                  * cannot just use ci->filemask, because renames could cause
3239                  * the filemask to go back to 7.  So we use dirmask, then
3240                  * pick the opposite side's index.
3241                  */
3242                 df_file_index = (ci->dirmask & (1 << 1)) ? 2 : 1;
3243                 branch = (df_file_index == 1) ? opt->branch1 : opt->branch2;
3244                 path = unique_path(&opt->priv->paths, path, branch);
3245                 strmap_put(&opt->priv->paths, path, new_ci);
3246
3247                 path_msg(opt, path, 0,
3248                          _("CONFLICT (file/directory): directory in the way "
3249                            "of %s from %s; moving it to %s instead."),
3250                          old_path, branch, path);
3251
3252                 /*
3253                  * Zero out the filemask for the old ci.  At this point, ci
3254                  * was just an entry for a directory, so we don't need to
3255                  * do anything more with it.
3256                  */
3257                 ci->filemask = 0;
3258
3259                 /*
3260                  * Now note that we're working on the new entry (path was
3261                  * updated above.
3262                  */
3263                 ci = new_ci;
3264         }
3265
3266         /*
3267          * NOTE: Below there is a long switch-like if-elseif-elseif... block
3268          *       which the code goes through even for the df_conflict cases
3269          *       above.
3270          */
3271         if (ci->match_mask) {
3272                 ci->merged.clean = 1;
3273                 if (ci->match_mask == 6) {
3274                         /* stages[1] == stages[2] */
3275                         ci->merged.result.mode = ci->stages[1].mode;
3276                         oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
3277                 } else {
3278                         /* determine the mask of the side that didn't match */
3279                         unsigned int othermask = 7 & ~ci->match_mask;
3280                         int side = (othermask == 4) ? 2 : 1;
3281
3282                         ci->merged.result.mode = ci->stages[side].mode;
3283                         ci->merged.is_null = !ci->merged.result.mode;
3284                         oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
3285
3286                         assert(othermask == 2 || othermask == 4);
3287                         assert(ci->merged.is_null ==
3288                                (ci->filemask == ci->match_mask));
3289                 }
3290         } else if (ci->filemask >= 6 &&
3291                    (S_IFMT & ci->stages[1].mode) !=
3292                    (S_IFMT & ci->stages[2].mode)) {
3293                 /* Two different items from (file/submodule/symlink) */
3294                 if (opt->priv->call_depth) {
3295                         /* Just use the version from the merge base */
3296                         ci->merged.clean = 0;
3297                         oidcpy(&ci->merged.result.oid, &ci->stages[0].oid);
3298                         ci->merged.result.mode = ci->stages[0].mode;
3299                         ci->merged.is_null = (ci->merged.result.mode == 0);
3300                 } else {
3301                         /* Handle by renaming one or both to separate paths. */
3302                         unsigned o_mode = ci->stages[0].mode;
3303                         unsigned a_mode = ci->stages[1].mode;
3304                         unsigned b_mode = ci->stages[2].mode;
3305                         struct conflict_info *new_ci;
3306                         const char *a_path = NULL, *b_path = NULL;
3307                         int rename_a = 0, rename_b = 0;
3308
3309                         new_ci = xmalloc(sizeof(*new_ci));
3310
3311                         if (S_ISREG(a_mode))
3312                                 rename_a = 1;
3313                         else if (S_ISREG(b_mode))
3314                                 rename_b = 1;
3315                         else {
3316                                 rename_a = 1;
3317                                 rename_b = 1;
3318                         }
3319
3320                         if (rename_a && rename_b) {
3321                                 path_msg(opt, path, 0,
3322                                          _("CONFLICT (distinct types): %s had "
3323                                            "different types on each side; "
3324                                            "renamed both of them so each can "
3325                                            "be recorded somewhere."),
3326                                          path);
3327                         } else {
3328                                 path_msg(opt, path, 0,
3329                                          _("CONFLICT (distinct types): %s had "
3330                                            "different types on each side; "
3331                                            "renamed one of them so each can be "
3332                                            "recorded somewhere."),
3333                                          path);
3334                         }
3335
3336                         ci->merged.clean = 0;
3337                         memcpy(new_ci, ci, sizeof(*new_ci));
3338
3339                         /* Put b into new_ci, removing a from stages */
3340                         new_ci->merged.result.mode = ci->stages[2].mode;
3341                         oidcpy(&new_ci->merged.result.oid, &ci->stages[2].oid);
3342                         new_ci->stages[1].mode = 0;
3343                         oidcpy(&new_ci->stages[1].oid, null_oid());
3344                         new_ci->filemask = 5;
3345                         if ((S_IFMT & b_mode) != (S_IFMT & o_mode)) {
3346                                 new_ci->stages[0].mode = 0;
3347                                 oidcpy(&new_ci->stages[0].oid, null_oid());
3348                                 new_ci->filemask = 4;
3349                         }
3350
3351                         /* Leave only a in ci, fixing stages. */
3352                         ci->merged.result.mode = ci->stages[1].mode;
3353                         oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
3354                         ci->stages[2].mode = 0;
3355                         oidcpy(&ci->stages[2].oid, null_oid());
3356                         ci->filemask = 3;
3357                         if ((S_IFMT & a_mode) != (S_IFMT & o_mode)) {
3358                                 ci->stages[0].mode = 0;
3359                                 oidcpy(&ci->stages[0].oid, null_oid());
3360                                 ci->filemask = 2;
3361                         }
3362
3363                         /* Insert entries into opt->priv_paths */
3364                         assert(rename_a || rename_b);
3365                         if (rename_a) {
3366                                 a_path = unique_path(&opt->priv->paths,
3367                                                      path, opt->branch1);
3368                                 strmap_put(&opt->priv->paths, a_path, ci);
3369                         }
3370
3371                         if (rename_b)
3372                                 b_path = unique_path(&opt->priv->paths,
3373                                                      path, opt->branch2);
3374                         else
3375                                 b_path = path;
3376                         strmap_put(&opt->priv->paths, b_path, new_ci);
3377
3378                         if (rename_a && rename_b) {
3379                                 strmap_remove(&opt->priv->paths, path, 0);
3380                                 /*
3381                                  * We removed path from opt->priv->paths.  path
3382                                  * will also eventually need to be freed, but
3383                                  * it may still be used by e.g.  ci->pathnames.
3384                                  * So, store it in another string-list for now.
3385                                  */
3386                                 string_list_append(&opt->priv->paths_to_free,
3387                                                    path);
3388                         }
3389
3390                         /*
3391                          * Do special handling for b_path since process_entry()
3392                          * won't be called on it specially.
3393                          */
3394                         strmap_put(&opt->priv->conflicted, b_path, new_ci);
3395                         record_entry_for_tree(dir_metadata, b_path,
3396                                               &new_ci->merged);
3397
3398                         /*
3399                          * Remaining code for processing this entry should
3400                          * think in terms of processing a_path.
3401                          */
3402                         if (a_path)
3403                                 path = a_path;
3404                 }
3405         } else if (ci->filemask >= 6) {
3406                 /* Need a two-way or three-way content merge */
3407                 struct version_info merged_file;
3408                 unsigned clean_merge;
3409                 struct version_info *o = &ci->stages[0];
3410                 struct version_info *a = &ci->stages[1];
3411                 struct version_info *b = &ci->stages[2];
3412
3413                 clean_merge = handle_content_merge(opt, path, o, a, b,
3414                                                    ci->pathnames,
3415                                                    opt->priv->call_depth * 2,
3416                                                    &merged_file);
3417                 ci->merged.clean = clean_merge &&
3418                                    !ci->df_conflict && !ci->path_conflict;
3419                 ci->merged.result.mode = merged_file.mode;
3420                 ci->merged.is_null = (merged_file.mode == 0);
3421                 oidcpy(&ci->merged.result.oid, &merged_file.oid);
3422                 if (clean_merge && ci->df_conflict) {
3423                         assert(df_file_index == 1 || df_file_index == 2);
3424                         ci->filemask = 1 << df_file_index;
3425                         ci->stages[df_file_index].mode = merged_file.mode;
3426                         oidcpy(&ci->stages[df_file_index].oid, &merged_file.oid);
3427                 }
3428                 if (!clean_merge) {
3429                         const char *reason = _("content");
3430                         if (ci->filemask == 6)
3431                                 reason = _("add/add");
3432                         if (S_ISGITLINK(merged_file.mode))
3433                                 reason = _("submodule");
3434                         path_msg(opt, path, 0,
3435                                  _("CONFLICT (%s): Merge conflict in %s"),
3436                                  reason, path);
3437                 }
3438         } else if (ci->filemask == 3 || ci->filemask == 5) {
3439                 /* Modify/delete */
3440                 const char *modify_branch, *delete_branch;
3441                 int side = (ci->filemask == 5) ? 2 : 1;
3442                 int index = opt->priv->call_depth ? 0 : side;
3443
3444                 ci->merged.result.mode = ci->stages[index].mode;
3445                 oidcpy(&ci->merged.result.oid, &ci->stages[index].oid);
3446                 ci->merged.clean = 0;
3447
3448                 modify_branch = (side == 1) ? opt->branch1 : opt->branch2;
3449                 delete_branch = (side == 1) ? opt->branch2 : opt->branch1;
3450
3451                 if (opt->renormalize &&
3452                     blob_unchanged(opt, &ci->stages[0], &ci->stages[side],
3453                                    path)) {
3454                         ci->merged.is_null = 1;
3455                         ci->merged.clean = 1;
3456                 } else if (ci->path_conflict &&
3457                            oideq(&ci->stages[0].oid, &ci->stages[side].oid)) {
3458                         /*
3459                          * This came from a rename/delete; no action to take,
3460                          * but avoid printing "modify/delete" conflict notice
3461                          * since the contents were not modified.
3462                          */
3463                 } else {
3464                         path_msg(opt, path, 0,
3465                                  _("CONFLICT (modify/delete): %s deleted in %s "
3466                                    "and modified in %s.  Version %s of %s left "
3467                                    "in tree."),
3468                                  path, delete_branch, modify_branch,
3469                                  modify_branch, path);
3470                 }
3471         } else if (ci->filemask == 2 || ci->filemask == 4) {
3472                 /* Added on one side */
3473                 int side = (ci->filemask == 4) ? 2 : 1;
3474                 ci->merged.result.mode = ci->stages[side].mode;
3475                 oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
3476                 ci->merged.clean = !ci->df_conflict && !ci->path_conflict;
3477         } else if (ci->filemask == 1) {
3478                 /* Deleted on both sides */
3479                 ci->merged.is_null = 1;
3480                 ci->merged.result.mode = 0;
3481                 oidcpy(&ci->merged.result.oid, null_oid());
3482                 ci->merged.clean = !ci->path_conflict;
3483         }
3484
3485         /*
3486          * If still conflicted, record it separately.  This allows us to later
3487          * iterate over just conflicted entries when updating the index instead
3488          * of iterating over all entries.
3489          */
3490         if (!ci->merged.clean)
3491                 strmap_put(&opt->priv->conflicted, path, ci);
3492
3493         /* Record metadata for ci->merged in dir_metadata */
3494         record_entry_for_tree(dir_metadata, path, &ci->merged);
3495 }
3496
3497 static void process_entries(struct merge_options *opt,
3498                             struct object_id *result_oid)
3499 {
3500         struct hashmap_iter iter;
3501         struct strmap_entry *e;
3502         struct string_list plist = STRING_LIST_INIT_NODUP;
3503         struct string_list_item *entry;
3504         struct directory_versions dir_metadata = { STRING_LIST_INIT_NODUP,
3505                                                    STRING_LIST_INIT_NODUP,
3506                                                    NULL, 0 };
3507
3508         trace2_region_enter("merge", "process_entries setup", opt->repo);
3509         if (strmap_empty(&opt->priv->paths)) {
3510                 oidcpy(result_oid, opt->repo->hash_algo->empty_tree);
3511                 return;
3512         }
3513
3514         /* Hack to pre-allocate plist to the desired size */
3515         trace2_region_enter("merge", "plist grow", opt->repo);
3516         ALLOC_GROW(plist.items, strmap_get_size(&opt->priv->paths), plist.alloc);
3517         trace2_region_leave("merge", "plist grow", opt->repo);
3518
3519         /* Put every entry from paths into plist, then sort */
3520         trace2_region_enter("merge", "plist copy", opt->repo);
3521         strmap_for_each_entry(&opt->priv->paths, &iter, e) {
3522                 string_list_append(&plist, e->key)->util = e->value;
3523         }
3524         trace2_region_leave("merge", "plist copy", opt->repo);
3525
3526         trace2_region_enter("merge", "plist special sort", opt->repo);
3527         plist.cmp = sort_dirs_next_to_their_children;
3528         string_list_sort(&plist);
3529         trace2_region_leave("merge", "plist special sort", opt->repo);
3530
3531         trace2_region_leave("merge", "process_entries setup", opt->repo);
3532
3533         /*
3534          * Iterate over the items in reverse order, so we can handle paths
3535          * below a directory before needing to handle the directory itself.
3536          *
3537          * This allows us to write subtrees before we need to write trees,
3538          * and it also enables sane handling of directory/file conflicts
3539          * (because it allows us to know whether the directory is still in
3540          * the way when it is time to process the file at the same path).
3541          */
3542         trace2_region_enter("merge", "processing", opt->repo);
3543         for (entry = &plist.items[plist.nr-1]; entry >= plist.items; --entry) {
3544                 char *path = entry->string;
3545                 /*
3546                  * NOTE: mi may actually be a pointer to a conflict_info, but
3547                  * we have to check mi->clean first to see if it's safe to
3548                  * reassign to such a pointer type.
3549                  */
3550                 struct merged_info *mi = entry->util;
3551
3552                 write_completed_directory(opt, mi->directory_name,
3553                                           &dir_metadata);
3554                 if (mi->clean)
3555                         record_entry_for_tree(&dir_metadata, path, mi);
3556                 else {
3557                         struct conflict_info *ci = (struct conflict_info *)mi;
3558                         process_entry(opt, path, ci, &dir_metadata);
3559                 }
3560         }
3561         trace2_region_leave("merge", "processing", opt->repo);
3562
3563         trace2_region_enter("merge", "process_entries cleanup", opt->repo);
3564         if (dir_metadata.offsets.nr != 1 ||
3565             (uintptr_t)dir_metadata.offsets.items[0].util != 0) {
3566                 printf("dir_metadata.offsets.nr = %d (should be 1)\n",
3567                        dir_metadata.offsets.nr);
3568                 printf("dir_metadata.offsets.items[0].util = %u (should be 0)\n",
3569                        (unsigned)(uintptr_t)dir_metadata.offsets.items[0].util);
3570                 fflush(stdout);
3571                 BUG("dir_metadata accounting completely off; shouldn't happen");
3572         }
3573         write_tree(result_oid, &dir_metadata.versions, 0,
3574                    opt->repo->hash_algo->rawsz);
3575         string_list_clear(&plist, 0);
3576         string_list_clear(&dir_metadata.versions, 0);
3577         string_list_clear(&dir_metadata.offsets, 0);
3578         trace2_region_leave("merge", "process_entries cleanup", opt->repo);
3579 }
3580
3581 /*** Function Grouping: functions related to merge_switch_to_result() ***/
3582
3583 static int checkout(struct merge_options *opt,
3584                     struct tree *prev,
3585                     struct tree *next)
3586 {
3587         /* Switch the index/working copy from old to new */
3588         int ret;
3589         struct tree_desc trees[2];
3590         struct unpack_trees_options unpack_opts;
3591
3592         memset(&unpack_opts, 0, sizeof(unpack_opts));
3593         unpack_opts.head_idx = -1;
3594         unpack_opts.src_index = opt->repo->index;
3595         unpack_opts.dst_index = opt->repo->index;
3596
3597         setup_unpack_trees_porcelain(&unpack_opts, "merge");
3598
3599         /*
3600          * NOTE: if this were just "git checkout" code, we would probably
3601          * read or refresh the cache and check for a conflicted index, but
3602          * builtin/merge.c or sequencer.c really needs to read the index
3603          * and check for conflicted entries before starting merging for a
3604          * good user experience (no sense waiting for merges/rebases before
3605          * erroring out), so there's no reason to duplicate that work here.
3606          */
3607
3608         /* 2-way merge to the new branch */
3609         unpack_opts.update = 1;
3610         unpack_opts.merge = 1;
3611         unpack_opts.quiet = 0; /* FIXME: sequencer might want quiet? */
3612         unpack_opts.verbose_update = (opt->verbosity > 2);
3613         unpack_opts.fn = twoway_merge;
3614         if (1/* FIXME: opts->overwrite_ignore*/) {
3615                 CALLOC_ARRAY(unpack_opts.dir, 1);
3616                 unpack_opts.dir->flags |= DIR_SHOW_IGNORED;
3617                 setup_standard_excludes(unpack_opts.dir);
3618         }
3619         parse_tree(prev);
3620         init_tree_desc(&trees[0], prev->buffer, prev->size);
3621         parse_tree(next);
3622         init_tree_desc(&trees[1], next->buffer, next->size);
3623
3624         ret = unpack_trees(2, trees, &unpack_opts);
3625         clear_unpack_trees_porcelain(&unpack_opts);
3626         dir_clear(unpack_opts.dir);
3627         FREE_AND_NULL(unpack_opts.dir);
3628         return ret;
3629 }
3630
3631 static int record_conflicted_index_entries(struct merge_options *opt)
3632 {
3633         struct hashmap_iter iter;
3634         struct strmap_entry *e;
3635         struct index_state *index = opt->repo->index;
3636         struct checkout state = CHECKOUT_INIT;
3637         int errs = 0;
3638         int original_cache_nr;
3639
3640         if (strmap_empty(&opt->priv->conflicted))
3641                 return 0;
3642
3643         /* If any entries have skip_worktree set, we'll have to check 'em out */
3644         state.force = 1;
3645         state.quiet = 1;
3646         state.refresh_cache = 1;
3647         state.istate = index;
3648         original_cache_nr = index->cache_nr;
3649
3650         /* Put every entry from paths into plist, then sort */
3651         strmap_for_each_entry(&opt->priv->conflicted, &iter, e) {
3652                 const char *path = e->key;
3653                 struct conflict_info *ci = e->value;
3654                 int pos;
3655                 struct cache_entry *ce;
3656                 int i;
3657
3658                 VERIFY_CI(ci);
3659
3660                 /*
3661                  * The index will already have a stage=0 entry for this path,
3662                  * because we created an as-merged-as-possible version of the
3663                  * file and checkout() moved the working copy and index over
3664                  * to that version.
3665                  *
3666                  * However, previous iterations through this loop will have
3667                  * added unstaged entries to the end of the cache which
3668                  * ignore the standard alphabetical ordering of cache
3669                  * entries and break invariants needed for index_name_pos()
3670                  * to work.  However, we know the entry we want is before
3671                  * those appended cache entries, so do a temporary swap on
3672                  * cache_nr to only look through entries of interest.
3673                  */
3674                 SWAP(index->cache_nr, original_cache_nr);
3675                 pos = index_name_pos(index, path, strlen(path));
3676                 SWAP(index->cache_nr, original_cache_nr);
3677                 if (pos < 0) {
3678                         if (ci->filemask != 1)
3679                                 BUG("Conflicted %s but nothing in basic working tree or index; this shouldn't happen", path);
3680                         cache_tree_invalidate_path(index, path);
3681                 } else {
3682                         ce = index->cache[pos];
3683
3684                         /*
3685                          * Clean paths with CE_SKIP_WORKTREE set will not be
3686                          * written to the working tree by the unpack_trees()
3687                          * call in checkout().  Our conflicted entries would
3688                          * have appeared clean to that code since we ignored
3689                          * the higher order stages.  Thus, we need override
3690                          * the CE_SKIP_WORKTREE bit and manually write those
3691                          * files to the working disk here.
3692                          */
3693                         if (ce_skip_worktree(ce)) {
3694                                 struct stat st;
3695
3696                                 if (!lstat(path, &st)) {
3697                                         char *new_name = unique_path(&opt->priv->paths,
3698                                                                      path,
3699                                                                      "cruft");
3700
3701                                         path_msg(opt, path, 1,
3702                                                  _("Note: %s not up to date and in way of checking out conflicted version; old copy renamed to %s"),
3703                                                  path, new_name);
3704                                         errs |= rename(path, new_name);
3705                                         free(new_name);
3706                                 }
3707                                 errs |= checkout_entry(ce, &state, NULL, NULL);
3708                         }
3709
3710                         /*
3711                          * Mark this cache entry for removal and instead add
3712                          * new stage>0 entries corresponding to the
3713                          * conflicts.  If there are many conflicted entries, we
3714                          * want to avoid memmove'ing O(NM) entries by
3715                          * inserting the new entries one at a time.  So,
3716                          * instead, we just add the new cache entries to the
3717                          * end (ignoring normal index requirements on sort
3718                          * order) and sort the index once we're all done.
3719                          */
3720                         ce->ce_flags |= CE_REMOVE;
3721                 }
3722
3723                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
3724                         struct version_info *vi;
3725                         if (!(ci->filemask & (1ul << i)))
3726                                 continue;
3727                         vi = &ci->stages[i];
3728                         ce = make_cache_entry(index, vi->mode, &vi->oid,
3729                                               path, i+1, 0);
3730                         add_index_entry(index, ce, ADD_CACHE_JUST_APPEND);
3731                 }
3732         }
3733
3734         /*
3735          * Remove the unused cache entries (and invalidate the relevant
3736          * cache-trees), then sort the index entries to get the conflicted
3737          * entries we added to the end into their right locations.
3738          */
3739         remove_marked_cache_entries(index, 1);
3740         /*
3741          * No need for STABLE_QSORT -- cmp_cache_name_compare sorts primarily
3742          * on filename and secondarily on stage, and (name, stage #) are a
3743          * unique tuple.
3744          */
3745         QSORT(index->cache, index->cache_nr, cmp_cache_name_compare);
3746
3747         return errs;
3748 }
3749
3750 void merge_switch_to_result(struct merge_options *opt,
3751                             struct tree *head,
3752                             struct merge_result *result,
3753                             int update_worktree_and_index,
3754                             int display_update_msgs)
3755 {
3756         assert(opt->priv == NULL);
3757         if (result->clean >= 0 && update_worktree_and_index) {
3758                 const char *filename;
3759                 FILE *fp;
3760
3761                 trace2_region_enter("merge", "checkout", opt->repo);
3762                 if (checkout(opt, head, result->tree)) {
3763                         /* failure to function */
3764                         result->clean = -1;
3765                         return;
3766                 }
3767                 trace2_region_leave("merge", "checkout", opt->repo);
3768
3769                 trace2_region_enter("merge", "record_conflicted", opt->repo);
3770                 opt->priv = result->priv;
3771                 if (record_conflicted_index_entries(opt)) {
3772                         /* failure to function */
3773                         opt->priv = NULL;
3774                         result->clean = -1;
3775                         return;
3776                 }
3777                 opt->priv = NULL;
3778                 trace2_region_leave("merge", "record_conflicted", opt->repo);
3779
3780                 trace2_region_enter("merge", "write_auto_merge", opt->repo);
3781                 filename = git_path_auto_merge(opt->repo);
3782                 fp = xfopen(filename, "w");
3783                 fprintf(fp, "%s\n", oid_to_hex(&result->tree->object.oid));
3784                 fclose(fp);
3785                 trace2_region_leave("merge", "write_auto_merge", opt->repo);
3786         }
3787
3788         if (display_update_msgs) {
3789                 struct merge_options_internal *opti = result->priv;
3790                 struct hashmap_iter iter;
3791                 struct strmap_entry *e;
3792                 struct string_list olist = STRING_LIST_INIT_NODUP;
3793                 int i;
3794
3795                 trace2_region_enter("merge", "display messages", opt->repo);
3796
3797                 /* Hack to pre-allocate olist to the desired size */
3798                 ALLOC_GROW(olist.items, strmap_get_size(&opti->output),
3799                            olist.alloc);
3800
3801                 /* Put every entry from output into olist, then sort */
3802                 strmap_for_each_entry(&opti->output, &iter, e) {
3803                         string_list_append(&olist, e->key)->util = e->value;
3804                 }
3805                 string_list_sort(&olist);
3806
3807                 /* Iterate over the items, printing them */
3808                 for (i = 0; i < olist.nr; ++i) {
3809                         struct strbuf *sb = olist.items[i].util;
3810
3811                         printf("%s", sb->buf);
3812                 }
3813                 string_list_clear(&olist, 0);
3814
3815                 /* Also include needed rename limit adjustment now */
3816                 diff_warn_rename_limit("merge.renamelimit",
3817                                        opti->renames.needed_limit, 0);
3818
3819                 trace2_region_leave("merge", "display messages", opt->repo);
3820         }
3821
3822         merge_finalize(opt, result);
3823 }
3824
3825 void merge_finalize(struct merge_options *opt,
3826                     struct merge_result *result)
3827 {
3828         struct merge_options_internal *opti = result->priv;
3829
3830         if (opt->renormalize)
3831                 git_attr_set_direction(GIT_ATTR_CHECKIN);
3832         assert(opt->priv == NULL);
3833
3834         clear_or_reinit_internal_opts(opti, 0);
3835         FREE_AND_NULL(opti);
3836 }
3837
3838 /*** Function Grouping: helper functions for merge_incore_*() ***/
3839
3840 static struct tree *shift_tree_object(struct repository *repo,
3841                                       struct tree *one, struct tree *two,
3842                                       const char *subtree_shift)
3843 {
3844         struct object_id shifted;
3845
3846         if (!*subtree_shift) {
3847                 shift_tree(repo, &one->object.oid, &two->object.oid, &shifted, 0);
3848         } else {
3849                 shift_tree_by(repo, &one->object.oid, &two->object.oid, &shifted,
3850                               subtree_shift);
3851         }
3852         if (oideq(&two->object.oid, &shifted))
3853                 return two;
3854         return lookup_tree(repo, &shifted);
3855 }
3856
3857 static inline void set_commit_tree(struct commit *c, struct tree *t)
3858 {
3859         c->maybe_tree = t;
3860 }
3861
3862 static struct commit *make_virtual_commit(struct repository *repo,
3863                                           struct tree *tree,
3864                                           const char *comment)
3865 {
3866         struct commit *commit = alloc_commit_node(repo);
3867
3868         set_merge_remote_desc(commit, comment, (struct object *)commit);
3869         set_commit_tree(commit, tree);
3870         commit->object.parsed = 1;
3871         return commit;
3872 }
3873
3874 static void merge_start(struct merge_options *opt, struct merge_result *result)
3875 {
3876         struct rename_info *renames;
3877         int i;
3878
3879         /* Sanity checks on opt */
3880         trace2_region_enter("merge", "sanity checks", opt->repo);
3881         assert(opt->repo);
3882
3883         assert(opt->branch1 && opt->branch2);
3884
3885         assert(opt->detect_directory_renames >= MERGE_DIRECTORY_RENAMES_NONE &&
3886                opt->detect_directory_renames <= MERGE_DIRECTORY_RENAMES_TRUE);
3887         assert(opt->rename_limit >= -1);
3888         assert(opt->rename_score >= 0 && opt->rename_score <= MAX_SCORE);
3889         assert(opt->show_rename_progress >= 0 && opt->show_rename_progress <= 1);
3890
3891         assert(opt->xdl_opts >= 0);
3892         assert(opt->recursive_variant >= MERGE_VARIANT_NORMAL &&
3893                opt->recursive_variant <= MERGE_VARIANT_THEIRS);
3894
3895         /*
3896          * detect_renames, verbosity, buffer_output, and obuf are ignored
3897          * fields that were used by "recursive" rather than "ort" -- but
3898          * sanity check them anyway.
3899          */
3900         assert(opt->detect_renames >= -1 &&
3901                opt->detect_renames <= DIFF_DETECT_COPY);
3902         assert(opt->verbosity >= 0 && opt->verbosity <= 5);
3903         assert(opt->buffer_output <= 2);
3904         assert(opt->obuf.len == 0);
3905
3906         assert(opt->priv == NULL);
3907         if (result->_properly_initialized != 0 &&
3908             result->_properly_initialized != RESULT_INITIALIZED)
3909                 BUG("struct merge_result passed to merge_incore_*recursive() must be zeroed or filled with values from a previous run");
3910         assert(!!result->priv == !!result->_properly_initialized);
3911         if (result->priv) {
3912                 opt->priv = result->priv;
3913                 result->priv = NULL;
3914                 /*
3915                  * opt->priv non-NULL means we had results from a previous
3916                  * run; do a few sanity checks that user didn't mess with
3917                  * it in an obvious fashion.
3918                  */
3919                 assert(opt->priv->call_depth == 0);
3920                 assert(!opt->priv->toplevel_dir ||
3921                        0 == strlen(opt->priv->toplevel_dir));
3922         }
3923         trace2_region_leave("merge", "sanity checks", opt->repo);
3924
3925         /* Default to histogram diff.  Actually, just hardcode it...for now. */
3926         opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);
3927
3928         /* Handle attr direction stuff for renormalization */
3929         if (opt->renormalize)
3930                 git_attr_set_direction(GIT_ATTR_CHECKOUT);
3931
3932         /* Initialization of opt->priv, our internal merge data */
3933         trace2_region_enter("merge", "allocate/init", opt->repo);
3934         if (opt->priv) {
3935                 clear_or_reinit_internal_opts(opt->priv, 1);
3936                 trace2_region_leave("merge", "allocate/init", opt->repo);
3937                 return;
3938         }
3939         opt->priv = xcalloc(1, sizeof(*opt->priv));
3940
3941         /* Initialization of various renames fields */
3942         renames = &opt->priv->renames;
3943         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
3944                 strintmap_init_with_options(&renames->dirs_removed[i],
3945                                             NOT_RELEVANT, NULL, 0);
3946                 strmap_init_with_options(&renames->dir_rename_count[i],
3947                                          NULL, 1);
3948                 strmap_init_with_options(&renames->dir_renames[i],
3949                                          NULL, 0);
3950                 /*
3951                  * relevant_sources uses -1 for the default, because we need
3952                  * to be able to distinguish not-in-strintmap from valid
3953                  * relevant_source values from enum file_rename_relevance.
3954                  * In particular, possibly_cache_new_pair() expects a negative
3955                  * value for not-found entries.
3956                  */
3957                 strintmap_init_with_options(&renames->relevant_sources[i],
3958                                             -1 /* explicitly invalid */,
3959                                             NULL, 0);
3960                 strmap_init_with_options(&renames->cached_pairs[i],
3961                                          NULL, 1);
3962                 strset_init_with_options(&renames->cached_irrelevant[i],
3963                                          NULL, 1);
3964                 strset_init_with_options(&renames->cached_target_names[i],
3965                                          NULL, 0);
3966         }
3967
3968         /*
3969          * Although we initialize opt->priv->paths with strdup_strings=0,
3970          * that's just to avoid making yet another copy of an allocated
3971          * string.  Putting the entry into paths means we are taking
3972          * ownership, so we will later free it.  paths_to_free is similar.
3973          *
3974          * In contrast, conflicted just has a subset of keys from paths, so
3975          * we don't want to free those (it'd be a duplicate free).
3976          */
3977         strmap_init_with_options(&opt->priv->paths, NULL, 0);
3978         strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
3979         string_list_init(&opt->priv->paths_to_free, 0);
3980
3981         /*
3982          * keys & strbufs in output will sometimes need to outlive "paths",
3983          * so it will have a copy of relevant keys.  It's probably a small
3984          * subset of the overall paths that have special output.
3985          */
3986         strmap_init(&opt->priv->output);
3987
3988         trace2_region_leave("merge", "allocate/init", opt->repo);
3989 }
3990
3991 static void merge_check_renames_reusable(struct merge_options *opt,
3992                                          struct merge_result *result,
3993                                          struct tree *merge_base,
3994                                          struct tree *side1,
3995                                          struct tree *side2)
3996 {
3997         struct rename_info *renames;
3998         struct tree **merge_trees;
3999         struct merge_options_internal *opti = result->priv;
4000
4001         if (!opti)
4002                 return;
4003
4004         renames = &opti->renames;
4005         merge_trees = renames->merge_trees;
4006
4007         /*
4008          * Handle case where previous merge operation did not want cache to
4009          * take effect, e.g. because rename/rename(1to1) makes it invalid.
4010          */
4011         if (!merge_trees[0]) {
4012                 assert(!merge_trees[0] && !merge_trees[1] && !merge_trees[2]);
4013                 renames->cached_pairs_valid_side = 0; /* neither side valid */
4014                 return;
4015         }
4016
4017         /*
4018          * Handle other cases; note that merge_trees[0..2] will only
4019          * be NULL if opti is, or if all three were manually set to
4020          * NULL by e.g. rename/rename(1to1) handling.
4021          */
4022         assert(merge_trees[0] && merge_trees[1] && merge_trees[2]);
4023
4024         /* Check if we meet a condition for re-using cached_pairs */
4025         if (oideq(&merge_base->object.oid, &merge_trees[2]->object.oid) &&
4026             oideq(&side1->object.oid, &result->tree->object.oid))
4027                 renames->cached_pairs_valid_side = MERGE_SIDE1;
4028         else if (oideq(&merge_base->object.oid, &merge_trees[1]->object.oid) &&
4029                  oideq(&side2->object.oid, &result->tree->object.oid))
4030                 renames->cached_pairs_valid_side = MERGE_SIDE2;
4031         else
4032                 renames->cached_pairs_valid_side = 0; /* neither side valid */
4033 }
4034
4035 /*** Function Grouping: merge_incore_*() and their internal variants ***/
4036
4037 /*
4038  * Originally from merge_trees_internal(); heavily adapted, though.
4039  */
4040 static void merge_ort_nonrecursive_internal(struct merge_options *opt,
4041                                             struct tree *merge_base,
4042                                             struct tree *side1,
4043                                             struct tree *side2,
4044                                             struct merge_result *result)
4045 {
4046         struct object_id working_tree_oid;
4047
4048         if (opt->subtree_shift) {
4049                 side2 = shift_tree_object(opt->repo, side1, side2,
4050                                           opt->subtree_shift);
4051                 merge_base = shift_tree_object(opt->repo, side1, merge_base,
4052                                                opt->subtree_shift);
4053         }
4054
4055         trace2_region_enter("merge", "collect_merge_info", opt->repo);
4056         if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
4057                 /*
4058                  * TRANSLATORS: The %s arguments are: 1) tree hash of a merge
4059                  * base, and 2-3) the trees for the two trees we're merging.
4060                  */
4061                 err(opt, _("collecting merge info failed for trees %s, %s, %s"),
4062                     oid_to_hex(&merge_base->object.oid),
4063                     oid_to_hex(&side1->object.oid),
4064                     oid_to_hex(&side2->object.oid));
4065                 result->clean = -1;
4066                 return;
4067         }
4068         trace2_region_leave("merge", "collect_merge_info", opt->repo);
4069
4070         trace2_region_enter("merge", "renames", opt->repo);
4071         result->clean = detect_and_process_renames(opt, merge_base,
4072                                                    side1, side2);
4073         trace2_region_leave("merge", "renames", opt->repo);
4074
4075         trace2_region_enter("merge", "process_entries", opt->repo);
4076         process_entries(opt, &working_tree_oid);
4077         trace2_region_leave("merge", "process_entries", opt->repo);
4078
4079         /* Set return values */
4080         result->tree = parse_tree_indirect(&working_tree_oid);
4081         /* existence of conflicted entries implies unclean */
4082         result->clean &= strmap_empty(&opt->priv->conflicted);
4083         if (!opt->priv->call_depth) {
4084                 result->priv = opt->priv;
4085                 result->_properly_initialized = RESULT_INITIALIZED;
4086                 opt->priv = NULL;
4087         }
4088 }
4089
4090 /*
4091  * Originally from merge_recursive_internal(); somewhat adapted, though.
4092  */
4093 static void merge_ort_internal(struct merge_options *opt,
4094                                struct commit_list *merge_bases,
4095                                struct commit *h1,
4096                                struct commit *h2,
4097                                struct merge_result *result)
4098 {
4099         struct commit_list *iter;
4100         struct commit *merged_merge_bases;
4101         const char *ancestor_name;
4102         struct strbuf merge_base_abbrev = STRBUF_INIT;
4103
4104         if (!merge_bases) {
4105                 merge_bases = get_merge_bases(h1, h2);
4106                 /* See merge-ort.h:merge_incore_recursive() declaration NOTE */
4107                 merge_bases = reverse_commit_list(merge_bases);
4108         }
4109
4110         merged_merge_bases = pop_commit(&merge_bases);
4111         if (merged_merge_bases == NULL) {
4112                 /* if there is no common ancestor, use an empty tree */
4113                 struct tree *tree;
4114
4115                 tree = lookup_tree(opt->repo, opt->repo->hash_algo->empty_tree);
4116                 merged_merge_bases = make_virtual_commit(opt->repo, tree,
4117                                                          "ancestor");
4118                 ancestor_name = "empty tree";
4119         } else if (merge_bases) {
4120                 ancestor_name = "merged common ancestors";
4121         } else {
4122                 strbuf_add_unique_abbrev(&merge_base_abbrev,
4123                                          &merged_merge_bases->object.oid,
4124                                          DEFAULT_ABBREV);
4125                 ancestor_name = merge_base_abbrev.buf;
4126         }
4127
4128         for (iter = merge_bases; iter; iter = iter->next) {
4129                 const char *saved_b1, *saved_b2;
4130                 struct commit *prev = merged_merge_bases;
4131
4132                 opt->priv->call_depth++;
4133                 /*
4134                  * When the merge fails, the result contains files
4135                  * with conflict markers. The cleanness flag is
4136                  * ignored (unless indicating an error), it was never
4137                  * actually used, as result of merge_trees has always
4138                  * overwritten it: the committed "conflicts" were
4139                  * already resolved.
4140                  */
4141                 saved_b1 = opt->branch1;
4142                 saved_b2 = opt->branch2;
4143                 opt->branch1 = "Temporary merge branch 1";
4144                 opt->branch2 = "Temporary merge branch 2";
4145                 merge_ort_internal(opt, NULL, prev, iter->item, result);
4146                 if (result->clean < 0)
4147                         return;
4148                 opt->branch1 = saved_b1;
4149                 opt->branch2 = saved_b2;
4150                 opt->priv->call_depth--;
4151
4152                 merged_merge_bases = make_virtual_commit(opt->repo,
4153                                                          result->tree,
4154                                                          "merged tree");
4155                 commit_list_insert(prev, &merged_merge_bases->parents);
4156                 commit_list_insert(iter->item,
4157                                    &merged_merge_bases->parents->next);
4158
4159                 clear_or_reinit_internal_opts(opt->priv, 1);
4160         }
4161
4162         opt->ancestor = ancestor_name;
4163         merge_ort_nonrecursive_internal(opt,
4164                                         repo_get_commit_tree(opt->repo,
4165                                                              merged_merge_bases),
4166                                         repo_get_commit_tree(opt->repo, h1),
4167                                         repo_get_commit_tree(opt->repo, h2),
4168                                         result);
4169         strbuf_release(&merge_base_abbrev);
4170         opt->ancestor = NULL;  /* avoid accidental re-use of opt->ancestor */
4171 }
4172
4173 void merge_incore_nonrecursive(struct merge_options *opt,
4174                                struct tree *merge_base,
4175                                struct tree *side1,
4176                                struct tree *side2,
4177                                struct merge_result *result)
4178 {
4179         trace2_region_enter("merge", "incore_nonrecursive", opt->repo);
4180
4181         trace2_region_enter("merge", "merge_start", opt->repo);
4182         assert(opt->ancestor != NULL);
4183         merge_check_renames_reusable(opt, result, merge_base, side1, side2);
4184         merge_start(opt, result);
4185         /*
4186          * Record the trees used in this merge, so if there's a next merge in
4187          * a cherry-pick or rebase sequence it might be able to take advantage
4188          * of the cached_pairs in that next merge.
4189          */
4190         opt->priv->renames.merge_trees[0] = merge_base;
4191         opt->priv->renames.merge_trees[1] = side1;
4192         opt->priv->renames.merge_trees[2] = side2;
4193         trace2_region_leave("merge", "merge_start", opt->repo);
4194
4195         merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);
4196         trace2_region_leave("merge", "incore_nonrecursive", opt->repo);
4197 }
4198
4199 void merge_incore_recursive(struct merge_options *opt,
4200                             struct commit_list *merge_bases,
4201                             struct commit *side1,
4202                             struct commit *side2,
4203                             struct merge_result *result)
4204 {
4205         trace2_region_enter("merge", "incore_recursive", opt->repo);
4206
4207         /* We set the ancestor label based on the merge_bases */
4208         assert(opt->ancestor == NULL);
4209
4210         trace2_region_enter("merge", "merge_start", opt->repo);
4211         merge_start(opt, result);
4212         trace2_region_leave("merge", "merge_start", opt->repo);
4213
4214         merge_ort_internal(opt, merge_bases, side1, side2, result);
4215         trace2_region_leave("merge", "incore_recursive", opt->repo);
4216 }