merge-ort: modify collect_renames() for directory rename handling
[git] / merge-ort.c
1 /*
2  * "Ostensibly Recursive's Twin" merge strategy, or "ort" for short.  Meant
3  * as a drop-in replacement for the "recursive" merge strategy, allowing one
4  * to replace
5  *
6  *   git merge [-s recursive]
7  *
8  * with
9  *
10  *   git merge -s ort
11  *
12  * Note: git's parser allows the space between '-s' and its argument to be
13  * missing.  (Should I have backronymed "ham", "alsa", "kip", "nap, "alvo",
14  * "cale", "peedy", or "ins" instead of "ort"?)
15  */
16
17 #include "cache.h"
18 #include "merge-ort.h"
19
20 #include "alloc.h"
21 #include "blob.h"
22 #include "cache-tree.h"
23 #include "commit.h"
24 #include "commit-reach.h"
25 #include "diff.h"
26 #include "diffcore.h"
27 #include "dir.h"
28 #include "object-store.h"
29 #include "strmap.h"
30 #include "tree.h"
31 #include "unpack-trees.h"
32 #include "xdiff-interface.h"
33
34 /*
35  * We have many arrays of size 3.  Whenever we have such an array, the
36  * indices refer to one of the sides of the three-way merge.  This is so
37  * pervasive that the constants 0, 1, and 2 are used in many places in the
38  * code (especially in arithmetic operations to find the other side's index
39  * or to compute a relevant mask), but sometimes these enum names are used
40  * to aid code clarity.
41  *
42  * See also 'filemask' and 'dirmask' in struct conflict_info; the "ith side"
43  * referred to there is one of these three sides.
44  */
45 enum merge_side {
46         MERGE_BASE = 0,
47         MERGE_SIDE1 = 1,
48         MERGE_SIDE2 = 2
49 };
50
51 struct rename_info {
52         /*
53          * All variables that are arrays of size 3 correspond to data tracked
54          * for the sides in enum merge_side.  Index 0 is almost always unused
55          * because we often only need to track information for MERGE_SIDE1 and
56          * MERGE_SIDE2 (MERGE_BASE can't have rename information since renames
57          * are determined relative to what changed since the MERGE_BASE).
58          */
59
60         /*
61          * pairs: pairing of filenames from diffcore_rename()
62          */
63         struct diff_queue_struct pairs[3];
64
65         /*
66          * dirs_removed: directories removed on a given side of history.
67          */
68         struct strset dirs_removed[3];
69
70         /*
71          * dir_rename_count: tracking where parts of a directory were renamed to
72          *
73          * When files in a directory are renamed, they may not all go to the
74          * same location.  Each strmap here tracks:
75          *      old_dir => {new_dir => int}
76          * That is, dir_rename_count[side] is a strmap to a strintmap.
77          */
78         struct strmap dir_rename_count[3];
79
80         /*
81          * dir_renames: computed directory renames
82          *
83          * This is a map of old_dir => new_dir and is derived in part from
84          * dir_rename_count.
85          */
86         struct strmap dir_renames[3];
87
88         /*
89          * needed_limit: value needed for inexact rename detection to run
90          *
91          * If the current rename limit wasn't high enough for inexact
92          * rename detection to run, this records the limit needed.  Otherwise,
93          * this value remains 0.
94          */
95         int needed_limit;
96 };
97
98 struct merge_options_internal {
99         /*
100          * paths: primary data structure in all of merge ort.
101          *
102          * The keys of paths:
103          *   * are full relative paths from the toplevel of the repository
104          *     (e.g. "drivers/firmware/raspberrypi.c").
105          *   * store all relevant paths in the repo, both directories and
106          *     files (e.g. drivers, drivers/firmware would also be included)
107          *   * these keys serve to intern all the path strings, which allows
108          *     us to do pointer comparison on directory names instead of
109          *     strcmp; we just have to be careful to use the interned strings.
110          *     (Technically paths_to_free may track some strings that were
111          *      removed from froms paths.)
112          *
113          * The values of paths:
114          *   * either a pointer to a merged_info, or a conflict_info struct
115          *   * merged_info contains all relevant information for a
116          *     non-conflicted entry.
117          *   * conflict_info contains a merged_info, plus any additional
118          *     information about a conflict such as the higher orders stages
119          *     involved and the names of the paths those came from (handy
120          *     once renames get involved).
121          *   * a path may start "conflicted" (i.e. point to a conflict_info)
122          *     and then a later step (e.g. three-way content merge) determines
123          *     it can be cleanly merged, at which point it'll be marked clean
124          *     and the algorithm will ignore any data outside the contained
125          *     merged_info for that entry
126          *   * If an entry remains conflicted, the merged_info portion of a
127          *     conflict_info will later be filled with whatever version of
128          *     the file should be placed in the working directory (e.g. an
129          *     as-merged-as-possible variation that contains conflict markers).
130          */
131         struct strmap paths;
132
133         /*
134          * conflicted: a subset of keys->values from "paths"
135          *
136          * conflicted is basically an optimization between process_entries()
137          * and record_conflicted_index_entries(); the latter could loop over
138          * ALL the entries in paths AGAIN and look for the ones that are
139          * still conflicted, but since process_entries() has to loop over
140          * all of them, it saves the ones it couldn't resolve in this strmap
141          * so that record_conflicted_index_entries() can iterate just the
142          * relevant entries.
143          */
144         struct strmap conflicted;
145
146         /*
147          * paths_to_free: additional list of strings to free
148          *
149          * If keys are removed from "paths", they are added to paths_to_free
150          * to ensure they are later freed.  We avoid free'ing immediately since
151          * other places (e.g. conflict_info.pathnames[]) may still be
152          * referencing these paths.
153          */
154         struct string_list paths_to_free;
155
156         /*
157          * output: special messages and conflict notices for various paths
158          *
159          * This is a map of pathnames (a subset of the keys in "paths" above)
160          * to strbufs.  It gathers various warning/conflict/notice messages
161          * for later processing.
162          */
163         struct strmap output;
164
165         /*
166          * renames: various data relating to rename detection
167          */
168         struct rename_info renames;
169
170         /*
171          * current_dir_name: temporary var used in collect_merge_info_callback()
172          *
173          * Used to set merged_info.directory_name; see documentation for that
174          * variable and the requirements placed on that field.
175          */
176         const char *current_dir_name;
177
178         /* call_depth: recursion level counter for merging merge bases */
179         int call_depth;
180 };
181
182 struct version_info {
183         struct object_id oid;
184         unsigned short mode;
185 };
186
187 struct merged_info {
188         /* if is_null, ignore result.  otherwise result has oid & mode */
189         struct version_info result;
190         unsigned is_null:1;
191
192         /*
193          * clean: whether the path in question is cleanly merged.
194          *
195          * see conflict_info.merged for more details.
196          */
197         unsigned clean:1;
198
199         /*
200          * basename_offset: offset of basename of path.
201          *
202          * perf optimization to avoid recomputing offset of final '/'
203          * character in pathname (0 if no '/' in pathname).
204          */
205         size_t basename_offset;
206
207          /*
208           * directory_name: containing directory name.
209           *
210           * Note that we assume directory_name is constructed such that
211           *    strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name,
212           * i.e. string equality is equivalent to pointer equality.  For this
213           * to hold, we have to be careful setting directory_name.
214           */
215         const char *directory_name;
216 };
217
218 struct conflict_info {
219         /*
220          * merged: the version of the path that will be written to working tree
221          *
222          * WARNING: It is critical to check merged.clean and ensure it is 0
223          * before reading any conflict_info fields outside of merged.
224          * Allocated merge_info structs will always have clean set to 1.
225          * Allocated conflict_info structs will have merged.clean set to 0
226          * initially.  The merged.clean field is how we know if it is safe
227          * to access other parts of conflict_info besides merged; if a
228          * conflict_info's merged.clean is changed to 1, the rest of the
229          * algorithm is not allowed to look at anything outside of the
230          * merged member anymore.
231          */
232         struct merged_info merged;
233
234         /* oids & modes from each of the three trees for this path */
235         struct version_info stages[3];
236
237         /* pathnames for each stage; may differ due to rename detection */
238         const char *pathnames[3];
239
240         /* Whether this path is/was involved in a directory/file conflict */
241         unsigned df_conflict:1;
242
243         /*
244          * Whether this path is/was involved in a non-content conflict other
245          * than a directory/file conflict (e.g. rename/rename, rename/delete,
246          * file location based on possible directory rename).
247          */
248         unsigned path_conflict:1;
249
250         /*
251          * For filemask and dirmask, the ith bit corresponds to whether the
252          * ith entry is a file (filemask) or a directory (dirmask).  Thus,
253          * filemask & dirmask is always zero, and filemask | dirmask is at
254          * most 7 but can be less when a path does not appear as either a
255          * file or a directory on at least one side of history.
256          *
257          * Note that these masks are related to enum merge_side, as the ith
258          * entry corresponds to side i.
259          *
260          * These values come from a traverse_trees() call; more info may be
261          * found looking at tree-walk.h's struct traverse_info,
262          * particularly the documentation above the "fn" member (note that
263          * filemask = mask & ~dirmask from that documentation).
264          */
265         unsigned filemask:3;
266         unsigned dirmask:3;
267
268         /*
269          * Optimization to track which stages match, to avoid the need to
270          * recompute it in multiple steps. Either 0 or at least 2 bits are
271          * set; if at least 2 bits are set, their corresponding stages match.
272          */
273         unsigned match_mask:3;
274 };
275
276 /*** Function Grouping: various utility functions ***/
277
278 /*
279  * For the next three macros, see warning for conflict_info.merged.
280  *
281  * In each of the below, mi is a struct merged_info*, and ci was defined
282  * as a struct conflict_info* (but we need to verify ci isn't actually
283  * pointed at a struct merged_info*).
284  *
285  * INITIALIZE_CI: Assign ci to mi but only if it's safe; set to NULL otherwise.
286  * VERIFY_CI: Ensure that something we assigned to a conflict_info* is one.
287  * ASSIGN_AND_VERIFY_CI: Similar to VERIFY_CI but do assignment first.
288  */
289 #define INITIALIZE_CI(ci, mi) do {                                           \
290         (ci) = (!(mi) || (mi)->clean) ? NULL : (struct conflict_info *)(mi); \
291 } while (0)
292 #define VERIFY_CI(ci) assert(ci && !ci->merged.clean);
293 #define ASSIGN_AND_VERIFY_CI(ci, mi) do {    \
294         (ci) = (struct conflict_info *)(mi);  \
295         assert((ci) && !(mi)->clean);        \
296 } while (0)
297
298 static void free_strmap_strings(struct strmap *map)
299 {
300         struct hashmap_iter iter;
301         struct strmap_entry *entry;
302
303         strmap_for_each_entry(map, &iter, entry) {
304                 free((char*)entry->key);
305         }
306 }
307
308 static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
309                                           int reinitialize)
310 {
311         struct rename_info *renames = &opti->renames;
312         int i;
313         void (*strmap_func)(struct strmap *, int) =
314                 reinitialize ? strmap_partial_clear : strmap_clear;
315         void (*strset_func)(struct strset *) =
316                 reinitialize ? strset_partial_clear : strset_clear;
317
318         /*
319          * We marked opti->paths with strdup_strings = 0, so that we
320          * wouldn't have to make another copy of the fullpath created by
321          * make_traverse_path from setup_path_info().  But, now that we've
322          * used it and have no other references to these strings, it is time
323          * to deallocate them.
324          */
325         free_strmap_strings(&opti->paths);
326         strmap_func(&opti->paths, 1);
327
328         /*
329          * All keys and values in opti->conflicted are a subset of those in
330          * opti->paths.  We don't want to deallocate anything twice, so we
331          * don't free the keys and we pass 0 for free_values.
332          */
333         strmap_func(&opti->conflicted, 0);
334
335         /*
336          * opti->paths_to_free is similar to opti->paths; we created it with
337          * strdup_strings = 0 to avoid making _another_ copy of the fullpath
338          * but now that we've used it and have no other references to these
339          * strings, it is time to deallocate them.  We do so by temporarily
340          * setting strdup_strings to 1.
341          */
342         opti->paths_to_free.strdup_strings = 1;
343         string_list_clear(&opti->paths_to_free, 0);
344         opti->paths_to_free.strdup_strings = 0;
345
346         /* Free memory used by various renames maps */
347         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
348                 struct hashmap_iter iter;
349                 struct strmap_entry *entry;
350
351                 strset_func(&renames->dirs_removed[i]);
352
353                 strmap_for_each_entry(&renames->dir_rename_count[i],
354                                       &iter, entry) {
355                         struct strintmap *counts = entry->value;
356                         strintmap_clear(counts);
357                 }
358                 strmap_func(&renames->dir_rename_count[i], 1);
359
360                 strmap_func(&renames->dir_renames[i], 0);
361         }
362
363         if (!reinitialize) {
364                 struct hashmap_iter iter;
365                 struct strmap_entry *e;
366
367                 /* Release and free each strbuf found in output */
368                 strmap_for_each_entry(&opti->output, &iter, e) {
369                         struct strbuf *sb = e->value;
370                         strbuf_release(sb);
371                         /*
372                          * While strictly speaking we don't need to free(sb)
373                          * here because we could pass free_values=1 when
374                          * calling strmap_clear() on opti->output, that would
375                          * require strmap_clear to do another
376                          * strmap_for_each_entry() loop, so we just free it
377                          * while we're iterating anyway.
378                          */
379                         free(sb);
380                 }
381                 strmap_clear(&opti->output, 0);
382         }
383 }
384
385 static int err(struct merge_options *opt, const char *err, ...)
386 {
387         va_list params;
388         struct strbuf sb = STRBUF_INIT;
389
390         strbuf_addstr(&sb, "error: ");
391         va_start(params, err);
392         strbuf_vaddf(&sb, err, params);
393         va_end(params);
394
395         error("%s", sb.buf);
396         strbuf_release(&sb);
397
398         return -1;
399 }
400
401 __attribute__((format (printf, 4, 5)))
402 static void path_msg(struct merge_options *opt,
403                      const char *path,
404                      int omittable_hint, /* skippable under --remerge-diff */
405                      const char *fmt, ...)
406 {
407         va_list ap;
408         struct strbuf *sb = strmap_get(&opt->priv->output, path);
409         if (!sb) {
410                 sb = xmalloc(sizeof(*sb));
411                 strbuf_init(sb, 0);
412                 strmap_put(&opt->priv->output, path, sb);
413         }
414
415         va_start(ap, fmt);
416         strbuf_vaddf(sb, fmt, ap);
417         va_end(ap);
418
419         strbuf_addch(sb, '\n');
420 }
421
422 /*** Function Grouping: functions related to collect_merge_info() ***/
423
424 static void setup_path_info(struct merge_options *opt,
425                             struct string_list_item *result,
426                             const char *current_dir_name,
427                             int current_dir_name_len,
428                             char *fullpath, /* we'll take over ownership */
429                             struct name_entry *names,
430                             struct name_entry *merged_version,
431                             unsigned is_null,     /* boolean */
432                             unsigned df_conflict, /* boolean */
433                             unsigned filemask,
434                             unsigned dirmask,
435                             int resolved          /* boolean */)
436 {
437         /* result->util is void*, so mi is a convenience typed variable */
438         struct merged_info *mi;
439
440         assert(!is_null || resolved);
441         assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
442         assert(resolved == (merged_version != NULL));
443
444         mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
445                                    sizeof(struct conflict_info));
446         mi->directory_name = current_dir_name;
447         mi->basename_offset = current_dir_name_len;
448         mi->clean = !!resolved;
449         if (resolved) {
450                 mi->result.mode = merged_version->mode;
451                 oidcpy(&mi->result.oid, &merged_version->oid);
452                 mi->is_null = !!is_null;
453         } else {
454                 int i;
455                 struct conflict_info *ci;
456
457                 ASSIGN_AND_VERIFY_CI(ci, mi);
458                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
459                         ci->pathnames[i] = fullpath;
460                         ci->stages[i].mode = names[i].mode;
461                         oidcpy(&ci->stages[i].oid, &names[i].oid);
462                 }
463                 ci->filemask = filemask;
464                 ci->dirmask = dirmask;
465                 ci->df_conflict = !!df_conflict;
466                 if (dirmask)
467                         /*
468                          * Assume is_null for now, but if we have entries
469                          * under the directory then when it is complete in
470                          * write_completed_directory() it'll update this.
471                          * Also, for D/F conflicts, we have to handle the
472                          * directory first, then clear this bit and process
473                          * the file to see how it is handled -- that occurs
474                          * near the top of process_entry().
475                          */
476                         mi->is_null = 1;
477         }
478         strmap_put(&opt->priv->paths, fullpath, mi);
479         result->string = fullpath;
480         result->util = mi;
481 }
482
483 static void collect_rename_info(struct merge_options *opt,
484                                 struct name_entry *names,
485                                 const char *dirname,
486                                 const char *fullname,
487                                 unsigned filemask,
488                                 unsigned dirmask,
489                                 unsigned match_mask)
490 {
491         struct rename_info *renames = &opt->priv->renames;
492
493         /* Update dirs_removed, as needed */
494         if (dirmask == 1 || dirmask == 3 || dirmask == 5) {
495                 /* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */
496                 unsigned sides = (0x07 - dirmask)/2;
497                 if (sides & 1)
498                         strset_add(&renames->dirs_removed[1], fullname);
499                 if (sides & 2)
500                         strset_add(&renames->dirs_removed[2], fullname);
501         }
502 }
503
504 static int collect_merge_info_callback(int n,
505                                        unsigned long mask,
506                                        unsigned long dirmask,
507                                        struct name_entry *names,
508                                        struct traverse_info *info)
509 {
510         /*
511          * n is 3.  Always.
512          * common ancestor (mbase) has mask 1, and stored in index 0 of names
513          * head of side 1  (side1) has mask 2, and stored in index 1 of names
514          * head of side 2  (side2) has mask 4, and stored in index 2 of names
515          */
516         struct merge_options *opt = info->data;
517         struct merge_options_internal *opti = opt->priv;
518         struct string_list_item pi;  /* Path Info */
519         struct conflict_info *ci; /* typed alias to pi.util (which is void*) */
520         struct name_entry *p;
521         size_t len;
522         char *fullpath;
523         const char *dirname = opti->current_dir_name;
524         unsigned filemask = mask & ~dirmask;
525         unsigned match_mask = 0; /* will be updated below */
526         unsigned mbase_null = !(mask & 1);
527         unsigned side1_null = !(mask & 2);
528         unsigned side2_null = !(mask & 4);
529         unsigned side1_matches_mbase = (!side1_null && !mbase_null &&
530                                         names[0].mode == names[1].mode &&
531                                         oideq(&names[0].oid, &names[1].oid));
532         unsigned side2_matches_mbase = (!side2_null && !mbase_null &&
533                                         names[0].mode == names[2].mode &&
534                                         oideq(&names[0].oid, &names[2].oid));
535         unsigned sides_match = (!side1_null && !side2_null &&
536                                 names[1].mode == names[2].mode &&
537                                 oideq(&names[1].oid, &names[2].oid));
538
539         /*
540          * Note: When a path is a file on one side of history and a directory
541          * in another, we have a directory/file conflict.  In such cases, if
542          * the conflict doesn't resolve from renames and deletions, then we
543          * always leave directories where they are and move files out of the
544          * way.  Thus, while struct conflict_info has a df_conflict field to
545          * track such conflicts, we ignore that field for any directories at
546          * a path and only pay attention to it for files at the given path.
547          * The fact that we leave directories were they are also means that
548          * we do not need to worry about getting additional df_conflict
549          * information propagated from parent directories down to children
550          * (unlike, say traverse_trees_recursive() in unpack-trees.c, which
551          * sets a newinfo.df_conflicts field specifically to propagate it).
552          */
553         unsigned df_conflict = (filemask != 0) && (dirmask != 0);
554
555         /* n = 3 is a fundamental assumption. */
556         if (n != 3)
557                 BUG("Called collect_merge_info_callback wrong");
558
559         /*
560          * A bunch of sanity checks verifying that traverse_trees() calls
561          * us the way I expect.  Could just remove these at some point,
562          * though maybe they are helpful to future code readers.
563          */
564         assert(mbase_null == is_null_oid(&names[0].oid));
565         assert(side1_null == is_null_oid(&names[1].oid));
566         assert(side2_null == is_null_oid(&names[2].oid));
567         assert(!mbase_null || !side1_null || !side2_null);
568         assert(mask > 0 && mask < 8);
569
570         /* Determine match_mask */
571         if (side1_matches_mbase)
572                 match_mask = (side2_matches_mbase ? 7 : 3);
573         else if (side2_matches_mbase)
574                 match_mask = 5;
575         else if (sides_match)
576                 match_mask = 6;
577
578         /*
579          * Get the name of the relevant filepath, which we'll pass to
580          * setup_path_info() for tracking.
581          */
582         p = names;
583         while (!p->mode)
584                 p++;
585         len = traverse_path_len(info, p->pathlen);
586
587         /* +1 in both of the following lines to include the NUL byte */
588         fullpath = xmalloc(len + 1);
589         make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
590
591         /*
592          * If mbase, side1, and side2 all match, we can resolve early.  Even
593          * if these are trees, there will be no renames or anything
594          * underneath.
595          */
596         if (side1_matches_mbase && side2_matches_mbase) {
597                 /* mbase, side1, & side2 all match; use mbase as resolution */
598                 setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
599                                 names, names+0, mbase_null, 0,
600                                 filemask, dirmask, 1);
601                 return mask;
602         }
603
604         /*
605          * Gather additional information used in rename detection.
606          */
607         collect_rename_info(opt, names, dirname, fullpath,
608                             filemask, dirmask, match_mask);
609
610         /*
611          * Record information about the path so we can resolve later in
612          * process_entries.
613          */
614         setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
615                         names, NULL, 0, df_conflict, filemask, dirmask, 0);
616
617         ci = pi.util;
618         VERIFY_CI(ci);
619         ci->match_mask = match_mask;
620
621         /* If dirmask, recurse into subdirectories */
622         if (dirmask) {
623                 struct traverse_info newinfo;
624                 struct tree_desc t[3];
625                 void *buf[3] = {NULL, NULL, NULL};
626                 const char *original_dir_name;
627                 int i, ret;
628
629                 ci->match_mask &= filemask;
630                 newinfo = *info;
631                 newinfo.prev = info;
632                 newinfo.name = p->path;
633                 newinfo.namelen = p->pathlen;
634                 newinfo.pathlen = st_add3(newinfo.pathlen, p->pathlen, 1);
635                 /*
636                  * If this directory we are about to recurse into cared about
637                  * its parent directory (the current directory) having a D/F
638                  * conflict, then we'd propagate the masks in this way:
639                  *    newinfo.df_conflicts |= (mask & ~dirmask);
640                  * But we don't worry about propagating D/F conflicts.  (See
641                  * comment near setting of local df_conflict variable near
642                  * the beginning of this function).
643                  */
644
645                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
646                         if (i == 1 && side1_matches_mbase)
647                                 t[1] = t[0];
648                         else if (i == 2 && side2_matches_mbase)
649                                 t[2] = t[0];
650                         else if (i == 2 && sides_match)
651                                 t[2] = t[1];
652                         else {
653                                 const struct object_id *oid = NULL;
654                                 if (dirmask & 1)
655                                         oid = &names[i].oid;
656                                 buf[i] = fill_tree_descriptor(opt->repo,
657                                                               t + i, oid);
658                         }
659                         dirmask >>= 1;
660                 }
661
662                 original_dir_name = opti->current_dir_name;
663                 opti->current_dir_name = pi.string;
664                 ret = traverse_trees(NULL, 3, t, &newinfo);
665                 opti->current_dir_name = original_dir_name;
666
667                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
668                         free(buf[i]);
669
670                 if (ret < 0)
671                         return -1;
672         }
673
674         return mask;
675 }
676
677 static int collect_merge_info(struct merge_options *opt,
678                               struct tree *merge_base,
679                               struct tree *side1,
680                               struct tree *side2)
681 {
682         int ret;
683         struct tree_desc t[3];
684         struct traverse_info info;
685         const char *toplevel_dir_placeholder = "";
686
687         opt->priv->current_dir_name = toplevel_dir_placeholder;
688         setup_traverse_info(&info, toplevel_dir_placeholder);
689         info.fn = collect_merge_info_callback;
690         info.data = opt;
691         info.show_all_errors = 1;
692
693         parse_tree(merge_base);
694         parse_tree(side1);
695         parse_tree(side2);
696         init_tree_desc(t + 0, merge_base->buffer, merge_base->size);
697         init_tree_desc(t + 1, side1->buffer, side1->size);
698         init_tree_desc(t + 2, side2->buffer, side2->size);
699
700         ret = traverse_trees(NULL, 3, t, &info);
701
702         return ret;
703 }
704
705 /*** Function Grouping: functions related to threeway content merges ***/
706
707 static int handle_content_merge(struct merge_options *opt,
708                                 const char *path,
709                                 const struct version_info *o,
710                                 const struct version_info *a,
711                                 const struct version_info *b,
712                                 const char *pathnames[3],
713                                 const int extra_marker_size,
714                                 struct version_info *result)
715 {
716         die("Not yet implemented");
717 }
718
719 /*** Function Grouping: functions related to detect_and_process_renames(), ***
720  *** which are split into directory and regular rename detection sections. ***/
721
722 /*** Function Grouping: functions related to directory rename detection ***/
723
724 struct collision_info {
725         struct string_list source_files;
726         unsigned reported_already:1;
727 };
728
729 static void get_renamed_dir_portion(const char *old_path, const char *new_path,
730                                     char **old_dir, char **new_dir)
731 {
732         char *end_of_old, *end_of_new;
733
734         /* Default return values: NULL, meaning no rename */
735         *old_dir = NULL;
736         *new_dir = NULL;
737
738         /*
739          * For
740          *    "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c"
741          * the "e/foo.c" part is the same, we just want to know that
742          *    "a/b/c/d" was renamed to "a/b/some/thing/else"
743          * so, for this example, this function returns "a/b/c/d" in
744          * *old_dir and "a/b/some/thing/else" in *new_dir.
745          */
746
747         /*
748          * If the basename of the file changed, we don't care.  We want
749          * to know which portion of the directory, if any, changed.
750          */
751         end_of_old = strrchr(old_path, '/');
752         end_of_new = strrchr(new_path, '/');
753
754         /*
755          * If end_of_old is NULL, old_path wasn't in a directory, so there
756          * could not be a directory rename (our rule elsewhere that a
757          * directory which still exists is not considered to have been
758          * renamed means the root directory can never be renamed -- because
759          * the root directory always exists).
760          */
761         if (end_of_old == NULL)
762                 return; /* Note: *old_dir and *new_dir are still NULL */
763
764         /*
765          * If new_path contains no directory (end_of_new is NULL), then we
766          * have a rename of old_path's directory to the root directory.
767          */
768         if (end_of_new == NULL) {
769                 *old_dir = xstrndup(old_path, end_of_old - old_path);
770                 *new_dir = xstrdup("");
771                 return;
772         }
773
774         /* Find the first non-matching character traversing backwards */
775         while (*--end_of_new == *--end_of_old &&
776                end_of_old != old_path &&
777                end_of_new != new_path)
778                 ; /* Do nothing; all in the while loop */
779
780         /*
781          * If both got back to the beginning of their strings, then the
782          * directory didn't change at all, only the basename did.
783          */
784         if (end_of_old == old_path && end_of_new == new_path &&
785             *end_of_old == *end_of_new)
786                 return; /* Note: *old_dir and *new_dir are still NULL */
787
788         /*
789          * If end_of_new got back to the beginning of its string, and
790          * end_of_old got back to the beginning of some subdirectory, then
791          * we have a rename/merge of a subdirectory into the root, which
792          * needs slightly special handling.
793          *
794          * Note: There is no need to consider the opposite case, with a
795          * rename/merge of the root directory into some subdirectory
796          * because as noted above the root directory always exists so it
797          * cannot be considered to be renamed.
798          */
799         if (end_of_new == new_path &&
800             end_of_old != old_path && end_of_old[-1] == '/') {
801                 *old_dir = xstrndup(old_path, --end_of_old - old_path);
802                 *new_dir = xstrdup("");
803                 return;
804         }
805
806         /*
807          * We've found the first non-matching character in the directory
808          * paths.  That means the current characters we were looking at
809          * were part of the first non-matching subdir name going back from
810          * the end of the strings.  Get the whole name by advancing both
811          * end_of_old and end_of_new to the NEXT '/' character.  That will
812          * represent the entire directory rename.
813          *
814          * The reason for the increment is cases like
815          *    a/b/star/foo/whatever.c -> a/b/tar/foo/random.c
816          * After dropping the basename and going back to the first
817          * non-matching character, we're now comparing:
818          *    a/b/s          and         a/b/
819          * and we want to be comparing:
820          *    a/b/star/      and         a/b/tar/
821          * but without the pre-increment, the one on the right would stay
822          * a/b/.
823          */
824         end_of_old = strchr(++end_of_old, '/');
825         end_of_new = strchr(++end_of_new, '/');
826
827         /* Copy the old and new directories into *old_dir and *new_dir. */
828         *old_dir = xstrndup(old_path, end_of_old - old_path);
829         *new_dir = xstrndup(new_path, end_of_new - new_path);
830 }
831
832 static void increment_count(struct strmap *dir_rename_count,
833                             char *old_dir,
834                             char *new_dir)
835 {
836         struct strintmap *counts;
837         struct strmap_entry *e;
838
839         /* Get the {new_dirs -> counts} mapping using old_dir */
840         e = strmap_get_entry(dir_rename_count, old_dir);
841         if (e) {
842                 counts = e->value;
843         } else {
844                 counts = xmalloc(sizeof(*counts));
845                 strintmap_init_with_options(counts, 0, NULL, 1);
846                 strmap_put(dir_rename_count, old_dir, counts);
847         }
848
849         /* Increment the count for new_dir */
850         strintmap_incr(counts, new_dir, 1);
851 }
852
853 static void compute_rename_counts(struct diff_queue_struct *pairs,
854                                   struct strmap *dir_rename_count,
855                                   struct strset *dirs_removed)
856 {
857         int i;
858
859         for (i = 0; i < pairs->nr; ++i) {
860                 char *old_dir, *new_dir;
861                 struct diff_filepair *pair = pairs->queue[i];
862
863                 /* File not part of directory rename if it wasn't renamed */
864                 if (pair->status != 'R')
865                         continue;
866
867                 /* Get the old and new directory names */
868                 get_renamed_dir_portion(pair->one->path, pair->two->path,
869                                         &old_dir,        &new_dir);
870                 if (!old_dir)
871                         /* Directory didn't change at all; ignore this one. */
872                         continue;
873
874                 /*
875                  * Make dir_rename_count contain a map of a map:
876                  *   old_directory -> {new_directory -> count}
877                  * In other words, for every pair look at the directories for
878                  * the old filename and the new filename and count how many
879                  * times that pairing occurs.
880                  */
881                 if (strset_contains(dirs_removed, old_dir))
882                         increment_count(dir_rename_count, old_dir, new_dir);
883
884                 /* Free resources we don't need anymore */
885                 free(old_dir);
886                 free(new_dir);
887         }
888 }
889
890 static void get_provisional_directory_renames(struct merge_options *opt,
891                                               unsigned side,
892                                               int *clean)
893 {
894         struct hashmap_iter iter;
895         struct strmap_entry *entry;
896         struct rename_info *renames = &opt->priv->renames;
897
898         compute_rename_counts(&renames->pairs[side],
899                               &renames->dir_rename_count[side],
900                               &renames->dirs_removed[side]);
901         /*
902          * Collapse
903          *    dir_rename_count: old_directory -> {new_directory -> count}
904          * down to
905          *    dir_renames: old_directory -> best_new_directory
906          * where best_new_directory is the one with the unique highest count.
907          */
908         strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) {
909                 const char *source_dir = entry->key;
910                 struct strintmap *counts = entry->value;
911                 struct hashmap_iter count_iter;
912                 struct strmap_entry *count_entry;
913                 int max = 0;
914                 int bad_max = 0;
915                 const char *best = NULL;
916
917                 strintmap_for_each_entry(counts, &count_iter, count_entry) {
918                         const char *target_dir = count_entry->key;
919                         intptr_t count = (intptr_t)count_entry->value;
920
921                         if (count == max)
922                                 bad_max = max;
923                         else if (count > max) {
924                                 max = count;
925                                 best = target_dir;
926                         }
927                 }
928
929                 if (bad_max == max) {
930                         path_msg(opt, source_dir, 0,
931                                _("CONFLICT (directory rename split): "
932                                  "Unclear where to rename %s to; it was "
933                                  "renamed to multiple other directories, with "
934                                  "no destination getting a majority of the "
935                                  "files."),
936                                source_dir);
937                         *clean = 0;
938                 } else {
939                         strmap_put(&renames->dir_renames[side],
940                                    source_dir, (void*)best);
941                 }
942         }
943 }
944
945 static void handle_directory_level_conflicts(struct merge_options *opt)
946 {
947         struct hashmap_iter iter;
948         struct strmap_entry *entry;
949         struct string_list duplicated = STRING_LIST_INIT_NODUP;
950         struct rename_info *renames = &opt->priv->renames;
951         struct strmap *side1_dir_renames = &renames->dir_renames[MERGE_SIDE1];
952         struct strmap *side2_dir_renames = &renames->dir_renames[MERGE_SIDE2];
953         int i;
954
955         strmap_for_each_entry(side1_dir_renames, &iter, entry) {
956                 if (strmap_contains(side2_dir_renames, entry->key))
957                         string_list_append(&duplicated, entry->key);
958         }
959
960         for (i = 0; i < duplicated.nr; i++) {
961                 strmap_remove(side1_dir_renames, duplicated.items[i].string, 0);
962                 strmap_remove(side2_dir_renames, duplicated.items[i].string, 0);
963         }
964         string_list_clear(&duplicated, 0);
965 }
966
967 static void compute_collisions(struct strmap *collisions,
968                                struct strmap *dir_renames,
969                                struct diff_queue_struct *pairs)
970 {
971         die("Not yet implemented.");
972 }
973
974 static char *check_for_directory_rename(struct merge_options *opt,
975                                         const char *path,
976                                         unsigned side_index,
977                                         struct strmap *dir_renames,
978                                         struct strmap *dir_rename_exclusions,
979                                         struct strmap *collisions,
980                                         int *clean_merge)
981 {
982         die("Not yet implemented.");
983 }
984
985 static void apply_directory_rename_modifications(struct merge_options *opt,
986                                                  struct diff_filepair *pair,
987                                                  char *new_path)
988 {
989         die("Not yet implemented.");
990 }
991
992 /*** Function Grouping: functions related to regular rename detection ***/
993
994 static int process_renames(struct merge_options *opt,
995                            struct diff_queue_struct *renames)
996 {
997         int clean_merge = 1, i;
998
999         for (i = 0; i < renames->nr; ++i) {
1000                 const char *oldpath = NULL, *newpath;
1001                 struct diff_filepair *pair = renames->queue[i];
1002                 struct conflict_info *oldinfo = NULL, *newinfo = NULL;
1003                 struct strmap_entry *old_ent, *new_ent;
1004                 unsigned int old_sidemask;
1005                 int target_index, other_source_index;
1006                 int source_deleted, collision, type_changed;
1007                 const char *rename_branch = NULL, *delete_branch = NULL;
1008
1009                 old_ent = strmap_get_entry(&opt->priv->paths, pair->one->path);
1010                 oldpath = old_ent->key;
1011                 oldinfo = old_ent->value;
1012
1013                 new_ent = strmap_get_entry(&opt->priv->paths, pair->two->path);
1014                 newpath = new_ent->key;
1015                 newinfo = new_ent->value;
1016
1017                 /*
1018                  * diff_filepairs have copies of pathnames, thus we have to
1019                  * use standard 'strcmp()' (negated) instead of '=='.
1020                  */
1021                 if (i + 1 < renames->nr &&
1022                     !strcmp(oldpath, renames->queue[i+1]->one->path)) {
1023                         /* Handle rename/rename(1to2) or rename/rename(1to1) */
1024                         const char *pathnames[3];
1025                         struct version_info merged;
1026                         struct conflict_info *base, *side1, *side2;
1027                         unsigned was_binary_blob = 0;
1028
1029                         pathnames[0] = oldpath;
1030                         pathnames[1] = newpath;
1031                         pathnames[2] = renames->queue[i+1]->two->path;
1032
1033                         base = strmap_get(&opt->priv->paths, pathnames[0]);
1034                         side1 = strmap_get(&opt->priv->paths, pathnames[1]);
1035                         side2 = strmap_get(&opt->priv->paths, pathnames[2]);
1036
1037                         VERIFY_CI(base);
1038                         VERIFY_CI(side1);
1039                         VERIFY_CI(side2);
1040
1041                         if (!strcmp(pathnames[1], pathnames[2])) {
1042                                 /* Both sides renamed the same way */
1043                                 assert(side1 == side2);
1044                                 memcpy(&side1->stages[0], &base->stages[0],
1045                                        sizeof(merged));
1046                                 side1->filemask |= (1 << MERGE_BASE);
1047                                 /* Mark base as resolved by removal */
1048                                 base->merged.is_null = 1;
1049                                 base->merged.clean = 1;
1050
1051                                 /* We handled both renames, i.e. i+1 handled */
1052                                 i++;
1053                                 /* Move to next rename */
1054                                 continue;
1055                         }
1056
1057                         /* This is a rename/rename(1to2) */
1058                         clean_merge = handle_content_merge(opt,
1059                                                            pair->one->path,
1060                                                            &base->stages[0],
1061                                                            &side1->stages[1],
1062                                                            &side2->stages[2],
1063                                                            pathnames,
1064                                                            1 + 2 * opt->priv->call_depth,
1065                                                            &merged);
1066                         if (!clean_merge &&
1067                             merged.mode == side1->stages[1].mode &&
1068                             oideq(&merged.oid, &side1->stages[1].oid))
1069                                 was_binary_blob = 1;
1070                         memcpy(&side1->stages[1], &merged, sizeof(merged));
1071                         if (was_binary_blob) {
1072                                 /*
1073                                  * Getting here means we were attempting to
1074                                  * merge a binary blob.
1075                                  *
1076                                  * Since we can't merge binaries,
1077                                  * handle_content_merge() just takes one
1078                                  * side.  But we don't want to copy the
1079                                  * contents of one side to both paths.  We
1080                                  * used the contents of side1 above for
1081                                  * side1->stages, let's use the contents of
1082                                  * side2 for side2->stages below.
1083                                  */
1084                                 oidcpy(&merged.oid, &side2->stages[2].oid);
1085                                 merged.mode = side2->stages[2].mode;
1086                         }
1087                         memcpy(&side2->stages[2], &merged, sizeof(merged));
1088
1089                         side1->path_conflict = 1;
1090                         side2->path_conflict = 1;
1091                         /*
1092                          * TODO: For renames we normally remove the path at the
1093                          * old name.  It would thus seem consistent to do the
1094                          * same for rename/rename(1to2) cases, but we haven't
1095                          * done so traditionally and a number of the regression
1096                          * tests now encode an expectation that the file is
1097                          * left there at stage 1.  If we ever decide to change
1098                          * this, add the following two lines here:
1099                          *    base->merged.is_null = 1;
1100                          *    base->merged.clean = 1;
1101                          * and remove the setting of base->path_conflict to 1.
1102                          */
1103                         base->path_conflict = 1;
1104                         path_msg(opt, oldpath, 0,
1105                                  _("CONFLICT (rename/rename): %s renamed to "
1106                                    "%s in %s and to %s in %s."),
1107                                  pathnames[0],
1108                                  pathnames[1], opt->branch1,
1109                                  pathnames[2], opt->branch2);
1110
1111                         i++; /* We handled both renames, i.e. i+1 handled */
1112                         continue;
1113                 }
1114
1115                 VERIFY_CI(oldinfo);
1116                 VERIFY_CI(newinfo);
1117                 target_index = pair->score; /* from collect_renames() */
1118                 assert(target_index == 1 || target_index == 2);
1119                 other_source_index = 3 - target_index;
1120                 old_sidemask = (1 << other_source_index); /* 2 or 4 */
1121                 source_deleted = (oldinfo->filemask == 1);
1122                 collision = ((newinfo->filemask & old_sidemask) != 0);
1123                 type_changed = !source_deleted &&
1124                         (S_ISREG(oldinfo->stages[other_source_index].mode) !=
1125                          S_ISREG(newinfo->stages[target_index].mode));
1126                 if (type_changed && collision) {
1127                         /*
1128                          * special handling so later blocks can handle this...
1129                          *
1130                          * if type_changed && collision are both true, then this
1131                          * was really a double rename, but one side wasn't
1132                          * detected due to lack of break detection.  I.e.
1133                          * something like
1134                          *    orig: has normal file 'foo'
1135                          *    side1: renames 'foo' to 'bar', adds 'foo' symlink
1136                          *    side2: renames 'foo' to 'bar'
1137                          * In this case, the foo->bar rename on side1 won't be
1138                          * detected because the new symlink named 'foo' is
1139                          * there and we don't do break detection.  But we detect
1140                          * this here because we don't want to merge the content
1141                          * of the foo symlink with the foo->bar file, so we
1142                          * have some logic to handle this special case.  The
1143                          * easiest way to do that is make 'bar' on side1 not
1144                          * be considered a colliding file but the other part
1145                          * of a normal rename.  If the file is very different,
1146                          * well we're going to get content merge conflicts
1147                          * anyway so it doesn't hurt.  And if the colliding
1148                          * file also has a different type, that'll be handled
1149                          * by the content merge logic in process_entry() too.
1150                          *
1151                          * See also t6430, 'rename vs. rename/symlink'
1152                          */
1153                         collision = 0;
1154                 }
1155                 if (source_deleted) {
1156                         if (target_index == 1) {
1157                                 rename_branch = opt->branch1;
1158                                 delete_branch = opt->branch2;
1159                         } else {
1160                                 rename_branch = opt->branch2;
1161                                 delete_branch = opt->branch1;
1162                         }
1163                 }
1164
1165                 assert(source_deleted || oldinfo->filemask & old_sidemask);
1166
1167                 /* Need to check for special types of rename conflicts... */
1168                 if (collision && !source_deleted) {
1169                         /* collision: rename/add or rename/rename(2to1) */
1170                         const char *pathnames[3];
1171                         struct version_info merged;
1172
1173                         struct conflict_info *base, *side1, *side2;
1174                         unsigned clean;
1175
1176                         pathnames[0] = oldpath;
1177                         pathnames[other_source_index] = oldpath;
1178                         pathnames[target_index] = newpath;
1179
1180                         base = strmap_get(&opt->priv->paths, pathnames[0]);
1181                         side1 = strmap_get(&opt->priv->paths, pathnames[1]);
1182                         side2 = strmap_get(&opt->priv->paths, pathnames[2]);
1183
1184                         VERIFY_CI(base);
1185                         VERIFY_CI(side1);
1186                         VERIFY_CI(side2);
1187
1188                         clean = handle_content_merge(opt, pair->one->path,
1189                                                      &base->stages[0],
1190                                                      &side1->stages[1],
1191                                                      &side2->stages[2],
1192                                                      pathnames,
1193                                                      1 + 2 * opt->priv->call_depth,
1194                                                      &merged);
1195
1196                         memcpy(&newinfo->stages[target_index], &merged,
1197                                sizeof(merged));
1198                         if (!clean) {
1199                                 path_msg(opt, newpath, 0,
1200                                          _("CONFLICT (rename involved in "
1201                                            "collision): rename of %s -> %s has "
1202                                            "content conflicts AND collides "
1203                                            "with another path; this may result "
1204                                            "in nested conflict markers."),
1205                                          oldpath, newpath);
1206                         }
1207                 } else if (collision && source_deleted) {
1208                         /*
1209                          * rename/add/delete or rename/rename(2to1)/delete:
1210                          * since oldpath was deleted on the side that didn't
1211                          * do the rename, there's not much of a content merge
1212                          * we can do for the rename.  oldinfo->merged.is_null
1213                          * was already set, so we just leave things as-is so
1214                          * they look like an add/add conflict.
1215                          */
1216
1217                         newinfo->path_conflict = 1;
1218                         path_msg(opt, newpath, 0,
1219                                  _("CONFLICT (rename/delete): %s renamed "
1220                                    "to %s in %s, but deleted in %s."),
1221                                  oldpath, newpath, rename_branch, delete_branch);
1222                 } else {
1223                         /*
1224                          * a few different cases...start by copying the
1225                          * existing stage(s) from oldinfo over the newinfo
1226                          * and update the pathname(s).
1227                          */
1228                         memcpy(&newinfo->stages[0], &oldinfo->stages[0],
1229                                sizeof(newinfo->stages[0]));
1230                         newinfo->filemask |= (1 << MERGE_BASE);
1231                         newinfo->pathnames[0] = oldpath;
1232                         if (type_changed) {
1233                                 /* rename vs. typechange */
1234                                 /* Mark the original as resolved by removal */
1235                                 memcpy(&oldinfo->stages[0].oid, &null_oid,
1236                                        sizeof(oldinfo->stages[0].oid));
1237                                 oldinfo->stages[0].mode = 0;
1238                                 oldinfo->filemask &= 0x06;
1239                         } else if (source_deleted) {
1240                                 /* rename/delete */
1241                                 newinfo->path_conflict = 1;
1242                                 path_msg(opt, newpath, 0,
1243                                          _("CONFLICT (rename/delete): %s renamed"
1244                                            " to %s in %s, but deleted in %s."),
1245                                          oldpath, newpath,
1246                                          rename_branch, delete_branch);
1247                         } else {
1248                                 /* normal rename */
1249                                 memcpy(&newinfo->stages[other_source_index],
1250                                        &oldinfo->stages[other_source_index],
1251                                        sizeof(newinfo->stages[0]));
1252                                 newinfo->filemask |= (1 << other_source_index);
1253                                 newinfo->pathnames[other_source_index] = oldpath;
1254                         }
1255                 }
1256
1257                 if (!type_changed) {
1258                         /* Mark the original as resolved by removal */
1259                         oldinfo->merged.is_null = 1;
1260                         oldinfo->merged.clean = 1;
1261                 }
1262
1263         }
1264
1265         return clean_merge;
1266 }
1267
1268 static int compare_pairs(const void *a_, const void *b_)
1269 {
1270         const struct diff_filepair *a = *((const struct diff_filepair **)a_);
1271         const struct diff_filepair *b = *((const struct diff_filepair **)b_);
1272
1273         return strcmp(a->one->path, b->one->path);
1274 }
1275
1276 /* Call diffcore_rename() to compute which files have changed on given side */
1277 static void detect_regular_renames(struct merge_options *opt,
1278                                    struct tree *merge_base,
1279                                    struct tree *side,
1280                                    unsigned side_index)
1281 {
1282         struct diff_options diff_opts;
1283         struct rename_info *renames = &opt->priv->renames;
1284
1285         repo_diff_setup(opt->repo, &diff_opts);
1286         diff_opts.flags.recursive = 1;
1287         diff_opts.flags.rename_empty = 0;
1288         diff_opts.detect_rename = DIFF_DETECT_RENAME;
1289         diff_opts.rename_limit = opt->rename_limit;
1290         if (opt->rename_limit <= 0)
1291                 diff_opts.rename_limit = 1000;
1292         diff_opts.rename_score = opt->rename_score;
1293         diff_opts.show_rename_progress = opt->show_rename_progress;
1294         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1295         diff_setup_done(&diff_opts);
1296         diff_tree_oid(&merge_base->object.oid, &side->object.oid, "",
1297                       &diff_opts);
1298         diffcore_std(&diff_opts);
1299
1300         if (diff_opts.needed_rename_limit > renames->needed_limit)
1301                 renames->needed_limit = diff_opts.needed_rename_limit;
1302
1303         renames->pairs[side_index] = diff_queued_diff;
1304
1305         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1306         diff_queued_diff.nr = 0;
1307         diff_queued_diff.queue = NULL;
1308         diff_flush(&diff_opts);
1309 }
1310
1311 /*
1312  * Get information of all renames which occurred in 'side_pairs', discarding
1313  * non-renames.
1314  */
1315 static int collect_renames(struct merge_options *opt,
1316                            struct diff_queue_struct *result,
1317                            unsigned side_index,
1318                            struct strmap *dir_renames_for_side,
1319                            struct strmap *rename_exclusions)
1320 {
1321         int i, clean = 1;
1322         struct strmap collisions;
1323         struct diff_queue_struct *side_pairs;
1324         struct hashmap_iter iter;
1325         struct strmap_entry *entry;
1326         struct rename_info *renames = &opt->priv->renames;
1327
1328         side_pairs = &renames->pairs[side_index];
1329         compute_collisions(&collisions, dir_renames_for_side, side_pairs);
1330
1331         for (i = 0; i < side_pairs->nr; ++i) {
1332                 struct diff_filepair *p = side_pairs->queue[i];
1333                 char *new_path; /* non-NULL only with directory renames */
1334
1335                 if (p->status != 'A' && p->status != 'R') {
1336                         diff_free_filepair(p);
1337                         continue;
1338                 }
1339
1340                 new_path = check_for_directory_rename(opt, p->two->path,
1341                                                       side_index,
1342                                                       dir_renames_for_side,
1343                                                       rename_exclusions,
1344                                                       &collisions,
1345                                                       &clean);
1346
1347                 if (p->status != 'R' && !new_path) {
1348                         diff_free_filepair(p);
1349                         continue;
1350                 }
1351
1352                 if (new_path)
1353                         apply_directory_rename_modifications(opt, p, new_path);
1354
1355                 /*
1356                  * p->score comes back from diffcore_rename_extended() with
1357                  * the similarity of the renamed file.  The similarity is
1358                  * was used to determine that the two files were related
1359                  * and are a rename, which we have already used, but beyond
1360                  * that we have no use for the similarity.  So p->score is
1361                  * now irrelevant.  However, process_renames() will need to
1362                  * know which side of the merge this rename was associated
1363                  * with, so overwrite p->score with that value.
1364                  */
1365                 p->score = side_index;
1366                 result->queue[result->nr++] = p;
1367         }
1368
1369         /* Free each value in the collisions map */
1370         strmap_for_each_entry(&collisions, &iter, entry) {
1371                 struct collision_info *info = entry->value;
1372                 string_list_clear(&info->source_files, 0);
1373         }
1374         /*
1375          * In compute_collisions(), we set collisions.strdup_strings to 0
1376          * so that we wouldn't have to make another copy of the new_path
1377          * allocated by apply_dir_rename().  But now that we've used them
1378          * and have no other references to these strings, it is time to
1379          * deallocate them.
1380          */
1381         free_strmap_strings(&collisions);
1382         strmap_clear(&collisions, 1);
1383         return clean;
1384 }
1385
1386 static int detect_and_process_renames(struct merge_options *opt,
1387                                       struct tree *merge_base,
1388                                       struct tree *side1,
1389                                       struct tree *side2)
1390 {
1391         struct diff_queue_struct combined;
1392         struct rename_info *renames = &opt->priv->renames;
1393         int need_dir_renames, s, clean = 1;
1394
1395         memset(&combined, 0, sizeof(combined));
1396
1397         detect_regular_renames(opt, merge_base, side1, MERGE_SIDE1);
1398         detect_regular_renames(opt, merge_base, side2, MERGE_SIDE2);
1399
1400         need_dir_renames =
1401           !opt->priv->call_depth &&
1402           (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE ||
1403            opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_CONFLICT);
1404
1405         if (need_dir_renames) {
1406                 get_provisional_directory_renames(opt, MERGE_SIDE1, &clean);
1407                 get_provisional_directory_renames(opt, MERGE_SIDE2, &clean);
1408                 handle_directory_level_conflicts(opt);
1409         }
1410
1411         ALLOC_GROW(combined.queue,
1412                    renames->pairs[1].nr + renames->pairs[2].nr,
1413                    combined.alloc);
1414         clean &= collect_renames(opt, &combined, MERGE_SIDE1,
1415                                  &renames->dir_renames[2],
1416                                  &renames->dir_renames[1]);
1417         clean &= collect_renames(opt, &combined, MERGE_SIDE2,
1418                                  &renames->dir_renames[1],
1419                                  &renames->dir_renames[2]);
1420         QSORT(combined.queue, combined.nr, compare_pairs);
1421
1422         clean &= process_renames(opt, &combined);
1423
1424         /* Free memory for renames->pairs[] and combined */
1425         for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
1426                 free(renames->pairs[s].queue);
1427                 DIFF_QUEUE_CLEAR(&renames->pairs[s]);
1428         }
1429         if (combined.nr) {
1430                 int i;
1431                 for (i = 0; i < combined.nr; i++)
1432                         diff_free_filepair(combined.queue[i]);
1433                 free(combined.queue);
1434         }
1435
1436         return clean;
1437 }
1438
1439 /*** Function Grouping: functions related to process_entries() ***/
1440
1441 static int string_list_df_name_compare(const char *one, const char *two)
1442 {
1443         int onelen = strlen(one);
1444         int twolen = strlen(two);
1445         /*
1446          * Here we only care that entries for D/F conflicts are
1447          * adjacent, in particular with the file of the D/F conflict
1448          * appearing before files below the corresponding directory.
1449          * The order of the rest of the list is irrelevant for us.
1450          *
1451          * To achieve this, we sort with df_name_compare and provide
1452          * the mode S_IFDIR so that D/F conflicts will sort correctly.
1453          * We use the mode S_IFDIR for everything else for simplicity,
1454          * since in other cases any changes in their order due to
1455          * sorting cause no problems for us.
1456          */
1457         int cmp = df_name_compare(one, onelen, S_IFDIR,
1458                                   two, twolen, S_IFDIR);
1459         /*
1460          * Now that 'foo' and 'foo/bar' compare equal, we have to make sure
1461          * that 'foo' comes before 'foo/bar'.
1462          */
1463         if (cmp)
1464                 return cmp;
1465         return onelen - twolen;
1466 }
1467
1468 struct directory_versions {
1469         /*
1470          * versions: list of (basename -> version_info)
1471          *
1472          * The basenames are in reverse lexicographic order of full pathnames,
1473          * as processed in process_entries().  This puts all entries within
1474          * a directory together, and covers the directory itself after
1475          * everything within it, allowing us to write subtrees before needing
1476          * to record information for the tree itself.
1477          */
1478         struct string_list versions;
1479
1480         /*
1481          * offsets: list of (full relative path directories -> integer offsets)
1482          *
1483          * Since versions contains basenames from files in multiple different
1484          * directories, we need to know which entries in versions correspond
1485          * to which directories.  Values of e.g.
1486          *     ""             0
1487          *     src            2
1488          *     src/moduleA    5
1489          * Would mean that entries 0-1 of versions are files in the toplevel
1490          * directory, entries 2-4 are files under src/, and the remaining
1491          * entries starting at index 5 are files under src/moduleA/.
1492          */
1493         struct string_list offsets;
1494
1495         /*
1496          * last_directory: directory that previously processed file found in
1497          *
1498          * last_directory starts NULL, but records the directory in which the
1499          * previous file was found within.  As soon as
1500          *    directory(current_file) != last_directory
1501          * then we need to start updating accounting in versions & offsets.
1502          * Note that last_directory is always the last path in "offsets" (or
1503          * NULL if "offsets" is empty) so this exists just for quick access.
1504          */
1505         const char *last_directory;
1506
1507         /* last_directory_len: cached computation of strlen(last_directory) */
1508         unsigned last_directory_len;
1509 };
1510
1511 static int tree_entry_order(const void *a_, const void *b_)
1512 {
1513         const struct string_list_item *a = a_;
1514         const struct string_list_item *b = b_;
1515
1516         const struct merged_info *ami = a->util;
1517         const struct merged_info *bmi = b->util;
1518         return base_name_compare(a->string, strlen(a->string), ami->result.mode,
1519                                  b->string, strlen(b->string), bmi->result.mode);
1520 }
1521
1522 static void write_tree(struct object_id *result_oid,
1523                        struct string_list *versions,
1524                        unsigned int offset,
1525                        size_t hash_size)
1526 {
1527         size_t maxlen = 0, extra;
1528         unsigned int nr = versions->nr - offset;
1529         struct strbuf buf = STRBUF_INIT;
1530         struct string_list relevant_entries = STRING_LIST_INIT_NODUP;
1531         int i;
1532
1533         /*
1534          * We want to sort the last (versions->nr-offset) entries in versions.
1535          * Do so by abusing the string_list API a bit: make another string_list
1536          * that contains just those entries and then sort them.
1537          *
1538          * We won't use relevant_entries again and will let it just pop off the
1539          * stack, so there won't be allocation worries or anything.
1540          */
1541         relevant_entries.items = versions->items + offset;
1542         relevant_entries.nr = versions->nr - offset;
1543         QSORT(relevant_entries.items, relevant_entries.nr, tree_entry_order);
1544
1545         /* Pre-allocate some space in buf */
1546         extra = hash_size + 8; /* 8: 6 for mode, 1 for space, 1 for NUL char */
1547         for (i = 0; i < nr; i++) {
1548                 maxlen += strlen(versions->items[offset+i].string) + extra;
1549         }
1550         strbuf_grow(&buf, maxlen);
1551
1552         /* Write each entry out to buf */
1553         for (i = 0; i < nr; i++) {
1554                 struct merged_info *mi = versions->items[offset+i].util;
1555                 struct version_info *ri = &mi->result;
1556                 strbuf_addf(&buf, "%o %s%c",
1557                             ri->mode,
1558                             versions->items[offset+i].string, '\0');
1559                 strbuf_add(&buf, ri->oid.hash, hash_size);
1560         }
1561
1562         /* Write this object file out, and record in result_oid */
1563         write_object_file(buf.buf, buf.len, tree_type, result_oid);
1564         strbuf_release(&buf);
1565 }
1566
1567 static void record_entry_for_tree(struct directory_versions *dir_metadata,
1568                                   const char *path,
1569                                   struct merged_info *mi)
1570 {
1571         const char *basename;
1572
1573         if (mi->is_null)
1574                 /* nothing to record */
1575                 return;
1576
1577         basename = path + mi->basename_offset;
1578         assert(strchr(basename, '/') == NULL);
1579         string_list_append(&dir_metadata->versions,
1580                            basename)->util = &mi->result;
1581 }
1582
1583 static void write_completed_directory(struct merge_options *opt,
1584                                       const char *new_directory_name,
1585                                       struct directory_versions *info)
1586 {
1587         const char *prev_dir;
1588         struct merged_info *dir_info = NULL;
1589         unsigned int offset;
1590
1591         /*
1592          * Some explanation of info->versions and info->offsets...
1593          *
1594          * process_entries() iterates over all relevant files AND
1595          * directories in reverse lexicographic order, and calls this
1596          * function.  Thus, an example of the paths that process_entries()
1597          * could operate on (along with the directories for those paths
1598          * being shown) is:
1599          *
1600          *     xtract.c             ""
1601          *     tokens.txt           ""
1602          *     src/moduleB/umm.c    src/moduleB
1603          *     src/moduleB/stuff.h  src/moduleB
1604          *     src/moduleB/baz.c    src/moduleB
1605          *     src/moduleB          src
1606          *     src/moduleA/foo.c    src/moduleA
1607          *     src/moduleA/bar.c    src/moduleA
1608          *     src/moduleA          src
1609          *     src                  ""
1610          *     Makefile             ""
1611          *
1612          * info->versions:
1613          *
1614          *     always contains the unprocessed entries and their
1615          *     version_info information.  For example, after the first five
1616          *     entries above, info->versions would be:
1617          *
1618          *         xtract.c     <xtract.c's version_info>
1619          *         token.txt    <token.txt's version_info>
1620          *         umm.c        <src/moduleB/umm.c's version_info>
1621          *         stuff.h      <src/moduleB/stuff.h's version_info>
1622          *         baz.c        <src/moduleB/baz.c's version_info>
1623          *
1624          *     Once a subdirectory is completed we remove the entries in
1625          *     that subdirectory from info->versions, writing it as a tree
1626          *     (write_tree()).  Thus, as soon as we get to src/moduleB,
1627          *     info->versions would be updated to
1628          *
1629          *         xtract.c     <xtract.c's version_info>
1630          *         token.txt    <token.txt's version_info>
1631          *         moduleB      <src/moduleB's version_info>
1632          *
1633          * info->offsets:
1634          *
1635          *     helps us track which entries in info->versions correspond to
1636          *     which directories.  When we are N directories deep (e.g. 4
1637          *     for src/modA/submod/subdir/), we have up to N+1 unprocessed
1638          *     directories (+1 because of toplevel dir).  Corresponding to
1639          *     the info->versions example above, after processing five entries
1640          *     info->offsets will be:
1641          *
1642          *         ""           0
1643          *         src/moduleB  2
1644          *
1645          *     which is used to know that xtract.c & token.txt are from the
1646          *     toplevel dirctory, while umm.c & stuff.h & baz.c are from the
1647          *     src/moduleB directory.  Again, following the example above,
1648          *     once we need to process src/moduleB, then info->offsets is
1649          *     updated to
1650          *
1651          *         ""           0
1652          *         src          2
1653          *
1654          *     which says that moduleB (and only moduleB so far) is in the
1655          *     src directory.
1656          *
1657          *     One unique thing to note about info->offsets here is that
1658          *     "src" was not added to info->offsets until there was a path
1659          *     (a file OR directory) immediately below src/ that got
1660          *     processed.
1661          *
1662          * Since process_entry() just appends new entries to info->versions,
1663          * write_completed_directory() only needs to do work if the next path
1664          * is in a directory that is different than the last directory found
1665          * in info->offsets.
1666          */
1667
1668         /*
1669          * If we are working with the same directory as the last entry, there
1670          * is no work to do.  (See comments above the directory_name member of
1671          * struct merged_info for why we can use pointer comparison instead of
1672          * strcmp here.)
1673          */
1674         if (new_directory_name == info->last_directory)
1675                 return;
1676
1677         /*
1678          * If we are just starting (last_directory is NULL), or last_directory
1679          * is a prefix of the current directory, then we can just update
1680          * info->offsets to record the offset where we started this directory
1681          * and update last_directory to have quick access to it.
1682          */
1683         if (info->last_directory == NULL ||
1684             !strncmp(new_directory_name, info->last_directory,
1685                      info->last_directory_len)) {
1686                 uintptr_t offset = info->versions.nr;
1687
1688                 info->last_directory = new_directory_name;
1689                 info->last_directory_len = strlen(info->last_directory);
1690                 /*
1691                  * Record the offset into info->versions where we will
1692                  * start recording basenames of paths found within
1693                  * new_directory_name.
1694                  */
1695                 string_list_append(&info->offsets,
1696                                    info->last_directory)->util = (void*)offset;
1697                 return;
1698         }
1699
1700         /*
1701          * The next entry that will be processed will be within
1702          * new_directory_name.  Since at this point we know that
1703          * new_directory_name is within a different directory than
1704          * info->last_directory, we have all entries for info->last_directory
1705          * in info->versions and we need to create a tree object for them.
1706          */
1707         dir_info = strmap_get(&opt->priv->paths, info->last_directory);
1708         assert(dir_info);
1709         offset = (uintptr_t)info->offsets.items[info->offsets.nr-1].util;
1710         if (offset == info->versions.nr) {
1711                 /*
1712                  * Actually, we don't need to create a tree object in this
1713                  * case.  Whenever all files within a directory disappear
1714                  * during the merge (e.g. unmodified on one side and
1715                  * deleted on the other, or files were renamed elsewhere),
1716                  * then we get here and the directory itself needs to be
1717                  * omitted from its parent tree as well.
1718                  */
1719                 dir_info->is_null = 1;
1720         } else {
1721                 /*
1722                  * Write out the tree to the git object directory, and also
1723                  * record the mode and oid in dir_info->result.
1724                  */
1725                 dir_info->is_null = 0;
1726                 dir_info->result.mode = S_IFDIR;
1727                 write_tree(&dir_info->result.oid, &info->versions, offset,
1728                            opt->repo->hash_algo->rawsz);
1729         }
1730
1731         /*
1732          * We've now used several entries from info->versions and one entry
1733          * from info->offsets, so we get rid of those values.
1734          */
1735         info->offsets.nr--;
1736         info->versions.nr = offset;
1737
1738         /*
1739          * Now we've taken care of the completed directory, but we need to
1740          * prepare things since future entries will be in
1741          * new_directory_name.  (In particular, process_entry() will be
1742          * appending new entries to info->versions.)  So, we need to make
1743          * sure new_directory_name is the last entry in info->offsets.
1744          */
1745         prev_dir = info->offsets.nr == 0 ? NULL :
1746                    info->offsets.items[info->offsets.nr-1].string;
1747         if (new_directory_name != prev_dir) {
1748                 uintptr_t c = info->versions.nr;
1749                 string_list_append(&info->offsets,
1750                                    new_directory_name)->util = (void*)c;
1751         }
1752
1753         /* And, of course, we need to update last_directory to match. */
1754         info->last_directory = new_directory_name;
1755         info->last_directory_len = strlen(info->last_directory);
1756 }
1757
1758 /* Per entry merge function */
1759 static void process_entry(struct merge_options *opt,
1760                           const char *path,
1761                           struct conflict_info *ci,
1762                           struct directory_versions *dir_metadata)
1763 {
1764         VERIFY_CI(ci);
1765         assert(ci->filemask >= 0 && ci->filemask <= 7);
1766         /* ci->match_mask == 7 was handled in collect_merge_info_callback() */
1767         assert(ci->match_mask == 0 || ci->match_mask == 3 ||
1768                ci->match_mask == 5 || ci->match_mask == 6);
1769
1770         if (ci->dirmask) {
1771                 record_entry_for_tree(dir_metadata, path, &ci->merged);
1772                 if (ci->filemask == 0)
1773                         /* nothing else to handle */
1774                         return;
1775                 assert(ci->df_conflict);
1776         }
1777
1778         if (ci->df_conflict) {
1779                 die("Not yet implemented.");
1780         }
1781
1782         /*
1783          * NOTE: Below there is a long switch-like if-elseif-elseif... block
1784          *       which the code goes through even for the df_conflict cases
1785          *       above.  Well, it will once we don't die-not-implemented above.
1786          */
1787         if (ci->match_mask) {
1788                 ci->merged.clean = 1;
1789                 if (ci->match_mask == 6) {
1790                         /* stages[1] == stages[2] */
1791                         ci->merged.result.mode = ci->stages[1].mode;
1792                         oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
1793                 } else {
1794                         /* determine the mask of the side that didn't match */
1795                         unsigned int othermask = 7 & ~ci->match_mask;
1796                         int side = (othermask == 4) ? 2 : 1;
1797
1798                         ci->merged.result.mode = ci->stages[side].mode;
1799                         ci->merged.is_null = !ci->merged.result.mode;
1800                         oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
1801
1802                         assert(othermask == 2 || othermask == 4);
1803                         assert(ci->merged.is_null ==
1804                                (ci->filemask == ci->match_mask));
1805                 }
1806         } else if (ci->filemask >= 6 &&
1807                    (S_IFMT & ci->stages[1].mode) !=
1808                    (S_IFMT & ci->stages[2].mode)) {
1809                 /*
1810                  * Two different items from (file/submodule/symlink)
1811                  */
1812                 die("Not yet implemented.");
1813         } else if (ci->filemask >= 6) {
1814                 /*
1815                  * TODO: Needs a two-way or three-way content merge, but we're
1816                  * just being lazy and copying the version from HEAD and
1817                  * leaving it as conflicted.
1818                  */
1819                 ci->merged.clean = 0;
1820                 ci->merged.result.mode = ci->stages[1].mode;
1821                 oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
1822                 /* When we fix above, we'll call handle_content_merge() */
1823                 (void)handle_content_merge;
1824         } else if (ci->filemask == 3 || ci->filemask == 5) {
1825                 /* Modify/delete */
1826                 const char *modify_branch, *delete_branch;
1827                 int side = (ci->filemask == 5) ? 2 : 1;
1828                 int index = opt->priv->call_depth ? 0 : side;
1829
1830                 ci->merged.result.mode = ci->stages[index].mode;
1831                 oidcpy(&ci->merged.result.oid, &ci->stages[index].oid);
1832                 ci->merged.clean = 0;
1833
1834                 modify_branch = (side == 1) ? opt->branch1 : opt->branch2;
1835                 delete_branch = (side == 1) ? opt->branch2 : opt->branch1;
1836
1837                 if (ci->path_conflict &&
1838                     oideq(&ci->stages[0].oid, &ci->stages[side].oid)) {
1839                         /*
1840                          * This came from a rename/delete; no action to take,
1841                          * but avoid printing "modify/delete" conflict notice
1842                          * since the contents were not modified.
1843                          */
1844                 } else {
1845                         path_msg(opt, path, 0,
1846                                  _("CONFLICT (modify/delete): %s deleted in %s "
1847                                    "and modified in %s.  Version %s of %s left "
1848                                    "in tree."),
1849                                  path, delete_branch, modify_branch,
1850                                  modify_branch, path);
1851                 }
1852         } else if (ci->filemask == 2 || ci->filemask == 4) {
1853                 /* Added on one side */
1854                 int side = (ci->filemask == 4) ? 2 : 1;
1855                 ci->merged.result.mode = ci->stages[side].mode;
1856                 oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
1857                 ci->merged.clean = !ci->df_conflict && !ci->path_conflict;
1858         } else if (ci->filemask == 1) {
1859                 /* Deleted on both sides */
1860                 ci->merged.is_null = 1;
1861                 ci->merged.result.mode = 0;
1862                 oidcpy(&ci->merged.result.oid, &null_oid);
1863                 ci->merged.clean = !ci->path_conflict;
1864         }
1865
1866         /*
1867          * If still conflicted, record it separately.  This allows us to later
1868          * iterate over just conflicted entries when updating the index instead
1869          * of iterating over all entries.
1870          */
1871         if (!ci->merged.clean)
1872                 strmap_put(&opt->priv->conflicted, path, ci);
1873         record_entry_for_tree(dir_metadata, path, &ci->merged);
1874 }
1875
1876 static void process_entries(struct merge_options *opt,
1877                             struct object_id *result_oid)
1878 {
1879         struct hashmap_iter iter;
1880         struct strmap_entry *e;
1881         struct string_list plist = STRING_LIST_INIT_NODUP;
1882         struct string_list_item *entry;
1883         struct directory_versions dir_metadata = { STRING_LIST_INIT_NODUP,
1884                                                    STRING_LIST_INIT_NODUP,
1885                                                    NULL, 0 };
1886
1887         if (strmap_empty(&opt->priv->paths)) {
1888                 oidcpy(result_oid, opt->repo->hash_algo->empty_tree);
1889                 return;
1890         }
1891
1892         /* Hack to pre-allocate plist to the desired size */
1893         ALLOC_GROW(plist.items, strmap_get_size(&opt->priv->paths), plist.alloc);
1894
1895         /* Put every entry from paths into plist, then sort */
1896         strmap_for_each_entry(&opt->priv->paths, &iter, e) {
1897                 string_list_append(&plist, e->key)->util = e->value;
1898         }
1899         plist.cmp = string_list_df_name_compare;
1900         string_list_sort(&plist);
1901
1902         /*
1903          * Iterate over the items in reverse order, so we can handle paths
1904          * below a directory before needing to handle the directory itself.
1905          *
1906          * This allows us to write subtrees before we need to write trees,
1907          * and it also enables sane handling of directory/file conflicts
1908          * (because it allows us to know whether the directory is still in
1909          * the way when it is time to process the file at the same path).
1910          */
1911         for (entry = &plist.items[plist.nr-1]; entry >= plist.items; --entry) {
1912                 char *path = entry->string;
1913                 /*
1914                  * NOTE: mi may actually be a pointer to a conflict_info, but
1915                  * we have to check mi->clean first to see if it's safe to
1916                  * reassign to such a pointer type.
1917                  */
1918                 struct merged_info *mi = entry->util;
1919
1920                 write_completed_directory(opt, mi->directory_name,
1921                                           &dir_metadata);
1922                 if (mi->clean)
1923                         record_entry_for_tree(&dir_metadata, path, mi);
1924                 else {
1925                         struct conflict_info *ci = (struct conflict_info *)mi;
1926                         process_entry(opt, path, ci, &dir_metadata);
1927                 }
1928         }
1929
1930         if (dir_metadata.offsets.nr != 1 ||
1931             (uintptr_t)dir_metadata.offsets.items[0].util != 0) {
1932                 printf("dir_metadata.offsets.nr = %d (should be 1)\n",
1933                        dir_metadata.offsets.nr);
1934                 printf("dir_metadata.offsets.items[0].util = %u (should be 0)\n",
1935                        (unsigned)(uintptr_t)dir_metadata.offsets.items[0].util);
1936                 fflush(stdout);
1937                 BUG("dir_metadata accounting completely off; shouldn't happen");
1938         }
1939         write_tree(result_oid, &dir_metadata.versions, 0,
1940                    opt->repo->hash_algo->rawsz);
1941         string_list_clear(&plist, 0);
1942         string_list_clear(&dir_metadata.versions, 0);
1943         string_list_clear(&dir_metadata.offsets, 0);
1944 }
1945
1946 /*** Function Grouping: functions related to merge_switch_to_result() ***/
1947
1948 static int checkout(struct merge_options *opt,
1949                     struct tree *prev,
1950                     struct tree *next)
1951 {
1952         /* Switch the index/working copy from old to new */
1953         int ret;
1954         struct tree_desc trees[2];
1955         struct unpack_trees_options unpack_opts;
1956
1957         memset(&unpack_opts, 0, sizeof(unpack_opts));
1958         unpack_opts.head_idx = -1;
1959         unpack_opts.src_index = opt->repo->index;
1960         unpack_opts.dst_index = opt->repo->index;
1961
1962         setup_unpack_trees_porcelain(&unpack_opts, "merge");
1963
1964         /*
1965          * NOTE: if this were just "git checkout" code, we would probably
1966          * read or refresh the cache and check for a conflicted index, but
1967          * builtin/merge.c or sequencer.c really needs to read the index
1968          * and check for conflicted entries before starting merging for a
1969          * good user experience (no sense waiting for merges/rebases before
1970          * erroring out), so there's no reason to duplicate that work here.
1971          */
1972
1973         /* 2-way merge to the new branch */
1974         unpack_opts.update = 1;
1975         unpack_opts.merge = 1;
1976         unpack_opts.quiet = 0; /* FIXME: sequencer might want quiet? */
1977         unpack_opts.verbose_update = (opt->verbosity > 2);
1978         unpack_opts.fn = twoway_merge;
1979         if (1/* FIXME: opts->overwrite_ignore*/) {
1980                 unpack_opts.dir = xcalloc(1, sizeof(*unpack_opts.dir));
1981                 unpack_opts.dir->flags |= DIR_SHOW_IGNORED;
1982                 setup_standard_excludes(unpack_opts.dir);
1983         }
1984         parse_tree(prev);
1985         init_tree_desc(&trees[0], prev->buffer, prev->size);
1986         parse_tree(next);
1987         init_tree_desc(&trees[1], next->buffer, next->size);
1988
1989         ret = unpack_trees(2, trees, &unpack_opts);
1990         clear_unpack_trees_porcelain(&unpack_opts);
1991         dir_clear(unpack_opts.dir);
1992         FREE_AND_NULL(unpack_opts.dir);
1993         return ret;
1994 }
1995
1996 static int record_conflicted_index_entries(struct merge_options *opt,
1997                                            struct index_state *index,
1998                                            struct strmap *paths,
1999                                            struct strmap *conflicted)
2000 {
2001         struct hashmap_iter iter;
2002         struct strmap_entry *e;
2003         int errs = 0;
2004         int original_cache_nr;
2005
2006         if (strmap_empty(conflicted))
2007                 return 0;
2008
2009         original_cache_nr = index->cache_nr;
2010
2011         /* Put every entry from paths into plist, then sort */
2012         strmap_for_each_entry(conflicted, &iter, e) {
2013                 const char *path = e->key;
2014                 struct conflict_info *ci = e->value;
2015                 int pos;
2016                 struct cache_entry *ce;
2017                 int i;
2018
2019                 VERIFY_CI(ci);
2020
2021                 /*
2022                  * The index will already have a stage=0 entry for this path,
2023                  * because we created an as-merged-as-possible version of the
2024                  * file and checkout() moved the working copy and index over
2025                  * to that version.
2026                  *
2027                  * However, previous iterations through this loop will have
2028                  * added unstaged entries to the end of the cache which
2029                  * ignore the standard alphabetical ordering of cache
2030                  * entries and break invariants needed for index_name_pos()
2031                  * to work.  However, we know the entry we want is before
2032                  * those appended cache entries, so do a temporary swap on
2033                  * cache_nr to only look through entries of interest.
2034                  */
2035                 SWAP(index->cache_nr, original_cache_nr);
2036                 pos = index_name_pos(index, path, strlen(path));
2037                 SWAP(index->cache_nr, original_cache_nr);
2038                 if (pos < 0) {
2039                         if (ci->filemask != 1)
2040                                 BUG("Conflicted %s but nothing in basic working tree or index; this shouldn't happen", path);
2041                         cache_tree_invalidate_path(index, path);
2042                 } else {
2043                         ce = index->cache[pos];
2044
2045                         /*
2046                          * Clean paths with CE_SKIP_WORKTREE set will not be
2047                          * written to the working tree by the unpack_trees()
2048                          * call in checkout().  Our conflicted entries would
2049                          * have appeared clean to that code since we ignored
2050                          * the higher order stages.  Thus, we need override
2051                          * the CE_SKIP_WORKTREE bit and manually write those
2052                          * files to the working disk here.
2053                          *
2054                          * TODO: Implement this CE_SKIP_WORKTREE fixup.
2055                          */
2056
2057                         /*
2058                          * Mark this cache entry for removal and instead add
2059                          * new stage>0 entries corresponding to the
2060                          * conflicts.  If there are many conflicted entries, we
2061                          * want to avoid memmove'ing O(NM) entries by
2062                          * inserting the new entries one at a time.  So,
2063                          * instead, we just add the new cache entries to the
2064                          * end (ignoring normal index requirements on sort
2065                          * order) and sort the index once we're all done.
2066                          */
2067                         ce->ce_flags |= CE_REMOVE;
2068                 }
2069
2070                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
2071                         struct version_info *vi;
2072                         if (!(ci->filemask & (1ul << i)))
2073                                 continue;
2074                         vi = &ci->stages[i];
2075                         ce = make_cache_entry(index, vi->mode, &vi->oid,
2076                                               path, i+1, 0);
2077                         add_index_entry(index, ce, ADD_CACHE_JUST_APPEND);
2078                 }
2079         }
2080
2081         /*
2082          * Remove the unused cache entries (and invalidate the relevant
2083          * cache-trees), then sort the index entries to get the conflicted
2084          * entries we added to the end into their right locations.
2085          */
2086         remove_marked_cache_entries(index, 1);
2087         QSORT(index->cache, index->cache_nr, cmp_cache_name_compare);
2088
2089         return errs;
2090 }
2091
2092 void merge_switch_to_result(struct merge_options *opt,
2093                             struct tree *head,
2094                             struct merge_result *result,
2095                             int update_worktree_and_index,
2096                             int display_update_msgs)
2097 {
2098         assert(opt->priv == NULL);
2099         if (result->clean >= 0 && update_worktree_and_index) {
2100                 struct merge_options_internal *opti = result->priv;
2101
2102                 if (checkout(opt, head, result->tree)) {
2103                         /* failure to function */
2104                         result->clean = -1;
2105                         return;
2106                 }
2107
2108                 if (record_conflicted_index_entries(opt, opt->repo->index,
2109                                                     &opti->paths,
2110                                                     &opti->conflicted)) {
2111                         /* failure to function */
2112                         result->clean = -1;
2113                         return;
2114                 }
2115         }
2116
2117         if (display_update_msgs) {
2118                 struct merge_options_internal *opti = result->priv;
2119                 struct hashmap_iter iter;
2120                 struct strmap_entry *e;
2121                 struct string_list olist = STRING_LIST_INIT_NODUP;
2122                 int i;
2123
2124                 /* Hack to pre-allocate olist to the desired size */
2125                 ALLOC_GROW(olist.items, strmap_get_size(&opti->output),
2126                            olist.alloc);
2127
2128                 /* Put every entry from output into olist, then sort */
2129                 strmap_for_each_entry(&opti->output, &iter, e) {
2130                         string_list_append(&olist, e->key)->util = e->value;
2131                 }
2132                 string_list_sort(&olist);
2133
2134                 /* Iterate over the items, printing them */
2135                 for (i = 0; i < olist.nr; ++i) {
2136                         struct strbuf *sb = olist.items[i].util;
2137
2138                         printf("%s", sb->buf);
2139                 }
2140                 string_list_clear(&olist, 0);
2141
2142                 /* Also include needed rename limit adjustment now */
2143                 diff_warn_rename_limit("merge.renamelimit",
2144                                        opti->renames.needed_limit, 0);
2145         }
2146
2147         merge_finalize(opt, result);
2148 }
2149
2150 void merge_finalize(struct merge_options *opt,
2151                     struct merge_result *result)
2152 {
2153         struct merge_options_internal *opti = result->priv;
2154
2155         assert(opt->priv == NULL);
2156
2157         clear_or_reinit_internal_opts(opti, 0);
2158         FREE_AND_NULL(opti);
2159 }
2160
2161 /*** Function Grouping: helper functions for merge_incore_*() ***/
2162
2163 static inline void set_commit_tree(struct commit *c, struct tree *t)
2164 {
2165         c->maybe_tree = t;
2166 }
2167
2168 static struct commit *make_virtual_commit(struct repository *repo,
2169                                           struct tree *tree,
2170                                           const char *comment)
2171 {
2172         struct commit *commit = alloc_commit_node(repo);
2173
2174         set_merge_remote_desc(commit, comment, (struct object *)commit);
2175         set_commit_tree(commit, tree);
2176         commit->object.parsed = 1;
2177         return commit;
2178 }
2179
2180 static void merge_start(struct merge_options *opt, struct merge_result *result)
2181 {
2182         struct rename_info *renames;
2183         int i;
2184
2185         /* Sanity checks on opt */
2186         assert(opt->repo);
2187
2188         assert(opt->branch1 && opt->branch2);
2189
2190         assert(opt->detect_directory_renames >= MERGE_DIRECTORY_RENAMES_NONE &&
2191                opt->detect_directory_renames <= MERGE_DIRECTORY_RENAMES_TRUE);
2192         assert(opt->rename_limit >= -1);
2193         assert(opt->rename_score >= 0 && opt->rename_score <= MAX_SCORE);
2194         assert(opt->show_rename_progress >= 0 && opt->show_rename_progress <= 1);
2195
2196         assert(opt->xdl_opts >= 0);
2197         assert(opt->recursive_variant >= MERGE_VARIANT_NORMAL &&
2198                opt->recursive_variant <= MERGE_VARIANT_THEIRS);
2199
2200         /*
2201          * detect_renames, verbosity, buffer_output, and obuf are ignored
2202          * fields that were used by "recursive" rather than "ort" -- but
2203          * sanity check them anyway.
2204          */
2205         assert(opt->detect_renames >= -1 &&
2206                opt->detect_renames <= DIFF_DETECT_COPY);
2207         assert(opt->verbosity >= 0 && opt->verbosity <= 5);
2208         assert(opt->buffer_output <= 2);
2209         assert(opt->obuf.len == 0);
2210
2211         assert(opt->priv == NULL);
2212
2213         /* Default to histogram diff.  Actually, just hardcode it...for now. */
2214         opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);
2215
2216         /* Initialization of opt->priv, our internal merge data */
2217         opt->priv = xcalloc(1, sizeof(*opt->priv));
2218
2219         /* Initialization of various renames fields */
2220         renames = &opt->priv->renames;
2221         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
2222                 strset_init_with_options(&renames->dirs_removed[i],
2223                                          NULL, 0);
2224                 strmap_init_with_options(&renames->dir_rename_count[i],
2225                                          NULL, 1);
2226                 strmap_init_with_options(&renames->dir_renames[i],
2227                                          NULL, 0);
2228         }
2229
2230         /*
2231          * Although we initialize opt->priv->paths with strdup_strings=0,
2232          * that's just to avoid making yet another copy of an allocated
2233          * string.  Putting the entry into paths means we are taking
2234          * ownership, so we will later free it.  paths_to_free is similar.
2235          *
2236          * In contrast, conflicted just has a subset of keys from paths, so
2237          * we don't want to free those (it'd be a duplicate free).
2238          */
2239         strmap_init_with_options(&opt->priv->paths, NULL, 0);
2240         strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
2241         string_list_init(&opt->priv->paths_to_free, 0);
2242
2243         /*
2244          * keys & strbufs in output will sometimes need to outlive "paths",
2245          * so it will have a copy of relevant keys.  It's probably a small
2246          * subset of the overall paths that have special output.
2247          */
2248         strmap_init(&opt->priv->output);
2249 }
2250
2251 /*** Function Grouping: merge_incore_*() and their internal variants ***/
2252
2253 /*
2254  * Originally from merge_trees_internal(); heavily adapted, though.
2255  */
2256 static void merge_ort_nonrecursive_internal(struct merge_options *opt,
2257                                             struct tree *merge_base,
2258                                             struct tree *side1,
2259                                             struct tree *side2,
2260                                             struct merge_result *result)
2261 {
2262         struct object_id working_tree_oid;
2263
2264         if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
2265                 /*
2266                  * TRANSLATORS: The %s arguments are: 1) tree hash of a merge
2267                  * base, and 2-3) the trees for the two trees we're merging.
2268                  */
2269                 err(opt, _("collecting merge info failed for trees %s, %s, %s"),
2270                     oid_to_hex(&merge_base->object.oid),
2271                     oid_to_hex(&side1->object.oid),
2272                     oid_to_hex(&side2->object.oid));
2273                 result->clean = -1;
2274                 return;
2275         }
2276
2277         result->clean = detect_and_process_renames(opt, merge_base,
2278                                                    side1, side2);
2279         process_entries(opt, &working_tree_oid);
2280
2281         /* Set return values */
2282         result->tree = parse_tree_indirect(&working_tree_oid);
2283         /* existence of conflicted entries implies unclean */
2284         result->clean &= strmap_empty(&opt->priv->conflicted);
2285         if (!opt->priv->call_depth) {
2286                 result->priv = opt->priv;
2287                 opt->priv = NULL;
2288         }
2289 }
2290
2291 /*
2292  * Originally from merge_recursive_internal(); somewhat adapted, though.
2293  */
2294 static void merge_ort_internal(struct merge_options *opt,
2295                                struct commit_list *merge_bases,
2296                                struct commit *h1,
2297                                struct commit *h2,
2298                                struct merge_result *result)
2299 {
2300         struct commit_list *iter;
2301         struct commit *merged_merge_bases;
2302         const char *ancestor_name;
2303         struct strbuf merge_base_abbrev = STRBUF_INIT;
2304
2305         if (!merge_bases) {
2306                 merge_bases = get_merge_bases(h1, h2);
2307                 /* See merge-ort.h:merge_incore_recursive() declaration NOTE */
2308                 merge_bases = reverse_commit_list(merge_bases);
2309         }
2310
2311         merged_merge_bases = pop_commit(&merge_bases);
2312         if (merged_merge_bases == NULL) {
2313                 /* if there is no common ancestor, use an empty tree */
2314                 struct tree *tree;
2315
2316                 tree = lookup_tree(opt->repo, opt->repo->hash_algo->empty_tree);
2317                 merged_merge_bases = make_virtual_commit(opt->repo, tree,
2318                                                          "ancestor");
2319                 ancestor_name = "empty tree";
2320         } else if (merge_bases) {
2321                 ancestor_name = "merged common ancestors";
2322         } else {
2323                 strbuf_add_unique_abbrev(&merge_base_abbrev,
2324                                          &merged_merge_bases->object.oid,
2325                                          DEFAULT_ABBREV);
2326                 ancestor_name = merge_base_abbrev.buf;
2327         }
2328
2329         for (iter = merge_bases; iter; iter = iter->next) {
2330                 const char *saved_b1, *saved_b2;
2331                 struct commit *prev = merged_merge_bases;
2332
2333                 opt->priv->call_depth++;
2334                 /*
2335                  * When the merge fails, the result contains files
2336                  * with conflict markers. The cleanness flag is
2337                  * ignored (unless indicating an error), it was never
2338                  * actually used, as result of merge_trees has always
2339                  * overwritten it: the committed "conflicts" were
2340                  * already resolved.
2341                  */
2342                 saved_b1 = opt->branch1;
2343                 saved_b2 = opt->branch2;
2344                 opt->branch1 = "Temporary merge branch 1";
2345                 opt->branch2 = "Temporary merge branch 2";
2346                 merge_ort_internal(opt, NULL, prev, iter->item, result);
2347                 if (result->clean < 0)
2348                         return;
2349                 opt->branch1 = saved_b1;
2350                 opt->branch2 = saved_b2;
2351                 opt->priv->call_depth--;
2352
2353                 merged_merge_bases = make_virtual_commit(opt->repo,
2354                                                          result->tree,
2355                                                          "merged tree");
2356                 commit_list_insert(prev, &merged_merge_bases->parents);
2357                 commit_list_insert(iter->item,
2358                                    &merged_merge_bases->parents->next);
2359
2360                 clear_or_reinit_internal_opts(opt->priv, 1);
2361         }
2362
2363         opt->ancestor = ancestor_name;
2364         merge_ort_nonrecursive_internal(opt,
2365                                         repo_get_commit_tree(opt->repo,
2366                                                              merged_merge_bases),
2367                                         repo_get_commit_tree(opt->repo, h1),
2368                                         repo_get_commit_tree(opt->repo, h2),
2369                                         result);
2370         strbuf_release(&merge_base_abbrev);
2371         opt->ancestor = NULL;  /* avoid accidental re-use of opt->ancestor */
2372 }
2373
2374 void merge_incore_nonrecursive(struct merge_options *opt,
2375                                struct tree *merge_base,
2376                                struct tree *side1,
2377                                struct tree *side2,
2378                                struct merge_result *result)
2379 {
2380         assert(opt->ancestor != NULL);
2381         merge_start(opt, result);
2382         merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);
2383 }
2384
2385 void merge_incore_recursive(struct merge_options *opt,
2386                             struct commit_list *merge_bases,
2387                             struct commit *side1,
2388                             struct commit *side2,
2389                             struct merge_result *result)
2390 {
2391         /* We set the ancestor label based on the merge_bases */
2392         assert(opt->ancestor == NULL);
2393
2394         merge_start(opt, result);
2395         merge_ort_internal(opt, merge_bases, side1, side2, result);
2396 }