midx: write object id fanout chunk
[git] / match-trees.c
1 #include "cache.h"
2 #include "tree.h"
3 #include "tree-walk.h"
4
5 static int score_missing(unsigned mode, const char *path)
6 {
7         int score;
8
9         if (S_ISDIR(mode))
10                 score = -1000;
11         else if (S_ISLNK(mode))
12                 score = -500;
13         else
14                 score = -50;
15         return score;
16 }
17
18 static int score_differs(unsigned mode1, unsigned mode2, const char *path)
19 {
20         int score;
21
22         if (S_ISDIR(mode1) != S_ISDIR(mode2))
23                 score = -100;
24         else if (S_ISLNK(mode1) != S_ISLNK(mode2))
25                 score = -50;
26         else
27                 score = -5;
28         return score;
29 }
30
31 static int score_matches(unsigned mode1, unsigned mode2, const char *path)
32 {
33         int score;
34
35         /* Heh, we found SHA-1 collisions between different kind of objects */
36         if (S_ISDIR(mode1) != S_ISDIR(mode2))
37                 score = -100;
38         else if (S_ISLNK(mode1) != S_ISLNK(mode2))
39                 score = -50;
40
41         else if (S_ISDIR(mode1))
42                 score = 1000;
43         else if (S_ISLNK(mode1))
44                 score = 500;
45         else
46                 score = 250;
47         return score;
48 }
49
50 static void *fill_tree_desc_strict(struct tree_desc *desc,
51                                    const struct object_id *hash)
52 {
53         void *buffer;
54         enum object_type type;
55         unsigned long size;
56
57         buffer = read_object_file(hash, &type, &size);
58         if (!buffer)
59                 die("unable to read tree (%s)", oid_to_hex(hash));
60         if (type != OBJ_TREE)
61                 die("%s is not a tree", oid_to_hex(hash));
62         init_tree_desc(desc, buffer, size);
63         return buffer;
64 }
65
66 static int base_name_entries_compare(const struct name_entry *a,
67                                      const struct name_entry *b)
68 {
69         return base_name_compare(a->path, tree_entry_len(a), a->mode,
70                                  b->path, tree_entry_len(b), b->mode);
71 }
72
73 /*
74  * Inspect two trees, and give a score that tells how similar they are.
75  */
76 static int score_trees(const struct object_id *hash1, const struct object_id *hash2)
77 {
78         struct tree_desc one;
79         struct tree_desc two;
80         void *one_buf = fill_tree_desc_strict(&one, hash1);
81         void *two_buf = fill_tree_desc_strict(&two, hash2);
82         int score = 0;
83
84         for (;;) {
85                 struct name_entry e1, e2;
86                 int got_entry_from_one = tree_entry(&one, &e1);
87                 int got_entry_from_two = tree_entry(&two, &e2);
88                 int cmp;
89
90                 if (got_entry_from_one && got_entry_from_two)
91                         cmp = base_name_entries_compare(&e1, &e2);
92                 else if (got_entry_from_one)
93                         /* two lacks this entry */
94                         cmp = -1;
95                 else if (got_entry_from_two)
96                         /* two has more entries */
97                         cmp = 1;
98                 else
99                         break;
100
101                 if (cmp < 0)
102                         /* path1 does not appear in two */
103                         score += score_missing(e1.mode, e1.path);
104                 else if (cmp > 0)
105                         /* path2 does not appear in one */
106                         score += score_missing(e2.mode, e2.path);
107                 else if (oidcmp(e1.oid, e2.oid))
108                         /* they are different */
109                         score += score_differs(e1.mode, e2.mode, e1.path);
110                 else
111                         /* same subtree or blob */
112                         score += score_matches(e1.mode, e2.mode, e1.path);
113         }
114         free(one_buf);
115         free(two_buf);
116         return score;
117 }
118
119 /*
120  * Match one itself and its subtrees with two and pick the best match.
121  */
122 static void match_trees(const struct object_id *hash1,
123                         const struct object_id *hash2,
124                         int *best_score,
125                         char **best_match,
126                         const char *base,
127                         int recurse_limit)
128 {
129         struct tree_desc one;
130         void *one_buf = fill_tree_desc_strict(&one, hash1);
131
132         while (one.size) {
133                 const char *path;
134                 const struct object_id *elem;
135                 unsigned mode;
136                 int score;
137
138                 elem = tree_entry_extract(&one, &path, &mode);
139                 if (!S_ISDIR(mode))
140                         goto next;
141                 score = score_trees(elem, hash2);
142                 if (*best_score < score) {
143                         free(*best_match);
144                         *best_match = xstrfmt("%s%s", base, path);
145                         *best_score = score;
146                 }
147                 if (recurse_limit) {
148                         char *newbase = xstrfmt("%s%s/", base, path);
149                         match_trees(elem, hash2, best_score, best_match,
150                                     newbase, recurse_limit - 1);
151                         free(newbase);
152                 }
153
154         next:
155                 update_tree_entry(&one);
156         }
157         free(one_buf);
158 }
159
160 /*
161  * A tree "oid1" has a subdirectory at "prefix".  Come up with a tree object by
162  * replacing it with another tree "oid2".
163  */
164 static int splice_tree(const struct object_id *oid1, const char *prefix,
165                        const struct object_id *oid2, struct object_id *result)
166 {
167         char *subpath;
168         int toplen;
169         char *buf;
170         unsigned long sz;
171         struct tree_desc desc;
172         struct object_id *rewrite_here;
173         const struct object_id *rewrite_with;
174         struct object_id subtree;
175         enum object_type type;
176         int status;
177
178         subpath = strchrnul(prefix, '/');
179         toplen = subpath - prefix;
180         if (*subpath)
181                 subpath++;
182
183         buf = read_object_file(oid1, &type, &sz);
184         if (!buf)
185                 die("cannot read tree %s", oid_to_hex(oid1));
186         init_tree_desc(&desc, buf, sz);
187
188         rewrite_here = NULL;
189         while (desc.size) {
190                 const char *name;
191                 unsigned mode;
192                 const struct object_id *oid;
193
194                 oid = tree_entry_extract(&desc, &name, &mode);
195                 if (strlen(name) == toplen &&
196                     !memcmp(name, prefix, toplen)) {
197                         if (!S_ISDIR(mode))
198                                 die("entry %s in tree %s is not a tree", name,
199                                     oid_to_hex(oid1));
200                         rewrite_here = (struct object_id *)oid;
201                         break;
202                 }
203                 update_tree_entry(&desc);
204         }
205         if (!rewrite_here)
206                 die("entry %.*s not found in tree %s", toplen, prefix,
207                     oid_to_hex(oid1));
208         if (*subpath) {
209                 status = splice_tree(rewrite_here, subpath, oid2, &subtree);
210                 if (status)
211                         return status;
212                 rewrite_with = &subtree;
213         } else {
214                 rewrite_with = oid2;
215         }
216         oidcpy(rewrite_here, rewrite_with);
217         status = write_object_file(buf, sz, tree_type, result);
218         free(buf);
219         return status;
220 }
221
222 /*
223  * We are trying to come up with a merge between one and two that
224  * results in a tree shape similar to one.  The tree two might
225  * correspond to a subtree of one, in which case it needs to be
226  * shifted down by prefixing otherwise empty directories.  On the
227  * other hand, it could cover tree one and we might need to pick a
228  * subtree of it.
229  */
230 void shift_tree(const struct object_id *hash1,
231                 const struct object_id *hash2,
232                 struct object_id *shifted,
233                 int depth_limit)
234 {
235         char *add_prefix;
236         char *del_prefix;
237         int add_score, del_score;
238
239         /*
240          * NEEDSWORK: this limits the recursion depth to hardcoded
241          * value '2' to avoid excessive overhead.
242          */
243         if (!depth_limit)
244                 depth_limit = 2;
245
246         add_score = del_score = score_trees(hash1, hash2);
247         add_prefix = xcalloc(1, 1);
248         del_prefix = xcalloc(1, 1);
249
250         /*
251          * See if one's subtree resembles two; if so we need to prefix
252          * two with a few fake trees to match the prefix.
253          */
254         match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit);
255
256         /*
257          * See if two's subtree resembles one; if so we need to
258          * pick only subtree of two.
259          */
260         match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit);
261
262         /* Assume we do not have to do any shifting */
263         oidcpy(shifted, hash2);
264
265         if (add_score < del_score) {
266                 /* We need to pick a subtree of two */
267                 unsigned mode;
268
269                 if (!*del_prefix)
270                         return;
271
272                 if (get_tree_entry(hash2, del_prefix, shifted, &mode))
273                         die("cannot find path %s in tree %s",
274                             del_prefix, oid_to_hex(hash2));
275                 return;
276         }
277
278         if (!*add_prefix)
279                 return;
280
281         splice_tree(hash1, add_prefix, hash2, shifted);
282 }
283
284 /*
285  * The user says the trees will be shifted by this much.
286  * Unfortunately we cannot fundamentally tell which one to
287  * be prefixed, as recursive merge can work in either direction.
288  */
289 void shift_tree_by(const struct object_id *hash1,
290                    const struct object_id *hash2,
291                    struct object_id *shifted,
292                    const char *shift_prefix)
293 {
294         struct object_id sub1, sub2;
295         unsigned mode1, mode2;
296         unsigned candidate = 0;
297
298         /* Can hash2 be a tree at shift_prefix in tree hash1? */
299         if (!get_tree_entry(hash1, shift_prefix, &sub1, &mode1) &&
300             S_ISDIR(mode1))
301                 candidate |= 1;
302
303         /* Can hash1 be a tree at shift_prefix in tree hash2? */
304         if (!get_tree_entry(hash2, shift_prefix, &sub2, &mode2) &&
305             S_ISDIR(mode2))
306                 candidate |= 2;
307
308         if (candidate == 3) {
309                 /* Both are plausible -- we need to evaluate the score */
310                 int best_score = score_trees(hash1, hash2);
311                 int score;
312
313                 candidate = 0;
314                 score = score_trees(&sub1, hash2);
315                 if (score > best_score) {
316                         candidate = 1;
317                         best_score = score;
318                 }
319                 score = score_trees(&sub2, hash1);
320                 if (score > best_score)
321                         candidate = 2;
322         }
323
324         if (!candidate) {
325                 /* Neither is plausible -- do not shift */
326                 oidcpy(shifted, hash2);
327                 return;
328         }
329
330         if (candidate == 1)
331                 /*
332                  * shift tree2 down by adding shift_prefix above it
333                  * to match tree1.
334                  */
335                 splice_tree(hash1, shift_prefix, hash2, shifted);
336         else
337                 /*
338                  * shift tree2 up by removing shift_prefix from it
339                  * to match tree1.
340                  */
341                 oidcpy(shifted, &sub2);
342 }