Documentation: diff can compare blobs
[git] / match-trees.c
1 #include "cache.h"
2 #include "tree.h"
3 #include "tree-walk.h"
4
5 static int score_missing(unsigned mode, const char *path)
6 {
7         int score;
8
9         if (S_ISDIR(mode))
10                 score = -1000;
11         else if (S_ISLNK(mode))
12                 score = -500;
13         else
14                 score = -50;
15         return score;
16 }
17
18 static int score_differs(unsigned mode1, unsigned mode2, const char *path)
19 {
20         int score;
21
22         if (S_ISDIR(mode1) != S_ISDIR(mode2))
23                 score = -100;
24         else if (S_ISLNK(mode1) != S_ISLNK(mode2))
25                 score = -50;
26         else
27                 score = -5;
28         return score;
29 }
30
31 static int score_matches(unsigned mode1, unsigned mode2, const char *path)
32 {
33         int score;
34
35         /* Heh, we found SHA-1 collisions between different kind of objects */
36         if (S_ISDIR(mode1) != S_ISDIR(mode2))
37                 score = -100;
38         else if (S_ISLNK(mode1) != S_ISLNK(mode2))
39                 score = -50;
40
41         else if (S_ISDIR(mode1))
42                 score = 1000;
43         else if (S_ISLNK(mode1))
44                 score = 500;
45         else
46                 score = 250;
47         return score;
48 }
49
50 /*
51  * Inspect two trees, and give a score that tells how similar they are.
52  */
53 static int score_trees(const unsigned char *hash1, const unsigned char *hash2)
54 {
55         struct tree_desc one;
56         struct tree_desc two;
57         void *one_buf, *two_buf;
58         int score = 0;
59         enum object_type type;
60         unsigned long size;
61
62         one_buf = read_sha1_file(hash1, &type, &size);
63         if (!one_buf)
64                 die("unable to read tree (%s)", sha1_to_hex(hash1));
65         if (type != OBJ_TREE)
66                 die("%s is not a tree", sha1_to_hex(hash1));
67         init_tree_desc(&one, one_buf, size);
68         two_buf = read_sha1_file(hash2, &type, &size);
69         if (!two_buf)
70                 die("unable to read tree (%s)", sha1_to_hex(hash2));
71         if (type != OBJ_TREE)
72                 die("%s is not a tree", sha1_to_hex(hash2));
73         init_tree_desc(&two, two_buf, size);
74         while (one.size | two.size) {
75                 const unsigned char *elem1 = elem1;
76                 const unsigned char *elem2 = elem2;
77                 const char *path1 = path1;
78                 const char *path2 = path2;
79                 unsigned mode1 = mode1;
80                 unsigned mode2 = mode2;
81                 int cmp;
82
83                 if (one.size)
84                         elem1 = tree_entry_extract(&one, &path1, &mode1);
85                 if (two.size)
86                         elem2 = tree_entry_extract(&two, &path2, &mode2);
87
88                 if (!one.size) {
89                         /* two has more entries */
90                         score += score_missing(mode2, path2);
91                         update_tree_entry(&two);
92                         continue;
93                 }
94                 if (!two.size) {
95                         /* two lacks this entry */
96                         score += score_missing(mode1, path1);
97                         update_tree_entry(&one);
98                         continue;
99                 }
100                 cmp = base_name_compare(path1, strlen(path1), mode1,
101                                         path2, strlen(path2), mode2);
102                 if (cmp < 0) {
103                         /* path1 does not appear in two */
104                         score += score_missing(mode1, path1);
105                         update_tree_entry(&one);
106                         continue;
107                 }
108                 else if (cmp > 0) {
109                         /* path2 does not appear in one */
110                         score += score_missing(mode2, path2);
111                         update_tree_entry(&two);
112                         continue;
113                 }
114                 else if (hashcmp(elem1, elem2))
115                         /* they are different */
116                         score += score_differs(mode1, mode2, path1);
117                 else
118                         /* same subtree or blob */
119                         score += score_matches(mode1, mode2, path1);
120                 update_tree_entry(&one);
121                 update_tree_entry(&two);
122         }
123         free(one_buf);
124         free(two_buf);
125         return score;
126 }
127
128 /*
129  * Match one itself and its subtrees with two and pick the best match.
130  */
131 static void match_trees(const unsigned char *hash1,
132                         const unsigned char *hash2,
133                         int *best_score,
134                         char **best_match,
135                         const char *base,
136                         int recurse_limit)
137 {
138         struct tree_desc one;
139         void *one_buf;
140         enum object_type type;
141         unsigned long size;
142
143         one_buf = read_sha1_file(hash1, &type, &size);
144         if (!one_buf)
145                 die("unable to read tree (%s)", sha1_to_hex(hash1));
146         if (type != OBJ_TREE)
147                 die("%s is not a tree", sha1_to_hex(hash1));
148         init_tree_desc(&one, one_buf, size);
149
150         while (one.size) {
151                 const char *path;
152                 const unsigned char *elem;
153                 unsigned mode;
154                 int score;
155
156                 elem = tree_entry_extract(&one, &path, &mode);
157                 if (!S_ISDIR(mode))
158                         goto next;
159                 score = score_trees(elem, hash2);
160                 if (*best_score < score) {
161                         char *newpath;
162                         newpath = xmalloc(strlen(base) + strlen(path) + 1);
163                         sprintf(newpath, "%s%s", base, path);
164                         free(*best_match);
165                         *best_match = newpath;
166                         *best_score = score;
167                 }
168                 if (recurse_limit) {
169                         char *newbase;
170                         newbase = xmalloc(strlen(base) + strlen(path) + 2);
171                         sprintf(newbase, "%s%s/", base, path);
172                         match_trees(elem, hash2, best_score, best_match,
173                                     newbase, recurse_limit - 1);
174                         free(newbase);
175                 }
176
177         next:
178                 update_tree_entry(&one);
179         }
180         free(one_buf);
181 }
182
183 /*
184  * A tree "hash1" has a subdirectory at "prefix".  Come up with a
185  * tree object by replacing it with another tree "hash2".
186  */
187 static int splice_tree(const unsigned char *hash1,
188                        const char *prefix,
189                        const unsigned char *hash2,
190                        unsigned char *result)
191 {
192         char *subpath;
193         int toplen;
194         char *buf;
195         unsigned long sz;
196         struct tree_desc desc;
197         unsigned char *rewrite_here;
198         const unsigned char *rewrite_with;
199         unsigned char subtree[20];
200         enum object_type type;
201         int status;
202
203         subpath = strchr(prefix, '/');
204         if (!subpath)
205                 toplen = strlen(prefix);
206         else {
207                 toplen = subpath - prefix;
208                 subpath++;
209         }
210
211         buf = read_sha1_file(hash1, &type, &sz);
212         if (!buf)
213                 die("cannot read tree %s", sha1_to_hex(hash1));
214         init_tree_desc(&desc, buf, sz);
215
216         rewrite_here = NULL;
217         while (desc.size) {
218                 const char *name;
219                 unsigned mode;
220                 const unsigned char *sha1;
221
222                 sha1 = tree_entry_extract(&desc, &name, &mode);
223                 if (strlen(name) == toplen &&
224                     !memcmp(name, prefix, toplen)) {
225                         if (!S_ISDIR(mode))
226                                 die("entry %s in tree %s is not a tree",
227                                     name, sha1_to_hex(hash1));
228                         rewrite_here = (unsigned char *) sha1;
229                         break;
230                 }
231                 update_tree_entry(&desc);
232         }
233         if (!rewrite_here)
234                 die("entry %.*s not found in tree %s",
235                     toplen, prefix, sha1_to_hex(hash1));
236         if (subpath) {
237                 status = splice_tree(rewrite_here, subpath, hash2, subtree);
238                 if (status)
239                         return status;
240                 rewrite_with = subtree;
241         }
242         else
243                 rewrite_with = hash2;
244         hashcpy(rewrite_here, rewrite_with);
245         status = write_sha1_file(buf, sz, tree_type, result);
246         free(buf);
247         return status;
248 }
249
250 /*
251  * We are trying to come up with a merge between one and two that
252  * results in a tree shape similar to one.  The tree two might
253  * correspond to a subtree of one, in which case it needs to be
254  * shifted down by prefixing otherwise empty directories.  On the
255  * other hand, it could cover tree one and we might need to pick a
256  * subtree of it.
257  */
258 void shift_tree(const unsigned char *hash1,
259                 const unsigned char *hash2,
260                 unsigned char *shifted,
261                 int depth_limit)
262 {
263         char *add_prefix;
264         char *del_prefix;
265         int add_score, del_score;
266
267         /*
268          * NEEDSWORK: this limits the recursion depth to hardcoded
269          * value '2' to avoid excessive overhead.
270          */
271         if (!depth_limit)
272                 depth_limit = 2;
273
274         add_score = del_score = score_trees(hash1, hash2);
275         add_prefix = xcalloc(1, 1);
276         del_prefix = xcalloc(1, 1);
277
278         /*
279          * See if one's subtree resembles two; if so we need to prefix
280          * two with a few fake trees to match the prefix.
281          */
282         match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit);
283
284         /*
285          * See if two's subtree resembles one; if so we need to
286          * pick only subtree of two.
287          */
288         match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit);
289
290         /* Assume we do not have to do any shifting */
291         hashcpy(shifted, hash2);
292
293         if (add_score < del_score) {
294                 /* We need to pick a subtree of two */
295                 unsigned mode;
296
297                 if (!*del_prefix)
298                         return;
299
300                 if (get_tree_entry(hash2, del_prefix, shifted, &mode))
301                         die("cannot find path %s in tree %s",
302                             del_prefix, sha1_to_hex(hash2));
303                 return;
304         }
305
306         if (!*add_prefix)
307                 return;
308
309         splice_tree(hash1, add_prefix, hash2, shifted);
310 }
311
312 /*
313  * The user says the trees will be shifted by this much.
314  * Unfortunately we cannot fundamentally tell which one to
315  * be prefixed, as recursive merge can work in either direction.
316  */
317 void shift_tree_by(const unsigned char *hash1,
318                    const unsigned char *hash2,
319                    unsigned char *shifted,
320                    const char *shift_prefix)
321 {
322         unsigned char sub1[20], sub2[20];
323         unsigned mode1, mode2;
324         unsigned candidate = 0;
325
326         /* Can hash2 be a tree at shift_prefix in tree hash1? */
327         if (!get_tree_entry(hash1, shift_prefix, sub1, &mode1) &&
328             S_ISDIR(mode1))
329                 candidate |= 1;
330
331         /* Can hash1 be a tree at shift_prefix in tree hash2? */
332         if (!get_tree_entry(hash2, shift_prefix, sub2, &mode2) &&
333             S_ISDIR(mode2))
334                 candidate |= 2;
335
336         if (candidate == 3) {
337                 /* Both are plausible -- we need to evaluate the score */
338                 int best_score = score_trees(hash1, hash2);
339                 int score;
340
341                 candidate = 0;
342                 score = score_trees(sub1, hash2);
343                 if (score > best_score) {
344                         candidate = 1;
345                         best_score = score;
346                 }
347                 score = score_trees(sub2, hash1);
348                 if (score > best_score)
349                         candidate = 2;
350         }
351
352         if (!candidate) {
353                 /* Neither is plausible -- do not shift */
354                 hashcpy(shifted, hash2);
355                 return;
356         }
357
358         if (candidate == 1)
359                 /*
360                  * shift tree2 down by adding shift_prefix above it
361                  * to match tree1.
362                  */
363                 splice_tree(hash1, shift_prefix, hash2, shifted);
364         else
365                 /*
366                  * shift tree2 up by removing shift_prefix from it
367                  * to match tree1.
368                  */
369                 hashcpy(shifted, sub2);
370 }