tests: cleanup binary test vector files
[git] / tree-walk.c
1 #include "cache.h"
2 #include "tree-walk.h"
3 #include "unpack-trees.h"
4 #include "dir.h"
5 #include "tree.h"
6
7 static const char *get_mode(const char *str, unsigned int *modep)
8 {
9         unsigned char c;
10         unsigned int mode = 0;
11
12         if (*str == ' ')
13                 return NULL;
14
15         while ((c = *str++) != ' ') {
16                 if (c < '0' || c > '7')
17                         return NULL;
18                 mode = (mode << 3) + (c - '0');
19         }
20         *modep = mode;
21         return str;
22 }
23
24 static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
25 {
26         const char *path;
27         unsigned int mode, len;
28
29         if (size < 24 || buf[size - 21])
30                 die("corrupt tree file");
31
32         path = get_mode(buf, &mode);
33         if (!path || !*path)
34                 die("corrupt tree file");
35         len = strlen(path) + 1;
36
37         /* Initialize the descriptor entry */
38         desc->entry.path = path;
39         desc->entry.mode = mode;
40         desc->entry.sha1 = (const unsigned char *)(path + len);
41 }
42
43 void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
44 {
45         desc->buffer = buffer;
46         desc->size = size;
47         if (size)
48                 decode_tree_entry(desc, buffer, size);
49 }
50
51 void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
52 {
53         unsigned long size = 0;
54         void *buf = NULL;
55
56         if (sha1) {
57                 buf = read_object_with_reference(sha1, tree_type, &size, NULL);
58                 if (!buf)
59                         die("unable to read tree %s", sha1_to_hex(sha1));
60         }
61         init_tree_desc(desc, buf, size);
62         return buf;
63 }
64
65 static void entry_clear(struct name_entry *a)
66 {
67         memset(a, 0, sizeof(*a));
68 }
69
70 static void entry_extract(struct tree_desc *t, struct name_entry *a)
71 {
72         *a = t->entry;
73 }
74
75 void update_tree_entry(struct tree_desc *desc)
76 {
77         const void *buf = desc->buffer;
78         const unsigned char *end = desc->entry.sha1 + 20;
79         unsigned long size = desc->size;
80         unsigned long len = end - (const unsigned char *)buf;
81
82         if (size < len)
83                 die("corrupt tree file");
84         buf = end;
85         size -= len;
86         desc->buffer = buf;
87         desc->size = size;
88         if (size)
89                 decode_tree_entry(desc, buf, size);
90 }
91
92 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
93 {
94         if (!desc->size)
95                 return 0;
96
97         *entry = desc->entry;
98         update_tree_entry(desc);
99         return 1;
100 }
101
102 void setup_traverse_info(struct traverse_info *info, const char *base)
103 {
104         int pathlen = strlen(base);
105         static struct traverse_info dummy;
106
107         memset(info, 0, sizeof(*info));
108         if (pathlen && base[pathlen-1] == '/')
109                 pathlen--;
110         info->pathlen = pathlen ? pathlen + 1 : 0;
111         info->name.path = base;
112         info->name.sha1 = (void *)(base + pathlen + 1);
113         if (pathlen)
114                 info->prev = &dummy;
115 }
116
117 char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
118 {
119         int len = tree_entry_len(n->path, n->sha1);
120         int pathlen = info->pathlen;
121
122         path[pathlen + len] = 0;
123         for (;;) {
124                 memcpy(path + pathlen, n->path, len);
125                 if (!pathlen)
126                         break;
127                 path[--pathlen] = '/';
128                 n = &info->name;
129                 len = tree_entry_len(n->path, n->sha1);
130                 info = info->prev;
131                 pathlen -= len;
132         }
133         return path;
134 }
135
136 struct tree_desc_skip {
137         struct tree_desc_skip *prev;
138         const void *ptr;
139 };
140
141 struct tree_desc_x {
142         struct tree_desc d;
143         struct tree_desc_skip *skip;
144 };
145
146 static int name_compare(const char *a, int a_len,
147                         const char *b, int b_len)
148 {
149         int len = (a_len < b_len) ? a_len : b_len;
150         int cmp = memcmp(a, b, len);
151         if (cmp)
152                 return cmp;
153         return (a_len - b_len);
154 }
155
156 static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
157 {
158         /*
159          * The caller wants to pick *a* from a tree or nothing.
160          * We are looking at *b* in a tree.
161          *
162          * (0) If a and b are the same name, we are trivially happy.
163          *
164          * There are three possibilities where *a* could be hiding
165          * behind *b*.
166          *
167          * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
168          *                                matter what.
169          * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
170          * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
171          *
172          * Otherwise we know *a* won't appear in the tree without
173          * scanning further.
174          */
175
176         int cmp = name_compare(a, a_len, b, b_len);
177
178         /* Most common case first -- reading sync'd trees */
179         if (!cmp)
180                 return cmp;
181
182         if (0 < cmp) {
183                 /* a comes after b; it does not matter if it is case (3)
184                 if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
185                         return 1;
186                 */
187                 return 1; /* keep looking */
188         }
189
190         /* b comes after a; are we looking at case (2)? */
191         if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
192                 return 1; /* keep looking */
193
194         return -1; /* a cannot appear in the tree */
195 }
196
197 /*
198  * From the extended tree_desc, extract the first name entry, while
199  * paying attention to the candidate "first" name.  Most importantly,
200  * when looking for an entry, if there are entries that sorts earlier
201  * in the tree object representation than that name, skip them and
202  * process the named entry first.  We will remember that we haven't
203  * processed the first entry yet, and in the later call skip the
204  * entry we processed early when update_extended_entry() is called.
205  *
206  * E.g. if the underlying tree object has these entries:
207  *
208  *    blob    "t-1"
209  *    blob    "t-2"
210  *    tree    "t"
211  *    blob    "t=1"
212  *
213  * and the "first" asks for "t", remember that we still need to
214  * process "t-1" and "t-2" but extract "t".  After processing the
215  * entry "t" from this call, the caller will let us know by calling
216  * update_extended_entry() that we can remember "t" has been processed
217  * already.
218  */
219
220 static void extended_entry_extract(struct tree_desc_x *t,
221                                    struct name_entry *a,
222                                    const char *first,
223                                    int first_len)
224 {
225         const char *path;
226         int len;
227         struct tree_desc probe;
228         struct tree_desc_skip *skip;
229
230         /*
231          * Extract the first entry from the tree_desc, but skip the
232          * ones that we already returned in earlier rounds.
233          */
234         while (1) {
235                 if (!t->d.size) {
236                         entry_clear(a);
237                         break; /* not found */
238                 }
239                 entry_extract(&t->d, a);
240                 for (skip = t->skip; skip; skip = skip->prev)
241                         if (a->path == skip->ptr)
242                                 break; /* found */
243                 if (!skip)
244                         break;
245                 /* We have processed this entry already. */
246                 update_tree_entry(&t->d);
247         }
248
249         if (!first || !a->path)
250                 return;
251
252         /*
253          * The caller wants "first" from this tree, or nothing.
254          */
255         path = a->path;
256         len = tree_entry_len(a->path, a->sha1);
257         switch (check_entry_match(first, first_len, path, len)) {
258         case -1:
259                 entry_clear(a);
260         case 0:
261                 return;
262         default:
263                 break;
264         }
265
266         /*
267          * We need to look-ahead -- we suspect that a subtree whose
268          * name is "first" may be hiding behind the current entry "path".
269          */
270         probe = t->d;
271         while (probe.size) {
272                 entry_extract(&probe, a);
273                 path = a->path;
274                 len = tree_entry_len(a->path, a->sha1);
275                 switch (check_entry_match(first, first_len, path, len)) {
276                 case -1:
277                         entry_clear(a);
278                 case 0:
279                         return;
280                 default:
281                         update_tree_entry(&probe);
282                         break;
283                 }
284                 /* keep looking */
285         }
286         entry_clear(a);
287 }
288
289 static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
290 {
291         if (t->d.entry.path == a->path) {
292                 update_tree_entry(&t->d);
293         } else {
294                 /* we have returned this entry early */
295                 struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
296                 skip->ptr = a->path;
297                 skip->prev = t->skip;
298                 t->skip = skip;
299         }
300 }
301
302 static void free_extended_entry(struct tree_desc_x *t)
303 {
304         struct tree_desc_skip *p, *s;
305
306         for (s = t->skip; s; s = p) {
307                 p = s->prev;
308                 free(s);
309         }
310 }
311
312 int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
313 {
314         int ret = 0;
315         int error = 0;
316         struct name_entry *entry = xmalloc(n*sizeof(*entry));
317         int i;
318         struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
319
320         for (i = 0; i < n; i++)
321                 tx[i].d = t[i];
322
323         for (;;) {
324                 unsigned long mask, dirmask;
325                 const char *first = NULL;
326                 int first_len = 0;
327                 struct name_entry *e;
328                 int len;
329
330                 for (i = 0; i < n; i++) {
331                         e = entry + i;
332                         extended_entry_extract(tx + i, e, NULL, 0);
333                 }
334
335                 /*
336                  * A tree may have "t-2" at the current location even
337                  * though it may have "t" that is a subtree behind it,
338                  * and another tree may return "t".  We want to grab
339                  * all "t" from all trees to match in such a case.
340                  */
341                 for (i = 0; i < n; i++) {
342                         e = entry + i;
343                         if (!e->path)
344                                 continue;
345                         len = tree_entry_len(e->path, e->sha1);
346                         if (!first) {
347                                 first = e->path;
348                                 first_len = len;
349                                 continue;
350                         }
351                         if (name_compare(e->path, len, first, first_len) < 0) {
352                                 first = e->path;
353                                 first_len = len;
354                         }
355                 }
356
357                 if (first) {
358                         for (i = 0; i < n; i++) {
359                                 e = entry + i;
360                                 extended_entry_extract(tx + i, e, first, first_len);
361                                 /* Cull the ones that are not the earliest */
362                                 if (!e->path)
363                                         continue;
364                                 len = tree_entry_len(e->path, e->sha1);
365                                 if (name_compare(e->path, len, first, first_len))
366                                         entry_clear(e);
367                         }
368                 }
369
370                 /* Now we have in entry[i] the earliest name from the trees */
371                 mask = 0;
372                 dirmask = 0;
373                 for (i = 0; i < n; i++) {
374                         if (!entry[i].path)
375                                 continue;
376                         mask |= 1ul << i;
377                         if (S_ISDIR(entry[i].mode))
378                                 dirmask |= 1ul << i;
379                 }
380                 if (!mask)
381                         break;
382                 ret = info->fn(n, mask, dirmask, entry, info);
383                 if (ret < 0) {
384                         error = ret;
385                         if (!info->show_all_errors)
386                                 break;
387                 }
388                 mask &= ret;
389                 ret = 0;
390                 for (i = 0; i < n; i++)
391                         if (mask & (1ul << i))
392                                 update_extended_entry(tx + i, entry + i);
393         }
394         free(entry);
395         for (i = 0; i < n; i++)
396                 free_extended_entry(tx + i);
397         free(tx);
398         return error;
399 }
400
401 static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
402 {
403         int namelen = strlen(name);
404         while (t->size) {
405                 const char *entry;
406                 const unsigned char *sha1;
407                 int entrylen, cmp;
408
409                 sha1 = tree_entry_extract(t, &entry, mode);
410                 update_tree_entry(t);
411                 entrylen = tree_entry_len(entry, sha1);
412                 if (entrylen > namelen)
413                         continue;
414                 cmp = memcmp(name, entry, entrylen);
415                 if (cmp > 0)
416                         continue;
417                 if (cmp < 0)
418                         break;
419                 if (entrylen == namelen) {
420                         hashcpy(result, sha1);
421                         return 0;
422                 }
423                 if (name[entrylen] != '/')
424                         continue;
425                 if (!S_ISDIR(*mode))
426                         break;
427                 if (++entrylen == namelen) {
428                         hashcpy(result, sha1);
429                         return 0;
430                 }
431                 return get_tree_entry(sha1, name + entrylen, result, mode);
432         }
433         return -1;
434 }
435
436 int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
437 {
438         int retval;
439         void *tree;
440         unsigned long size;
441         struct tree_desc t;
442         unsigned char root[20];
443
444         tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
445         if (!tree)
446                 return -1;
447
448         if (name[0] == '\0') {
449                 hashcpy(sha1, root);
450                 free(tree);
451                 return 0;
452         }
453
454         init_tree_desc(&t, tree, size);
455         retval = find_tree_entry(&t, name, sha1, mode);
456         free(tree);
457         return retval;
458 }
459
460 static int match_entry(const struct name_entry *entry, int pathlen,
461                        const char *match, int matchlen,
462                        int *never_interesting)
463 {
464         int m = -1; /* signals that we haven't called strncmp() */
465
466         if (*never_interesting) {
467                 /*
468                  * We have not seen any match that sorts later
469                  * than the current path.
470                  */
471
472                 /*
473                  * Does match sort strictly earlier than path
474                  * with their common parts?
475                  */
476                 m = strncmp(match, entry->path,
477                             (matchlen < pathlen) ? matchlen : pathlen);
478                 if (m < 0)
479                         return 0;
480
481                 /*
482                  * If we come here even once, that means there is at
483                  * least one pathspec that would sort equal to or
484                  * later than the path we are currently looking at.
485                  * In other words, if we have never reached this point
486                  * after iterating all pathspecs, it means all
487                  * pathspecs are either outside of base, or inside the
488                  * base but sorts strictly earlier than the current
489                  * one.  In either case, they will never match the
490                  * subsequent entries.  In such a case, we initialized
491                  * the variable to -1 and that is what will be
492                  * returned, allowing the caller to terminate early.
493                  */
494                 *never_interesting = 0;
495         }
496
497         if (pathlen > matchlen)
498                 return 0;
499
500         if (matchlen > pathlen) {
501                 if (match[pathlen] != '/')
502                         return 0;
503                 if (!S_ISDIR(entry->mode))
504                         return 0;
505         }
506
507         if (m == -1)
508                 /*
509                  * we cheated and did not do strncmp(), so we do
510                  * that here.
511                  */
512                 m = strncmp(match, entry->path, pathlen);
513
514         /*
515          * If common part matched earlier then it is a hit,
516          * because we rejected the case where path is not a
517          * leading directory and is shorter than match.
518          */
519         if (!m)
520                 return 1;
521
522         return 0;
523 }
524
525 static int match_dir_prefix(const char *base, int baselen,
526                             const char *match, int matchlen)
527 {
528         if (strncmp(base, match, matchlen))
529                 return 0;
530
531         /*
532          * If the base is a subdirectory of a path which
533          * was specified, all of them are interesting.
534          */
535         if (!matchlen ||
536             base[matchlen] == '/' ||
537             match[matchlen - 1] == '/')
538                 return 1;
539
540         /* Just a random prefix match */
541         return 0;
542 }
543
544 /*
545  * Is a tree entry interesting given the pathspec we have?
546  *
547  * Pre-condition: either baselen == base_offset (i.e. empty path)
548  * or base[baselen-1] == '/' (i.e. with trailing slash).
549  *
550  * Return:
551  *  - 2 for "yes, and all subsequent entries will be"
552  *  - 1 for yes
553  *  - zero for no
554  *  - negative for "no, and no subsequent entries will be either"
555  */
556 int tree_entry_interesting(const struct name_entry *entry,
557                            struct strbuf *base, int base_offset,
558                            const struct pathspec *ps)
559 {
560         int i;
561         int pathlen, baselen = base->len - base_offset;
562         int never_interesting = ps->has_wildcard ? 0 : -1;
563
564         if (!ps->nr) {
565                 if (!ps->recursive || ps->max_depth == -1)
566                         return 2;
567                 return !!within_depth(base->buf + base_offset, baselen,
568                                       !!S_ISDIR(entry->mode),
569                                       ps->max_depth);
570         }
571
572         pathlen = tree_entry_len(entry->path, entry->sha1);
573
574         for (i = ps->nr - 1; i >= 0; i--) {
575                 const struct pathspec_item *item = ps->items+i;
576                 const char *match = item->match;
577                 const char *base_str = base->buf + base_offset;
578                 int matchlen = item->len;
579
580                 if (baselen >= matchlen) {
581                         /* If it doesn't match, move along... */
582                         if (!match_dir_prefix(base_str, baselen, match, matchlen))
583                                 goto match_wildcards;
584
585                         if (!ps->recursive || ps->max_depth == -1)
586                                 return 2;
587
588                         return !!within_depth(base_str + matchlen + 1,
589                                               baselen - matchlen - 1,
590                                               !!S_ISDIR(entry->mode),
591                                               ps->max_depth);
592                 }
593
594                 /* Does the base match? */
595                 if (!strncmp(base_str, match, baselen)) {
596                         if (match_entry(entry, pathlen,
597                                         match + baselen, matchlen - baselen,
598                                         &never_interesting))
599                                 return 1;
600
601                         if (ps->items[i].use_wildcard) {
602                                 if (!fnmatch(match + baselen, entry->path, 0))
603                                         return 1;
604
605                                 /*
606                                  * Match all directories. We'll try to
607                                  * match files later on.
608                                  */
609                                 if (ps->recursive && S_ISDIR(entry->mode))
610                                         return 1;
611                         }
612
613                         continue;
614                 }
615
616 match_wildcards:
617                 if (!ps->items[i].use_wildcard)
618                         continue;
619
620                 /*
621                  * Concatenate base and entry->path into one and do
622                  * fnmatch() on it.
623                  */
624
625                 strbuf_add(base, entry->path, pathlen);
626
627                 if (!fnmatch(match, base->buf + base_offset, 0)) {
628                         strbuf_setlen(base, base_offset + baselen);
629                         return 1;
630                 }
631                 strbuf_setlen(base, base_offset + baselen);
632
633                 /*
634                  * Match all directories. We'll try to match files
635                  * later on.
636                  */
637                 if (ps->recursive && S_ISDIR(entry->mode))
638                         return 1;
639         }
640         return never_interesting; /* No matches */
641 }