tree_entry_interesting: do basedir compare on wildcard patterns when possible
[git] / tree-walk.c
1 #include "cache.h"
2 #include "tree-walk.h"
3 #include "unpack-trees.h"
4 #include "dir.h"
5 #include "tree.h"
6
7 static const char *get_mode(const char *str, unsigned int *modep)
8 {
9         unsigned char c;
10         unsigned int mode = 0;
11
12         if (*str == ' ')
13                 return NULL;
14
15         while ((c = *str++) != ' ') {
16                 if (c < '0' || c > '7')
17                         return NULL;
18                 mode = (mode << 3) + (c - '0');
19         }
20         *modep = mode;
21         return str;
22 }
23
24 static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
25 {
26         const char *path;
27         unsigned int mode, len;
28
29         if (size < 24 || buf[size - 21])
30                 die("corrupt tree file");
31
32         path = get_mode(buf, &mode);
33         if (!path || !*path)
34                 die("corrupt tree file");
35         len = strlen(path) + 1;
36
37         /* Initialize the descriptor entry */
38         desc->entry.path = path;
39         desc->entry.mode = mode;
40         desc->entry.sha1 = (const unsigned char *)(path + len);
41 }
42
43 void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
44 {
45         desc->buffer = buffer;
46         desc->size = size;
47         if (size)
48                 decode_tree_entry(desc, buffer, size);
49 }
50
51 void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
52 {
53         unsigned long size = 0;
54         void *buf = NULL;
55
56         if (sha1) {
57                 buf = read_object_with_reference(sha1, tree_type, &size, NULL);
58                 if (!buf)
59                         die("unable to read tree %s", sha1_to_hex(sha1));
60         }
61         init_tree_desc(desc, buf, size);
62         return buf;
63 }
64
65 static void entry_clear(struct name_entry *a)
66 {
67         memset(a, 0, sizeof(*a));
68 }
69
70 static void entry_extract(struct tree_desc *t, struct name_entry *a)
71 {
72         *a = t->entry;
73 }
74
75 void update_tree_entry(struct tree_desc *desc)
76 {
77         const void *buf = desc->buffer;
78         const unsigned char *end = desc->entry.sha1 + 20;
79         unsigned long size = desc->size;
80         unsigned long len = end - (const unsigned char *)buf;
81
82         if (size < len)
83                 die("corrupt tree file");
84         buf = end;
85         size -= len;
86         desc->buffer = buf;
87         desc->size = size;
88         if (size)
89                 decode_tree_entry(desc, buf, size);
90 }
91
92 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
93 {
94         if (!desc->size)
95                 return 0;
96
97         *entry = desc->entry;
98         update_tree_entry(desc);
99         return 1;
100 }
101
102 void setup_traverse_info(struct traverse_info *info, const char *base)
103 {
104         int pathlen = strlen(base);
105         static struct traverse_info dummy;
106
107         memset(info, 0, sizeof(*info));
108         if (pathlen && base[pathlen-1] == '/')
109                 pathlen--;
110         info->pathlen = pathlen ? pathlen + 1 : 0;
111         info->name.path = base;
112         info->name.sha1 = (void *)(base + pathlen + 1);
113         if (pathlen)
114                 info->prev = &dummy;
115 }
116
117 char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
118 {
119         int len = tree_entry_len(n);
120         int pathlen = info->pathlen;
121
122         path[pathlen + len] = 0;
123         for (;;) {
124                 memcpy(path + pathlen, n->path, len);
125                 if (!pathlen)
126                         break;
127                 path[--pathlen] = '/';
128                 n = &info->name;
129                 len = tree_entry_len(n);
130                 info = info->prev;
131                 pathlen -= len;
132         }
133         return path;
134 }
135
136 struct tree_desc_skip {
137         struct tree_desc_skip *prev;
138         const void *ptr;
139 };
140
141 struct tree_desc_x {
142         struct tree_desc d;
143         struct tree_desc_skip *skip;
144 };
145
146 static int name_compare(const char *a, int a_len,
147                         const char *b, int b_len)
148 {
149         int len = (a_len < b_len) ? a_len : b_len;
150         int cmp = memcmp(a, b, len);
151         if (cmp)
152                 return cmp;
153         return (a_len - b_len);
154 }
155
156 static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
157 {
158         /*
159          * The caller wants to pick *a* from a tree or nothing.
160          * We are looking at *b* in a tree.
161          *
162          * (0) If a and b are the same name, we are trivially happy.
163          *
164          * There are three possibilities where *a* could be hiding
165          * behind *b*.
166          *
167          * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
168          *                                matter what.
169          * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
170          * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
171          *
172          * Otherwise we know *a* won't appear in the tree without
173          * scanning further.
174          */
175
176         int cmp = name_compare(a, a_len, b, b_len);
177
178         /* Most common case first -- reading sync'd trees */
179         if (!cmp)
180                 return cmp;
181
182         if (0 < cmp) {
183                 /* a comes after b; it does not matter if it is case (3)
184                 if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
185                         return 1;
186                 */
187                 return 1; /* keep looking */
188         }
189
190         /* b comes after a; are we looking at case (2)? */
191         if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
192                 return 1; /* keep looking */
193
194         return -1; /* a cannot appear in the tree */
195 }
196
197 /*
198  * From the extended tree_desc, extract the first name entry, while
199  * paying attention to the candidate "first" name.  Most importantly,
200  * when looking for an entry, if there are entries that sorts earlier
201  * in the tree object representation than that name, skip them and
202  * process the named entry first.  We will remember that we haven't
203  * processed the first entry yet, and in the later call skip the
204  * entry we processed early when update_extended_entry() is called.
205  *
206  * E.g. if the underlying tree object has these entries:
207  *
208  *    blob    "t-1"
209  *    blob    "t-2"
210  *    tree    "t"
211  *    blob    "t=1"
212  *
213  * and the "first" asks for "t", remember that we still need to
214  * process "t-1" and "t-2" but extract "t".  After processing the
215  * entry "t" from this call, the caller will let us know by calling
216  * update_extended_entry() that we can remember "t" has been processed
217  * already.
218  */
219
220 static void extended_entry_extract(struct tree_desc_x *t,
221                                    struct name_entry *a,
222                                    const char *first,
223                                    int first_len)
224 {
225         const char *path;
226         int len;
227         struct tree_desc probe;
228         struct tree_desc_skip *skip;
229
230         /*
231          * Extract the first entry from the tree_desc, but skip the
232          * ones that we already returned in earlier rounds.
233          */
234         while (1) {
235                 if (!t->d.size) {
236                         entry_clear(a);
237                         break; /* not found */
238                 }
239                 entry_extract(&t->d, a);
240                 for (skip = t->skip; skip; skip = skip->prev)
241                         if (a->path == skip->ptr)
242                                 break; /* found */
243                 if (!skip)
244                         break;
245                 /* We have processed this entry already. */
246                 update_tree_entry(&t->d);
247         }
248
249         if (!first || !a->path)
250                 return;
251
252         /*
253          * The caller wants "first" from this tree, or nothing.
254          */
255         path = a->path;
256         len = tree_entry_len(a);
257         switch (check_entry_match(first, first_len, path, len)) {
258         case -1:
259                 entry_clear(a);
260         case 0:
261                 return;
262         default:
263                 break;
264         }
265
266         /*
267          * We need to look-ahead -- we suspect that a subtree whose
268          * name is "first" may be hiding behind the current entry "path".
269          */
270         probe = t->d;
271         while (probe.size) {
272                 entry_extract(&probe, a);
273                 path = a->path;
274                 len = tree_entry_len(a);
275                 switch (check_entry_match(first, first_len, path, len)) {
276                 case -1:
277                         entry_clear(a);
278                 case 0:
279                         return;
280                 default:
281                         update_tree_entry(&probe);
282                         break;
283                 }
284                 /* keep looking */
285         }
286         entry_clear(a);
287 }
288
289 static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
290 {
291         if (t->d.entry.path == a->path) {
292                 update_tree_entry(&t->d);
293         } else {
294                 /* we have returned this entry early */
295                 struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
296                 skip->ptr = a->path;
297                 skip->prev = t->skip;
298                 t->skip = skip;
299         }
300 }
301
302 static void free_extended_entry(struct tree_desc_x *t)
303 {
304         struct tree_desc_skip *p, *s;
305
306         for (s = t->skip; s; s = p) {
307                 p = s->prev;
308                 free(s);
309         }
310 }
311
312 static inline int prune_traversal(struct name_entry *e,
313                                   struct traverse_info *info,
314                                   struct strbuf *base,
315                                   int still_interesting)
316 {
317         if (!info->pathspec || still_interesting == 2)
318                 return 2;
319         if (still_interesting < 0)
320                 return still_interesting;
321         return tree_entry_interesting(e, base, 0, info->pathspec);
322 }
323
324 int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
325 {
326         int ret = 0;
327         int error = 0;
328         struct name_entry *entry = xmalloc(n*sizeof(*entry));
329         int i;
330         struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
331         struct strbuf base = STRBUF_INIT;
332         int interesting = 1;
333
334         for (i = 0; i < n; i++)
335                 tx[i].d = t[i];
336
337         if (info->prev) {
338                 strbuf_grow(&base, info->pathlen);
339                 make_traverse_path(base.buf, info->prev, &info->name);
340                 base.buf[info->pathlen-1] = '/';
341                 strbuf_setlen(&base, info->pathlen);
342         }
343         for (;;) {
344                 unsigned long mask, dirmask;
345                 const char *first = NULL;
346                 int first_len = 0;
347                 struct name_entry *e = NULL;
348                 int len;
349
350                 for (i = 0; i < n; i++) {
351                         e = entry + i;
352                         extended_entry_extract(tx + i, e, NULL, 0);
353                 }
354
355                 /*
356                  * A tree may have "t-2" at the current location even
357                  * though it may have "t" that is a subtree behind it,
358                  * and another tree may return "t".  We want to grab
359                  * all "t" from all trees to match in such a case.
360                  */
361                 for (i = 0; i < n; i++) {
362                         e = entry + i;
363                         if (!e->path)
364                                 continue;
365                         len = tree_entry_len(e);
366                         if (!first) {
367                                 first = e->path;
368                                 first_len = len;
369                                 continue;
370                         }
371                         if (name_compare(e->path, len, first, first_len) < 0) {
372                                 first = e->path;
373                                 first_len = len;
374                         }
375                 }
376
377                 if (first) {
378                         for (i = 0; i < n; i++) {
379                                 e = entry + i;
380                                 extended_entry_extract(tx + i, e, first, first_len);
381                                 /* Cull the ones that are not the earliest */
382                                 if (!e->path)
383                                         continue;
384                                 len = tree_entry_len(e);
385                                 if (name_compare(e->path, len, first, first_len))
386                                         entry_clear(e);
387                         }
388                 }
389
390                 /* Now we have in entry[i] the earliest name from the trees */
391                 mask = 0;
392                 dirmask = 0;
393                 for (i = 0; i < n; i++) {
394                         if (!entry[i].path)
395                                 continue;
396                         mask |= 1ul << i;
397                         if (S_ISDIR(entry[i].mode))
398                                 dirmask |= 1ul << i;
399                         e = &entry[i];
400                 }
401                 if (!mask)
402                         break;
403                 interesting = prune_traversal(e, info, &base, interesting);
404                 if (interesting < 0)
405                         break;
406                 if (interesting) {
407                         ret = info->fn(n, mask, dirmask, entry, info);
408                         if (ret < 0) {
409                                 error = ret;
410                                 if (!info->show_all_errors)
411                                         break;
412                         }
413                         mask &= ret;
414                 }
415                 ret = 0;
416                 for (i = 0; i < n; i++)
417                         if (mask & (1ul << i))
418                                 update_extended_entry(tx + i, entry + i);
419         }
420         free(entry);
421         for (i = 0; i < n; i++)
422                 free_extended_entry(tx + i);
423         free(tx);
424         strbuf_release(&base);
425         return error;
426 }
427
428 static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
429 {
430         int namelen = strlen(name);
431         while (t->size) {
432                 const char *entry;
433                 const unsigned char *sha1;
434                 int entrylen, cmp;
435
436                 sha1 = tree_entry_extract(t, &entry, mode);
437                 entrylen = tree_entry_len(&t->entry);
438                 update_tree_entry(t);
439                 if (entrylen > namelen)
440                         continue;
441                 cmp = memcmp(name, entry, entrylen);
442                 if (cmp > 0)
443                         continue;
444                 if (cmp < 0)
445                         break;
446                 if (entrylen == namelen) {
447                         hashcpy(result, sha1);
448                         return 0;
449                 }
450                 if (name[entrylen] != '/')
451                         continue;
452                 if (!S_ISDIR(*mode))
453                         break;
454                 if (++entrylen == namelen) {
455                         hashcpy(result, sha1);
456                         return 0;
457                 }
458                 return get_tree_entry(sha1, name + entrylen, result, mode);
459         }
460         return -1;
461 }
462
463 int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
464 {
465         int retval;
466         void *tree;
467         unsigned long size;
468         unsigned char root[20];
469
470         tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
471         if (!tree)
472                 return -1;
473
474         if (name[0] == '\0') {
475                 hashcpy(sha1, root);
476                 free(tree);
477                 return 0;
478         }
479
480         if (!size) {
481                 retval = -1;
482         } else {
483                 struct tree_desc t;
484                 init_tree_desc(&t, tree, size);
485                 retval = find_tree_entry(&t, name, sha1, mode);
486         }
487         free(tree);
488         return retval;
489 }
490
491 static int match_entry(const struct name_entry *entry, int pathlen,
492                        const char *match, int matchlen,
493                        enum interesting *never_interesting)
494 {
495         int m = -1; /* signals that we haven't called strncmp() */
496
497         if (*never_interesting != entry_not_interesting) {
498                 /*
499                  * We have not seen any match that sorts later
500                  * than the current path.
501                  */
502
503                 /*
504                  * Does match sort strictly earlier than path
505                  * with their common parts?
506                  */
507                 m = strncmp(match, entry->path,
508                             (matchlen < pathlen) ? matchlen : pathlen);
509                 if (m < 0)
510                         return 0;
511
512                 /*
513                  * If we come here even once, that means there is at
514                  * least one pathspec that would sort equal to or
515                  * later than the path we are currently looking at.
516                  * In other words, if we have never reached this point
517                  * after iterating all pathspecs, it means all
518                  * pathspecs are either outside of base, or inside the
519                  * base but sorts strictly earlier than the current
520                  * one.  In either case, they will never match the
521                  * subsequent entries.  In such a case, we initialized
522                  * the variable to -1 and that is what will be
523                  * returned, allowing the caller to terminate early.
524                  */
525                 *never_interesting = entry_not_interesting;
526         }
527
528         if (pathlen > matchlen)
529                 return 0;
530
531         if (matchlen > pathlen) {
532                 if (match[pathlen] != '/')
533                         return 0;
534                 if (!S_ISDIR(entry->mode))
535                         return 0;
536         }
537
538         if (m == -1)
539                 /*
540                  * we cheated and did not do strncmp(), so we do
541                  * that here.
542                  */
543                 m = strncmp(match, entry->path, pathlen);
544
545         /*
546          * If common part matched earlier then it is a hit,
547          * because we rejected the case where path is not a
548          * leading directory and is shorter than match.
549          */
550         if (!m)
551                 return 1;
552
553         return 0;
554 }
555
556 static int match_dir_prefix(const char *base,
557                             const char *match, int matchlen)
558 {
559         if (strncmp(base, match, matchlen))
560                 return 0;
561
562         /*
563          * If the base is a subdirectory of a path which
564          * was specified, all of them are interesting.
565          */
566         if (!matchlen ||
567             base[matchlen] == '/' ||
568             match[matchlen - 1] == '/')
569                 return 1;
570
571         /* Just a random prefix match */
572         return 0;
573 }
574
575 /*
576  * Perform matching on the leading non-wildcard part of
577  * pathspec. item->nowildcard_len must be greater than zero. Return
578  * non-zero if base is matched.
579  */
580 static int match_wildcard_base(const struct pathspec_item *item,
581                                const char *base, int baselen,
582                                int *matched)
583 {
584         const char *match = item->match;
585         /* the wildcard part is not considered in this function */
586         int matchlen = item->nowildcard_len;
587
588         if (baselen) {
589                 int dirlen;
590                 /*
591                  * Return early if base is longer than the
592                  * non-wildcard part but it does not match.
593                  */
594                 if (baselen >= matchlen) {
595                         *matched = matchlen;
596                         return !strncmp(base, match, matchlen);
597                 }
598
599                 dirlen = matchlen;
600                 while (dirlen && match[dirlen - 1] != '/')
601                         dirlen--;
602
603                 /*
604                  * Return early if base is shorter than the
605                  * non-wildcard part but it does not match. Note that
606                  * base ends with '/' so we are sure it really matches
607                  * directory
608                  */
609                 if (strncmp(base, match, baselen))
610                         return 0;
611                 *matched = baselen;
612         } else
613                 *matched = 0;
614         /*
615          * we could have checked entry against the non-wildcard part
616          * that is not in base and does similar never_interesting
617          * optimization as in match_entry. For now just be happy with
618          * base comparison.
619          */
620         return entry_interesting;
621 }
622
623 /*
624  * Is a tree entry interesting given the pathspec we have?
625  *
626  * Pre-condition: either baselen == base_offset (i.e. empty path)
627  * or base[baselen-1] == '/' (i.e. with trailing slash).
628  */
629 enum interesting tree_entry_interesting(const struct name_entry *entry,
630                                         struct strbuf *base, int base_offset,
631                                         const struct pathspec *ps)
632 {
633         int i;
634         int pathlen, baselen = base->len - base_offset;
635         enum interesting never_interesting = ps->has_wildcard ?
636                 entry_not_interesting : all_entries_not_interesting;
637
638         if (!ps->nr) {
639                 if (!ps->recursive || ps->max_depth == -1)
640                         return all_entries_interesting;
641                 return within_depth(base->buf + base_offset, baselen,
642                                     !!S_ISDIR(entry->mode),
643                                     ps->max_depth) ?
644                         entry_interesting : entry_not_interesting;
645         }
646
647         pathlen = tree_entry_len(entry);
648
649         for (i = ps->nr - 1; i >= 0; i--) {
650                 const struct pathspec_item *item = ps->items+i;
651                 const char *match = item->match;
652                 const char *base_str = base->buf + base_offset;
653                 int matchlen = item->len, matched = 0;
654
655                 if (baselen >= matchlen) {
656                         /* If it doesn't match, move along... */
657                         if (!match_dir_prefix(base_str, match, matchlen))
658                                 goto match_wildcards;
659
660                         if (!ps->recursive || ps->max_depth == -1)
661                                 return all_entries_interesting;
662
663                         return within_depth(base_str + matchlen + 1,
664                                             baselen - matchlen - 1,
665                                             !!S_ISDIR(entry->mode),
666                                             ps->max_depth) ?
667                                 entry_interesting : entry_not_interesting;
668                 }
669
670                 /* Either there must be no base, or the base must match. */
671                 if (baselen == 0 || !strncmp(base_str, match, baselen)) {
672                         if (match_entry(entry, pathlen,
673                                         match + baselen, matchlen - baselen,
674                                         &never_interesting))
675                                 return entry_interesting;
676
677                         if (item->nowildcard_len < item->len) {
678                                 if (!git_fnmatch(match + baselen, entry->path,
679                                                  item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
680                                                  item->nowildcard_len - baselen))
681                                         return entry_interesting;
682
683                                 /*
684                                  * Match all directories. We'll try to
685                                  * match files later on.
686                                  */
687                                 if (ps->recursive && S_ISDIR(entry->mode))
688                                         return entry_interesting;
689                         }
690
691                         continue;
692                 }
693
694 match_wildcards:
695                 if (item->nowildcard_len == item->len)
696                         continue;
697
698                 if (item->nowildcard_len &&
699                     !match_wildcard_base(item, base_str, baselen, &matched))
700                         return entry_not_interesting;
701
702                 /*
703                  * Concatenate base and entry->path into one and do
704                  * fnmatch() on it.
705                  *
706                  * While we could avoid concatenation in certain cases
707                  * [1], which saves a memcpy and potentially a
708                  * realloc, it turns out not worth it. Measurement on
709                  * linux-2.6 does not show any clear improvements,
710                  * partly because of the nowildcard_len optimization
711                  * in git_fnmatch(). Avoid micro-optimizations here.
712                  *
713                  * [1] if match_wildcard_base() says the base
714                  * directory is already matched, we only need to match
715                  * the rest, which is shorter so _in theory_ faster.
716                  */
717
718                 strbuf_add(base, entry->path, pathlen);
719
720                 if (!git_fnmatch(match, base->buf + base_offset,
721                                  item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
722                                  item->nowildcard_len)) {
723                         strbuf_setlen(base, base_offset + baselen);
724                         return entry_interesting;
725                 }
726                 strbuf_setlen(base, base_offset + baselen);
727
728                 /*
729                  * Match all directories. We'll try to match files
730                  * later on.
731                  * max_depth is ignored but we may consider support it
732                  * in future, see
733                  * http://thread.gmane.org/gmane.comp.version-control.git/163757/focus=163840
734                  */
735                 if (ps->recursive && S_ISDIR(entry->mode))
736                         return entry_interesting;
737         }
738         return never_interesting; /* No matches */
739 }