Merge branch 'bc/unuse-packfile'
[git] / tree-walk.c
1 #include "cache.h"
2 #include "tree-walk.h"
3 #include "unpack-trees.h"
4 #include "dir.h"
5 #include "tree.h"
6
7 static const char *get_mode(const char *str, unsigned int *modep)
8 {
9         unsigned char c;
10         unsigned int mode = 0;
11
12         if (*str == ' ')
13                 return NULL;
14
15         while ((c = *str++) != ' ') {
16                 if (c < '0' || c > '7')
17                         return NULL;
18                 mode = (mode << 3) + (c - '0');
19         }
20         *modep = mode;
21         return str;
22 }
23
24 static void decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size)
25 {
26         const char *path;
27         unsigned int mode, len;
28
29         if (size < 24 || buf[size - 21])
30                 die("corrupt tree file");
31
32         path = get_mode(buf, &mode);
33         if (!path || !*path)
34                 die("corrupt tree file");
35         len = strlen(path) + 1;
36
37         /* Initialize the descriptor entry */
38         desc->entry.path = path;
39         desc->entry.mode = mode;
40         desc->entry.sha1 = (const unsigned char *)(path + len);
41 }
42
43 void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
44 {
45         desc->buffer = buffer;
46         desc->size = size;
47         if (size)
48                 decode_tree_entry(desc, buffer, size);
49 }
50
51 void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
52 {
53         unsigned long size = 0;
54         void *buf = NULL;
55
56         if (sha1) {
57                 buf = read_object_with_reference(sha1, tree_type, &size, NULL);
58                 if (!buf)
59                         die("unable to read tree %s", sha1_to_hex(sha1));
60         }
61         init_tree_desc(desc, buf, size);
62         return buf;
63 }
64
65 static void entry_clear(struct name_entry *a)
66 {
67         memset(a, 0, sizeof(*a));
68 }
69
70 static void entry_extract(struct tree_desc *t, struct name_entry *a)
71 {
72         *a = t->entry;
73 }
74
75 void update_tree_entry(struct tree_desc *desc)
76 {
77         const void *buf = desc->buffer;
78         const unsigned char *end = desc->entry.sha1 + 20;
79         unsigned long size = desc->size;
80         unsigned long len = end - (const unsigned char *)buf;
81
82         if (size < len)
83                 die("corrupt tree file");
84         buf = end;
85         size -= len;
86         desc->buffer = buf;
87         desc->size = size;
88         if (size)
89                 decode_tree_entry(desc, buf, size);
90 }
91
92 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
93 {
94         if (!desc->size)
95                 return 0;
96
97         *entry = desc->entry;
98         update_tree_entry(desc);
99         return 1;
100 }
101
102 void setup_traverse_info(struct traverse_info *info, const char *base)
103 {
104         int pathlen = strlen(base);
105         static struct traverse_info dummy;
106
107         memset(info, 0, sizeof(*info));
108         if (pathlen && base[pathlen-1] == '/')
109                 pathlen--;
110         info->pathlen = pathlen ? pathlen + 1 : 0;
111         info->name.path = base;
112         info->name.sha1 = (void *)(base + pathlen + 1);
113         if (pathlen)
114                 info->prev = &dummy;
115 }
116
117 char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
118 {
119         int len = tree_entry_len(n);
120         int pathlen = info->pathlen;
121
122         path[pathlen + len] = 0;
123         for (;;) {
124                 memcpy(path + pathlen, n->path, len);
125                 if (!pathlen)
126                         break;
127                 path[--pathlen] = '/';
128                 n = &info->name;
129                 len = tree_entry_len(n);
130                 info = info->prev;
131                 pathlen -= len;
132         }
133         return path;
134 }
135
136 struct tree_desc_skip {
137         struct tree_desc_skip *prev;
138         const void *ptr;
139 };
140
141 struct tree_desc_x {
142         struct tree_desc d;
143         struct tree_desc_skip *skip;
144 };
145
146 static int name_compare(const char *a, int a_len,
147                         const char *b, int b_len)
148 {
149         int len = (a_len < b_len) ? a_len : b_len;
150         int cmp = memcmp(a, b, len);
151         if (cmp)
152                 return cmp;
153         return (a_len - b_len);
154 }
155
156 static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
157 {
158         /*
159          * The caller wants to pick *a* from a tree or nothing.
160          * We are looking at *b* in a tree.
161          *
162          * (0) If a and b are the same name, we are trivially happy.
163          *
164          * There are three possibilities where *a* could be hiding
165          * behind *b*.
166          *
167          * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
168          *                                matter what.
169          * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
170          * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
171          *
172          * Otherwise we know *a* won't appear in the tree without
173          * scanning further.
174          */
175
176         int cmp = name_compare(a, a_len, b, b_len);
177
178         /* Most common case first -- reading sync'd trees */
179         if (!cmp)
180                 return cmp;
181
182         if (0 < cmp) {
183                 /* a comes after b; it does not matter if it is case (3)
184                 if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
185                         return 1;
186                 */
187                 return 1; /* keep looking */
188         }
189
190         /* b comes after a; are we looking at case (2)? */
191         if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
192                 return 1; /* keep looking */
193
194         return -1; /* a cannot appear in the tree */
195 }
196
197 /*
198  * From the extended tree_desc, extract the first name entry, while
199  * paying attention to the candidate "first" name.  Most importantly,
200  * when looking for an entry, if there are entries that sorts earlier
201  * in the tree object representation than that name, skip them and
202  * process the named entry first.  We will remember that we haven't
203  * processed the first entry yet, and in the later call skip the
204  * entry we processed early when update_extended_entry() is called.
205  *
206  * E.g. if the underlying tree object has these entries:
207  *
208  *    blob    "t-1"
209  *    blob    "t-2"
210  *    tree    "t"
211  *    blob    "t=1"
212  *
213  * and the "first" asks for "t", remember that we still need to
214  * process "t-1" and "t-2" but extract "t".  After processing the
215  * entry "t" from this call, the caller will let us know by calling
216  * update_extended_entry() that we can remember "t" has been processed
217  * already.
218  */
219
220 static void extended_entry_extract(struct tree_desc_x *t,
221                                    struct name_entry *a,
222                                    const char *first,
223                                    int first_len)
224 {
225         const char *path;
226         int len;
227         struct tree_desc probe;
228         struct tree_desc_skip *skip;
229
230         /*
231          * Extract the first entry from the tree_desc, but skip the
232          * ones that we already returned in earlier rounds.
233          */
234         while (1) {
235                 if (!t->d.size) {
236                         entry_clear(a);
237                         break; /* not found */
238                 }
239                 entry_extract(&t->d, a);
240                 for (skip = t->skip; skip; skip = skip->prev)
241                         if (a->path == skip->ptr)
242                                 break; /* found */
243                 if (!skip)
244                         break;
245                 /* We have processed this entry already. */
246                 update_tree_entry(&t->d);
247         }
248
249         if (!first || !a->path)
250                 return;
251
252         /*
253          * The caller wants "first" from this tree, or nothing.
254          */
255         path = a->path;
256         len = tree_entry_len(a);
257         switch (check_entry_match(first, first_len, path, len)) {
258         case -1:
259                 entry_clear(a);
260         case 0:
261                 return;
262         default:
263                 break;
264         }
265
266         /*
267          * We need to look-ahead -- we suspect that a subtree whose
268          * name is "first" may be hiding behind the current entry "path".
269          */
270         probe = t->d;
271         while (probe.size) {
272                 entry_extract(&probe, a);
273                 path = a->path;
274                 len = tree_entry_len(a);
275                 switch (check_entry_match(first, first_len, path, len)) {
276                 case -1:
277                         entry_clear(a);
278                 case 0:
279                         return;
280                 default:
281                         update_tree_entry(&probe);
282                         break;
283                 }
284                 /* keep looking */
285         }
286         entry_clear(a);
287 }
288
289 static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
290 {
291         if (t->d.entry.path == a->path) {
292                 update_tree_entry(&t->d);
293         } else {
294                 /* we have returned this entry early */
295                 struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
296                 skip->ptr = a->path;
297                 skip->prev = t->skip;
298                 t->skip = skip;
299         }
300 }
301
302 static void free_extended_entry(struct tree_desc_x *t)
303 {
304         struct tree_desc_skip *p, *s;
305
306         for (s = t->skip; s; s = p) {
307                 p = s->prev;
308                 free(s);
309         }
310 }
311
312 static inline int prune_traversal(struct name_entry *e,
313                                   struct traverse_info *info,
314                                   struct strbuf *base,
315                                   int still_interesting)
316 {
317         if (!info->pathspec || still_interesting == 2)
318                 return 2;
319         if (still_interesting < 0)
320                 return still_interesting;
321         return tree_entry_interesting(e, base, 0, info->pathspec);
322 }
323
324 int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
325 {
326         int error = 0;
327         struct name_entry *entry = xmalloc(n*sizeof(*entry));
328         int i;
329         struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
330         struct strbuf base = STRBUF_INIT;
331         int interesting = 1;
332
333         for (i = 0; i < n; i++)
334                 tx[i].d = t[i];
335
336         if (info->prev) {
337                 strbuf_grow(&base, info->pathlen);
338                 make_traverse_path(base.buf, info->prev, &info->name);
339                 base.buf[info->pathlen-1] = '/';
340                 strbuf_setlen(&base, info->pathlen);
341         }
342         for (;;) {
343                 int trees_used;
344                 unsigned long mask, dirmask;
345                 const char *first = NULL;
346                 int first_len = 0;
347                 struct name_entry *e = NULL;
348                 int len;
349
350                 for (i = 0; i < n; i++) {
351                         e = entry + i;
352                         extended_entry_extract(tx + i, e, NULL, 0);
353                 }
354
355                 /*
356                  * A tree may have "t-2" at the current location even
357                  * though it may have "t" that is a subtree behind it,
358                  * and another tree may return "t".  We want to grab
359                  * all "t" from all trees to match in such a case.
360                  */
361                 for (i = 0; i < n; i++) {
362                         e = entry + i;
363                         if (!e->path)
364                                 continue;
365                         len = tree_entry_len(e);
366                         if (!first) {
367                                 first = e->path;
368                                 first_len = len;
369                                 continue;
370                         }
371                         if (name_compare(e->path, len, first, first_len) < 0) {
372                                 first = e->path;
373                                 first_len = len;
374                         }
375                 }
376
377                 if (first) {
378                         for (i = 0; i < n; i++) {
379                                 e = entry + i;
380                                 extended_entry_extract(tx + i, e, first, first_len);
381                                 /* Cull the ones that are not the earliest */
382                                 if (!e->path)
383                                         continue;
384                                 len = tree_entry_len(e);
385                                 if (name_compare(e->path, len, first, first_len))
386                                         entry_clear(e);
387                         }
388                 }
389
390                 /* Now we have in entry[i] the earliest name from the trees */
391                 mask = 0;
392                 dirmask = 0;
393                 for (i = 0; i < n; i++) {
394                         if (!entry[i].path)
395                                 continue;
396                         mask |= 1ul << i;
397                         if (S_ISDIR(entry[i].mode))
398                                 dirmask |= 1ul << i;
399                         e = &entry[i];
400                 }
401                 if (!mask)
402                         break;
403                 interesting = prune_traversal(e, info, &base, interesting);
404                 if (interesting < 0)
405                         break;
406                 if (interesting) {
407                         trees_used = info->fn(n, mask, dirmask, entry, info);
408                         if (trees_used < 0) {
409                                 error = trees_used;
410                                 if (!info->show_all_errors)
411                                         break;
412                         }
413                         mask &= trees_used;
414                 }
415                 for (i = 0; i < n; i++)
416                         if (mask & (1ul << i))
417                                 update_extended_entry(tx + i, entry + i);
418         }
419         free(entry);
420         for (i = 0; i < n; i++)
421                 free_extended_entry(tx + i);
422         free(tx);
423         strbuf_release(&base);
424         return error;
425 }
426
427 static int find_tree_entry(struct tree_desc *t, const char *name, unsigned char *result, unsigned *mode)
428 {
429         int namelen = strlen(name);
430         while (t->size) {
431                 const char *entry;
432                 const unsigned char *sha1;
433                 int entrylen, cmp;
434
435                 sha1 = tree_entry_extract(t, &entry, mode);
436                 entrylen = tree_entry_len(&t->entry);
437                 update_tree_entry(t);
438                 if (entrylen > namelen)
439                         continue;
440                 cmp = memcmp(name, entry, entrylen);
441                 if (cmp > 0)
442                         continue;
443                 if (cmp < 0)
444                         break;
445                 if (entrylen == namelen) {
446                         hashcpy(result, sha1);
447                         return 0;
448                 }
449                 if (name[entrylen] != '/')
450                         continue;
451                 if (!S_ISDIR(*mode))
452                         break;
453                 if (++entrylen == namelen) {
454                         hashcpy(result, sha1);
455                         return 0;
456                 }
457                 return get_tree_entry(sha1, name + entrylen, result, mode);
458         }
459         return -1;
460 }
461
462 int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned char *sha1, unsigned *mode)
463 {
464         int retval;
465         void *tree;
466         unsigned long size;
467         unsigned char root[20];
468
469         tree = read_object_with_reference(tree_sha1, tree_type, &size, root);
470         if (!tree)
471                 return -1;
472
473         if (name[0] == '\0') {
474                 hashcpy(sha1, root);
475                 free(tree);
476                 return 0;
477         }
478
479         if (!size) {
480                 retval = -1;
481         } else {
482                 struct tree_desc t;
483                 init_tree_desc(&t, tree, size);
484                 retval = find_tree_entry(&t, name, sha1, mode);
485         }
486         free(tree);
487         return retval;
488 }
489
490 static int match_entry(const struct name_entry *entry, int pathlen,
491                        const char *match, int matchlen,
492                        enum interesting *never_interesting)
493 {
494         int m = -1; /* signals that we haven't called strncmp() */
495
496         if (*never_interesting != entry_not_interesting) {
497                 /*
498                  * We have not seen any match that sorts later
499                  * than the current path.
500                  */
501
502                 /*
503                  * Does match sort strictly earlier than path
504                  * with their common parts?
505                  */
506                 m = strncmp(match, entry->path,
507                             (matchlen < pathlen) ? matchlen : pathlen);
508                 if (m < 0)
509                         return 0;
510
511                 /*
512                  * If we come here even once, that means there is at
513                  * least one pathspec that would sort equal to or
514                  * later than the path we are currently looking at.
515                  * In other words, if we have never reached this point
516                  * after iterating all pathspecs, it means all
517                  * pathspecs are either outside of base, or inside the
518                  * base but sorts strictly earlier than the current
519                  * one.  In either case, they will never match the
520                  * subsequent entries.  In such a case, we initialized
521                  * the variable to -1 and that is what will be
522                  * returned, allowing the caller to terminate early.
523                  */
524                 *never_interesting = entry_not_interesting;
525         }
526
527         if (pathlen > matchlen)
528                 return 0;
529
530         if (matchlen > pathlen) {
531                 if (match[pathlen] != '/')
532                         return 0;
533                 if (!S_ISDIR(entry->mode))
534                         return 0;
535         }
536
537         if (m == -1)
538                 /*
539                  * we cheated and did not do strncmp(), so we do
540                  * that here.
541                  */
542                 m = strncmp(match, entry->path, pathlen);
543
544         /*
545          * If common part matched earlier then it is a hit,
546          * because we rejected the case where path is not a
547          * leading directory and is shorter than match.
548          */
549         if (!m)
550                 return 1;
551
552         return 0;
553 }
554
555 static int match_dir_prefix(const char *base,
556                             const char *match, int matchlen)
557 {
558         if (strncmp(base, match, matchlen))
559                 return 0;
560
561         /*
562          * If the base is a subdirectory of a path which
563          * was specified, all of them are interesting.
564          */
565         if (!matchlen ||
566             base[matchlen] == '/' ||
567             match[matchlen - 1] == '/')
568                 return 1;
569
570         /* Just a random prefix match */
571         return 0;
572 }
573
574 /*
575  * Perform matching on the leading non-wildcard part of
576  * pathspec. item->nowildcard_len must be greater than zero. Return
577  * non-zero if base is matched.
578  */
579 static int match_wildcard_base(const struct pathspec_item *item,
580                                const char *base, int baselen,
581                                int *matched)
582 {
583         const char *match = item->match;
584         /* the wildcard part is not considered in this function */
585         int matchlen = item->nowildcard_len;
586
587         if (baselen) {
588                 int dirlen;
589                 /*
590                  * Return early if base is longer than the
591                  * non-wildcard part but it does not match.
592                  */
593                 if (baselen >= matchlen) {
594                         *matched = matchlen;
595                         return !strncmp(base, match, matchlen);
596                 }
597
598                 dirlen = matchlen;
599                 while (dirlen && match[dirlen - 1] != '/')
600                         dirlen--;
601
602                 /*
603                  * Return early if base is shorter than the
604                  * non-wildcard part but it does not match. Note that
605                  * base ends with '/' so we are sure it really matches
606                  * directory
607                  */
608                 if (strncmp(base, match, baselen))
609                         return 0;
610                 *matched = baselen;
611         } else
612                 *matched = 0;
613         /*
614          * we could have checked entry against the non-wildcard part
615          * that is not in base and does similar never_interesting
616          * optimization as in match_entry. For now just be happy with
617          * base comparison.
618          */
619         return entry_interesting;
620 }
621
622 /*
623  * Is a tree entry interesting given the pathspec we have?
624  *
625  * Pre-condition: either baselen == base_offset (i.e. empty path)
626  * or base[baselen-1] == '/' (i.e. with trailing slash).
627  */
628 enum interesting tree_entry_interesting(const struct name_entry *entry,
629                                         struct strbuf *base, int base_offset,
630                                         const struct pathspec *ps)
631 {
632         int i;
633         int pathlen, baselen = base->len - base_offset;
634         enum interesting never_interesting = ps->has_wildcard ?
635                 entry_not_interesting : all_entries_not_interesting;
636
637         if (!ps->nr) {
638                 if (!ps->recursive || ps->max_depth == -1)
639                         return all_entries_interesting;
640                 return within_depth(base->buf + base_offset, baselen,
641                                     !!S_ISDIR(entry->mode),
642                                     ps->max_depth) ?
643                         entry_interesting : entry_not_interesting;
644         }
645
646         pathlen = tree_entry_len(entry);
647
648         for (i = ps->nr - 1; i >= 0; i--) {
649                 const struct pathspec_item *item = ps->items+i;
650                 const char *match = item->match;
651                 const char *base_str = base->buf + base_offset;
652                 int matchlen = item->len, matched = 0;
653
654                 if (baselen >= matchlen) {
655                         /* If it doesn't match, move along... */
656                         if (!match_dir_prefix(base_str, match, matchlen))
657                                 goto match_wildcards;
658
659                         if (!ps->recursive || ps->max_depth == -1)
660                                 return all_entries_interesting;
661
662                         return within_depth(base_str + matchlen + 1,
663                                             baselen - matchlen - 1,
664                                             !!S_ISDIR(entry->mode),
665                                             ps->max_depth) ?
666                                 entry_interesting : entry_not_interesting;
667                 }
668
669                 /* Either there must be no base, or the base must match. */
670                 if (baselen == 0 || !strncmp(base_str, match, baselen)) {
671                         if (match_entry(entry, pathlen,
672                                         match + baselen, matchlen - baselen,
673                                         &never_interesting))
674                                 return entry_interesting;
675
676                         if (item->nowildcard_len < item->len) {
677                                 if (!git_fnmatch(match + baselen, entry->path,
678                                                  item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
679                                                  item->nowildcard_len - baselen))
680                                         return entry_interesting;
681
682                                 /*
683                                  * Match all directories. We'll try to
684                                  * match files later on.
685                                  */
686                                 if (ps->recursive && S_ISDIR(entry->mode))
687                                         return entry_interesting;
688                         }
689
690                         continue;
691                 }
692
693 match_wildcards:
694                 if (item->nowildcard_len == item->len)
695                         continue;
696
697                 if (item->nowildcard_len &&
698                     !match_wildcard_base(item, base_str, baselen, &matched))
699                         return entry_not_interesting;
700
701                 /*
702                  * Concatenate base and entry->path into one and do
703                  * fnmatch() on it.
704                  *
705                  * While we could avoid concatenation in certain cases
706                  * [1], which saves a memcpy and potentially a
707                  * realloc, it turns out not worth it. Measurement on
708                  * linux-2.6 does not show any clear improvements,
709                  * partly because of the nowildcard_len optimization
710                  * in git_fnmatch(). Avoid micro-optimizations here.
711                  *
712                  * [1] if match_wildcard_base() says the base
713                  * directory is already matched, we only need to match
714                  * the rest, which is shorter so _in theory_ faster.
715                  */
716
717                 strbuf_add(base, entry->path, pathlen);
718
719                 if (!git_fnmatch(match, base->buf + base_offset,
720                                  item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
721                                  item->nowildcard_len)) {
722                         strbuf_setlen(base, base_offset + baselen);
723                         return entry_interesting;
724                 }
725                 strbuf_setlen(base, base_offset + baselen);
726
727                 /*
728                  * Match all directories. We'll try to match files
729                  * later on.
730                  * max_depth is ignored but we may consider support it
731                  * in future, see
732                  * http://thread.gmane.org/gmane.comp.version-control.git/163757/focus=163840
733                  */
734                 if (ps->recursive && S_ISDIR(entry->mode))
735                         return entry_interesting;
736         }
737         return never_interesting; /* No matches */
738 }