commit-graph: fix buffer read-overflow
[git] / tree-walk.c
1 #include "cache.h"
2 #include "tree-walk.h"
3 #include "unpack-trees.h"
4 #include "dir.h"
5 #include "object-store.h"
6 #include "tree.h"
7 #include "pathspec.h"
8
9 static const char *get_mode(const char *str, unsigned int *modep)
10 {
11         unsigned char c;
12         unsigned int mode = 0;
13
14         if (*str == ' ')
15                 return NULL;
16
17         while ((c = *str++) != ' ') {
18                 if (c < '0' || c > '7')
19                         return NULL;
20                 mode = (mode << 3) + (c - '0');
21         }
22         *modep = mode;
23         return str;
24 }
25
26 static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
27 {
28         const char *path;
29         unsigned int mode, len;
30         const unsigned hashsz = the_hash_algo->rawsz;
31
32         if (size < hashsz + 3 || buf[size - (hashsz + 1)]) {
33                 strbuf_addstr(err, _("too-short tree object"));
34                 return -1;
35         }
36
37         path = get_mode(buf, &mode);
38         if (!path) {
39                 strbuf_addstr(err, _("malformed mode in tree entry"));
40                 return -1;
41         }
42         if (!*path) {
43                 strbuf_addstr(err, _("empty filename in tree entry"));
44                 return -1;
45         }
46         len = strlen(path) + 1;
47
48         /* Initialize the descriptor entry */
49         desc->entry.path = path;
50         desc->entry.mode = canon_mode(mode);
51         desc->entry.oid  = (const struct object_id *)(path + len);
52
53         return 0;
54 }
55
56 static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
57 {
58         desc->buffer = buffer;
59         desc->size = size;
60         if (size)
61                 return decode_tree_entry(desc, buffer, size, err);
62         return 0;
63 }
64
65 void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
66 {
67         struct strbuf err = STRBUF_INIT;
68         if (init_tree_desc_internal(desc, buffer, size, &err))
69                 die("%s", err.buf);
70         strbuf_release(&err);
71 }
72
73 int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
74 {
75         struct strbuf err = STRBUF_INIT;
76         int result = init_tree_desc_internal(desc, buffer, size, &err);
77         if (result)
78                 error("%s", err.buf);
79         strbuf_release(&err);
80         return result;
81 }
82
83 void *fill_tree_descriptor(struct tree_desc *desc, const struct object_id *oid)
84 {
85         unsigned long size = 0;
86         void *buf = NULL;
87
88         if (oid) {
89                 buf = read_object_with_reference(oid, tree_type, &size, NULL);
90                 if (!buf)
91                         die("unable to read tree %s", oid_to_hex(oid));
92         }
93         init_tree_desc(desc, buf, size);
94         return buf;
95 }
96
97 static void entry_clear(struct name_entry *a)
98 {
99         memset(a, 0, sizeof(*a));
100 }
101
102 static void entry_extract(struct tree_desc *t, struct name_entry *a)
103 {
104         *a = t->entry;
105 }
106
107 static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err)
108 {
109         const void *buf = desc->buffer;
110         const unsigned char *end = desc->entry.oid->hash + the_hash_algo->rawsz;
111         unsigned long size = desc->size;
112         unsigned long len = end - (const unsigned char *)buf;
113
114         if (size < len)
115                 die(_("too-short tree file"));
116         buf = end;
117         size -= len;
118         desc->buffer = buf;
119         desc->size = size;
120         if (size)
121                 return decode_tree_entry(desc, buf, size, err);
122         return 0;
123 }
124
125 void update_tree_entry(struct tree_desc *desc)
126 {
127         struct strbuf err = STRBUF_INIT;
128         if (update_tree_entry_internal(desc, &err))
129                 die("%s", err.buf);
130         strbuf_release(&err);
131 }
132
133 int update_tree_entry_gently(struct tree_desc *desc)
134 {
135         struct strbuf err = STRBUF_INIT;
136         if (update_tree_entry_internal(desc, &err)) {
137                 error("%s", err.buf);
138                 strbuf_release(&err);
139                 /* Stop processing this tree after error */
140                 desc->size = 0;
141                 return -1;
142         }
143         strbuf_release(&err);
144         return 0;
145 }
146
147 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
148 {
149         if (!desc->size)
150                 return 0;
151
152         *entry = desc->entry;
153         update_tree_entry(desc);
154         return 1;
155 }
156
157 int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
158 {
159         if (!desc->size)
160                 return 0;
161
162         *entry = desc->entry;
163         if (update_tree_entry_gently(desc))
164                 return 0;
165         return 1;
166 }
167
168 void setup_traverse_info(struct traverse_info *info, const char *base)
169 {
170         int pathlen = strlen(base);
171         static struct traverse_info dummy;
172
173         memset(info, 0, sizeof(*info));
174         if (pathlen && base[pathlen-1] == '/')
175                 pathlen--;
176         info->pathlen = pathlen ? pathlen + 1 : 0;
177         info->name.path = base;
178         info->name.oid = (void *)(base + pathlen + 1);
179         if (pathlen)
180                 info->prev = &dummy;
181 }
182
183 char *make_traverse_path(char *path, const struct traverse_info *info, const struct name_entry *n)
184 {
185         int len = tree_entry_len(n);
186         int pathlen = info->pathlen;
187
188         path[pathlen + len] = 0;
189         for (;;) {
190                 memcpy(path + pathlen, n->path, len);
191                 if (!pathlen)
192                         break;
193                 path[--pathlen] = '/';
194                 n = &info->name;
195                 len = tree_entry_len(n);
196                 info = info->prev;
197                 pathlen -= len;
198         }
199         return path;
200 }
201
202 struct tree_desc_skip {
203         struct tree_desc_skip *prev;
204         const void *ptr;
205 };
206
207 struct tree_desc_x {
208         struct tree_desc d;
209         struct tree_desc_skip *skip;
210 };
211
212 static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
213 {
214         /*
215          * The caller wants to pick *a* from a tree or nothing.
216          * We are looking at *b* in a tree.
217          *
218          * (0) If a and b are the same name, we are trivially happy.
219          *
220          * There are three possibilities where *a* could be hiding
221          * behind *b*.
222          *
223          * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
224          *                                matter what.
225          * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
226          * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
227          *
228          * Otherwise we know *a* won't appear in the tree without
229          * scanning further.
230          */
231
232         int cmp = name_compare(a, a_len, b, b_len);
233
234         /* Most common case first -- reading sync'd trees */
235         if (!cmp)
236                 return cmp;
237
238         if (0 < cmp) {
239                 /* a comes after b; it does not matter if it is case (3)
240                 if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
241                         return 1;
242                 */
243                 return 1; /* keep looking */
244         }
245
246         /* b comes after a; are we looking at case (2)? */
247         if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
248                 return 1; /* keep looking */
249
250         return -1; /* a cannot appear in the tree */
251 }
252
253 /*
254  * From the extended tree_desc, extract the first name entry, while
255  * paying attention to the candidate "first" name.  Most importantly,
256  * when looking for an entry, if there are entries that sorts earlier
257  * in the tree object representation than that name, skip them and
258  * process the named entry first.  We will remember that we haven't
259  * processed the first entry yet, and in the later call skip the
260  * entry we processed early when update_extended_entry() is called.
261  *
262  * E.g. if the underlying tree object has these entries:
263  *
264  *    blob    "t-1"
265  *    blob    "t-2"
266  *    tree    "t"
267  *    blob    "t=1"
268  *
269  * and the "first" asks for "t", remember that we still need to
270  * process "t-1" and "t-2" but extract "t".  After processing the
271  * entry "t" from this call, the caller will let us know by calling
272  * update_extended_entry() that we can remember "t" has been processed
273  * already.
274  */
275
276 static void extended_entry_extract(struct tree_desc_x *t,
277                                    struct name_entry *a,
278                                    const char *first,
279                                    int first_len)
280 {
281         const char *path;
282         int len;
283         struct tree_desc probe;
284         struct tree_desc_skip *skip;
285
286         /*
287          * Extract the first entry from the tree_desc, but skip the
288          * ones that we already returned in earlier rounds.
289          */
290         while (1) {
291                 if (!t->d.size) {
292                         entry_clear(a);
293                         break; /* not found */
294                 }
295                 entry_extract(&t->d, a);
296                 for (skip = t->skip; skip; skip = skip->prev)
297                         if (a->path == skip->ptr)
298                                 break; /* found */
299                 if (!skip)
300                         break;
301                 /* We have processed this entry already. */
302                 update_tree_entry(&t->d);
303         }
304
305         if (!first || !a->path)
306                 return;
307
308         /*
309          * The caller wants "first" from this tree, or nothing.
310          */
311         path = a->path;
312         len = tree_entry_len(a);
313         switch (check_entry_match(first, first_len, path, len)) {
314         case -1:
315                 entry_clear(a);
316         case 0:
317                 return;
318         default:
319                 break;
320         }
321
322         /*
323          * We need to look-ahead -- we suspect that a subtree whose
324          * name is "first" may be hiding behind the current entry "path".
325          */
326         probe = t->d;
327         while (probe.size) {
328                 entry_extract(&probe, a);
329                 path = a->path;
330                 len = tree_entry_len(a);
331                 switch (check_entry_match(first, first_len, path, len)) {
332                 case -1:
333                         entry_clear(a);
334                 case 0:
335                         return;
336                 default:
337                         update_tree_entry(&probe);
338                         break;
339                 }
340                 /* keep looking */
341         }
342         entry_clear(a);
343 }
344
345 static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
346 {
347         if (t->d.entry.path == a->path) {
348                 update_tree_entry(&t->d);
349         } else {
350                 /* we have returned this entry early */
351                 struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
352                 skip->ptr = a->path;
353                 skip->prev = t->skip;
354                 t->skip = skip;
355         }
356 }
357
358 static void free_extended_entry(struct tree_desc_x *t)
359 {
360         struct tree_desc_skip *p, *s;
361
362         for (s = t->skip; s; s = p) {
363                 p = s->prev;
364                 free(s);
365         }
366 }
367
368 static inline int prune_traversal(struct index_state *istate,
369                                   struct name_entry *e,
370                                   struct traverse_info *info,
371                                   struct strbuf *base,
372                                   int still_interesting)
373 {
374         if (!info->pathspec || still_interesting == 2)
375                 return 2;
376         if (still_interesting < 0)
377                 return still_interesting;
378         return tree_entry_interesting(istate, e, base,
379                                       0, info->pathspec);
380 }
381
382 int traverse_trees(struct index_state *istate,
383                    int n, struct tree_desc *t,
384                    struct traverse_info *info)
385 {
386         int error = 0;
387         struct name_entry *entry = xmalloc(n*sizeof(*entry));
388         int i;
389         struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
390         struct strbuf base = STRBUF_INIT;
391         int interesting = 1;
392         char *traverse_path;
393
394         for (i = 0; i < n; i++)
395                 tx[i].d = t[i];
396
397         if (info->prev) {
398                 strbuf_grow(&base, info->pathlen);
399                 make_traverse_path(base.buf, info->prev, &info->name);
400                 base.buf[info->pathlen-1] = '/';
401                 strbuf_setlen(&base, info->pathlen);
402                 traverse_path = xstrndup(base.buf, info->pathlen);
403         } else {
404                 traverse_path = xstrndup(info->name.path, info->pathlen);
405         }
406         info->traverse_path = traverse_path;
407         for (;;) {
408                 int trees_used;
409                 unsigned long mask, dirmask;
410                 const char *first = NULL;
411                 int first_len = 0;
412                 struct name_entry *e = NULL;
413                 int len;
414
415                 for (i = 0; i < n; i++) {
416                         e = entry + i;
417                         extended_entry_extract(tx + i, e, NULL, 0);
418                 }
419
420                 /*
421                  * A tree may have "t-2" at the current location even
422                  * though it may have "t" that is a subtree behind it,
423                  * and another tree may return "t".  We want to grab
424                  * all "t" from all trees to match in such a case.
425                  */
426                 for (i = 0; i < n; i++) {
427                         e = entry + i;
428                         if (!e->path)
429                                 continue;
430                         len = tree_entry_len(e);
431                         if (!first) {
432                                 first = e->path;
433                                 first_len = len;
434                                 continue;
435                         }
436                         if (name_compare(e->path, len, first, first_len) < 0) {
437                                 first = e->path;
438                                 first_len = len;
439                         }
440                 }
441
442                 if (first) {
443                         for (i = 0; i < n; i++) {
444                                 e = entry + i;
445                                 extended_entry_extract(tx + i, e, first, first_len);
446                                 /* Cull the ones that are not the earliest */
447                                 if (!e->path)
448                                         continue;
449                                 len = tree_entry_len(e);
450                                 if (name_compare(e->path, len, first, first_len))
451                                         entry_clear(e);
452                         }
453                 }
454
455                 /* Now we have in entry[i] the earliest name from the trees */
456                 mask = 0;
457                 dirmask = 0;
458                 for (i = 0; i < n; i++) {
459                         if (!entry[i].path)
460                                 continue;
461                         mask |= 1ul << i;
462                         if (S_ISDIR(entry[i].mode))
463                                 dirmask |= 1ul << i;
464                         e = &entry[i];
465                 }
466                 if (!mask)
467                         break;
468                 interesting = prune_traversal(istate, e, info, &base, interesting);
469                 if (interesting < 0)
470                         break;
471                 if (interesting) {
472                         trees_used = info->fn(n, mask, dirmask, entry, info);
473                         if (trees_used < 0) {
474                                 error = trees_used;
475                                 if (!info->show_all_errors)
476                                         break;
477                         }
478                         mask &= trees_used;
479                 }
480                 for (i = 0; i < n; i++)
481                         if (mask & (1ul << i))
482                                 update_extended_entry(tx + i, entry + i);
483         }
484         free(entry);
485         for (i = 0; i < n; i++)
486                 free_extended_entry(tx + i);
487         free(tx);
488         free(traverse_path);
489         info->traverse_path = NULL;
490         strbuf_release(&base);
491         return error;
492 }
493
494 struct dir_state {
495         void *tree;
496         unsigned long size;
497         struct object_id oid;
498 };
499
500 static int find_tree_entry(struct tree_desc *t, const char *name, struct object_id *result, unsigned *mode)
501 {
502         int namelen = strlen(name);
503         while (t->size) {
504                 const char *entry;
505                 const struct object_id *oid;
506                 int entrylen, cmp;
507
508                 oid = tree_entry_extract(t, &entry, mode);
509                 entrylen = tree_entry_len(&t->entry);
510                 update_tree_entry(t);
511                 if (entrylen > namelen)
512                         continue;
513                 cmp = memcmp(name, entry, entrylen);
514                 if (cmp > 0)
515                         continue;
516                 if (cmp < 0)
517                         break;
518                 if (entrylen == namelen) {
519                         oidcpy(result, oid);
520                         return 0;
521                 }
522                 if (name[entrylen] != '/')
523                         continue;
524                 if (!S_ISDIR(*mode))
525                         break;
526                 if (++entrylen == namelen) {
527                         oidcpy(result, oid);
528                         return 0;
529                 }
530                 return get_tree_entry(oid, name + entrylen, result, mode);
531         }
532         return -1;
533 }
534
535 int get_tree_entry(const struct object_id *tree_oid, const char *name, struct object_id *oid, unsigned *mode)
536 {
537         int retval;
538         void *tree;
539         unsigned long size;
540         struct object_id root;
541
542         tree = read_object_with_reference(tree_oid, tree_type, &size, &root);
543         if (!tree)
544                 return -1;
545
546         if (name[0] == '\0') {
547                 oidcpy(oid, &root);
548                 free(tree);
549                 return 0;
550         }
551
552         if (!size) {
553                 retval = -1;
554         } else {
555                 struct tree_desc t;
556                 init_tree_desc(&t, tree, size);
557                 retval = find_tree_entry(&t, name, oid, mode);
558         }
559         free(tree);
560         return retval;
561 }
562
563 /*
564  * This is Linux's built-in max for the number of symlinks to follow.
565  * That limit, of course, does not affect git, but it's a reasonable
566  * choice.
567  */
568 #define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
569
570 /**
571  * Find a tree entry by following symlinks in tree_sha (which is
572  * assumed to be the root of the repository).  In the event that a
573  * symlink points outside the repository (e.g. a link to /foo or a
574  * root-level link to ../foo), the portion of the link which is
575  * outside the repository will be returned in result_path, and *mode
576  * will be set to 0.  It is assumed that result_path is uninitialized.
577  * If there are no symlinks, or the end result of the symlink chain
578  * points to an object inside the repository, result will be filled in
579  * with the sha1 of the found object, and *mode will hold the mode of
580  * the object.
581  *
582  * See the code for enum follow_symlink_result for a description of
583  * the return values.
584  */
585 enum follow_symlinks_result get_tree_entry_follow_symlinks(struct object_id *tree_oid, const char *name, struct object_id *result, struct strbuf *result_path, unsigned *mode)
586 {
587         int retval = MISSING_OBJECT;
588         struct dir_state *parents = NULL;
589         size_t parents_alloc = 0;
590         size_t i, parents_nr = 0;
591         struct object_id current_tree_oid;
592         struct strbuf namebuf = STRBUF_INIT;
593         struct tree_desc t;
594         int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
595
596         init_tree_desc(&t, NULL, 0UL);
597         strbuf_addstr(&namebuf, name);
598         oidcpy(&current_tree_oid, tree_oid);
599
600         while (1) {
601                 int find_result;
602                 char *first_slash;
603                 char *remainder = NULL;
604
605                 if (!t.buffer) {
606                         void *tree;
607                         struct object_id root;
608                         unsigned long size;
609                         tree = read_object_with_reference(&current_tree_oid,
610                                                           tree_type, &size,
611                                                           &root);
612                         if (!tree)
613                                 goto done;
614
615                         ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
616                         parents[parents_nr].tree = tree;
617                         parents[parents_nr].size = size;
618                         oidcpy(&parents[parents_nr].oid, &root);
619                         parents_nr++;
620
621                         if (namebuf.buf[0] == '\0') {
622                                 oidcpy(result, &root);
623                                 retval = FOUND;
624                                 goto done;
625                         }
626
627                         if (!size)
628                                 goto done;
629
630                         /* descend */
631                         init_tree_desc(&t, tree, size);
632                 }
633
634                 /* Handle symlinks to e.g. a//b by removing leading slashes */
635                 while (namebuf.buf[0] == '/') {
636                         strbuf_remove(&namebuf, 0, 1);
637                 }
638
639                 /* Split namebuf into a first component and a remainder */
640                 if ((first_slash = strchr(namebuf.buf, '/'))) {
641                         *first_slash = 0;
642                         remainder = first_slash + 1;
643                 }
644
645                 if (!strcmp(namebuf.buf, "..")) {
646                         struct dir_state *parent;
647                         /*
648                          * We could end up with .. in the namebuf if it
649                          * appears in a symlink.
650                          */
651
652                         if (parents_nr == 1) {
653                                 if (remainder)
654                                         *first_slash = '/';
655                                 strbuf_add(result_path, namebuf.buf,
656                                            namebuf.len);
657                                 *mode = 0;
658                                 retval = FOUND;
659                                 goto done;
660                         }
661                         parent = &parents[parents_nr - 1];
662                         free(parent->tree);
663                         parents_nr--;
664                         parent = &parents[parents_nr - 1];
665                         init_tree_desc(&t, parent->tree, parent->size);
666                         strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
667                         continue;
668                 }
669
670                 /* We could end up here via a symlink to dir/.. */
671                 if (namebuf.buf[0] == '\0') {
672                         oidcpy(result, &parents[parents_nr - 1].oid);
673                         retval = FOUND;
674                         goto done;
675                 }
676
677                 /* Look up the first (or only) path component in the tree. */
678                 find_result = find_tree_entry(&t, namebuf.buf,
679                                               &current_tree_oid, mode);
680                 if (find_result) {
681                         goto done;
682                 }
683
684                 if (S_ISDIR(*mode)) {
685                         if (!remainder) {
686                                 oidcpy(result, &current_tree_oid);
687                                 retval = FOUND;
688                                 goto done;
689                         }
690                         /* Descend the tree */
691                         t.buffer = NULL;
692                         strbuf_remove(&namebuf, 0,
693                                       1 + first_slash - namebuf.buf);
694                 } else if (S_ISREG(*mode)) {
695                         if (!remainder) {
696                                 oidcpy(result, &current_tree_oid);
697                                 retval = FOUND;
698                         } else {
699                                 retval = NOT_DIR;
700                         }
701                         goto done;
702                 } else if (S_ISLNK(*mode)) {
703                         /* Follow a symlink */
704                         unsigned long link_len;
705                         size_t len;
706                         char *contents, *contents_start;
707                         struct dir_state *parent;
708                         enum object_type type;
709
710                         if (follows_remaining-- == 0) {
711                                 /* Too many symlinks followed */
712                                 retval = SYMLINK_LOOP;
713                                 goto done;
714                         }
715
716                         /*
717                          * At this point, we have followed at a least
718                          * one symlink, so on error we need to report this.
719                          */
720                         retval = DANGLING_SYMLINK;
721
722                         contents = read_object_file(&current_tree_oid, &type,
723                                                     &link_len);
724
725                         if (!contents)
726                                 goto done;
727
728                         if (contents[0] == '/') {
729                                 strbuf_addstr(result_path, contents);
730                                 free(contents);
731                                 *mode = 0;
732                                 retval = FOUND;
733                                 goto done;
734                         }
735
736                         if (remainder)
737                                 len = first_slash - namebuf.buf;
738                         else
739                                 len = namebuf.len;
740
741                         contents_start = contents;
742
743                         parent = &parents[parents_nr - 1];
744                         init_tree_desc(&t, parent->tree, parent->size);
745                         strbuf_splice(&namebuf, 0, len,
746                                       contents_start, link_len);
747                         if (remainder)
748                                 namebuf.buf[link_len] = '/';
749                         free(contents);
750                 }
751         }
752 done:
753         for (i = 0; i < parents_nr; i++)
754                 free(parents[i].tree);
755         free(parents);
756
757         strbuf_release(&namebuf);
758         return retval;
759 }
760
761 static int match_entry(const struct pathspec_item *item,
762                        const struct name_entry *entry, int pathlen,
763                        const char *match, int matchlen,
764                        enum interesting *never_interesting)
765 {
766         int m = -1; /* signals that we haven't called strncmp() */
767
768         if (item->magic & PATHSPEC_ICASE)
769                 /*
770                  * "Never interesting" trick requires exact
771                  * matching. We could do something clever with inexact
772                  * matching, but it's trickier (and not to forget that
773                  * strcasecmp is locale-dependent, at least in
774                  * glibc). Just disable it for now. It can't be worse
775                  * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
776                  * pattern.
777                  */
778                 *never_interesting = entry_not_interesting;
779         else if (*never_interesting != entry_not_interesting) {
780                 /*
781                  * We have not seen any match that sorts later
782                  * than the current path.
783                  */
784
785                 /*
786                  * Does match sort strictly earlier than path
787                  * with their common parts?
788                  */
789                 m = strncmp(match, entry->path,
790                             (matchlen < pathlen) ? matchlen : pathlen);
791                 if (m < 0)
792                         return 0;
793
794                 /*
795                  * If we come here even once, that means there is at
796                  * least one pathspec that would sort equal to or
797                  * later than the path we are currently looking at.
798                  * In other words, if we have never reached this point
799                  * after iterating all pathspecs, it means all
800                  * pathspecs are either outside of base, or inside the
801                  * base but sorts strictly earlier than the current
802                  * one.  In either case, they will never match the
803                  * subsequent entries.  In such a case, we initialized
804                  * the variable to -1 and that is what will be
805                  * returned, allowing the caller to terminate early.
806                  */
807                 *never_interesting = entry_not_interesting;
808         }
809
810         if (pathlen > matchlen)
811                 return 0;
812
813         if (matchlen > pathlen) {
814                 if (match[pathlen] != '/')
815                         return 0;
816                 if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
817                         return 0;
818         }
819
820         if (m == -1)
821                 /*
822                  * we cheated and did not do strncmp(), so we do
823                  * that here.
824                  */
825                 m = ps_strncmp(item, match, entry->path, pathlen);
826
827         /*
828          * If common part matched earlier then it is a hit,
829          * because we rejected the case where path is not a
830          * leading directory and is shorter than match.
831          */
832         if (!m)
833                 /*
834                  * match_entry does not check if the prefix part is
835                  * matched case-sensitively. If the entry is a
836                  * directory and part of prefix, it'll be rematched
837                  * eventually by basecmp with special treatment for
838                  * the prefix.
839                  */
840                 return 1;
841
842         return 0;
843 }
844
845 /* :(icase)-aware string compare */
846 static int basecmp(const struct pathspec_item *item,
847                    const char *base, const char *match, int len)
848 {
849         if (item->magic & PATHSPEC_ICASE) {
850                 int ret, n = len > item->prefix ? item->prefix : len;
851                 ret = strncmp(base, match, n);
852                 if (ret)
853                         return ret;
854                 base += n;
855                 match += n;
856                 len -= n;
857         }
858         return ps_strncmp(item, base, match, len);
859 }
860
861 static int match_dir_prefix(const struct pathspec_item *item,
862                             const char *base,
863                             const char *match, int matchlen)
864 {
865         if (basecmp(item, base, match, matchlen))
866                 return 0;
867
868         /*
869          * If the base is a subdirectory of a path which
870          * was specified, all of them are interesting.
871          */
872         if (!matchlen ||
873             base[matchlen] == '/' ||
874             match[matchlen - 1] == '/')
875                 return 1;
876
877         /* Just a random prefix match */
878         return 0;
879 }
880
881 /*
882  * Perform matching on the leading non-wildcard part of
883  * pathspec. item->nowildcard_len must be greater than zero. Return
884  * non-zero if base is matched.
885  */
886 static int match_wildcard_base(const struct pathspec_item *item,
887                                const char *base, int baselen,
888                                int *matched)
889 {
890         const char *match = item->match;
891         /* the wildcard part is not considered in this function */
892         int matchlen = item->nowildcard_len;
893
894         if (baselen) {
895                 int dirlen;
896                 /*
897                  * Return early if base is longer than the
898                  * non-wildcard part but it does not match.
899                  */
900                 if (baselen >= matchlen) {
901                         *matched = matchlen;
902                         return !basecmp(item, base, match, matchlen);
903                 }
904
905                 dirlen = matchlen;
906                 while (dirlen && match[dirlen - 1] != '/')
907                         dirlen--;
908
909                 /*
910                  * Return early if base is shorter than the
911                  * non-wildcard part but it does not match. Note that
912                  * base ends with '/' so we are sure it really matches
913                  * directory
914                  */
915                 if (basecmp(item, base, match, baselen))
916                         return 0;
917                 *matched = baselen;
918         } else
919                 *matched = 0;
920         /*
921          * we could have checked entry against the non-wildcard part
922          * that is not in base and does similar never_interesting
923          * optimization as in match_entry. For now just be happy with
924          * base comparison.
925          */
926         return entry_interesting;
927 }
928
929 /*
930  * Is a tree entry interesting given the pathspec we have?
931  *
932  * Pre-condition: either baselen == base_offset (i.e. empty path)
933  * or base[baselen-1] == '/' (i.e. with trailing slash).
934  */
935 static enum interesting do_match(struct index_state *istate,
936                                  const struct name_entry *entry,
937                                  struct strbuf *base, int base_offset,
938                                  const struct pathspec *ps,
939                                  int exclude)
940 {
941         int i;
942         int pathlen, baselen = base->len - base_offset;
943         enum interesting never_interesting = ps->has_wildcard ?
944                 entry_not_interesting : all_entries_not_interesting;
945
946         GUARD_PATHSPEC(ps,
947                        PATHSPEC_FROMTOP |
948                        PATHSPEC_MAXDEPTH |
949                        PATHSPEC_LITERAL |
950                        PATHSPEC_GLOB |
951                        PATHSPEC_ICASE |
952                        PATHSPEC_EXCLUDE |
953                        PATHSPEC_ATTR);
954
955         if (!ps->nr) {
956                 if (!ps->recursive ||
957                     !(ps->magic & PATHSPEC_MAXDEPTH) ||
958                     ps->max_depth == -1)
959                         return all_entries_interesting;
960                 return within_depth(base->buf + base_offset, baselen,
961                                     !!S_ISDIR(entry->mode),
962                                     ps->max_depth) ?
963                         entry_interesting : entry_not_interesting;
964         }
965
966         pathlen = tree_entry_len(entry);
967
968         for (i = ps->nr - 1; i >= 0; i--) {
969                 const struct pathspec_item *item = ps->items+i;
970                 const char *match = item->match;
971                 const char *base_str = base->buf + base_offset;
972                 int matchlen = item->len, matched = 0;
973
974                 if ((!exclude &&   item->magic & PATHSPEC_EXCLUDE) ||
975                     ( exclude && !(item->magic & PATHSPEC_EXCLUDE)))
976                         continue;
977
978                 if (baselen >= matchlen) {
979                         /* If it doesn't match, move along... */
980                         if (!match_dir_prefix(item, base_str, match, matchlen))
981                                 goto match_wildcards;
982
983                         if (!ps->recursive ||
984                             !(ps->magic & PATHSPEC_MAXDEPTH) ||
985                             ps->max_depth == -1) {
986                                 if (!item->attr_match_nr)
987                                         return all_entries_interesting;
988                                 else
989                                         goto interesting;
990                         }
991
992                         if (within_depth(base_str + matchlen + 1,
993                                          baselen - matchlen - 1,
994                                          !!S_ISDIR(entry->mode),
995                                          ps->max_depth))
996                                 goto interesting;
997                         else
998                                 return entry_not_interesting;
999                 }
1000
1001                 /* Either there must be no base, or the base must match. */
1002                 if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
1003                         if (match_entry(item, entry, pathlen,
1004                                         match + baselen, matchlen - baselen,
1005                                         &never_interesting))
1006                                 goto interesting;
1007
1008                         if (item->nowildcard_len < item->len) {
1009                                 if (!git_fnmatch(item, match + baselen, entry->path,
1010                                                  item->nowildcard_len - baselen))
1011                                         goto interesting;
1012
1013                                 /*
1014                                  * Match all directories. We'll try to
1015                                  * match files later on.
1016                                  */
1017                                 if (ps->recursive && S_ISDIR(entry->mode))
1018                                         return entry_interesting;
1019
1020                                 /*
1021                                  * When matching against submodules with
1022                                  * wildcard characters, ensure that the entry
1023                                  * at least matches up to the first wild
1024                                  * character.  More accurate matching can then
1025                                  * be performed in the submodule itself.
1026                                  */
1027                                 if (ps->recurse_submodules &&
1028                                     S_ISGITLINK(entry->mode) &&
1029                                     !ps_strncmp(item, match + baselen,
1030                                                 entry->path,
1031                                                 item->nowildcard_len - baselen))
1032                                         goto interesting;
1033                         }
1034
1035                         continue;
1036                 }
1037
1038 match_wildcards:
1039                 if (item->nowildcard_len == item->len)
1040                         continue;
1041
1042                 if (item->nowildcard_len &&
1043                     !match_wildcard_base(item, base_str, baselen, &matched))
1044                         continue;
1045
1046                 /*
1047                  * Concatenate base and entry->path into one and do
1048                  * fnmatch() on it.
1049                  *
1050                  * While we could avoid concatenation in certain cases
1051                  * [1], which saves a memcpy and potentially a
1052                  * realloc, it turns out not worth it. Measurement on
1053                  * linux-2.6 does not show any clear improvements,
1054                  * partly because of the nowildcard_len optimization
1055                  * in git_fnmatch(). Avoid micro-optimizations here.
1056                  *
1057                  * [1] if match_wildcard_base() says the base
1058                  * directory is already matched, we only need to match
1059                  * the rest, which is shorter so _in theory_ faster.
1060                  */
1061
1062                 strbuf_add(base, entry->path, pathlen);
1063
1064                 if (!git_fnmatch(item, match, base->buf + base_offset,
1065                                  item->nowildcard_len)) {
1066                         strbuf_setlen(base, base_offset + baselen);
1067                         goto interesting;
1068                 }
1069
1070                 /*
1071                  * When matching against submodules with
1072                  * wildcard characters, ensure that the entry
1073                  * at least matches up to the first wild
1074                  * character.  More accurate matching can then
1075                  * be performed in the submodule itself.
1076                  */
1077                 if (ps->recurse_submodules && S_ISGITLINK(entry->mode) &&
1078                     !ps_strncmp(item, match, base->buf + base_offset,
1079                                 item->nowildcard_len)) {
1080                         strbuf_setlen(base, base_offset + baselen);
1081                         goto interesting;
1082                 }
1083
1084                 strbuf_setlen(base, base_offset + baselen);
1085
1086                 /*
1087                  * Match all directories. We'll try to match files
1088                  * later on.
1089                  * max_depth is ignored but we may consider support it
1090                  * in future, see
1091                  * https://public-inbox.org/git/7vmxo5l2g4.fsf@alter.siamese.dyndns.org/
1092                  */
1093                 if (ps->recursive && S_ISDIR(entry->mode))
1094                         return entry_interesting;
1095                 continue;
1096 interesting:
1097                 if (item->attr_match_nr) {
1098                         int ret;
1099
1100                         /*
1101                          * Must not return all_entries_not_interesting
1102                          * prematurely. We do not know if all entries do not
1103                          * match some attributes with current attr API.
1104                          */
1105                         never_interesting = entry_not_interesting;
1106
1107                         /*
1108                          * Consider all directories interesting (because some
1109                          * of those files inside may match some attributes
1110                          * even though the parent dir does not)
1111                          *
1112                          * FIXME: attributes _can_ match directories and we
1113                          * can probably return all_entries_interesting or
1114                          * all_entries_not_interesting here if matched.
1115                          */
1116                         if (S_ISDIR(entry->mode))
1117                                 return entry_interesting;
1118
1119                         strbuf_add(base, entry->path, pathlen);
1120                         ret = match_pathspec_attrs(istate, base->buf + base_offset,
1121                                                    base->len - base_offset, item);
1122                         strbuf_setlen(base, base_offset + baselen);
1123                         if (!ret)
1124                                 continue;
1125                 }
1126                 return entry_interesting;
1127         }
1128         return never_interesting; /* No matches */
1129 }
1130
1131 /*
1132  * Is a tree entry interesting given the pathspec we have?
1133  *
1134  * Pre-condition: either baselen == base_offset (i.e. empty path)
1135  * or base[baselen-1] == '/' (i.e. with trailing slash).
1136  */
1137 enum interesting tree_entry_interesting(struct index_state *istate,
1138                                         const struct name_entry *entry,
1139                                         struct strbuf *base, int base_offset,
1140                                         const struct pathspec *ps)
1141 {
1142         enum interesting positive, negative;
1143         positive = do_match(istate, entry, base, base_offset, ps, 0);
1144
1145         /*
1146          * case | entry | positive | negative | result
1147          * -----+-------+----------+----------+-------
1148          *   1  |  file |   -1     |  -1..2   |  -1
1149          *   2  |  file |    0     |  -1..2   |   0
1150          *   3  |  file |    1     |   -1     |   1
1151          *   4  |  file |    1     |    0     |   1
1152          *   5  |  file |    1     |    1     |   0
1153          *   6  |  file |    1     |    2     |   0
1154          *   7  |  file |    2     |   -1     |   2
1155          *   8  |  file |    2     |    0     |   1
1156          *   9  |  file |    2     |    1     |   0
1157          *  10  |  file |    2     |    2     |  -1
1158          * -----+-------+----------+----------+-------
1159          *  11  |  dir  |   -1     |  -1..2   |  -1
1160          *  12  |  dir  |    0     |  -1..2   |   0
1161          *  13  |  dir  |    1     |   -1     |   1
1162          *  14  |  dir  |    1     |    0     |   1
1163          *  15  |  dir  |    1     |    1     |   1 (*)
1164          *  16  |  dir  |    1     |    2     |   0
1165          *  17  |  dir  |    2     |   -1     |   2
1166          *  18  |  dir  |    2     |    0     |   1
1167          *  19  |  dir  |    2     |    1     |   1 (*)
1168          *  20  |  dir  |    2     |    2     |  -1
1169          *
1170          * (*) An exclude pattern interested in a directory does not
1171          * necessarily mean it will exclude all of the directory. In
1172          * wildcard case, it can't decide until looking at individual
1173          * files inside. So don't write such directories off yet.
1174          */
1175
1176         if (!(ps->magic & PATHSPEC_EXCLUDE) ||
1177             positive <= entry_not_interesting) /* #1, #2, #11, #12 */
1178                 return positive;
1179
1180         negative = do_match(istate, entry, base, base_offset, ps, 1);
1181
1182         /* #8, #18 */
1183         if (positive == all_entries_interesting &&
1184             negative == entry_not_interesting)
1185                 return entry_interesting;
1186
1187         /* #3, #4, #7, #13, #14, #17 */
1188         if (negative <= entry_not_interesting)
1189                 return positive;
1190
1191         /* #15, #19 */
1192         if (S_ISDIR(entry->mode) &&
1193             positive >= entry_interesting &&
1194             negative == entry_interesting)
1195                 return entry_interesting;
1196
1197         if ((positive == entry_interesting &&
1198              negative >= entry_interesting) || /* #5, #6, #16 */
1199             (positive == all_entries_interesting &&
1200              negative == entry_interesting)) /* #9 */
1201                 return entry_not_interesting;
1202
1203         return all_entries_not_interesting; /* #10, #20 */
1204 }