wt-status: introduce wt_status_state_free_buffers()
[git] / tree-walk.c
1 #include "cache.h"
2 #include "tree-walk.h"
3 #include "dir.h"
4 #include "object-store.h"
5 #include "tree.h"
6 #include "pathspec.h"
7
8 static const char *get_mode(const char *str, unsigned int *modep)
9 {
10         unsigned char c;
11         unsigned int mode = 0;
12
13         if (*str == ' ')
14                 return NULL;
15
16         while ((c = *str++) != ' ') {
17                 if (c < '0' || c > '7')
18                         return NULL;
19                 mode = (mode << 3) + (c - '0');
20         }
21         *modep = mode;
22         return str;
23 }
24
25 static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
26 {
27         const char *path;
28         unsigned int mode, len;
29         const unsigned hashsz = the_hash_algo->rawsz;
30
31         if (size < hashsz + 3 || buf[size - (hashsz + 1)]) {
32                 strbuf_addstr(err, _("too-short tree object"));
33                 return -1;
34         }
35
36         path = get_mode(buf, &mode);
37         if (!path) {
38                 strbuf_addstr(err, _("malformed mode in tree entry"));
39                 return -1;
40         }
41         if (!*path) {
42                 strbuf_addstr(err, _("empty filename in tree entry"));
43                 return -1;
44         }
45         len = strlen(path) + 1;
46
47         /* Initialize the descriptor entry */
48         desc->entry.path = path;
49         desc->entry.mode = canon_mode(mode);
50         desc->entry.pathlen = len - 1;
51         hashcpy(desc->entry.oid.hash, (const unsigned char *)path + len);
52
53         return 0;
54 }
55
56 static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
57 {
58         desc->buffer = buffer;
59         desc->size = size;
60         if (size)
61                 return decode_tree_entry(desc, buffer, size, err);
62         return 0;
63 }
64
65 void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
66 {
67         struct strbuf err = STRBUF_INIT;
68         if (init_tree_desc_internal(desc, buffer, size, &err))
69                 die("%s", err.buf);
70         strbuf_release(&err);
71 }
72
73 int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
74 {
75         struct strbuf err = STRBUF_INIT;
76         int result = init_tree_desc_internal(desc, buffer, size, &err);
77         if (result)
78                 error("%s", err.buf);
79         strbuf_release(&err);
80         return result;
81 }
82
83 void *fill_tree_descriptor(struct repository *r,
84                            struct tree_desc *desc,
85                            const struct object_id *oid)
86 {
87         unsigned long size = 0;
88         void *buf = NULL;
89
90         if (oid) {
91                 buf = read_object_with_reference(r, oid, tree_type, &size, NULL);
92                 if (!buf)
93                         die("unable to read tree %s", oid_to_hex(oid));
94         }
95         init_tree_desc(desc, buf, size);
96         return buf;
97 }
98
99 static void entry_clear(struct name_entry *a)
100 {
101         memset(a, 0, sizeof(*a));
102 }
103
104 static void entry_extract(struct tree_desc *t, struct name_entry *a)
105 {
106         *a = t->entry;
107 }
108
109 static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err)
110 {
111         const void *buf = desc->buffer;
112         const unsigned char *end = (const unsigned char *)desc->entry.path + desc->entry.pathlen + 1 + the_hash_algo->rawsz;
113         unsigned long size = desc->size;
114         unsigned long len = end - (const unsigned char *)buf;
115
116         if (size < len)
117                 die(_("too-short tree file"));
118         buf = end;
119         size -= len;
120         desc->buffer = buf;
121         desc->size = size;
122         if (size)
123                 return decode_tree_entry(desc, buf, size, err);
124         return 0;
125 }
126
127 void update_tree_entry(struct tree_desc *desc)
128 {
129         struct strbuf err = STRBUF_INIT;
130         if (update_tree_entry_internal(desc, &err))
131                 die("%s", err.buf);
132         strbuf_release(&err);
133 }
134
135 int update_tree_entry_gently(struct tree_desc *desc)
136 {
137         struct strbuf err = STRBUF_INIT;
138         if (update_tree_entry_internal(desc, &err)) {
139                 error("%s", err.buf);
140                 strbuf_release(&err);
141                 /* Stop processing this tree after error */
142                 desc->size = 0;
143                 return -1;
144         }
145         strbuf_release(&err);
146         return 0;
147 }
148
149 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
150 {
151         if (!desc->size)
152                 return 0;
153
154         *entry = desc->entry;
155         update_tree_entry(desc);
156         return 1;
157 }
158
159 int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
160 {
161         if (!desc->size)
162                 return 0;
163
164         *entry = desc->entry;
165         if (update_tree_entry_gently(desc))
166                 return 0;
167         return 1;
168 }
169
170 void setup_traverse_info(struct traverse_info *info, const char *base)
171 {
172         size_t pathlen = strlen(base);
173         static struct traverse_info dummy;
174
175         memset(info, 0, sizeof(*info));
176         if (pathlen && base[pathlen-1] == '/')
177                 pathlen--;
178         info->pathlen = pathlen ? pathlen + 1 : 0;
179         info->name = base;
180         info->namelen = pathlen;
181         if (pathlen)
182                 info->prev = &dummy;
183 }
184
185 char *make_traverse_path(char *path, size_t pathlen,
186                          const struct traverse_info *info,
187                          const char *name, size_t namelen)
188 {
189         /* Always points to the end of the name we're about to add */
190         size_t pos = st_add(info->pathlen, namelen);
191
192         if (pos >= pathlen)
193                 BUG("too small buffer passed to make_traverse_path");
194
195         path[pos] = 0;
196         for (;;) {
197                 if (pos < namelen)
198                         BUG("traverse_info pathlen does not match strings");
199                 pos -= namelen;
200                 memcpy(path + pos, name, namelen);
201
202                 if (!pos)
203                         break;
204                 path[--pos] = '/';
205
206                 if (!info)
207                         BUG("traverse_info ran out of list items");
208                 name = info->name;
209                 namelen = info->namelen;
210                 info = info->prev;
211         }
212         return path;
213 }
214
215 void strbuf_make_traverse_path(struct strbuf *out,
216                                const struct traverse_info *info,
217                                const char *name, size_t namelen)
218 {
219         size_t len = traverse_path_len(info, namelen);
220
221         strbuf_grow(out, len);
222         make_traverse_path(out->buf + out->len, out->alloc - out->len,
223                            info, name, namelen);
224         strbuf_setlen(out, out->len + len);
225 }
226
227 struct tree_desc_skip {
228         struct tree_desc_skip *prev;
229         const void *ptr;
230 };
231
232 struct tree_desc_x {
233         struct tree_desc d;
234         struct tree_desc_skip *skip;
235 };
236
237 static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
238 {
239         /*
240          * The caller wants to pick *a* from a tree or nothing.
241          * We are looking at *b* in a tree.
242          *
243          * (0) If a and b are the same name, we are trivially happy.
244          *
245          * There are three possibilities where *a* could be hiding
246          * behind *b*.
247          *
248          * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
249          *                                matter what.
250          * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
251          * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
252          *
253          * Otherwise we know *a* won't appear in the tree without
254          * scanning further.
255          */
256
257         int cmp = name_compare(a, a_len, b, b_len);
258
259         /* Most common case first -- reading sync'd trees */
260         if (!cmp)
261                 return cmp;
262
263         if (0 < cmp) {
264                 /* a comes after b; it does not matter if it is case (3)
265                 if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
266                         return 1;
267                 */
268                 return 1; /* keep looking */
269         }
270
271         /* b comes after a; are we looking at case (2)? */
272         if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
273                 return 1; /* keep looking */
274
275         return -1; /* a cannot appear in the tree */
276 }
277
278 /*
279  * From the extended tree_desc, extract the first name entry, while
280  * paying attention to the candidate "first" name.  Most importantly,
281  * when looking for an entry, if there are entries that sorts earlier
282  * in the tree object representation than that name, skip them and
283  * process the named entry first.  We will remember that we haven't
284  * processed the first entry yet, and in the later call skip the
285  * entry we processed early when update_extended_entry() is called.
286  *
287  * E.g. if the underlying tree object has these entries:
288  *
289  *    blob    "t-1"
290  *    blob    "t-2"
291  *    tree    "t"
292  *    blob    "t=1"
293  *
294  * and the "first" asks for "t", remember that we still need to
295  * process "t-1" and "t-2" but extract "t".  After processing the
296  * entry "t" from this call, the caller will let us know by calling
297  * update_extended_entry() that we can remember "t" has been processed
298  * already.
299  */
300
301 static void extended_entry_extract(struct tree_desc_x *t,
302                                    struct name_entry *a,
303                                    const char *first,
304                                    int first_len)
305 {
306         const char *path;
307         int len;
308         struct tree_desc probe;
309         struct tree_desc_skip *skip;
310
311         /*
312          * Extract the first entry from the tree_desc, but skip the
313          * ones that we already returned in earlier rounds.
314          */
315         while (1) {
316                 if (!t->d.size) {
317                         entry_clear(a);
318                         break; /* not found */
319                 }
320                 entry_extract(&t->d, a);
321                 for (skip = t->skip; skip; skip = skip->prev)
322                         if (a->path == skip->ptr)
323                                 break; /* found */
324                 if (!skip)
325                         break;
326                 /* We have processed this entry already. */
327                 update_tree_entry(&t->d);
328         }
329
330         if (!first || !a->path)
331                 return;
332
333         /*
334          * The caller wants "first" from this tree, or nothing.
335          */
336         path = a->path;
337         len = tree_entry_len(a);
338         switch (check_entry_match(first, first_len, path, len)) {
339         case -1:
340                 entry_clear(a);
341         case 0:
342                 return;
343         default:
344                 break;
345         }
346
347         /*
348          * We need to look-ahead -- we suspect that a subtree whose
349          * name is "first" may be hiding behind the current entry "path".
350          */
351         probe = t->d;
352         while (probe.size) {
353                 entry_extract(&probe, a);
354                 path = a->path;
355                 len = tree_entry_len(a);
356                 switch (check_entry_match(first, first_len, path, len)) {
357                 case -1:
358                         entry_clear(a);
359                 case 0:
360                         return;
361                 default:
362                         update_tree_entry(&probe);
363                         break;
364                 }
365                 /* keep looking */
366         }
367         entry_clear(a);
368 }
369
370 static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
371 {
372         if (t->d.entry.path == a->path) {
373                 update_tree_entry(&t->d);
374         } else {
375                 /* we have returned this entry early */
376                 struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
377                 skip->ptr = a->path;
378                 skip->prev = t->skip;
379                 t->skip = skip;
380         }
381 }
382
383 static void free_extended_entry(struct tree_desc_x *t)
384 {
385         struct tree_desc_skip *p, *s;
386
387         for (s = t->skip; s; s = p) {
388                 p = s->prev;
389                 free(s);
390         }
391 }
392
393 static inline int prune_traversal(struct index_state *istate,
394                                   struct name_entry *e,
395                                   struct traverse_info *info,
396                                   struct strbuf *base,
397                                   int still_interesting)
398 {
399         if (!info->pathspec || still_interesting == 2)
400                 return 2;
401         if (still_interesting < 0)
402                 return still_interesting;
403         return tree_entry_interesting(istate, e, base,
404                                       0, info->pathspec);
405 }
406
407 int traverse_trees(struct index_state *istate,
408                    int n, struct tree_desc *t,
409                    struct traverse_info *info)
410 {
411         int error = 0;
412         struct name_entry entry[MAX_TRAVERSE_TREES];
413         int i;
414         struct tree_desc_x tx[ARRAY_SIZE(entry)];
415         struct strbuf base = STRBUF_INIT;
416         int interesting = 1;
417         char *traverse_path;
418
419         if (n >= ARRAY_SIZE(entry))
420                 BUG("traverse_trees() called with too many trees (%d)", n);
421
422         for (i = 0; i < n; i++) {
423                 tx[i].d = t[i];
424                 tx[i].skip = NULL;
425         }
426
427         if (info->prev) {
428                 strbuf_make_traverse_path(&base, info->prev,
429                                           info->name, info->namelen);
430                 strbuf_addch(&base, '/');
431                 traverse_path = xstrndup(base.buf, base.len);
432         } else {
433                 traverse_path = xstrndup(info->name, info->pathlen);
434         }
435         info->traverse_path = traverse_path;
436         for (;;) {
437                 int trees_used;
438                 unsigned long mask, dirmask;
439                 const char *first = NULL;
440                 int first_len = 0;
441                 struct name_entry *e = NULL;
442                 int len;
443
444                 for (i = 0; i < n; i++) {
445                         e = entry + i;
446                         extended_entry_extract(tx + i, e, NULL, 0);
447                 }
448
449                 /*
450                  * A tree may have "t-2" at the current location even
451                  * though it may have "t" that is a subtree behind it,
452                  * and another tree may return "t".  We want to grab
453                  * all "t" from all trees to match in such a case.
454                  */
455                 for (i = 0; i < n; i++) {
456                         e = entry + i;
457                         if (!e->path)
458                                 continue;
459                         len = tree_entry_len(e);
460                         if (!first) {
461                                 first = e->path;
462                                 first_len = len;
463                                 continue;
464                         }
465                         if (name_compare(e->path, len, first, first_len) < 0) {
466                                 first = e->path;
467                                 first_len = len;
468                         }
469                 }
470
471                 if (first) {
472                         for (i = 0; i < n; i++) {
473                                 e = entry + i;
474                                 extended_entry_extract(tx + i, e, first, first_len);
475                                 /* Cull the ones that are not the earliest */
476                                 if (!e->path)
477                                         continue;
478                                 len = tree_entry_len(e);
479                                 if (name_compare(e->path, len, first, first_len))
480                                         entry_clear(e);
481                         }
482                 }
483
484                 /* Now we have in entry[i] the earliest name from the trees */
485                 mask = 0;
486                 dirmask = 0;
487                 for (i = 0; i < n; i++) {
488                         if (!entry[i].path)
489                                 continue;
490                         mask |= 1ul << i;
491                         if (S_ISDIR(entry[i].mode))
492                                 dirmask |= 1ul << i;
493                         e = &entry[i];
494                 }
495                 if (!mask)
496                         break;
497                 interesting = prune_traversal(istate, e, info, &base, interesting);
498                 if (interesting < 0)
499                         break;
500                 if (interesting) {
501                         trees_used = info->fn(n, mask, dirmask, entry, info);
502                         if (trees_used < 0) {
503                                 error = trees_used;
504                                 if (!info->show_all_errors)
505                                         break;
506                         }
507                         mask &= trees_used;
508                 }
509                 for (i = 0; i < n; i++)
510                         if (mask & (1ul << i))
511                                 update_extended_entry(tx + i, entry + i);
512         }
513         for (i = 0; i < n; i++)
514                 free_extended_entry(tx + i);
515         free(traverse_path);
516         info->traverse_path = NULL;
517         strbuf_release(&base);
518         return error;
519 }
520
521 struct dir_state {
522         void *tree;
523         unsigned long size;
524         struct object_id oid;
525 };
526
527 static int find_tree_entry(struct repository *r, struct tree_desc *t,
528                            const char *name, struct object_id *result,
529                            unsigned short *mode)
530 {
531         int namelen = strlen(name);
532         while (t->size) {
533                 const char *entry;
534                 struct object_id oid;
535                 int entrylen, cmp;
536
537                 oidcpy(&oid, tree_entry_extract(t, &entry, mode));
538                 entrylen = tree_entry_len(&t->entry);
539                 update_tree_entry(t);
540                 if (entrylen > namelen)
541                         continue;
542                 cmp = memcmp(name, entry, entrylen);
543                 if (cmp > 0)
544                         continue;
545                 if (cmp < 0)
546                         break;
547                 if (entrylen == namelen) {
548                         oidcpy(result, &oid);
549                         return 0;
550                 }
551                 if (name[entrylen] != '/')
552                         continue;
553                 if (!S_ISDIR(*mode))
554                         break;
555                 if (++entrylen == namelen) {
556                         oidcpy(result, &oid);
557                         return 0;
558                 }
559                 return get_tree_entry(r, &oid, name + entrylen, result, mode);
560         }
561         return -1;
562 }
563
564 int get_tree_entry(struct repository *r,
565                    const struct object_id *tree_oid,
566                    const char *name,
567                    struct object_id *oid,
568                    unsigned short *mode)
569 {
570         int retval;
571         void *tree;
572         unsigned long size;
573         struct object_id root;
574
575         tree = read_object_with_reference(r, tree_oid, tree_type, &size, &root);
576         if (!tree)
577                 return -1;
578
579         if (name[0] == '\0') {
580                 oidcpy(oid, &root);
581                 free(tree);
582                 return 0;
583         }
584
585         if (!size) {
586                 retval = -1;
587         } else {
588                 struct tree_desc t;
589                 init_tree_desc(&t, tree, size);
590                 retval = find_tree_entry(r, &t, name, oid, mode);
591         }
592         free(tree);
593         return retval;
594 }
595
596 /*
597  * This is Linux's built-in max for the number of symlinks to follow.
598  * That limit, of course, does not affect git, but it's a reasonable
599  * choice.
600  */
601 #define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
602
603 /**
604  * Find a tree entry by following symlinks in tree_sha (which is
605  * assumed to be the root of the repository).  In the event that a
606  * symlink points outside the repository (e.g. a link to /foo or a
607  * root-level link to ../foo), the portion of the link which is
608  * outside the repository will be returned in result_path, and *mode
609  * will be set to 0.  It is assumed that result_path is uninitialized.
610  * If there are no symlinks, or the end result of the symlink chain
611  * points to an object inside the repository, result will be filled in
612  * with the sha1 of the found object, and *mode will hold the mode of
613  * the object.
614  *
615  * See the code for enum get_oid_result for a description of
616  * the return values.
617  */
618 enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
619                 struct object_id *tree_oid, const char *name,
620                 struct object_id *result, struct strbuf *result_path,
621                 unsigned short *mode)
622 {
623         int retval = MISSING_OBJECT;
624         struct dir_state *parents = NULL;
625         size_t parents_alloc = 0;
626         size_t i, parents_nr = 0;
627         struct object_id current_tree_oid;
628         struct strbuf namebuf = STRBUF_INIT;
629         struct tree_desc t;
630         int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
631
632         init_tree_desc(&t, NULL, 0UL);
633         strbuf_addstr(&namebuf, name);
634         oidcpy(&current_tree_oid, tree_oid);
635
636         while (1) {
637                 int find_result;
638                 char *first_slash;
639                 char *remainder = NULL;
640
641                 if (!t.buffer) {
642                         void *tree;
643                         struct object_id root;
644                         unsigned long size;
645                         tree = read_object_with_reference(r,
646                                                           &current_tree_oid,
647                                                           tree_type, &size,
648                                                           &root);
649                         if (!tree)
650                                 goto done;
651
652                         ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
653                         parents[parents_nr].tree = tree;
654                         parents[parents_nr].size = size;
655                         oidcpy(&parents[parents_nr].oid, &root);
656                         parents_nr++;
657
658                         if (namebuf.buf[0] == '\0') {
659                                 oidcpy(result, &root);
660                                 retval = FOUND;
661                                 goto done;
662                         }
663
664                         if (!size)
665                                 goto done;
666
667                         /* descend */
668                         init_tree_desc(&t, tree, size);
669                 }
670
671                 /* Handle symlinks to e.g. a//b by removing leading slashes */
672                 while (namebuf.buf[0] == '/') {
673                         strbuf_remove(&namebuf, 0, 1);
674                 }
675
676                 /* Split namebuf into a first component and a remainder */
677                 if ((first_slash = strchr(namebuf.buf, '/'))) {
678                         *first_slash = 0;
679                         remainder = first_slash + 1;
680                 }
681
682                 if (!strcmp(namebuf.buf, "..")) {
683                         struct dir_state *parent;
684                         /*
685                          * We could end up with .. in the namebuf if it
686                          * appears in a symlink.
687                          */
688
689                         if (parents_nr == 1) {
690                                 if (remainder)
691                                         *first_slash = '/';
692                                 strbuf_add(result_path, namebuf.buf,
693                                            namebuf.len);
694                                 *mode = 0;
695                                 retval = FOUND;
696                                 goto done;
697                         }
698                         parent = &parents[parents_nr - 1];
699                         free(parent->tree);
700                         parents_nr--;
701                         parent = &parents[parents_nr - 1];
702                         init_tree_desc(&t, parent->tree, parent->size);
703                         strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
704                         continue;
705                 }
706
707                 /* We could end up here via a symlink to dir/.. */
708                 if (namebuf.buf[0] == '\0') {
709                         oidcpy(result, &parents[parents_nr - 1].oid);
710                         retval = FOUND;
711                         goto done;
712                 }
713
714                 /* Look up the first (or only) path component in the tree. */
715                 find_result = find_tree_entry(r, &t, namebuf.buf,
716                                               &current_tree_oid, mode);
717                 if (find_result) {
718                         goto done;
719                 }
720
721                 if (S_ISDIR(*mode)) {
722                         if (!remainder) {
723                                 oidcpy(result, &current_tree_oid);
724                                 retval = FOUND;
725                                 goto done;
726                         }
727                         /* Descend the tree */
728                         t.buffer = NULL;
729                         strbuf_remove(&namebuf, 0,
730                                       1 + first_slash - namebuf.buf);
731                 } else if (S_ISREG(*mode)) {
732                         if (!remainder) {
733                                 oidcpy(result, &current_tree_oid);
734                                 retval = FOUND;
735                         } else {
736                                 retval = NOT_DIR;
737                         }
738                         goto done;
739                 } else if (S_ISLNK(*mode)) {
740                         /* Follow a symlink */
741                         unsigned long link_len;
742                         size_t len;
743                         char *contents, *contents_start;
744                         struct dir_state *parent;
745                         enum object_type type;
746
747                         if (follows_remaining-- == 0) {
748                                 /* Too many symlinks followed */
749                                 retval = SYMLINK_LOOP;
750                                 goto done;
751                         }
752
753                         /*
754                          * At this point, we have followed at a least
755                          * one symlink, so on error we need to report this.
756                          */
757                         retval = DANGLING_SYMLINK;
758
759                         contents = repo_read_object_file(r,
760                                                     &current_tree_oid, &type,
761                                                     &link_len);
762
763                         if (!contents)
764                                 goto done;
765
766                         if (contents[0] == '/') {
767                                 strbuf_addstr(result_path, contents);
768                                 free(contents);
769                                 *mode = 0;
770                                 retval = FOUND;
771                                 goto done;
772                         }
773
774                         if (remainder)
775                                 len = first_slash - namebuf.buf;
776                         else
777                                 len = namebuf.len;
778
779                         contents_start = contents;
780
781                         parent = &parents[parents_nr - 1];
782                         init_tree_desc(&t, parent->tree, parent->size);
783                         strbuf_splice(&namebuf, 0, len,
784                                       contents_start, link_len);
785                         if (remainder)
786                                 namebuf.buf[link_len] = '/';
787                         free(contents);
788                 }
789         }
790 done:
791         for (i = 0; i < parents_nr; i++)
792                 free(parents[i].tree);
793         free(parents);
794
795         strbuf_release(&namebuf);
796         return retval;
797 }
798
799 static int match_entry(const struct pathspec_item *item,
800                        const struct name_entry *entry, int pathlen,
801                        const char *match, int matchlen,
802                        enum interesting *never_interesting)
803 {
804         int m = -1; /* signals that we haven't called strncmp() */
805
806         if (item->magic & PATHSPEC_ICASE)
807                 /*
808                  * "Never interesting" trick requires exact
809                  * matching. We could do something clever with inexact
810                  * matching, but it's trickier (and not to forget that
811                  * strcasecmp is locale-dependent, at least in
812                  * glibc). Just disable it for now. It can't be worse
813                  * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
814                  * pattern.
815                  */
816                 *never_interesting = entry_not_interesting;
817         else if (*never_interesting != entry_not_interesting) {
818                 /*
819                  * We have not seen any match that sorts later
820                  * than the current path.
821                  */
822
823                 /*
824                  * Does match sort strictly earlier than path
825                  * with their common parts?
826                  */
827                 m = strncmp(match, entry->path,
828                             (matchlen < pathlen) ? matchlen : pathlen);
829                 if (m < 0)
830                         return 0;
831
832                 /*
833                  * If we come here even once, that means there is at
834                  * least one pathspec that would sort equal to or
835                  * later than the path we are currently looking at.
836                  * In other words, if we have never reached this point
837                  * after iterating all pathspecs, it means all
838                  * pathspecs are either outside of base, or inside the
839                  * base but sorts strictly earlier than the current
840                  * one.  In either case, they will never match the
841                  * subsequent entries.  In such a case, we initialized
842                  * the variable to -1 and that is what will be
843                  * returned, allowing the caller to terminate early.
844                  */
845                 *never_interesting = entry_not_interesting;
846         }
847
848         if (pathlen > matchlen)
849                 return 0;
850
851         if (matchlen > pathlen) {
852                 if (match[pathlen] != '/')
853                         return 0;
854                 /*
855                  * Reject non-directories as partial pathnames, except
856                  * when match is a submodule with a trailing slash and
857                  * nothing else (to handle 'submod/' and 'submod'
858                  * uniformly).
859                  */
860                 if (!S_ISDIR(entry->mode) &&
861                     (!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
862                         return 0;
863         }
864
865         if (m == -1)
866                 /*
867                  * we cheated and did not do strncmp(), so we do
868                  * that here.
869                  */
870                 m = ps_strncmp(item, match, entry->path, pathlen);
871
872         /*
873          * If common part matched earlier then it is a hit,
874          * because we rejected the case where path is not a
875          * leading directory and is shorter than match.
876          */
877         if (!m)
878                 /*
879                  * match_entry does not check if the prefix part is
880                  * matched case-sensitively. If the entry is a
881                  * directory and part of prefix, it'll be rematched
882                  * eventually by basecmp with special treatment for
883                  * the prefix.
884                  */
885                 return 1;
886
887         return 0;
888 }
889
890 /* :(icase)-aware string compare */
891 static int basecmp(const struct pathspec_item *item,
892                    const char *base, const char *match, int len)
893 {
894         if (item->magic & PATHSPEC_ICASE) {
895                 int ret, n = len > item->prefix ? item->prefix : len;
896                 ret = strncmp(base, match, n);
897                 if (ret)
898                         return ret;
899                 base += n;
900                 match += n;
901                 len -= n;
902         }
903         return ps_strncmp(item, base, match, len);
904 }
905
906 static int match_dir_prefix(const struct pathspec_item *item,
907                             const char *base,
908                             const char *match, int matchlen)
909 {
910         if (basecmp(item, base, match, matchlen))
911                 return 0;
912
913         /*
914          * If the base is a subdirectory of a path which
915          * was specified, all of them are interesting.
916          */
917         if (!matchlen ||
918             base[matchlen] == '/' ||
919             match[matchlen - 1] == '/')
920                 return 1;
921
922         /* Just a random prefix match */
923         return 0;
924 }
925
926 /*
927  * Perform matching on the leading non-wildcard part of
928  * pathspec. item->nowildcard_len must be greater than zero. Return
929  * non-zero if base is matched.
930  */
931 static int match_wildcard_base(const struct pathspec_item *item,
932                                const char *base, int baselen,
933                                int *matched)
934 {
935         const char *match = item->match;
936         /* the wildcard part is not considered in this function */
937         int matchlen = item->nowildcard_len;
938
939         if (baselen) {
940                 int dirlen;
941                 /*
942                  * Return early if base is longer than the
943                  * non-wildcard part but it does not match.
944                  */
945                 if (baselen >= matchlen) {
946                         *matched = matchlen;
947                         return !basecmp(item, base, match, matchlen);
948                 }
949
950                 dirlen = matchlen;
951                 while (dirlen && match[dirlen - 1] != '/')
952                         dirlen--;
953
954                 /*
955                  * Return early if base is shorter than the
956                  * non-wildcard part but it does not match. Note that
957                  * base ends with '/' so we are sure it really matches
958                  * directory
959                  */
960                 if (basecmp(item, base, match, baselen))
961                         return 0;
962                 *matched = baselen;
963         } else
964                 *matched = 0;
965         /*
966          * we could have checked entry against the non-wildcard part
967          * that is not in base and does similar never_interesting
968          * optimization as in match_entry. For now just be happy with
969          * base comparison.
970          */
971         return entry_interesting;
972 }
973
974 /*
975  * Is a tree entry interesting given the pathspec we have?
976  *
977  * Pre-condition: either baselen == base_offset (i.e. empty path)
978  * or base[baselen-1] == '/' (i.e. with trailing slash).
979  */
980 static enum interesting do_match(struct index_state *istate,
981                                  const struct name_entry *entry,
982                                  struct strbuf *base, int base_offset,
983                                  const struct pathspec *ps,
984                                  int exclude)
985 {
986         int i;
987         int pathlen, baselen = base->len - base_offset;
988         enum interesting never_interesting = ps->has_wildcard ?
989                 entry_not_interesting : all_entries_not_interesting;
990
991         GUARD_PATHSPEC(ps,
992                        PATHSPEC_FROMTOP |
993                        PATHSPEC_MAXDEPTH |
994                        PATHSPEC_LITERAL |
995                        PATHSPEC_GLOB |
996                        PATHSPEC_ICASE |
997                        PATHSPEC_EXCLUDE |
998                        PATHSPEC_ATTR);
999
1000         if (!ps->nr) {
1001                 if (!ps->recursive ||
1002                     !(ps->magic & PATHSPEC_MAXDEPTH) ||
1003                     ps->max_depth == -1)
1004                         return all_entries_interesting;
1005                 return within_depth(base->buf + base_offset, baselen,
1006                                     !!S_ISDIR(entry->mode),
1007                                     ps->max_depth) ?
1008                         entry_interesting : entry_not_interesting;
1009         }
1010
1011         pathlen = tree_entry_len(entry);
1012
1013         for (i = ps->nr - 1; i >= 0; i--) {
1014                 const struct pathspec_item *item = ps->items+i;
1015                 const char *match = item->match;
1016                 const char *base_str = base->buf + base_offset;
1017                 int matchlen = item->len, matched = 0;
1018
1019                 if ((!exclude &&   item->magic & PATHSPEC_EXCLUDE) ||
1020                     ( exclude && !(item->magic & PATHSPEC_EXCLUDE)))
1021                         continue;
1022
1023                 if (baselen >= matchlen) {
1024                         /* If it doesn't match, move along... */
1025                         if (!match_dir_prefix(item, base_str, match, matchlen))
1026                                 goto match_wildcards;
1027
1028                         if (!ps->recursive ||
1029                             !(ps->magic & PATHSPEC_MAXDEPTH) ||
1030                             ps->max_depth == -1) {
1031                                 if (!item->attr_match_nr)
1032                                         return all_entries_interesting;
1033                                 else
1034                                         goto interesting;
1035                         }
1036
1037                         if (within_depth(base_str + matchlen + 1,
1038                                          baselen - matchlen - 1,
1039                                          !!S_ISDIR(entry->mode),
1040                                          ps->max_depth))
1041                                 goto interesting;
1042                         else
1043                                 return entry_not_interesting;
1044                 }
1045
1046                 /* Either there must be no base, or the base must match. */
1047                 if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
1048                         if (match_entry(item, entry, pathlen,
1049                                         match + baselen, matchlen - baselen,
1050                                         &never_interesting))
1051                                 goto interesting;
1052
1053                         if (item->nowildcard_len < item->len) {
1054                                 if (!git_fnmatch(item, match + baselen, entry->path,
1055                                                  item->nowildcard_len - baselen))
1056                                         goto interesting;
1057
1058                                 /*
1059                                  * Match all directories. We'll try to
1060                                  * match files later on.
1061                                  */
1062                                 if (ps->recursive && S_ISDIR(entry->mode))
1063                                         return entry_interesting;
1064
1065                                 /*
1066                                  * When matching against submodules with
1067                                  * wildcard characters, ensure that the entry
1068                                  * at least matches up to the first wild
1069                                  * character.  More accurate matching can then
1070                                  * be performed in the submodule itself.
1071                                  */
1072                                 if (ps->recurse_submodules &&
1073                                     S_ISGITLINK(entry->mode) &&
1074                                     !ps_strncmp(item, match + baselen,
1075                                                 entry->path,
1076                                                 item->nowildcard_len - baselen))
1077                                         goto interesting;
1078                         }
1079
1080                         continue;
1081                 }
1082
1083 match_wildcards:
1084                 if (item->nowildcard_len == item->len)
1085                         continue;
1086
1087                 if (item->nowildcard_len &&
1088                     !match_wildcard_base(item, base_str, baselen, &matched))
1089                         continue;
1090
1091                 /*
1092                  * Concatenate base and entry->path into one and do
1093                  * fnmatch() on it.
1094                  *
1095                  * While we could avoid concatenation in certain cases
1096                  * [1], which saves a memcpy and potentially a
1097                  * realloc, it turns out not worth it. Measurement on
1098                  * linux-2.6 does not show any clear improvements,
1099                  * partly because of the nowildcard_len optimization
1100                  * in git_fnmatch(). Avoid micro-optimizations here.
1101                  *
1102                  * [1] if match_wildcard_base() says the base
1103                  * directory is already matched, we only need to match
1104                  * the rest, which is shorter so _in theory_ faster.
1105                  */
1106
1107                 strbuf_add(base, entry->path, pathlen);
1108
1109                 if (!git_fnmatch(item, match, base->buf + base_offset,
1110                                  item->nowildcard_len)) {
1111                         strbuf_setlen(base, base_offset + baselen);
1112                         goto interesting;
1113                 }
1114
1115                 /*
1116                  * When matching against submodules with
1117                  * wildcard characters, ensure that the entry
1118                  * at least matches up to the first wild
1119                  * character.  More accurate matching can then
1120                  * be performed in the submodule itself.
1121                  */
1122                 if (ps->recurse_submodules && S_ISGITLINK(entry->mode) &&
1123                     !ps_strncmp(item, match, base->buf + base_offset,
1124                                 item->nowildcard_len)) {
1125                         strbuf_setlen(base, base_offset + baselen);
1126                         goto interesting;
1127                 }
1128
1129                 strbuf_setlen(base, base_offset + baselen);
1130
1131                 /*
1132                  * Match all directories. We'll try to match files
1133                  * later on.
1134                  * max_depth is ignored but we may consider support it
1135                  * in future, see
1136                  * https://lore.kernel.org/git/7vmxo5l2g4.fsf@alter.siamese.dyndns.org/
1137                  */
1138                 if (ps->recursive && S_ISDIR(entry->mode))
1139                         return entry_interesting;
1140                 continue;
1141 interesting:
1142                 if (item->attr_match_nr) {
1143                         int ret;
1144
1145                         /*
1146                          * Must not return all_entries_not_interesting
1147                          * prematurely. We do not know if all entries do not
1148                          * match some attributes with current attr API.
1149                          */
1150                         never_interesting = entry_not_interesting;
1151
1152                         /*
1153                          * Consider all directories interesting (because some
1154                          * of those files inside may match some attributes
1155                          * even though the parent dir does not)
1156                          *
1157                          * FIXME: attributes _can_ match directories and we
1158                          * can probably return all_entries_interesting or
1159                          * all_entries_not_interesting here if matched.
1160                          */
1161                         if (S_ISDIR(entry->mode))
1162                                 return entry_interesting;
1163
1164                         strbuf_add(base, entry->path, pathlen);
1165                         ret = match_pathspec_attrs(istate, base->buf + base_offset,
1166                                                    base->len - base_offset, item);
1167                         strbuf_setlen(base, base_offset + baselen);
1168                         if (!ret)
1169                                 continue;
1170                 }
1171                 return entry_interesting;
1172         }
1173         return never_interesting; /* No matches */
1174 }
1175
1176 /*
1177  * Is a tree entry interesting given the pathspec we have?
1178  *
1179  * Pre-condition: either baselen == base_offset (i.e. empty path)
1180  * or base[baselen-1] == '/' (i.e. with trailing slash).
1181  */
1182 enum interesting tree_entry_interesting(struct index_state *istate,
1183                                         const struct name_entry *entry,
1184                                         struct strbuf *base, int base_offset,
1185                                         const struct pathspec *ps)
1186 {
1187         enum interesting positive, negative;
1188         positive = do_match(istate, entry, base, base_offset, ps, 0);
1189
1190         /*
1191          * case | entry | positive | negative | result
1192          * -----+-------+----------+----------+-------
1193          *   1  |  file |   -1     |  -1..2   |  -1
1194          *   2  |  file |    0     |  -1..2   |   0
1195          *   3  |  file |    1     |   -1     |   1
1196          *   4  |  file |    1     |    0     |   1
1197          *   5  |  file |    1     |    1     |   0
1198          *   6  |  file |    1     |    2     |   0
1199          *   7  |  file |    2     |   -1     |   2
1200          *   8  |  file |    2     |    0     |   1
1201          *   9  |  file |    2     |    1     |   0
1202          *  10  |  file |    2     |    2     |  -1
1203          * -----+-------+----------+----------+-------
1204          *  11  |  dir  |   -1     |  -1..2   |  -1
1205          *  12  |  dir  |    0     |  -1..2   |   0
1206          *  13  |  dir  |    1     |   -1     |   1
1207          *  14  |  dir  |    1     |    0     |   1
1208          *  15  |  dir  |    1     |    1     |   1 (*)
1209          *  16  |  dir  |    1     |    2     |   0
1210          *  17  |  dir  |    2     |   -1     |   2
1211          *  18  |  dir  |    2     |    0     |   1
1212          *  19  |  dir  |    2     |    1     |   1 (*)
1213          *  20  |  dir  |    2     |    2     |  -1
1214          *
1215          * (*) An exclude pattern interested in a directory does not
1216          * necessarily mean it will exclude all of the directory. In
1217          * wildcard case, it can't decide until looking at individual
1218          * files inside. So don't write such directories off yet.
1219          */
1220
1221         if (!(ps->magic & PATHSPEC_EXCLUDE) ||
1222             positive <= entry_not_interesting) /* #1, #2, #11, #12 */
1223                 return positive;
1224
1225         negative = do_match(istate, entry, base, base_offset, ps, 1);
1226
1227         /* #8, #18 */
1228         if (positive == all_entries_interesting &&
1229             negative == entry_not_interesting)
1230                 return entry_interesting;
1231
1232         /* #3, #4, #7, #13, #14, #17 */
1233         if (negative <= entry_not_interesting)
1234                 return positive;
1235
1236         /* #15, #19 */
1237         if (S_ISDIR(entry->mode) &&
1238             positive >= entry_interesting &&
1239             negative == entry_interesting)
1240                 return entry_interesting;
1241
1242         if ((positive == entry_interesting &&
1243              negative >= entry_interesting) || /* #5, #6, #16 */
1244             (positive == all_entries_interesting &&
1245              negative == entry_interesting)) /* #9 */
1246                 return entry_not_interesting;
1247
1248         return all_entries_not_interesting; /* #10, #20 */
1249 }