line-log: more responsive, incremental 'git log -L'
[git] / tree-walk.c
1 #include "cache.h"
2 #include "tree-walk.h"
3 #include "dir.h"
4 #include "object-store.h"
5 #include "tree.h"
6 #include "pathspec.h"
7
8 static const char *get_mode(const char *str, unsigned int *modep)
9 {
10         unsigned char c;
11         unsigned int mode = 0;
12
13         if (*str == ' ')
14                 return NULL;
15
16         while ((c = *str++) != ' ') {
17                 if (c < '0' || c > '7')
18                         return NULL;
19                 mode = (mode << 3) + (c - '0');
20         }
21         *modep = mode;
22         return str;
23 }
24
25 static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
26 {
27         const char *path;
28         unsigned int mode, len;
29         const unsigned hashsz = the_hash_algo->rawsz;
30
31         if (size < hashsz + 3 || buf[size - (hashsz + 1)]) {
32                 strbuf_addstr(err, _("too-short tree object"));
33                 return -1;
34         }
35
36         path = get_mode(buf, &mode);
37         if (!path) {
38                 strbuf_addstr(err, _("malformed mode in tree entry"));
39                 return -1;
40         }
41         if (!*path) {
42                 strbuf_addstr(err, _("empty filename in tree entry"));
43                 return -1;
44         }
45         len = strlen(path) + 1;
46
47         /* Initialize the descriptor entry */
48         desc->entry.path = path;
49         desc->entry.mode = canon_mode(mode);
50         desc->entry.pathlen = len - 1;
51         hashcpy(desc->entry.oid.hash, (const unsigned char *)path + len);
52
53         return 0;
54 }
55
56 static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
57 {
58         desc->buffer = buffer;
59         desc->size = size;
60         if (size)
61                 return decode_tree_entry(desc, buffer, size, err);
62         return 0;
63 }
64
65 void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
66 {
67         struct strbuf err = STRBUF_INIT;
68         if (init_tree_desc_internal(desc, buffer, size, &err))
69                 die("%s", err.buf);
70         strbuf_release(&err);
71 }
72
73 int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
74 {
75         struct strbuf err = STRBUF_INIT;
76         int result = init_tree_desc_internal(desc, buffer, size, &err);
77         if (result)
78                 error("%s", err.buf);
79         strbuf_release(&err);
80         return result;
81 }
82
83 void *fill_tree_descriptor(struct repository *r,
84                            struct tree_desc *desc,
85                            const struct object_id *oid)
86 {
87         unsigned long size = 0;
88         void *buf = NULL;
89
90         if (oid) {
91                 buf = read_object_with_reference(r, oid, tree_type, &size, NULL);
92                 if (!buf)
93                         die("unable to read tree %s", oid_to_hex(oid));
94         }
95         init_tree_desc(desc, buf, size);
96         return buf;
97 }
98
99 static void entry_clear(struct name_entry *a)
100 {
101         memset(a, 0, sizeof(*a));
102 }
103
104 static void entry_extract(struct tree_desc *t, struct name_entry *a)
105 {
106         *a = t->entry;
107 }
108
109 static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err)
110 {
111         const void *buf = desc->buffer;
112         const unsigned char *end = (const unsigned char *)desc->entry.path + desc->entry.pathlen + 1 + the_hash_algo->rawsz;
113         unsigned long size = desc->size;
114         unsigned long len = end - (const unsigned char *)buf;
115
116         if (size < len)
117                 die(_("too-short tree file"));
118         buf = end;
119         size -= len;
120         desc->buffer = buf;
121         desc->size = size;
122         if (size)
123                 return decode_tree_entry(desc, buf, size, err);
124         return 0;
125 }
126
127 void update_tree_entry(struct tree_desc *desc)
128 {
129         struct strbuf err = STRBUF_INIT;
130         if (update_tree_entry_internal(desc, &err))
131                 die("%s", err.buf);
132         strbuf_release(&err);
133 }
134
135 int update_tree_entry_gently(struct tree_desc *desc)
136 {
137         struct strbuf err = STRBUF_INIT;
138         if (update_tree_entry_internal(desc, &err)) {
139                 error("%s", err.buf);
140                 strbuf_release(&err);
141                 /* Stop processing this tree after error */
142                 desc->size = 0;
143                 return -1;
144         }
145         strbuf_release(&err);
146         return 0;
147 }
148
149 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
150 {
151         if (!desc->size)
152                 return 0;
153
154         *entry = desc->entry;
155         update_tree_entry(desc);
156         return 1;
157 }
158
159 int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
160 {
161         if (!desc->size)
162                 return 0;
163
164         *entry = desc->entry;
165         if (update_tree_entry_gently(desc))
166                 return 0;
167         return 1;
168 }
169
170 void setup_traverse_info(struct traverse_info *info, const char *base)
171 {
172         size_t pathlen = strlen(base);
173         static struct traverse_info dummy;
174
175         memset(info, 0, sizeof(*info));
176         if (pathlen && base[pathlen-1] == '/')
177                 pathlen--;
178         info->pathlen = pathlen ? pathlen + 1 : 0;
179         info->name = base;
180         info->namelen = pathlen;
181         if (pathlen)
182                 info->prev = &dummy;
183 }
184
185 char *make_traverse_path(char *path, size_t pathlen,
186                          const struct traverse_info *info,
187                          const char *name, size_t namelen)
188 {
189         /* Always points to the end of the name we're about to add */
190         size_t pos = st_add(info->pathlen, namelen);
191
192         if (pos >= pathlen)
193                 BUG("too small buffer passed to make_traverse_path");
194
195         path[pos] = 0;
196         for (;;) {
197                 if (pos < namelen)
198                         BUG("traverse_info pathlen does not match strings");
199                 pos -= namelen;
200                 memcpy(path + pos, name, namelen);
201
202                 if (!pos)
203                         break;
204                 path[--pos] = '/';
205
206                 if (!info)
207                         BUG("traverse_info ran out of list items");
208                 name = info->name;
209                 namelen = info->namelen;
210                 info = info->prev;
211         }
212         return path;
213 }
214
215 void strbuf_make_traverse_path(struct strbuf *out,
216                                const struct traverse_info *info,
217                                const char *name, size_t namelen)
218 {
219         size_t len = traverse_path_len(info, namelen);
220
221         strbuf_grow(out, len);
222         make_traverse_path(out->buf + out->len, out->alloc - out->len,
223                            info, name, namelen);
224         strbuf_setlen(out, out->len + len);
225 }
226
227 struct tree_desc_skip {
228         struct tree_desc_skip *prev;
229         const void *ptr;
230 };
231
232 struct tree_desc_x {
233         struct tree_desc d;
234         struct tree_desc_skip *skip;
235 };
236
237 static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
238 {
239         /*
240          * The caller wants to pick *a* from a tree or nothing.
241          * We are looking at *b* in a tree.
242          *
243          * (0) If a and b are the same name, we are trivially happy.
244          *
245          * There are three possibilities where *a* could be hiding
246          * behind *b*.
247          *
248          * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
249          *                                matter what.
250          * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
251          * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
252          *
253          * Otherwise we know *a* won't appear in the tree without
254          * scanning further.
255          */
256
257         int cmp = name_compare(a, a_len, b, b_len);
258
259         /* Most common case first -- reading sync'd trees */
260         if (!cmp)
261                 return cmp;
262
263         if (0 < cmp) {
264                 /* a comes after b; it does not matter if it is case (3)
265                 if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
266                         return 1;
267                 */
268                 return 1; /* keep looking */
269         }
270
271         /* b comes after a; are we looking at case (2)? */
272         if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
273                 return 1; /* keep looking */
274
275         return -1; /* a cannot appear in the tree */
276 }
277
278 /*
279  * From the extended tree_desc, extract the first name entry, while
280  * paying attention to the candidate "first" name.  Most importantly,
281  * when looking for an entry, if there are entries that sorts earlier
282  * in the tree object representation than that name, skip them and
283  * process the named entry first.  We will remember that we haven't
284  * processed the first entry yet, and in the later call skip the
285  * entry we processed early when update_extended_entry() is called.
286  *
287  * E.g. if the underlying tree object has these entries:
288  *
289  *    blob    "t-1"
290  *    blob    "t-2"
291  *    tree    "t"
292  *    blob    "t=1"
293  *
294  * and the "first" asks for "t", remember that we still need to
295  * process "t-1" and "t-2" but extract "t".  After processing the
296  * entry "t" from this call, the caller will let us know by calling
297  * update_extended_entry() that we can remember "t" has been processed
298  * already.
299  */
300
301 static void extended_entry_extract(struct tree_desc_x *t,
302                                    struct name_entry *a,
303                                    const char *first,
304                                    int first_len)
305 {
306         const char *path;
307         int len;
308         struct tree_desc probe;
309         struct tree_desc_skip *skip;
310
311         /*
312          * Extract the first entry from the tree_desc, but skip the
313          * ones that we already returned in earlier rounds.
314          */
315         while (1) {
316                 if (!t->d.size) {
317                         entry_clear(a);
318                         break; /* not found */
319                 }
320                 entry_extract(&t->d, a);
321                 for (skip = t->skip; skip; skip = skip->prev)
322                         if (a->path == skip->ptr)
323                                 break; /* found */
324                 if (!skip)
325                         break;
326                 /* We have processed this entry already. */
327                 update_tree_entry(&t->d);
328         }
329
330         if (!first || !a->path)
331                 return;
332
333         /*
334          * The caller wants "first" from this tree, or nothing.
335          */
336         path = a->path;
337         len = tree_entry_len(a);
338         switch (check_entry_match(first, first_len, path, len)) {
339         case -1:
340                 entry_clear(a);
341         case 0:
342                 return;
343         default:
344                 break;
345         }
346
347         /*
348          * We need to look-ahead -- we suspect that a subtree whose
349          * name is "first" may be hiding behind the current entry "path".
350          */
351         probe = t->d;
352         while (probe.size) {
353                 entry_extract(&probe, a);
354                 path = a->path;
355                 len = tree_entry_len(a);
356                 switch (check_entry_match(first, first_len, path, len)) {
357                 case -1:
358                         entry_clear(a);
359                 case 0:
360                         return;
361                 default:
362                         update_tree_entry(&probe);
363                         break;
364                 }
365                 /* keep looking */
366         }
367         entry_clear(a);
368 }
369
370 static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
371 {
372         if (t->d.entry.path == a->path) {
373                 update_tree_entry(&t->d);
374         } else {
375                 /* we have returned this entry early */
376                 struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
377                 skip->ptr = a->path;
378                 skip->prev = t->skip;
379                 t->skip = skip;
380         }
381 }
382
383 static void free_extended_entry(struct tree_desc_x *t)
384 {
385         struct tree_desc_skip *p, *s;
386
387         for (s = t->skip; s; s = p) {
388                 p = s->prev;
389                 free(s);
390         }
391 }
392
393 static inline int prune_traversal(struct index_state *istate,
394                                   struct name_entry *e,
395                                   struct traverse_info *info,
396                                   struct strbuf *base,
397                                   int still_interesting)
398 {
399         if (!info->pathspec || still_interesting == 2)
400                 return 2;
401         if (still_interesting < 0)
402                 return still_interesting;
403         return tree_entry_interesting(istate, e, base,
404                                       0, info->pathspec);
405 }
406
407 int traverse_trees(struct index_state *istate,
408                    int n, struct tree_desc *t,
409                    struct traverse_info *info)
410 {
411         int error = 0;
412         struct name_entry entry[MAX_TRAVERSE_TREES];
413         int i;
414         struct tree_desc_x tx[ARRAY_SIZE(entry)];
415         struct strbuf base = STRBUF_INIT;
416         int interesting = 1;
417         char *traverse_path;
418
419         if (n >= ARRAY_SIZE(entry))
420                 BUG("traverse_trees() called with too many trees (%d)", n);
421
422         for (i = 0; i < n; i++) {
423                 tx[i].d = t[i];
424                 tx[i].skip = NULL;
425         }
426
427         if (info->prev) {
428                 strbuf_make_traverse_path(&base, info->prev,
429                                           info->name, info->namelen);
430                 strbuf_addch(&base, '/');
431                 traverse_path = xstrndup(base.buf, base.len);
432         } else {
433                 traverse_path = xstrndup(info->name, info->pathlen);
434         }
435         info->traverse_path = traverse_path;
436         for (;;) {
437                 int trees_used;
438                 unsigned long mask, dirmask;
439                 const char *first = NULL;
440                 int first_len = 0;
441                 struct name_entry *e = NULL;
442                 int len;
443
444                 for (i = 0; i < n; i++) {
445                         e = entry + i;
446                         extended_entry_extract(tx + i, e, NULL, 0);
447                 }
448
449                 /*
450                  * A tree may have "t-2" at the current location even
451                  * though it may have "t" that is a subtree behind it,
452                  * and another tree may return "t".  We want to grab
453                  * all "t" from all trees to match in such a case.
454                  */
455                 for (i = 0; i < n; i++) {
456                         e = entry + i;
457                         if (!e->path)
458                                 continue;
459                         len = tree_entry_len(e);
460                         if (!first) {
461                                 first = e->path;
462                                 first_len = len;
463                                 continue;
464                         }
465                         if (name_compare(e->path, len, first, first_len) < 0) {
466                                 first = e->path;
467                                 first_len = len;
468                         }
469                 }
470
471                 if (first) {
472                         for (i = 0; i < n; i++) {
473                                 e = entry + i;
474                                 extended_entry_extract(tx + i, e, first, first_len);
475                                 /* Cull the ones that are not the earliest */
476                                 if (!e->path)
477                                         continue;
478                                 len = tree_entry_len(e);
479                                 if (name_compare(e->path, len, first, first_len))
480                                         entry_clear(e);
481                         }
482                 }
483
484                 /* Now we have in entry[i] the earliest name from the trees */
485                 mask = 0;
486                 dirmask = 0;
487                 for (i = 0; i < n; i++) {
488                         if (!entry[i].path)
489                                 continue;
490                         mask |= 1ul << i;
491                         if (S_ISDIR(entry[i].mode))
492                                 dirmask |= 1ul << i;
493                         e = &entry[i];
494                 }
495                 if (!mask)
496                         break;
497                 interesting = prune_traversal(istate, e, info, &base, interesting);
498                 if (interesting < 0)
499                         break;
500                 if (interesting) {
501                         trees_used = info->fn(n, mask, dirmask, entry, info);
502                         if (trees_used < 0) {
503                                 error = trees_used;
504                                 if (!info->show_all_errors)
505                                         break;
506                         }
507                         mask &= trees_used;
508                 }
509                 for (i = 0; i < n; i++)
510                         if (mask & (1ul << i))
511                                 update_extended_entry(tx + i, entry + i);
512         }
513         for (i = 0; i < n; i++)
514                 free_extended_entry(tx + i);
515         free(traverse_path);
516         info->traverse_path = NULL;
517         strbuf_release(&base);
518         return error;
519 }
520
521 struct dir_state {
522         void *tree;
523         unsigned long size;
524         struct object_id oid;
525 };
526
527 static int find_tree_entry(struct repository *r, struct tree_desc *t,
528                            const char *name, struct object_id *result,
529                            unsigned short *mode)
530 {
531         int namelen = strlen(name);
532         while (t->size) {
533                 const char *entry;
534                 struct object_id oid;
535                 int entrylen, cmp;
536
537                 oidcpy(&oid, tree_entry_extract(t, &entry, mode));
538                 entrylen = tree_entry_len(&t->entry);
539                 update_tree_entry(t);
540                 if (entrylen > namelen)
541                         continue;
542                 cmp = memcmp(name, entry, entrylen);
543                 if (cmp > 0)
544                         continue;
545                 if (cmp < 0)
546                         break;
547                 if (entrylen == namelen) {
548                         oidcpy(result, &oid);
549                         return 0;
550                 }
551                 if (name[entrylen] != '/')
552                         continue;
553                 if (!S_ISDIR(*mode))
554                         break;
555                 if (++entrylen == namelen) {
556                         oidcpy(result, &oid);
557                         return 0;
558                 }
559                 return get_tree_entry(r, &oid, name + entrylen, result, mode);
560         }
561         return -1;
562 }
563
564 int get_tree_entry(struct repository *r,
565                    const struct object_id *tree_oid,
566                    const char *name,
567                    struct object_id *oid,
568                    unsigned short *mode)
569 {
570         int retval;
571         void *tree;
572         unsigned long size;
573         struct object_id root;
574
575         tree = read_object_with_reference(r, tree_oid, tree_type, &size, &root);
576         if (!tree)
577                 return -1;
578
579         if (name[0] == '\0') {
580                 oidcpy(oid, &root);
581                 free(tree);
582                 return 0;
583         }
584
585         if (!size) {
586                 retval = -1;
587         } else {
588                 struct tree_desc t;
589                 init_tree_desc(&t, tree, size);
590                 retval = find_tree_entry(r, &t, name, oid, mode);
591         }
592         free(tree);
593         return retval;
594 }
595
596 /*
597  * This is Linux's built-in max for the number of symlinks to follow.
598  * That limit, of course, does not affect git, but it's a reasonable
599  * choice.
600  */
601 #define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
602
603 /**
604  * Find a tree entry by following symlinks in tree_sha (which is
605  * assumed to be the root of the repository).  In the event that a
606  * symlink points outside the repository (e.g. a link to /foo or a
607  * root-level link to ../foo), the portion of the link which is
608  * outside the repository will be returned in result_path, and *mode
609  * will be set to 0.  It is assumed that result_path is uninitialized.
610  * If there are no symlinks, or the end result of the symlink chain
611  * points to an object inside the repository, result will be filled in
612  * with the sha1 of the found object, and *mode will hold the mode of
613  * the object.
614  *
615  * See the code for enum get_oid_result for a description of
616  * the return values.
617  */
618 enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
619                 struct object_id *tree_oid, const char *name,
620                 struct object_id *result, struct strbuf *result_path,
621                 unsigned short *mode)
622 {
623         int retval = MISSING_OBJECT;
624         struct dir_state *parents = NULL;
625         size_t parents_alloc = 0;
626         size_t i, parents_nr = 0;
627         struct object_id current_tree_oid;
628         struct strbuf namebuf = STRBUF_INIT;
629         struct tree_desc t;
630         int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
631
632         init_tree_desc(&t, NULL, 0UL);
633         strbuf_addstr(&namebuf, name);
634         oidcpy(&current_tree_oid, tree_oid);
635
636         while (1) {
637                 int find_result;
638                 char *first_slash;
639                 char *remainder = NULL;
640
641                 if (!t.buffer) {
642                         void *tree;
643                         struct object_id root;
644                         unsigned long size;
645                         tree = read_object_with_reference(r,
646                                                           &current_tree_oid,
647                                                           tree_type, &size,
648                                                           &root);
649                         if (!tree)
650                                 goto done;
651
652                         ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
653                         parents[parents_nr].tree = tree;
654                         parents[parents_nr].size = size;
655                         oidcpy(&parents[parents_nr].oid, &root);
656                         parents_nr++;
657
658                         if (namebuf.buf[0] == '\0') {
659                                 oidcpy(result, &root);
660                                 retval = FOUND;
661                                 goto done;
662                         }
663
664                         if (!size)
665                                 goto done;
666
667                         /* descend */
668                         init_tree_desc(&t, tree, size);
669                 }
670
671                 /* Handle symlinks to e.g. a//b by removing leading slashes */
672                 while (namebuf.buf[0] == '/') {
673                         strbuf_remove(&namebuf, 0, 1);
674                 }
675
676                 /* Split namebuf into a first component and a remainder */
677                 if ((first_slash = strchr(namebuf.buf, '/'))) {
678                         *first_slash = 0;
679                         remainder = first_slash + 1;
680                 }
681
682                 if (!strcmp(namebuf.buf, "..")) {
683                         struct dir_state *parent;
684                         /*
685                          * We could end up with .. in the namebuf if it
686                          * appears in a symlink.
687                          */
688
689                         if (parents_nr == 1) {
690                                 if (remainder)
691                                         *first_slash = '/';
692                                 strbuf_add(result_path, namebuf.buf,
693                                            namebuf.len);
694                                 *mode = 0;
695                                 retval = FOUND;
696                                 goto done;
697                         }
698                         parent = &parents[parents_nr - 1];
699                         free(parent->tree);
700                         parents_nr--;
701                         parent = &parents[parents_nr - 1];
702                         init_tree_desc(&t, parent->tree, parent->size);
703                         strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
704                         continue;
705                 }
706
707                 /* We could end up here via a symlink to dir/.. */
708                 if (namebuf.buf[0] == '\0') {
709                         oidcpy(result, &parents[parents_nr - 1].oid);
710                         retval = FOUND;
711                         goto done;
712                 }
713
714                 /* Look up the first (or only) path component in the tree. */
715                 find_result = find_tree_entry(r, &t, namebuf.buf,
716                                               &current_tree_oid, mode);
717                 if (find_result) {
718                         goto done;
719                 }
720
721                 if (S_ISDIR(*mode)) {
722                         if (!remainder) {
723                                 oidcpy(result, &current_tree_oid);
724                                 retval = FOUND;
725                                 goto done;
726                         }
727                         /* Descend the tree */
728                         t.buffer = NULL;
729                         strbuf_remove(&namebuf, 0,
730                                       1 + first_slash - namebuf.buf);
731                 } else if (S_ISREG(*mode)) {
732                         if (!remainder) {
733                                 oidcpy(result, &current_tree_oid);
734                                 retval = FOUND;
735                         } else {
736                                 retval = NOT_DIR;
737                         }
738                         goto done;
739                 } else if (S_ISLNK(*mode)) {
740                         /* Follow a symlink */
741                         unsigned long link_len;
742                         size_t len;
743                         char *contents, *contents_start;
744                         struct dir_state *parent;
745                         enum object_type type;
746
747                         if (follows_remaining-- == 0) {
748                                 /* Too many symlinks followed */
749                                 retval = SYMLINK_LOOP;
750                                 goto done;
751                         }
752
753                         /*
754                          * At this point, we have followed at a least
755                          * one symlink, so on error we need to report this.
756                          */
757                         retval = DANGLING_SYMLINK;
758
759                         contents = repo_read_object_file(r,
760                                                     &current_tree_oid, &type,
761                                                     &link_len);
762
763                         if (!contents)
764                                 goto done;
765
766                         if (contents[0] == '/') {
767                                 strbuf_addstr(result_path, contents);
768                                 free(contents);
769                                 *mode = 0;
770                                 retval = FOUND;
771                                 goto done;
772                         }
773
774                         if (remainder)
775                                 len = first_slash - namebuf.buf;
776                         else
777                                 len = namebuf.len;
778
779                         contents_start = contents;
780
781                         parent = &parents[parents_nr - 1];
782                         init_tree_desc(&t, parent->tree, parent->size);
783                         strbuf_splice(&namebuf, 0, len,
784                                       contents_start, link_len);
785                         if (remainder)
786                                 namebuf.buf[link_len] = '/';
787                         free(contents);
788                 }
789         }
790 done:
791         for (i = 0; i < parents_nr; i++)
792                 free(parents[i].tree);
793         free(parents);
794
795         strbuf_release(&namebuf);
796         return retval;
797 }
798
799 static int match_entry(const struct pathspec_item *item,
800                        const struct name_entry *entry, int pathlen,
801                        const char *match, int matchlen,
802                        enum interesting *never_interesting)
803 {
804         int m = -1; /* signals that we haven't called strncmp() */
805
806         if (item->magic & PATHSPEC_ICASE)
807                 /*
808                  * "Never interesting" trick requires exact
809                  * matching. We could do something clever with inexact
810                  * matching, but it's trickier (and not to forget that
811                  * strcasecmp is locale-dependent, at least in
812                  * glibc). Just disable it for now. It can't be worse
813                  * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
814                  * pattern.
815                  */
816                 *never_interesting = entry_not_interesting;
817         else if (*never_interesting != entry_not_interesting) {
818                 /*
819                  * We have not seen any match that sorts later
820                  * than the current path.
821                  */
822
823                 /*
824                  * Does match sort strictly earlier than path
825                  * with their common parts?
826                  */
827                 m = strncmp(match, entry->path,
828                             (matchlen < pathlen) ? matchlen : pathlen);
829                 if (m < 0)
830                         return 0;
831
832                 /*
833                  * If we come here even once, that means there is at
834                  * least one pathspec that would sort equal to or
835                  * later than the path we are currently looking at.
836                  * In other words, if we have never reached this point
837                  * after iterating all pathspecs, it means all
838                  * pathspecs are either outside of base, or inside the
839                  * base but sorts strictly earlier than the current
840                  * one.  In either case, they will never match the
841                  * subsequent entries.  In such a case, we initialized
842                  * the variable to -1 and that is what will be
843                  * returned, allowing the caller to terminate early.
844                  */
845                 *never_interesting = entry_not_interesting;
846         }
847
848         if (pathlen > matchlen)
849                 return 0;
850
851         if (matchlen > pathlen) {
852                 if (match[pathlen] != '/')
853                         return 0;
854                 if (!S_ISDIR(entry->mode) && !S_ISGITLINK(entry->mode))
855                         return 0;
856         }
857
858         if (m == -1)
859                 /*
860                  * we cheated and did not do strncmp(), so we do
861                  * that here.
862                  */
863                 m = ps_strncmp(item, match, entry->path, pathlen);
864
865         /*
866          * If common part matched earlier then it is a hit,
867          * because we rejected the case where path is not a
868          * leading directory and is shorter than match.
869          */
870         if (!m)
871                 /*
872                  * match_entry does not check if the prefix part is
873                  * matched case-sensitively. If the entry is a
874                  * directory and part of prefix, it'll be rematched
875                  * eventually by basecmp with special treatment for
876                  * the prefix.
877                  */
878                 return 1;
879
880         return 0;
881 }
882
883 /* :(icase)-aware string compare */
884 static int basecmp(const struct pathspec_item *item,
885                    const char *base, const char *match, int len)
886 {
887         if (item->magic & PATHSPEC_ICASE) {
888                 int ret, n = len > item->prefix ? item->prefix : len;
889                 ret = strncmp(base, match, n);
890                 if (ret)
891                         return ret;
892                 base += n;
893                 match += n;
894                 len -= n;
895         }
896         return ps_strncmp(item, base, match, len);
897 }
898
899 static int match_dir_prefix(const struct pathspec_item *item,
900                             const char *base,
901                             const char *match, int matchlen)
902 {
903         if (basecmp(item, base, match, matchlen))
904                 return 0;
905
906         /*
907          * If the base is a subdirectory of a path which
908          * was specified, all of them are interesting.
909          */
910         if (!matchlen ||
911             base[matchlen] == '/' ||
912             match[matchlen - 1] == '/')
913                 return 1;
914
915         /* Just a random prefix match */
916         return 0;
917 }
918
919 /*
920  * Perform matching on the leading non-wildcard part of
921  * pathspec. item->nowildcard_len must be greater than zero. Return
922  * non-zero if base is matched.
923  */
924 static int match_wildcard_base(const struct pathspec_item *item,
925                                const char *base, int baselen,
926                                int *matched)
927 {
928         const char *match = item->match;
929         /* the wildcard part is not considered in this function */
930         int matchlen = item->nowildcard_len;
931
932         if (baselen) {
933                 int dirlen;
934                 /*
935                  * Return early if base is longer than the
936                  * non-wildcard part but it does not match.
937                  */
938                 if (baselen >= matchlen) {
939                         *matched = matchlen;
940                         return !basecmp(item, base, match, matchlen);
941                 }
942
943                 dirlen = matchlen;
944                 while (dirlen && match[dirlen - 1] != '/')
945                         dirlen--;
946
947                 /*
948                  * Return early if base is shorter than the
949                  * non-wildcard part but it does not match. Note that
950                  * base ends with '/' so we are sure it really matches
951                  * directory
952                  */
953                 if (basecmp(item, base, match, baselen))
954                         return 0;
955                 *matched = baselen;
956         } else
957                 *matched = 0;
958         /*
959          * we could have checked entry against the non-wildcard part
960          * that is not in base and does similar never_interesting
961          * optimization as in match_entry. For now just be happy with
962          * base comparison.
963          */
964         return entry_interesting;
965 }
966
967 /*
968  * Is a tree entry interesting given the pathspec we have?
969  *
970  * Pre-condition: either baselen == base_offset (i.e. empty path)
971  * or base[baselen-1] == '/' (i.e. with trailing slash).
972  */
973 static enum interesting do_match(struct index_state *istate,
974                                  const struct name_entry *entry,
975                                  struct strbuf *base, int base_offset,
976                                  const struct pathspec *ps,
977                                  int exclude)
978 {
979         int i;
980         int pathlen, baselen = base->len - base_offset;
981         enum interesting never_interesting = ps->has_wildcard ?
982                 entry_not_interesting : all_entries_not_interesting;
983
984         GUARD_PATHSPEC(ps,
985                        PATHSPEC_FROMTOP |
986                        PATHSPEC_MAXDEPTH |
987                        PATHSPEC_LITERAL |
988                        PATHSPEC_GLOB |
989                        PATHSPEC_ICASE |
990                        PATHSPEC_EXCLUDE |
991                        PATHSPEC_ATTR);
992
993         if (!ps->nr) {
994                 if (!ps->recursive ||
995                     !(ps->magic & PATHSPEC_MAXDEPTH) ||
996                     ps->max_depth == -1)
997                         return all_entries_interesting;
998                 return within_depth(base->buf + base_offset, baselen,
999                                     !!S_ISDIR(entry->mode),
1000                                     ps->max_depth) ?
1001                         entry_interesting : entry_not_interesting;
1002         }
1003
1004         pathlen = tree_entry_len(entry);
1005
1006         for (i = ps->nr - 1; i >= 0; i--) {
1007                 const struct pathspec_item *item = ps->items+i;
1008                 const char *match = item->match;
1009                 const char *base_str = base->buf + base_offset;
1010                 int matchlen = item->len, matched = 0;
1011
1012                 if ((!exclude &&   item->magic & PATHSPEC_EXCLUDE) ||
1013                     ( exclude && !(item->magic & PATHSPEC_EXCLUDE)))
1014                         continue;
1015
1016                 if (baselen >= matchlen) {
1017                         /* If it doesn't match, move along... */
1018                         if (!match_dir_prefix(item, base_str, match, matchlen))
1019                                 goto match_wildcards;
1020
1021                         if (!ps->recursive ||
1022                             !(ps->magic & PATHSPEC_MAXDEPTH) ||
1023                             ps->max_depth == -1) {
1024                                 if (!item->attr_match_nr)
1025                                         return all_entries_interesting;
1026                                 else
1027                                         goto interesting;
1028                         }
1029
1030                         if (within_depth(base_str + matchlen + 1,
1031                                          baselen - matchlen - 1,
1032                                          !!S_ISDIR(entry->mode),
1033                                          ps->max_depth))
1034                                 goto interesting;
1035                         else
1036                                 return entry_not_interesting;
1037                 }
1038
1039                 /* Either there must be no base, or the base must match. */
1040                 if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
1041                         if (match_entry(item, entry, pathlen,
1042                                         match + baselen, matchlen - baselen,
1043                                         &never_interesting))
1044                                 goto interesting;
1045
1046                         if (item->nowildcard_len < item->len) {
1047                                 if (!git_fnmatch(item, match + baselen, entry->path,
1048                                                  item->nowildcard_len - baselen))
1049                                         goto interesting;
1050
1051                                 /*
1052                                  * Match all directories. We'll try to
1053                                  * match files later on.
1054                                  */
1055                                 if (ps->recursive && S_ISDIR(entry->mode))
1056                                         return entry_interesting;
1057
1058                                 /*
1059                                  * When matching against submodules with
1060                                  * wildcard characters, ensure that the entry
1061                                  * at least matches up to the first wild
1062                                  * character.  More accurate matching can then
1063                                  * be performed in the submodule itself.
1064                                  */
1065                                 if (ps->recurse_submodules &&
1066                                     S_ISGITLINK(entry->mode) &&
1067                                     !ps_strncmp(item, match + baselen,
1068                                                 entry->path,
1069                                                 item->nowildcard_len - baselen))
1070                                         goto interesting;
1071                         }
1072
1073                         continue;
1074                 }
1075
1076 match_wildcards:
1077                 if (item->nowildcard_len == item->len)
1078                         continue;
1079
1080                 if (item->nowildcard_len &&
1081                     !match_wildcard_base(item, base_str, baselen, &matched))
1082                         continue;
1083
1084                 /*
1085                  * Concatenate base and entry->path into one and do
1086                  * fnmatch() on it.
1087                  *
1088                  * While we could avoid concatenation in certain cases
1089                  * [1], which saves a memcpy and potentially a
1090                  * realloc, it turns out not worth it. Measurement on
1091                  * linux-2.6 does not show any clear improvements,
1092                  * partly because of the nowildcard_len optimization
1093                  * in git_fnmatch(). Avoid micro-optimizations here.
1094                  *
1095                  * [1] if match_wildcard_base() says the base
1096                  * directory is already matched, we only need to match
1097                  * the rest, which is shorter so _in theory_ faster.
1098                  */
1099
1100                 strbuf_add(base, entry->path, pathlen);
1101
1102                 if (!git_fnmatch(item, match, base->buf + base_offset,
1103                                  item->nowildcard_len)) {
1104                         strbuf_setlen(base, base_offset + baselen);
1105                         goto interesting;
1106                 }
1107
1108                 /*
1109                  * When matching against submodules with
1110                  * wildcard characters, ensure that the entry
1111                  * at least matches up to the first wild
1112                  * character.  More accurate matching can then
1113                  * be performed in the submodule itself.
1114                  */
1115                 if (ps->recurse_submodules && S_ISGITLINK(entry->mode) &&
1116                     !ps_strncmp(item, match, base->buf + base_offset,
1117                                 item->nowildcard_len)) {
1118                         strbuf_setlen(base, base_offset + baselen);
1119                         goto interesting;
1120                 }
1121
1122                 strbuf_setlen(base, base_offset + baselen);
1123
1124                 /*
1125                  * Match all directories. We'll try to match files
1126                  * later on.
1127                  * max_depth is ignored but we may consider support it
1128                  * in future, see
1129                  * https://lore.kernel.org/git/7vmxo5l2g4.fsf@alter.siamese.dyndns.org/
1130                  */
1131                 if (ps->recursive && S_ISDIR(entry->mode))
1132                         return entry_interesting;
1133                 continue;
1134 interesting:
1135                 if (item->attr_match_nr) {
1136                         int ret;
1137
1138                         /*
1139                          * Must not return all_entries_not_interesting
1140                          * prematurely. We do not know if all entries do not
1141                          * match some attributes with current attr API.
1142                          */
1143                         never_interesting = entry_not_interesting;
1144
1145                         /*
1146                          * Consider all directories interesting (because some
1147                          * of those files inside may match some attributes
1148                          * even though the parent dir does not)
1149                          *
1150                          * FIXME: attributes _can_ match directories and we
1151                          * can probably return all_entries_interesting or
1152                          * all_entries_not_interesting here if matched.
1153                          */
1154                         if (S_ISDIR(entry->mode))
1155                                 return entry_interesting;
1156
1157                         strbuf_add(base, entry->path, pathlen);
1158                         ret = match_pathspec_attrs(istate, base->buf + base_offset,
1159                                                    base->len - base_offset, item);
1160                         strbuf_setlen(base, base_offset + baselen);
1161                         if (!ret)
1162                                 continue;
1163                 }
1164                 return entry_interesting;
1165         }
1166         return never_interesting; /* No matches */
1167 }
1168
1169 /*
1170  * Is a tree entry interesting given the pathspec we have?
1171  *
1172  * Pre-condition: either baselen == base_offset (i.e. empty path)
1173  * or base[baselen-1] == '/' (i.e. with trailing slash).
1174  */
1175 enum interesting tree_entry_interesting(struct index_state *istate,
1176                                         const struct name_entry *entry,
1177                                         struct strbuf *base, int base_offset,
1178                                         const struct pathspec *ps)
1179 {
1180         enum interesting positive, negative;
1181         positive = do_match(istate, entry, base, base_offset, ps, 0);
1182
1183         /*
1184          * case | entry | positive | negative | result
1185          * -----+-------+----------+----------+-------
1186          *   1  |  file |   -1     |  -1..2   |  -1
1187          *   2  |  file |    0     |  -1..2   |   0
1188          *   3  |  file |    1     |   -1     |   1
1189          *   4  |  file |    1     |    0     |   1
1190          *   5  |  file |    1     |    1     |   0
1191          *   6  |  file |    1     |    2     |   0
1192          *   7  |  file |    2     |   -1     |   2
1193          *   8  |  file |    2     |    0     |   1
1194          *   9  |  file |    2     |    1     |   0
1195          *  10  |  file |    2     |    2     |  -1
1196          * -----+-------+----------+----------+-------
1197          *  11  |  dir  |   -1     |  -1..2   |  -1
1198          *  12  |  dir  |    0     |  -1..2   |   0
1199          *  13  |  dir  |    1     |   -1     |   1
1200          *  14  |  dir  |    1     |    0     |   1
1201          *  15  |  dir  |    1     |    1     |   1 (*)
1202          *  16  |  dir  |    1     |    2     |   0
1203          *  17  |  dir  |    2     |   -1     |   2
1204          *  18  |  dir  |    2     |    0     |   1
1205          *  19  |  dir  |    2     |    1     |   1 (*)
1206          *  20  |  dir  |    2     |    2     |  -1
1207          *
1208          * (*) An exclude pattern interested in a directory does not
1209          * necessarily mean it will exclude all of the directory. In
1210          * wildcard case, it can't decide until looking at individual
1211          * files inside. So don't write such directories off yet.
1212          */
1213
1214         if (!(ps->magic & PATHSPEC_EXCLUDE) ||
1215             positive <= entry_not_interesting) /* #1, #2, #11, #12 */
1216                 return positive;
1217
1218         negative = do_match(istate, entry, base, base_offset, ps, 1);
1219
1220         /* #8, #18 */
1221         if (positive == all_entries_interesting &&
1222             negative == entry_not_interesting)
1223                 return entry_interesting;
1224
1225         /* #3, #4, #7, #13, #14, #17 */
1226         if (negative <= entry_not_interesting)
1227                 return positive;
1228
1229         /* #15, #19 */
1230         if (S_ISDIR(entry->mode) &&
1231             positive >= entry_interesting &&
1232             negative == entry_interesting)
1233                 return entry_interesting;
1234
1235         if ((positive == entry_interesting &&
1236              negative >= entry_interesting) || /* #5, #6, #16 */
1237             (positive == all_entries_interesting &&
1238              negative == entry_interesting)) /* #9 */
1239                 return entry_not_interesting;
1240
1241         return all_entries_not_interesting; /* #10, #20 */
1242 }