4 * Copyright (c) 2006 Junio C Hamano
11 #include "tree-walk.h"
15 #ifndef NO_EXTERNAL_GREP
17 #define NO_EXTERNAL_GREP 0
19 #define NO_EXTERNAL_GREP 1
23 static int builtin_grep;
25 static int grep_config(const char *var, const char *value, void *cb)
27 struct grep_opt *opt = cb;
29 if (!strcmp(var, "color.grep")) {
30 opt->color = git_config_colorbool(var, value, -1);
33 if (!strcmp(var, "color.grep.external"))
34 return git_config_string(&(opt->color_external), var, value);
35 if (!strcmp(var, "color.grep.match")) {
37 return config_error_nonbool(var);
38 color_parse(value, var, opt->color_match);
41 return git_color_default_config(var, value, cb);
45 * git grep pathspecs are somewhat different from diff-tree pathspecs;
46 * pathname wildcards are allowed.
48 static int pathspec_matches(const char **paths, const char *name)
51 if (!paths || !*paths)
53 namelen = strlen(name);
54 for (i = 0; paths[i]; i++) {
55 const char *match = paths[i];
56 int matchlen = strlen(match);
57 const char *cp, *meta;
60 ((matchlen <= namelen) &&
61 !strncmp(name, match, matchlen) &&
62 (match[matchlen-1] == '/' ||
63 name[matchlen] == '\0' || name[matchlen] == '/')))
65 if (!fnmatch(match, name, 0))
67 if (name[namelen-1] != '/')
70 /* We are being asked if the directory ("name") is worth
73 * Find the longest leading directory name that does
74 * not have metacharacter in the pathspec; the name
75 * we are looking at must overlap with that directory.
77 for (cp = match, meta = NULL; cp - match < matchlen; cp++) {
79 if (ch == '*' || ch == '[' || ch == '?') {
85 meta = cp; /* fully literal */
87 if (namelen <= meta - match) {
88 /* Looking at "Documentation/" and
89 * the pattern says "Documentation/howto/", or
90 * "Documentation/diff*.txt". The name we
91 * have should match prefix.
93 if (!memcmp(match, name, namelen))
98 if (meta - match < namelen) {
99 /* Looking at "Documentation/howto/" and
100 * the pattern says "Documentation/h*";
101 * match up to "Do.../h"; this avoids descending
102 * into "Documentation/technical/".
104 if (!memcmp(match, name, meta - match))
112 static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name, int tree_name_len)
116 enum object_type type;
117 char *to_free = NULL;
120 data = read_sha1_file(sha1, &type, &size);
122 error("'%s': unable to read %s", name, sha1_to_hex(sha1));
125 if (opt->relative && opt->prefix_length) {
126 static char name_buf[PATH_MAX];
128 int name_len = strlen(name) - opt->prefix_length + 1;
131 name += opt->prefix_length;
133 if (ARRAY_SIZE(name_buf) <= name_len)
134 cp = to_free = xmalloc(name_len);
137 memcpy(cp, name, tree_name_len);
138 strcpy(cp + tree_name_len,
139 name + tree_name_len + opt->prefix_length);
143 hit = grep_buffer(opt, name, data, size);
149 static int grep_file(struct grep_opt *opt, const char *filename)
156 if (lstat(filename, &st) < 0) {
159 error("'%s': %s", filename, strerror(errno));
163 return 0; /* empty file -- no grep hit */
164 if (!S_ISREG(st.st_mode))
166 sz = xsize_t(st.st_size);
167 i = open(filename, O_RDONLY);
170 data = xmalloc(sz + 1);
171 if (st.st_size != read_in_full(i, data, sz)) {
172 error("'%s': short read %s", filename, strerror(errno));
178 if (opt->relative && opt->prefix_length)
179 filename += opt->prefix_length;
180 i = grep_buffer(opt, filename, data, sz);
185 #if !NO_EXTERNAL_GREP
186 static int exec_grep(int argc, const char **argv)
196 execvp("grep", (char **) argv);
199 while (waitpid(pid, &status, 0) < 0) {
204 if (WIFEXITED(status)) {
205 if (!WEXITSTATUS(status))
214 #define push_arg(a) do { \
215 if (nr < MAXARGS) argv[nr++] = (a); \
216 else die("maximum number of args exceeded"); \
220 * If you send a singleton filename to grep, it does not give
221 * the name of the file. GNU grep has "-H" but we would want
222 * that behaviour in a portable way.
224 * So we keep two pathnames in argv buffer unsent to grep in
225 * the main loop if we need to do more than one grep.
227 static int flush_grep(struct grep_opt *opt,
228 int argc, int arg0, const char **argv, int *kept)
231 int count = argc - arg0;
232 const char *kept_0 = NULL;
236 * Because we keep at least 2 paths in the call from
237 * the main loop (i.e. kept != NULL), and MAXARGS is
238 * far greater than 2, this usually is a call to
239 * conclude the grep. However, the user could attempt
240 * to overflow the argv buffer by giving too many
241 * options to leave very small number of real
242 * arguments even for the call in the main loop.
245 die("insanely many options to grep");
248 * If we have two or more paths, we do not have to do
249 * anything special, but we need to push /dev/null to
250 * get "-H" behaviour of GNU grep portably but when we
251 * are not doing "-l" nor "-L" nor "-c".
255 !opt->unmatch_name_only &&
257 argv[argc++] = "/dev/null";
264 * Called because we found many paths and haven't finished
265 * iterating over the cache yet. We keep two paths
266 * for the concluding call. argv[argc-2] and argv[argc-1]
267 * has the last two paths, so save the first one away,
268 * replace it with NULL while sending the list to grep,
269 * and recover them after we are done.
272 kept_0 = argv[argc-2];
277 status = exec_grep(argc, argv);
281 * Then recover them. Now the last arg is beyond the
282 * terminating NULL which is at argc, and the second
283 * from the last is what we saved away in kept_0
285 argv[arg0++] = kept_0;
286 argv[arg0] = argv[argc+1];
291 static void grep_add_color(struct strbuf *sb, const char *escape_seq)
293 size_t orig_len = sb->len;
295 while (*escape_seq) {
296 if (*escape_seq == 'm')
297 strbuf_addch(sb, ';');
298 else if (*escape_seq != '\033' && *escape_seq != '[')
299 strbuf_addch(sb, *escape_seq);
302 if (sb->len > orig_len && sb->buf[sb->len - 1] == ';')
303 strbuf_setlen(sb, sb->len - 1);
306 static int external_grep(struct grep_opt *opt, const char **paths, int cached)
308 int i, nr, argc, hit, len, status;
309 const char *argv[MAXARGS+1];
310 char randarg[ARGBUF];
311 char *argptr = randarg;
314 if (opt->extended || (opt->relative && opt->prefix_length))
324 if (opt->regflags & REG_EXTENDED)
326 if (opt->regflags & REG_ICASE)
328 if (opt->binary == GREP_BINARY_NOMATCH)
330 if (opt->word_regexp)
334 if (opt->unmatch_name_only)
336 if (opt->null_following_name)
337 /* in GNU grep git's "-z" translates to "-Z" */
341 if (opt->post_context || opt->pre_context) {
342 if (opt->post_context != opt->pre_context) {
343 if (opt->pre_context) {
345 len += snprintf(argptr, sizeof(randarg)-len,
346 "%u", opt->pre_context) + 1;
347 if (sizeof(randarg) <= len)
348 die("maximum length of args exceeded");
352 if (opt->post_context) {
354 len += snprintf(argptr, sizeof(randarg)-len,
355 "%u", opt->post_context) + 1;
356 if (sizeof(randarg) <= len)
357 die("maximum length of args exceeded");
364 len += snprintf(argptr, sizeof(randarg)-len,
365 "%u", opt->post_context) + 1;
366 if (sizeof(randarg) <= len)
367 die("maximum length of args exceeded");
372 for (p = opt->pattern_list; p; p = p->next) {
374 push_arg(p->pattern);
377 struct strbuf sb = STRBUF_INIT;
379 grep_add_color(&sb, opt->color_match);
380 setenv("GREP_COLOR", sb.buf, 1);
383 strbuf_addstr(&sb, "mt=");
384 grep_add_color(&sb, opt->color_match);
385 strbuf_addstr(&sb, ":sl=:cx=:fn=:ln=:bn=:se=");
386 setenv("GREP_COLORS", sb.buf, 1);
390 if (opt->color_external && strlen(opt->color_external) > 0)
391 push_arg(opt->color_external);
396 for (i = 0; i < active_nr; i++) {
397 struct cache_entry *ce = active_cache[i];
400 if (!S_ISREG(ce->ce_mode))
402 if (!pathspec_matches(paths, ce->name))
405 if (name[0] == '-') {
406 int len = ce_namelen(ce);
407 name = xmalloc(len + 3);
408 memcpy(name, "./", 2);
409 memcpy(name + 2, ce->name, len + 1);
412 if (MAXARGS <= argc) {
413 status = flush_grep(opt, argc, nr, argv, &kept);
421 } while (i < active_nr &&
422 !strcmp(ce->name, active_cache[i]->name));
423 i--; /* compensate for loop control */
427 status = flush_grep(opt, argc, nr, argv, NULL);
435 static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
441 #if !NO_EXTERNAL_GREP
443 * Use the external "grep" command for the case where
444 * we grep through the checked-out files. It tends to
445 * be a lot more optimized
447 if (!cached && !builtin_grep) {
448 hit = external_grep(opt, paths, cached);
454 for (nr = 0; nr < active_nr; nr++) {
455 struct cache_entry *ce = active_cache[nr];
456 if (!S_ISREG(ce->ce_mode))
458 if (!pathspec_matches(paths, ce->name))
461 * If CE_VALID is on, we assume worktree file and its cache entry
462 * are identical, even if worktree file has been modified, so use
463 * cache version instead
465 if (cached || (ce->ce_flags & CE_VALID)) {
468 hit |= grep_sha1(opt, ce->sha1, ce->name, 0);
471 hit |= grep_file(opt, ce->name);
475 } while (nr < active_nr &&
476 !strcmp(ce->name, active_cache[nr]->name));
477 nr--; /* compensate for loop control */
480 free_grep_patterns(opt);
484 static int grep_tree(struct grep_opt *opt, const char **paths,
485 struct tree_desc *tree,
486 const char *tree_name, const char *base)
490 struct name_entry entry;
492 int tn_len = strlen(tree_name);
493 struct strbuf pathbuf;
495 strbuf_init(&pathbuf, PATH_MAX + tn_len);
498 strbuf_add(&pathbuf, tree_name, tn_len);
499 strbuf_addch(&pathbuf, ':');
500 tn_len = pathbuf.len;
502 strbuf_addstr(&pathbuf, base);
505 while (tree_entry(tree, &entry)) {
506 int te_len = tree_entry_len(entry.path, entry.sha1);
508 strbuf_add(&pathbuf, entry.path, te_len);
510 if (S_ISDIR(entry.mode))
511 /* Match "abc/" against pathspec to
512 * decide if we want to descend into "abc"
515 strbuf_addch(&pathbuf, '/');
517 down = pathbuf.buf + tn_len;
518 if (!pathspec_matches(paths, down))
520 else if (S_ISREG(entry.mode))
521 hit |= grep_sha1(opt, entry.sha1, pathbuf.buf, tn_len);
522 else if (S_ISDIR(entry.mode)) {
523 enum object_type type;
524 struct tree_desc sub;
528 data = read_sha1_file(entry.sha1, &type, &size);
530 die("unable to read tree (%s)",
531 sha1_to_hex(entry.sha1));
532 init_tree_desc(&sub, data, size);
533 hit |= grep_tree(opt, paths, &sub, tree_name, down);
537 strbuf_release(&pathbuf);
541 static int grep_object(struct grep_opt *opt, const char **paths,
542 struct object *obj, const char *name)
544 if (obj->type == OBJ_BLOB)
545 return grep_sha1(opt, obj->sha1, name, 0);
546 if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
547 struct tree_desc tree;
551 data = read_object_with_reference(obj->sha1, tree_type,
554 die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
555 init_tree_desc(&tree, data, size);
556 hit = grep_tree(opt, paths, &tree, name, "");
560 die("unable to grep from object of type %s", typename(obj->type));
563 static const char builtin_grep_usage[] =
564 "git grep <option>* [-e] <pattern> <rev>* [[--] <path>...]";
566 static const char emsg_invalid_context_len[] =
567 "%s: invalid context length argument";
568 static const char emsg_missing_context_len[] =
569 "missing context length argument";
570 static const char emsg_missing_argument[] =
571 "option requires an argument -%s";
573 int cmd_grep(int argc, const char **argv, const char *prefix)
577 int seen_dashdash = 0;
579 struct object_array list = { 0, 0, NULL };
580 const char **paths = NULL;
583 memset(&opt, 0, sizeof(opt));
584 opt.prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
587 opt.pattern_tail = &opt.pattern_list;
588 opt.regflags = REG_NEWLINE;
590 strcpy(opt.color_match, GIT_COLOR_RED GIT_COLOR_BOLD);
592 git_config(grep_config, &opt);
594 opt.color = git_use_color_default;
597 * If there is no -- then the paths must exist in the working
598 * tree. If there is no explicit pattern specified with -e or
599 * -f, we take the first unrecognized non option to be the
600 * pattern, but then what follows it must be zero or more
601 * valid refs up to the -- (if exists), and then existing
602 * paths. If there is an explicit pattern, then the first
603 * unrecognized non option is the beginning of the refs list
604 * that continues up to the -- (if exists), and then paths.
608 const char *arg = argv[1];
610 if (!strcmp("--cached", arg)) {
614 if (!strcmp("--no-ext-grep", arg)) {
618 if (!strcmp("-a", arg) ||
619 !strcmp("--text", arg)) {
620 opt.binary = GREP_BINARY_TEXT;
623 if (!strcmp("-i", arg) ||
624 !strcmp("--ignore-case", arg)) {
625 opt.regflags |= REG_ICASE;
628 if (!strcmp("-I", arg)) {
629 opt.binary = GREP_BINARY_NOMATCH;
632 if (!strcmp("-v", arg) ||
633 !strcmp("--invert-match", arg)) {
637 if (!strcmp("-E", arg) ||
638 !strcmp("--extended-regexp", arg)) {
639 opt.regflags |= REG_EXTENDED;
642 if (!strcmp("-F", arg) ||
643 !strcmp("--fixed-strings", arg)) {
647 if (!strcmp("-G", arg) ||
648 !strcmp("--basic-regexp", arg)) {
649 opt.regflags &= ~REG_EXTENDED;
652 if (!strcmp("-n", arg)) {
656 if (!strcmp("-h", arg)) {
660 if (!strcmp("-H", arg)) {
664 if (!strcmp("-l", arg) ||
665 !strcmp("--name-only", arg) ||
666 !strcmp("--files-with-matches", arg)) {
670 if (!strcmp("-L", arg) ||
671 !strcmp("--files-without-match", arg)) {
672 opt.unmatch_name_only = 1;
675 if (!strcmp("-z", arg) ||
676 !strcmp("--null", arg)) {
677 opt.null_following_name = 1;
680 if (!strcmp("-c", arg) ||
681 !strcmp("--count", arg)) {
685 if (!strcmp("-w", arg) ||
686 !strcmp("--word-regexp", arg)) {
690 if (!prefixcmp(arg, "-A") ||
691 !prefixcmp(arg, "-B") ||
692 !prefixcmp(arg, "-C") ||
693 (arg[0] == '-' && '1' <= arg[1] && arg[1] <= '9')) {
697 case 'A': case 'B': case 'C':
700 die(emsg_missing_context_len);
711 if (strtoul_ui(scan, 10, &num))
712 die(emsg_invalid_context_len, scan);
715 opt.post_context = num;
719 opt.post_context = num;
721 opt.pre_context = num;
726 if (!strcmp("-f", arg)) {
731 die(emsg_missing_argument, arg);
732 patterns = fopen(argv[1], "r");
734 die("'%s': %s", argv[1], strerror(errno));
735 while (fgets(buf, sizeof(buf), patterns)) {
736 int len = strlen(buf);
737 if (len && buf[len-1] == '\n')
739 /* ignore empty line like grep does */
742 append_grep_pattern(&opt, xstrdup(buf),
751 if (!strcmp("--not", arg)) {
752 append_grep_pattern(&opt, arg, "command line", 0,
756 if (!strcmp("--and", arg)) {
757 append_grep_pattern(&opt, arg, "command line", 0,
761 if (!strcmp("--or", arg))
762 continue; /* no-op */
763 if (!strcmp("(", arg)) {
764 append_grep_pattern(&opt, arg, "command line", 0,
768 if (!strcmp(")", arg)) {
769 append_grep_pattern(&opt, arg, "command line", 0,
773 if (!strcmp("--all-match", arg)) {
777 if (!strcmp("-e", arg)) {
779 append_grep_pattern(&opt, argv[1],
786 die(emsg_missing_argument, arg);
788 if (!strcmp("--full-name", arg)) {
792 if (!strcmp("--color", arg)) {
796 if (!strcmp("--no-color", arg)) {
800 if (!strcmp("--", arg)) {
801 /* later processing wants to have this at argv[1] */
807 usage(builtin_grep_usage);
809 /* First unrecognized non-option token */
810 if (!opt.pattern_list) {
811 append_grep_pattern(&opt, arg, "command line", 0,
816 /* We are looking at the first path or rev;
817 * it is found at argv[1] after leaving the
825 if (opt.color && !opt.color_external)
827 if (!opt.pattern_list)
828 die("no pattern given.");
829 if ((opt.regflags != REG_NEWLINE) && opt.fixed)
830 die("cannot mix --fixed-strings and regexp");
831 compile_grep_patterns(&opt);
833 /* Check revs and then paths */
834 for (i = 1; i < argc; i++) {
835 const char *arg = argv[i];
836 unsigned char sha1[20];
838 if (!get_sha1(arg, sha1)) {
839 struct object *object = parse_object(sha1);
841 die("bad object %s", arg);
842 add_object_array(object, arg, &list);
845 if (!strcmp(arg, "--")) {
852 /* The rest are paths */
853 if (!seen_dashdash) {
855 for (j = i; j < argc; j++)
856 verify_filename(prefix, argv[j]);
860 paths = get_pathspec(prefix, argv + i);
861 if (opt.prefix_length && opt.relative) {
862 /* Make sure we do not get outside of paths */
863 for (i = 0; paths[i]; i++)
864 if (strncmp(prefix, paths[i], opt.prefix_length))
865 die("git grep: cannot generate relative filenames containing '..'");
869 paths = xcalloc(2, sizeof(const char *));
877 return !grep_cache(&opt, paths, cached);
881 die("both --cached and trees are given.");
883 for (i = 0; i < list.nr; i++) {
884 struct object *real_obj;
885 real_obj = deref_tag(list.objects[i].item, NULL, 0);
886 if (grep_object(&opt, paths, real_obj, list.objects[i].name))
889 free_grep_patterns(&opt);