4 * Copyright (c) 2006 Junio C Hamano
11 #include "tree-walk.h"
15 #ifndef NO_EXTERNAL_GREP
17 #define NO_EXTERNAL_GREP 0
19 #define NO_EXTERNAL_GREP 1
23 static int builtin_grep;
25 static int grep_config(const char *var, const char *value, void *cb)
27 struct grep_opt *opt = cb;
29 if (!strcmp(var, "grep.color") || !strcmp(var, "color.grep")) {
30 opt->color = git_config_colorbool(var, value, -1);
33 if (!strcmp(var, "grep.color.match") ||
34 !strcmp(var, "color.grep.match")) {
36 return config_error_nonbool(var);
37 color_parse(value, var, opt->color_match);
40 return git_color_default_config(var, value, cb);
44 * git grep pathspecs are somewhat different from diff-tree pathspecs;
45 * pathname wildcards are allowed.
47 static int pathspec_matches(const char **paths, const char *name)
50 if (!paths || !*paths)
52 namelen = strlen(name);
53 for (i = 0; paths[i]; i++) {
54 const char *match = paths[i];
55 int matchlen = strlen(match);
56 const char *cp, *meta;
59 ((matchlen <= namelen) &&
60 !strncmp(name, match, matchlen) &&
61 (match[matchlen-1] == '/' ||
62 name[matchlen] == '\0' || name[matchlen] == '/')))
64 if (!fnmatch(match, name, 0))
66 if (name[namelen-1] != '/')
69 /* We are being asked if the directory ("name") is worth
72 * Find the longest leading directory name that does
73 * not have metacharacter in the pathspec; the name
74 * we are looking at must overlap with that directory.
76 for (cp = match, meta = NULL; cp - match < matchlen; cp++) {
78 if (ch == '*' || ch == '[' || ch == '?') {
84 meta = cp; /* fully literal */
86 if (namelen <= meta - match) {
87 /* Looking at "Documentation/" and
88 * the pattern says "Documentation/howto/", or
89 * "Documentation/diff*.txt". The name we
90 * have should match prefix.
92 if (!memcmp(match, name, namelen))
97 if (meta - match < namelen) {
98 /* Looking at "Documentation/howto/" and
99 * the pattern says "Documentation/h*";
100 * match up to "Do.../h"; this avoids descending
101 * into "Documentation/technical/".
103 if (!memcmp(match, name, meta - match))
111 static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name, int tree_name_len)
115 enum object_type type;
116 char *to_free = NULL;
119 data = read_sha1_file(sha1, &type, &size);
121 error("'%s': unable to read %s", name, sha1_to_hex(sha1));
124 if (opt->relative && opt->prefix_length) {
125 static char name_buf[PATH_MAX];
127 int name_len = strlen(name) - opt->prefix_length + 1;
130 name += opt->prefix_length;
132 if (ARRAY_SIZE(name_buf) <= name_len)
133 cp = to_free = xmalloc(name_len);
136 memcpy(cp, name, tree_name_len);
137 strcpy(cp + tree_name_len,
138 name + tree_name_len + opt->prefix_length);
142 hit = grep_buffer(opt, name, data, size);
148 static int grep_file(struct grep_opt *opt, const char *filename)
155 if (lstat(filename, &st) < 0) {
158 error("'%s': %s", filename, strerror(errno));
162 return 0; /* empty file -- no grep hit */
163 if (!S_ISREG(st.st_mode))
165 sz = xsize_t(st.st_size);
166 i = open(filename, O_RDONLY);
169 data = xmalloc(sz + 1);
170 if (st.st_size != read_in_full(i, data, sz)) {
171 error("'%s': short read %s", filename, strerror(errno));
177 if (opt->relative && opt->prefix_length)
178 filename += opt->prefix_length;
179 i = grep_buffer(opt, filename, data, sz);
184 #if !NO_EXTERNAL_GREP
185 static int exec_grep(int argc, const char **argv)
195 execvp("grep", (char **) argv);
198 while (waitpid(pid, &status, 0) < 0) {
203 if (WIFEXITED(status)) {
204 if (!WEXITSTATUS(status))
213 #define push_arg(a) do { \
214 if (nr < MAXARGS) argv[nr++] = (a); \
215 else die("maximum number of args exceeded"); \
219 * If you send a singleton filename to grep, it does not give
220 * the name of the file. GNU grep has "-H" but we would want
221 * that behaviour in a portable way.
223 * So we keep two pathnames in argv buffer unsent to grep in
224 * the main loop if we need to do more than one grep.
226 static int flush_grep(struct grep_opt *opt,
227 int argc, int arg0, const char **argv, int *kept)
230 int count = argc - arg0;
231 const char *kept_0 = NULL;
235 * Because we keep at least 2 paths in the call from
236 * the main loop (i.e. kept != NULL), and MAXARGS is
237 * far greater than 2, this usually is a call to
238 * conclude the grep. However, the user could attempt
239 * to overflow the argv buffer by giving too many
240 * options to leave very small number of real
241 * arguments even for the call in the main loop.
244 die("insanely many options to grep");
247 * If we have two or more paths, we do not have to do
248 * anything special, but we need to push /dev/null to
249 * get "-H" behaviour of GNU grep portably but when we
250 * are not doing "-l" nor "-L" nor "-c".
254 !opt->unmatch_name_only &&
256 argv[argc++] = "/dev/null";
263 * Called because we found many paths and haven't finished
264 * iterating over the cache yet. We keep two paths
265 * for the concluding call. argv[argc-2] and argv[argc-1]
266 * has the last two paths, so save the first one away,
267 * replace it with NULL while sending the list to grep,
268 * and recover them after we are done.
271 kept_0 = argv[argc-2];
276 status = exec_grep(argc, argv);
280 * Then recover them. Now the last arg is beyond the
281 * terminating NULL which is at argc, and the second
282 * from the last is what we saved away in kept_0
284 argv[arg0++] = kept_0;
285 argv[arg0] = argv[argc+1];
290 static int external_grep(struct grep_opt *opt, const char **paths, int cached)
292 int i, nr, argc, hit, len, status;
293 const char *argv[MAXARGS+1];
294 char randarg[ARGBUF];
295 char *argptr = randarg;
298 if (opt->extended || (opt->relative && opt->prefix_length))
308 if (opt->regflags & REG_EXTENDED)
310 if (opt->regflags & REG_ICASE)
312 if (opt->binary == GREP_BINARY_NOMATCH)
314 if (opt->word_regexp)
318 if (opt->unmatch_name_only)
320 if (opt->null_following_name)
321 /* in GNU grep git's "-z" translates to "-Z" */
325 if (opt->post_context || opt->pre_context) {
326 if (opt->post_context != opt->pre_context) {
327 if (opt->pre_context) {
329 len += snprintf(argptr, sizeof(randarg)-len,
330 "%u", opt->pre_context) + 1;
331 if (sizeof(randarg) <= len)
332 die("maximum length of args exceeded");
336 if (opt->post_context) {
338 len += snprintf(argptr, sizeof(randarg)-len,
339 "%u", opt->post_context) + 1;
340 if (sizeof(randarg) <= len)
341 die("maximum length of args exceeded");
348 len += snprintf(argptr, sizeof(randarg)-len,
349 "%u", opt->post_context) + 1;
350 if (sizeof(randarg) <= len)
351 die("maximum length of args exceeded");
356 for (p = opt->pattern_list; p; p = p->next) {
358 push_arg(p->pattern);
363 for (i = 0; i < active_nr; i++) {
364 struct cache_entry *ce = active_cache[i];
367 if (!S_ISREG(ce->ce_mode))
369 if (!pathspec_matches(paths, ce->name))
372 if (name[0] == '-') {
373 int len = ce_namelen(ce);
374 name = xmalloc(len + 3);
375 memcpy(name, "./", 2);
376 memcpy(name + 2, ce->name, len + 1);
379 if (MAXARGS <= argc) {
380 status = flush_grep(opt, argc, nr, argv, &kept);
388 } while (i < active_nr &&
389 !strcmp(ce->name, active_cache[i]->name));
390 i--; /* compensate for loop control */
394 status = flush_grep(opt, argc, nr, argv, NULL);
402 static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
408 #if !NO_EXTERNAL_GREP
410 * Use the external "grep" command for the case where
411 * we grep through the checked-out files. It tends to
412 * be a lot more optimized
414 if (!cached && !builtin_grep) {
415 hit = external_grep(opt, paths, cached);
421 for (nr = 0; nr < active_nr; nr++) {
422 struct cache_entry *ce = active_cache[nr];
423 if (!S_ISREG(ce->ce_mode))
425 if (!pathspec_matches(paths, ce->name))
428 * If CE_VALID is on, we assume worktree file and its cache entry
429 * are identical, even if worktree file has been modified, so use
430 * cache version instead
432 if (cached || (ce->ce_flags & CE_VALID)) {
435 hit |= grep_sha1(opt, ce->sha1, ce->name, 0);
438 hit |= grep_file(opt, ce->name);
442 } while (nr < active_nr &&
443 !strcmp(ce->name, active_cache[nr]->name));
444 nr--; /* compensate for loop control */
447 free_grep_patterns(opt);
451 static int grep_tree(struct grep_opt *opt, const char **paths,
452 struct tree_desc *tree,
453 const char *tree_name, const char *base)
457 struct name_entry entry;
459 int tn_len = strlen(tree_name);
460 struct strbuf pathbuf;
462 strbuf_init(&pathbuf, PATH_MAX + tn_len);
465 strbuf_add(&pathbuf, tree_name, tn_len);
466 strbuf_addch(&pathbuf, ':');
467 tn_len = pathbuf.len;
469 strbuf_addstr(&pathbuf, base);
472 while (tree_entry(tree, &entry)) {
473 int te_len = tree_entry_len(entry.path, entry.sha1);
475 strbuf_add(&pathbuf, entry.path, te_len);
477 if (S_ISDIR(entry.mode))
478 /* Match "abc/" against pathspec to
479 * decide if we want to descend into "abc"
482 strbuf_addch(&pathbuf, '/');
484 down = pathbuf.buf + tn_len;
485 if (!pathspec_matches(paths, down))
487 else if (S_ISREG(entry.mode))
488 hit |= grep_sha1(opt, entry.sha1, pathbuf.buf, tn_len);
489 else if (S_ISDIR(entry.mode)) {
490 enum object_type type;
491 struct tree_desc sub;
495 data = read_sha1_file(entry.sha1, &type, &size);
497 die("unable to read tree (%s)",
498 sha1_to_hex(entry.sha1));
499 init_tree_desc(&sub, data, size);
500 hit |= grep_tree(opt, paths, &sub, tree_name, down);
504 strbuf_release(&pathbuf);
508 static int grep_object(struct grep_opt *opt, const char **paths,
509 struct object *obj, const char *name)
511 if (obj->type == OBJ_BLOB)
512 return grep_sha1(opt, obj->sha1, name, 0);
513 if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
514 struct tree_desc tree;
518 data = read_object_with_reference(obj->sha1, tree_type,
521 die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
522 init_tree_desc(&tree, data, size);
523 hit = grep_tree(opt, paths, &tree, name, "");
527 die("unable to grep from object of type %s", typename(obj->type));
530 static const char builtin_grep_usage[] =
531 "git grep <option>* [-e] <pattern> <rev>* [[--] <path>...]";
533 static const char emsg_invalid_context_len[] =
534 "%s: invalid context length argument";
535 static const char emsg_missing_context_len[] =
536 "missing context length argument";
537 static const char emsg_missing_argument[] =
538 "option requires an argument -%s";
540 int cmd_grep(int argc, const char **argv, const char *prefix)
544 int seen_dashdash = 0;
546 struct object_array list = { 0, 0, NULL };
547 const char **paths = NULL;
550 memset(&opt, 0, sizeof(opt));
551 opt.prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
554 opt.pattern_tail = &opt.pattern_list;
555 opt.regflags = REG_NEWLINE;
557 strcpy(opt.color_match, GIT_COLOR_RED GIT_COLOR_BOLD);
559 git_config(grep_config, &opt);
561 opt.color = git_use_color_default;
564 * If there is no -- then the paths must exist in the working
565 * tree. If there is no explicit pattern specified with -e or
566 * -f, we take the first unrecognized non option to be the
567 * pattern, but then what follows it must be zero or more
568 * valid refs up to the -- (if exists), and then existing
569 * paths. If there is an explicit pattern, then the first
570 * unrecognized non option is the beginning of the refs list
571 * that continues up to the -- (if exists), and then paths.
575 const char *arg = argv[1];
577 if (!strcmp("--cached", arg)) {
581 if (!strcmp("--no-ext-grep", arg)) {
585 if (!strcmp("-a", arg) ||
586 !strcmp("--text", arg)) {
587 opt.binary = GREP_BINARY_TEXT;
590 if (!strcmp("-i", arg) ||
591 !strcmp("--ignore-case", arg)) {
592 opt.regflags |= REG_ICASE;
595 if (!strcmp("-I", arg)) {
596 opt.binary = GREP_BINARY_NOMATCH;
599 if (!strcmp("-v", arg) ||
600 !strcmp("--invert-match", arg)) {
604 if (!strcmp("-E", arg) ||
605 !strcmp("--extended-regexp", arg)) {
606 opt.regflags |= REG_EXTENDED;
609 if (!strcmp("-F", arg) ||
610 !strcmp("--fixed-strings", arg)) {
614 if (!strcmp("-G", arg) ||
615 !strcmp("--basic-regexp", arg)) {
616 opt.regflags &= ~REG_EXTENDED;
619 if (!strcmp("-n", arg)) {
623 if (!strcmp("-h", arg)) {
627 if (!strcmp("-H", arg)) {
631 if (!strcmp("-l", arg) ||
632 !strcmp("--name-only", arg) ||
633 !strcmp("--files-with-matches", arg)) {
637 if (!strcmp("-L", arg) ||
638 !strcmp("--files-without-match", arg)) {
639 opt.unmatch_name_only = 1;
642 if (!strcmp("-z", arg) ||
643 !strcmp("--null", arg)) {
644 opt.null_following_name = 1;
647 if (!strcmp("-c", arg) ||
648 !strcmp("--count", arg)) {
652 if (!strcmp("-w", arg) ||
653 !strcmp("--word-regexp", arg)) {
657 if (!prefixcmp(arg, "-A") ||
658 !prefixcmp(arg, "-B") ||
659 !prefixcmp(arg, "-C") ||
660 (arg[0] == '-' && '1' <= arg[1] && arg[1] <= '9')) {
664 case 'A': case 'B': case 'C':
667 die(emsg_missing_context_len);
678 if (strtoul_ui(scan, 10, &num))
679 die(emsg_invalid_context_len, scan);
682 opt.post_context = num;
686 opt.post_context = num;
688 opt.pre_context = num;
693 if (!strcmp("-f", arg)) {
698 die(emsg_missing_argument, arg);
699 patterns = fopen(argv[1], "r");
701 die("'%s': %s", argv[1], strerror(errno));
702 while (fgets(buf, sizeof(buf), patterns)) {
703 int len = strlen(buf);
704 if (len && buf[len-1] == '\n')
706 /* ignore empty line like grep does */
709 append_grep_pattern(&opt, xstrdup(buf),
718 if (!strcmp("--not", arg)) {
719 append_grep_pattern(&opt, arg, "command line", 0,
723 if (!strcmp("--and", arg)) {
724 append_grep_pattern(&opt, arg, "command line", 0,
728 if (!strcmp("--or", arg))
729 continue; /* no-op */
730 if (!strcmp("(", arg)) {
731 append_grep_pattern(&opt, arg, "command line", 0,
735 if (!strcmp(")", arg)) {
736 append_grep_pattern(&opt, arg, "command line", 0,
740 if (!strcmp("--all-match", arg)) {
744 if (!strcmp("-e", arg)) {
746 append_grep_pattern(&opt, argv[1],
753 die(emsg_missing_argument, arg);
755 if (!strcmp("--full-name", arg)) {
759 if (!strcmp("--color", arg)) {
763 if (!strcmp("--no-color", arg)) {
767 if (!strcmp("--", arg)) {
768 /* later processing wants to have this at argv[1] */
774 usage(builtin_grep_usage);
776 /* First unrecognized non-option token */
777 if (!opt.pattern_list) {
778 append_grep_pattern(&opt, arg, "command line", 0,
783 /* We are looking at the first path or rev;
784 * it is found at argv[1] after leaving the
792 if (!opt.pattern_list)
793 die("no pattern given.");
794 if ((opt.regflags != REG_NEWLINE) && opt.fixed)
795 die("cannot mix --fixed-strings and regexp");
796 compile_grep_patterns(&opt);
798 /* Check revs and then paths */
799 for (i = 1; i < argc; i++) {
800 const char *arg = argv[i];
801 unsigned char sha1[20];
803 if (!get_sha1(arg, sha1)) {
804 struct object *object = parse_object(sha1);
806 die("bad object %s", arg);
807 add_object_array(object, arg, &list);
810 if (!strcmp(arg, "--")) {
817 /* The rest are paths */
818 if (!seen_dashdash) {
820 for (j = i; j < argc; j++)
821 verify_filename(prefix, argv[j]);
825 paths = get_pathspec(prefix, argv + i);
826 if (opt.prefix_length && opt.relative) {
827 /* Make sure we do not get outside of paths */
828 for (i = 0; paths[i]; i++)
829 if (strncmp(prefix, paths[i], opt.prefix_length))
830 die("git grep: cannot generate relative filenames containing '..'");
834 paths = xcalloc(2, sizeof(const char *));
842 return !grep_cache(&opt, paths, cached);
846 die("both --cached and trees are given.");
848 for (i = 0; i < list.nr; i++) {
849 struct object *real_obj;
850 real_obj = deref_tag(list.objects[i].item, NULL, 0);
851 if (grep_object(&opt, paths, real_obj, list.objects[i].name))
854 free_grep_patterns(&opt);