3 #include "xdiff-interface.h"
5 void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
7 struct grep_pat *p = xcalloc(1, sizeof(*p));
11 p->token = GREP_PATTERN_HEAD;
13 *opt->pattern_tail = p;
14 opt->pattern_tail = &p->next;
18 void append_grep_pattern(struct grep_opt *opt, const char *pat,
19 const char *origin, int no, enum grep_pat_token t)
21 struct grep_pat *p = xcalloc(1, sizeof(*p));
26 *opt->pattern_tail = p;
27 opt->pattern_tail = &p->next;
31 static int isregexspecial(int c)
33 return c == '\0' || is_glob_special(c) ||
34 c == '$' || c == '(' || c == ')' || c == '+' ||
35 c == '.' || c == '^' || c == '{' || c == '|';
38 static int is_fixed(const char *s)
40 while (!isregexspecial(*s))
45 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
49 if (opt->fixed || is_fixed(p->pattern))
51 if (opt->regflags & REG_ICASE)
56 err = regcomp(&p->regexp, p->pattern, opt->regflags);
61 sprintf(where, "In '%s' at %d, ",
64 sprintf(where, "%s, ", p->origin);
67 regerror(err, &p->regexp, errbuf, 1024);
69 die("%s'%s': %s", where, p->pattern, errbuf);
73 static struct grep_expr *compile_pattern_or(struct grep_pat **);
74 static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
81 case GREP_PATTERN: /* atom */
82 case GREP_PATTERN_HEAD:
83 case GREP_PATTERN_BODY:
84 x = xcalloc(1, sizeof (struct grep_expr));
85 x->node = GREP_NODE_ATOM;
91 x = compile_pattern_or(list);
94 if (!*list || (*list)->token != GREP_CLOSE_PAREN)
95 die("unmatched parenthesis");
96 *list = (*list)->next;
103 static struct grep_expr *compile_pattern_not(struct grep_pat **list)
112 die("--not not followed by pattern expression");
114 x = xcalloc(1, sizeof (struct grep_expr));
115 x->node = GREP_NODE_NOT;
116 x->u.unary = compile_pattern_not(list);
118 die("--not followed by non pattern expression");
121 return compile_pattern_atom(list);
125 static struct grep_expr *compile_pattern_and(struct grep_pat **list)
128 struct grep_expr *x, *y, *z;
130 x = compile_pattern_not(list);
132 if (p && p->token == GREP_AND) {
134 die("--and not followed by pattern expression");
136 y = compile_pattern_and(list);
138 die("--and not followed by pattern expression");
139 z = xcalloc(1, sizeof (struct grep_expr));
140 z->node = GREP_NODE_AND;
141 z->u.binary.left = x;
142 z->u.binary.right = y;
148 static struct grep_expr *compile_pattern_or(struct grep_pat **list)
151 struct grep_expr *x, *y, *z;
153 x = compile_pattern_and(list);
155 if (x && p && p->token != GREP_CLOSE_PAREN) {
156 y = compile_pattern_or(list);
158 die("not a pattern expression %s", p->pattern);
159 z = xcalloc(1, sizeof (struct grep_expr));
160 z->node = GREP_NODE_OR;
161 z->u.binary.left = x;
162 z->u.binary.right = y;
168 static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
170 return compile_pattern_or(list);
173 void compile_grep_patterns(struct grep_opt *opt)
180 for (p = opt->pattern_list; p; p = p->next) {
182 case GREP_PATTERN: /* atom */
183 case GREP_PATTERN_HEAD:
184 case GREP_PATTERN_BODY:
185 compile_regexp(p, opt);
196 /* Then bundle them up in an expression.
197 * A classic recursive descent parser would do.
199 p = opt->pattern_list;
200 opt->pattern_expression = compile_pattern_expr(&p);
202 die("incomplete pattern expression: %s", p->pattern);
205 static void free_pattern_expr(struct grep_expr *x)
211 free_pattern_expr(x->u.unary);
215 free_pattern_expr(x->u.binary.left);
216 free_pattern_expr(x->u.binary.right);
222 void free_grep_patterns(struct grep_opt *opt)
224 struct grep_pat *p, *n;
226 for (p = opt->pattern_list; p; p = n) {
229 case GREP_PATTERN: /* atom */
230 case GREP_PATTERN_HEAD:
231 case GREP_PATTERN_BODY:
242 free_pattern_expr(opt->pattern_expression);
245 static char *end_of_line(char *cp, unsigned long *left)
247 unsigned long l = *left;
248 while (l && *cp != '\n') {
256 static int word_char(char ch)
258 return isalnum(ch) || ch == '_';
261 static void show_line(struct grep_opt *opt, const char *bol, const char *eol,
262 const char *name, unsigned lno, char sign)
264 if (opt->null_following_name)
267 printf("%s%c", name, sign);
269 printf("%d%c", lno, sign);
270 printf("%.*s\n", (int)(eol-bol), bol);
273 static void show_name(struct grep_opt *opt, const char *name)
275 printf("%s%c", name, opt->null_following_name ? '\0' : '\n');
278 static int fixmatch(const char *pattern, char *line, regmatch_t *match)
280 char *hit = strstr(line, pattern);
282 match->rm_so = match->rm_eo = -1;
286 match->rm_so = hit - line;
287 match->rm_eo = match->rm_so + strlen(pattern);
292 static int strip_timestamp(char *bol, char **eol_p)
297 while (bol < --eol) {
313 { "committer ", 10 },
316 static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
320 regmatch_t pmatch[10];
322 if ((p->token != GREP_PATTERN) &&
323 ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
326 if (p->token == GREP_PATTERN_HEAD) {
329 assert(p->field < ARRAY_SIZE(header_field));
330 field = header_field[p->field].field;
331 len = header_field[p->field].len;
332 if (strncmp(bol, field, len))
335 saved_ch = strip_timestamp(bol, &eol);
340 regex_t *exp = &p->regexp;
341 hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
345 hit = !fixmatch(p->pattern, bol, pmatch);
348 if (hit && opt->word_regexp) {
349 if ((pmatch[0].rm_so < 0) ||
350 (eol - bol) <= pmatch[0].rm_so ||
351 (pmatch[0].rm_eo < 0) ||
352 (eol - bol) < pmatch[0].rm_eo)
353 die("regexp returned nonsense");
355 /* Match beginning must be either beginning of the
356 * line, or at word boundary (i.e. the last char must
357 * not be a word char). Similarly, match end must be
358 * either end of the line, or at word boundary
359 * (i.e. the next char must not be a word char).
361 if ( ((pmatch[0].rm_so == 0) ||
362 !word_char(bol[pmatch[0].rm_so-1])) &&
363 ((pmatch[0].rm_eo == (eol-bol)) ||
364 !word_char(bol[pmatch[0].rm_eo])) )
369 if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
370 /* There could be more than one match on the
371 * line, and the first match might not be
372 * strict word match. But later ones could be!
373 * Forward to the next possible start, i.e. the
374 * next position following a non-word char.
376 bol = pmatch[0].rm_so + bol + 1;
377 while (word_char(bol[-1]) && bol < eol)
383 if (p->token == GREP_PATTERN_HEAD && saved_ch)
388 static int match_expr_eval(struct grep_opt *o,
390 char *bol, char *eol,
391 enum grep_context ctx,
398 h = match_one_pattern(o, x->u.atom, bol, eol, ctx);
401 h = !match_expr_eval(o, x->u.unary, bol, eol, ctx, 0);
405 return (match_expr_eval(o, x->u.binary.left,
407 match_expr_eval(o, x->u.binary.right,
409 h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
410 h &= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 0);
414 return (match_expr_eval(o, x->u.binary.left,
416 match_expr_eval(o, x->u.binary.right,
418 h = match_expr_eval(o, x->u.binary.left, bol, eol, ctx, 0);
419 x->u.binary.left->hit |= h;
420 h |= match_expr_eval(o, x->u.binary.right, bol, eol, ctx, 1);
423 die("Unexpected node type (internal error) %d", x->node);
430 static int match_expr(struct grep_opt *opt, char *bol, char *eol,
431 enum grep_context ctx, int collect_hits)
433 struct grep_expr *x = opt->pattern_expression;
434 return match_expr_eval(opt, x, bol, eol, ctx, collect_hits);
437 static int match_line(struct grep_opt *opt, char *bol, char *eol,
438 enum grep_context ctx, int collect_hits)
442 return match_expr(opt, bol, eol, ctx, collect_hits);
444 /* we do not call with collect_hits without being extended */
445 for (p = opt->pattern_list; p; p = p->next) {
446 if (match_one_pattern(opt, p, bol, eol, ctx))
452 static int grep_buffer_1(struct grep_opt *opt, const char *name,
453 char *buf, unsigned long size, int collect_hits)
456 unsigned long left = size;
458 struct pre_context_line {
461 } *prev = NULL, *pcl;
462 unsigned last_hit = 0;
463 unsigned last_shown = 0;
464 int binary_match_only = 0;
465 const char *hunk_mark = "";
467 enum grep_context ctx = GREP_CONTEXT_HEAD;
469 if (buffer_is_binary(buf, size)) {
470 switch (opt->binary) {
471 case GREP_BINARY_DEFAULT:
472 binary_match_only = 1;
474 case GREP_BINARY_NOMATCH:
475 return 0; /* Assume unmatch */
482 if (opt->pre_context)
483 prev = xcalloc(opt->pre_context, sizeof(*prev));
484 if (opt->pre_context || opt->post_context)
491 eol = end_of_line(bol, &left);
495 if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
496 ctx = GREP_CONTEXT_BODY;
498 hit = match_line(opt, bol, eol, ctx, collect_hits);
504 /* "grep -v -e foo -e bla" should list lines
505 * that do not have either, so inversion should
510 if (opt->unmatch_name_only) {
517 if (opt->status_only)
519 if (binary_match_only) {
520 printf("Binary file %s matches\n", name);
523 if (opt->name_only) {
524 show_name(opt, name);
527 /* Hit at this line. If we haven't shown the
528 * pre-context lines, we would need to show them.
529 * When asked to do "count", this still show
530 * the context which is nonsense, but the user
531 * deserves to get that ;-).
533 if (opt->pre_context) {
535 if (opt->pre_context < lno)
536 from = lno - opt->pre_context;
539 if (from <= last_shown)
540 from = last_shown + 1;
541 if (last_shown && from != last_shown + 1)
542 fputs(hunk_mark, stdout);
544 pcl = &prev[lno-from-1];
545 show_line(opt, pcl->bol, pcl->eol,
551 if (last_shown && lno != last_shown + 1)
552 fputs(hunk_mark, stdout);
554 show_line(opt, bol, eol, name, lno, ':');
555 last_shown = last_hit = lno;
558 lno <= last_hit + opt->post_context) {
559 /* If the last hit is within the post context,
560 * we need to show this line.
562 if (last_shown && lno != last_shown + 1)
563 fputs(hunk_mark, stdout);
564 show_line(opt, bol, eol, name, lno, '-');
567 if (opt->pre_context) {
568 memmove(prev+1, prev,
569 (opt->pre_context-1) * sizeof(*prev));
586 if (opt->status_only)
588 if (opt->unmatch_name_only) {
589 /* We did not see any hit, so we want to show this */
590 show_name(opt, name);
595 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
596 * which feels mostly useless but sometimes useful. Maybe
597 * make it another option? For now suppress them.
599 if (opt->count && count)
600 printf("%s%c%u\n", name,
601 opt->null_following_name ? '\0' : ':', count);
605 static void clr_hit_marker(struct grep_expr *x)
607 /* All-hit markers are meaningful only at the very top level
612 if (x->node != GREP_NODE_OR)
614 x->u.binary.left->hit = 0;
615 x = x->u.binary.right;
619 static int chk_hit_marker(struct grep_expr *x)
621 /* Top level nodes have hit markers. See if they all are hits */
623 if (x->node != GREP_NODE_OR)
625 if (!x->u.binary.left->hit)
627 x = x->u.binary.right;
631 int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
634 * we do not have to do the two-pass grep when we do not check
635 * buffer-wide "all-match".
638 return grep_buffer_1(opt, name, buf, size, 0);
640 /* Otherwise the toplevel "or" terms hit a bit differently.
641 * We first clear hit markers from them.
643 clr_hit_marker(opt->pattern_expression);
644 grep_buffer_1(opt, name, buf, size, 1);
646 if (!chk_hit_marker(opt->pattern_expression))
649 return grep_buffer_1(opt, name, buf, size, 0);