Add streaming filter API
[git] / diffcore-pickaxe.c
1 /*
2  * Copyright (C) 2005 Junio C Hamano
3  * Copyright (C) 2010 Google Inc.
4  */
5 #include "cache.h"
6 #include "diff.h"
7 #include "diffcore.h"
8 #include "xdiff-interface.h"
9
10 struct diffgrep_cb {
11         regex_t *regexp;
12         int hit;
13 };
14
15 static void diffgrep_consume(void *priv, char *line, unsigned long len)
16 {
17         struct diffgrep_cb *data = priv;
18         regmatch_t regmatch;
19         int hold;
20
21         if (line[0] != '+' && line[0] != '-')
22                 return;
23         if (data->hit)
24                 /*
25                  * NEEDSWORK: we should have a way to terminate the
26                  * caller early.
27                  */
28                 return;
29         /* Yuck -- line ought to be "const char *"! */
30         hold = line[len];
31         line[len] = '\0';
32         data->hit = !regexec(data->regexp, line + 1, 1, &regmatch, 0);
33         line[len] = hold;
34 }
35
36 static void fill_one(struct diff_filespec *one,
37                      mmfile_t *mf, struct userdiff_driver **textconv)
38 {
39         if (DIFF_FILE_VALID(one)) {
40                 *textconv = get_textconv(one);
41                 mf->size = fill_textconv(*textconv, one, &mf->ptr);
42         } else {
43                 memset(mf, 0, sizeof(*mf));
44         }
45 }
46
47 static int diff_grep(struct diff_filepair *p, regex_t *regexp, struct diff_options *o)
48 {
49         regmatch_t regmatch;
50         struct userdiff_driver *textconv_one = NULL;
51         struct userdiff_driver *textconv_two = NULL;
52         mmfile_t mf1, mf2;
53         int hit;
54
55         if (diff_unmodified_pair(p))
56                 return 0;
57
58         fill_one(p->one, &mf1, &textconv_one);
59         fill_one(p->two, &mf2, &textconv_two);
60
61         if (!mf1.ptr) {
62                 if (!mf2.ptr)
63                         return 0; /* ignore unmerged */
64                 /* created "two" -- does it have what we are looking for? */
65                 hit = !regexec(regexp, p->two->data, 1, &regmatch, 0);
66         } else if (!mf2.ptr) {
67                 /* removed "one" -- did it have what we are looking for? */
68                 hit = !regexec(regexp, p->one->data, 1, &regmatch, 0);
69         } else {
70                 /*
71                  * We have both sides; need to run textual diff and see if
72                  * the pattern appears on added/deleted lines.
73                  */
74                 struct diffgrep_cb ecbdata;
75                 xpparam_t xpp;
76                 xdemitconf_t xecfg;
77
78                 memset(&xpp, 0, sizeof(xpp));
79                 memset(&xecfg, 0, sizeof(xecfg));
80                 ecbdata.regexp = regexp;
81                 ecbdata.hit = 0;
82                 xecfg.ctxlen = o->context;
83                 xecfg.interhunkctxlen = o->interhunkcontext;
84                 xdi_diff_outf(&mf1, &mf2, diffgrep_consume, &ecbdata,
85                               &xpp, &xecfg);
86                 hit = ecbdata.hit;
87         }
88         if (textconv_one)
89                 free(mf1.ptr);
90         if (textconv_two)
91                 free(mf2.ptr);
92         return hit;
93 }
94
95 static void diffcore_pickaxe_grep(struct diff_options *o)
96 {
97         struct diff_queue_struct *q = &diff_queued_diff;
98         int i, has_changes, err;
99         regex_t regex;
100         struct diff_queue_struct outq;
101         outq.queue = NULL;
102         outq.nr = outq.alloc = 0;
103
104         err = regcomp(&regex, o->pickaxe, REG_EXTENDED | REG_NEWLINE);
105         if (err) {
106                 char errbuf[1024];
107                 regerror(err, &regex, errbuf, 1024);
108                 regfree(&regex);
109                 die("invalid log-grep regex: %s", errbuf);
110         }
111
112         if (o->pickaxe_opts & DIFF_PICKAXE_ALL) {
113                 /* Showing the whole changeset if needle exists */
114                 for (i = has_changes = 0; !has_changes && i < q->nr; i++) {
115                         struct diff_filepair *p = q->queue[i];
116                         if (diff_grep(p, &regex, o))
117                                 has_changes++;
118                 }
119                 if (has_changes)
120                         return; /* do not munge the queue */
121
122                 /*
123                  * Otherwise we will clear the whole queue by copying
124                  * the empty outq at the end of this function, but
125                  * first clear the current entries in the queue.
126                  */
127                 for (i = 0; i < q->nr; i++)
128                         diff_free_filepair(q->queue[i]);
129         } else {
130                 /* Showing only the filepairs that has the needle */
131                 for (i = 0; i < q->nr; i++) {
132                         struct diff_filepair *p = q->queue[i];
133                         if (diff_grep(p, &regex, o))
134                                 diff_q(&outq, p);
135                         else
136                                 diff_free_filepair(p);
137                 }
138         }
139
140         regfree(&regex);
141
142         free(q->queue);
143         *q = outq;
144         return;
145 }
146
147 static unsigned int contains(struct diff_filespec *one,
148                              const char *needle, unsigned long len,
149                              regex_t *regexp)
150 {
151         unsigned int cnt;
152         unsigned long sz;
153         const char *data;
154         if (diff_populate_filespec(one, 0))
155                 return 0;
156         if (!len)
157                 return 0;
158
159         sz = one->size;
160         data = one->data;
161         cnt = 0;
162
163         if (regexp) {
164                 regmatch_t regmatch;
165                 int flags = 0;
166
167                 assert(data[sz] == '\0');
168                 while (*data && !regexec(regexp, data, 1, &regmatch, flags)) {
169                         flags |= REG_NOTBOL;
170                         data += regmatch.rm_eo;
171                         if (*data && regmatch.rm_so == regmatch.rm_eo)
172                                 data++;
173                         cnt++;
174                 }
175
176         } else { /* Classic exact string match */
177                 while (sz) {
178                         const char *found = memmem(data, sz, needle, len);
179                         if (!found)
180                                 break;
181                         sz -= found - data + len;
182                         data = found + len;
183                         cnt++;
184                 }
185         }
186         diff_free_filespec_data(one);
187         return cnt;
188 }
189
190 static void diffcore_pickaxe_count(struct diff_options *o)
191 {
192         const char *needle = o->pickaxe;
193         int opts = o->pickaxe_opts;
194         struct diff_queue_struct *q = &diff_queued_diff;
195         unsigned long len = strlen(needle);
196         int i, has_changes;
197         regex_t regex, *regexp = NULL;
198         struct diff_queue_struct outq;
199         DIFF_QUEUE_CLEAR(&outq);
200
201         if (opts & DIFF_PICKAXE_REGEX) {
202                 int err;
203                 err = regcomp(&regex, needle, REG_EXTENDED | REG_NEWLINE);
204                 if (err) {
205                         /* The POSIX.2 people are surely sick */
206                         char errbuf[1024];
207                         regerror(err, &regex, errbuf, 1024);
208                         regfree(&regex);
209                         die("invalid pickaxe regex: %s", errbuf);
210                 }
211                 regexp = &regex;
212         }
213
214         if (opts & DIFF_PICKAXE_ALL) {
215                 /* Showing the whole changeset if needle exists */
216                 for (i = has_changes = 0; !has_changes && i < q->nr; i++) {
217                         struct diff_filepair *p = q->queue[i];
218                         if (!DIFF_FILE_VALID(p->one)) {
219                                 if (!DIFF_FILE_VALID(p->two))
220                                         continue; /* ignore unmerged */
221                                 /* created */
222                                 if (contains(p->two, needle, len, regexp))
223                                         has_changes++;
224                         }
225                         else if (!DIFF_FILE_VALID(p->two)) {
226                                 if (contains(p->one, needle, len, regexp))
227                                         has_changes++;
228                         }
229                         else if (!diff_unmodified_pair(p) &&
230                                  contains(p->one, needle, len, regexp) !=
231                                  contains(p->two, needle, len, regexp))
232                                 has_changes++;
233                 }
234                 if (has_changes)
235                         return; /* not munge the queue */
236
237                 /* otherwise we will clear the whole queue
238                  * by copying the empty outq at the end of this
239                  * function, but first clear the current entries
240                  * in the queue.
241                  */
242                 for (i = 0; i < q->nr; i++)
243                         diff_free_filepair(q->queue[i]);
244         }
245         else
246                 /* Showing only the filepairs that has the needle */
247                 for (i = 0; i < q->nr; i++) {
248                         struct diff_filepair *p = q->queue[i];
249                         has_changes = 0;
250                         if (!DIFF_FILE_VALID(p->one)) {
251                                 if (!DIFF_FILE_VALID(p->two))
252                                         ; /* ignore unmerged */
253                                 /* created */
254                                 else if (contains(p->two, needle, len, regexp))
255                                         has_changes = 1;
256                         }
257                         else if (!DIFF_FILE_VALID(p->two)) {
258                                 if (contains(p->one, needle, len, regexp))
259                                         has_changes = 1;
260                         }
261                         else if (!diff_unmodified_pair(p) &&
262                                  contains(p->one, needle, len, regexp) !=
263                                  contains(p->two, needle, len, regexp))
264                                 has_changes = 1;
265
266                         if (has_changes)
267                                 diff_q(&outq, p);
268                         else
269                                 diff_free_filepair(p);
270                 }
271
272         if (opts & DIFF_PICKAXE_REGEX)
273                 regfree(&regex);
274
275         free(q->queue);
276         *q = outq;
277         return;
278 }
279
280 void diffcore_pickaxe(struct diff_options *o)
281 {
282         /* Might want to warn when both S and G are on; I don't care... */
283         if (o->pickaxe_opts & DIFF_PICKAXE_KIND_G)
284                 diffcore_pickaxe_grep(o);
285         else
286                 diffcore_pickaxe_count(o);
287 }