list-objects-filter: correct usage of ALLOC_GROW
[git] / list-objects-filter.c
1 #include "cache.h"
2 #include "dir.h"
3 #include "tag.h"
4 #include "commit.h"
5 #include "tree.h"
6 #include "blob.h"
7 #include "diff.h"
8 #include "tree-walk.h"
9 #include "revision.h"
10 #include "list-objects.h"
11 #include "list-objects-filter.h"
12 #include "list-objects-filter-options.h"
13 #include "oidset.h"
14 #include "object-store.h"
15
16 /* Remember to update object flag allocation in object.h */
17 /*
18  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
19  * that have been shown, but should be revisited if they appear
20  * in the traversal (until we mark it SEEN).  This is a way to
21  * let us silently de-dup calls to show() in the caller.  This
22  * is subtly different from the "revision.h:SHOWN" and the
23  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
24  * the non-de-dup usage in pack-bitmap.c
25  */
26 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
27
28 /*
29  * A filter for list-objects to omit ALL blobs from the traversal.
30  * And to OPTIONALLY collect a list of the omitted OIDs.
31  */
32 struct filter_blobs_none_data {
33         struct oidset *omits;
34 };
35
36 static enum list_objects_filter_result filter_blobs_none(
37         enum list_objects_filter_situation filter_situation,
38         struct object *obj,
39         const char *pathname,
40         const char *filename,
41         void *filter_data_)
42 {
43         struct filter_blobs_none_data *filter_data = filter_data_;
44
45         switch (filter_situation) {
46         default:
47                 BUG("unknown filter_situation: %d", filter_situation);
48
49         case LOFS_BEGIN_TREE:
50                 assert(obj->type == OBJ_TREE);
51                 /* always include all tree objects */
52                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
53
54         case LOFS_END_TREE:
55                 assert(obj->type == OBJ_TREE);
56                 return LOFR_ZERO;
57
58         case LOFS_BLOB:
59                 assert(obj->type == OBJ_BLOB);
60                 assert((obj->flags & SEEN) == 0);
61
62                 if (filter_data->omits)
63                         oidset_insert(filter_data->omits, &obj->oid);
64                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
65         }
66 }
67
68 static void *filter_blobs_none__init(
69         struct oidset *omitted,
70         struct list_objects_filter_options *filter_options,
71         filter_object_fn *filter_fn,
72         filter_free_fn *filter_free_fn)
73 {
74         struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
75         d->omits = omitted;
76
77         *filter_fn = filter_blobs_none;
78         *filter_free_fn = free;
79         return d;
80 }
81
82 /*
83  * A filter for list-objects to omit ALL trees and blobs from the traversal.
84  * Can OPTIONALLY collect a list of the omitted OIDs.
85  */
86 struct filter_trees_none_data {
87         struct oidset *omits;
88 };
89
90 static enum list_objects_filter_result filter_trees_none(
91         enum list_objects_filter_situation filter_situation,
92         struct object *obj,
93         const char *pathname,
94         const char *filename,
95         void *filter_data_)
96 {
97         struct filter_trees_none_data *filter_data = filter_data_;
98
99         switch (filter_situation) {
100         default:
101                 BUG("unknown filter_situation: %d", filter_situation);
102
103         case LOFS_BEGIN_TREE:
104         case LOFS_BLOB:
105                 if (filter_data->omits) {
106                         oidset_insert(filter_data->omits, &obj->oid);
107                         /* _MARK_SEEN but not _DO_SHOW (hard omit) */
108                         return LOFR_MARK_SEEN;
109                 } else {
110                         /*
111                          * Not collecting omits so no need to to traverse tree.
112                          */
113                         return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
114                 }
115
116         case LOFS_END_TREE:
117                 assert(obj->type == OBJ_TREE);
118                 return LOFR_ZERO;
119
120         }
121 }
122
123 static void* filter_trees_none__init(
124         struct oidset *omitted,
125         struct list_objects_filter_options *filter_options,
126         filter_object_fn *filter_fn,
127         filter_free_fn *filter_free_fn)
128 {
129         struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
130         d->omits = omitted;
131
132         *filter_fn = filter_trees_none;
133         *filter_free_fn = free;
134         return d;
135 }
136
137 /*
138  * A filter for list-objects to omit large blobs.
139  * And to OPTIONALLY collect a list of the omitted OIDs.
140  */
141 struct filter_blobs_limit_data {
142         struct oidset *omits;
143         unsigned long max_bytes;
144 };
145
146 static enum list_objects_filter_result filter_blobs_limit(
147         enum list_objects_filter_situation filter_situation,
148         struct object *obj,
149         const char *pathname,
150         const char *filename,
151         void *filter_data_)
152 {
153         struct filter_blobs_limit_data *filter_data = filter_data_;
154         unsigned long object_length;
155         enum object_type t;
156
157         switch (filter_situation) {
158         default:
159                 BUG("unknown filter_situation: %d", filter_situation);
160
161         case LOFS_BEGIN_TREE:
162                 assert(obj->type == OBJ_TREE);
163                 /* always include all tree objects */
164                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
165
166         case LOFS_END_TREE:
167                 assert(obj->type == OBJ_TREE);
168                 return LOFR_ZERO;
169
170         case LOFS_BLOB:
171                 assert(obj->type == OBJ_BLOB);
172                 assert((obj->flags & SEEN) == 0);
173
174                 t = oid_object_info(the_repository, &obj->oid, &object_length);
175                 if (t != OBJ_BLOB) { /* probably OBJ_NONE */
176                         /*
177                          * We DO NOT have the blob locally, so we cannot
178                          * apply the size filter criteria.  Be conservative
179                          * and force show it (and let the caller deal with
180                          * the ambiguity).
181                          */
182                         goto include_it;
183                 }
184
185                 if (object_length < filter_data->max_bytes)
186                         goto include_it;
187
188                 if (filter_data->omits)
189                         oidset_insert(filter_data->omits, &obj->oid);
190                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
191         }
192
193 include_it:
194         if (filter_data->omits)
195                 oidset_remove(filter_data->omits, &obj->oid);
196         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
197 }
198
199 static void *filter_blobs_limit__init(
200         struct oidset *omitted,
201         struct list_objects_filter_options *filter_options,
202         filter_object_fn *filter_fn,
203         filter_free_fn *filter_free_fn)
204 {
205         struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
206         d->omits = omitted;
207         d->max_bytes = filter_options->blob_limit_value;
208
209         *filter_fn = filter_blobs_limit;
210         *filter_free_fn = free;
211         return d;
212 }
213
214 /*
215  * A filter driven by a sparse-checkout specification to only
216  * include blobs that a sparse checkout would populate.
217  *
218  * The sparse-checkout spec can be loaded from a blob with the
219  * given OID or from a local pathname.  We allow an OID because
220  * the repo may be bare or we may be doing the filtering on the
221  * server.
222  */
223 struct frame {
224         /*
225          * defval is the usual default include/exclude value that
226          * should be inherited as we recurse into directories based
227          * upon pattern matching of the directory itself or of a
228          * containing directory.
229          */
230         int defval;
231
232         /*
233          * 1 if the directory (recursively) contains any provisionally
234          * omitted objects.
235          *
236          * 0 if everything (recursively) contained in this directory
237          * has been explicitly included (SHOWN) in the result and
238          * the directory may be short-cut later in the traversal.
239          */
240         unsigned child_prov_omit : 1;
241 };
242
243 struct filter_sparse_data {
244         struct oidset *omits;
245         struct exclude_list el;
246
247         size_t nr, alloc;
248         struct frame *array_frame;
249 };
250
251 static enum list_objects_filter_result filter_sparse(
252         enum list_objects_filter_situation filter_situation,
253         struct object *obj,
254         const char *pathname,
255         const char *filename,
256         void *filter_data_)
257 {
258         struct filter_sparse_data *filter_data = filter_data_;
259         int val, dtype;
260         struct frame *frame;
261
262         switch (filter_situation) {
263         default:
264                 BUG("unknown filter_situation: %d", filter_situation);
265
266         case LOFS_BEGIN_TREE:
267                 assert(obj->type == OBJ_TREE);
268                 dtype = DT_DIR;
269                 val = is_excluded_from_list(pathname, strlen(pathname),
270                                             filename, &dtype, &filter_data->el,
271                                             &the_index);
272                 if (val < 0)
273                         val = filter_data->array_frame[filter_data->nr - 1].defval;
274
275                 ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
276                            filter_data->alloc);
277                 filter_data->array_frame[filter_data->nr].defval = val;
278                 filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
279                 filter_data->nr++;
280
281                 /*
282                  * A directory with this tree OID may appear in multiple
283                  * places in the tree. (Think of a directory move or copy,
284                  * with no other changes, so the OID is the same, but the
285                  * full pathnames of objects within this directory are new
286                  * and may match is_excluded() patterns differently.)
287                  * So we cannot mark this directory as SEEN (yet), since
288                  * that will prevent process_tree() from revisiting this
289                  * tree object with other pathname prefixes.
290                  *
291                  * Only _DO_SHOW the tree object the first time we visit
292                  * this tree object.
293                  *
294                  * We always show all tree objects.  A future optimization
295                  * may want to attempt to narrow this.
296                  */
297                 if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
298                         return LOFR_ZERO;
299                 obj->flags |= FILTER_SHOWN_BUT_REVISIT;
300                 return LOFR_DO_SHOW;
301
302         case LOFS_END_TREE:
303                 assert(obj->type == OBJ_TREE);
304                 assert(filter_data->nr > 1);
305
306                 frame = &filter_data->array_frame[--filter_data->nr];
307
308                 /*
309                  * Tell our parent directory if any of our children were
310                  * provisionally omitted.
311                  */
312                 filter_data->array_frame[filter_data->nr - 1].child_prov_omit |=
313                         frame->child_prov_omit;
314
315                 /*
316                  * If there are NO provisionally omitted child objects (ALL child
317                  * objects in this folder were INCLUDED), then we can mark the
318                  * folder as SEEN (so we will not have to revisit it again).
319                  */
320                 if (!frame->child_prov_omit)
321                         return LOFR_MARK_SEEN;
322                 return LOFR_ZERO;
323
324         case LOFS_BLOB:
325                 assert(obj->type == OBJ_BLOB);
326                 assert((obj->flags & SEEN) == 0);
327
328                 frame = &filter_data->array_frame[filter_data->nr - 1];
329
330                 dtype = DT_REG;
331                 val = is_excluded_from_list(pathname, strlen(pathname),
332                                             filename, &dtype, &filter_data->el,
333                                             &the_index);
334                 if (val < 0)
335                         val = frame->defval;
336                 if (val > 0) {
337                         if (filter_data->omits)
338                                 oidset_remove(filter_data->omits, &obj->oid);
339                         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
340                 }
341
342                 /*
343                  * Provisionally omit it.  We've already established that
344                  * this pathname is not in the sparse-checkout specification
345                  * with the CURRENT pathname, so we *WANT* to omit this blob.
346                  *
347                  * However, a pathname elsewhere in the tree may also
348                  * reference this same blob, so we cannot reject it yet.
349                  * Leave the LOFR_ bits unset so that if the blob appears
350                  * again in the traversal, we will be asked again.
351                  */
352                 if (filter_data->omits)
353                         oidset_insert(filter_data->omits, &obj->oid);
354
355                 /*
356                  * Remember that at least 1 blob in this tree was
357                  * provisionally omitted.  This prevents us from short
358                  * cutting the tree in future iterations.
359                  */
360                 frame->child_prov_omit = 1;
361                 return LOFR_ZERO;
362         }
363 }
364
365
366 static void filter_sparse_free(void *filter_data)
367 {
368         struct filter_sparse_data *d = filter_data;
369         free(d->array_frame);
370         free(d);
371 }
372
373 static void *filter_sparse_oid__init(
374         struct oidset *omitted,
375         struct list_objects_filter_options *filter_options,
376         filter_object_fn *filter_fn,
377         filter_free_fn *filter_free_fn)
378 {
379         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
380         d->omits = omitted;
381         if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
382                                            NULL, 0, &d->el) < 0)
383                 die("could not load filter specification");
384
385         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
386         d->array_frame[d->nr].defval = 0; /* default to include */
387         d->array_frame[d->nr].child_prov_omit = 0;
388         d->nr++;
389
390         *filter_fn = filter_sparse;
391         *filter_free_fn = filter_sparse_free;
392         return d;
393 }
394
395 static void *filter_sparse_path__init(
396         struct oidset *omitted,
397         struct list_objects_filter_options *filter_options,
398         filter_object_fn *filter_fn,
399         filter_free_fn *filter_free_fn)
400 {
401         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
402         d->omits = omitted;
403         if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
404                                            NULL, 0, &d->el, NULL) < 0)
405                 die("could not load filter specification");
406
407         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
408         d->array_frame[d->nr].defval = 0; /* default to include */
409         d->array_frame[d->nr].child_prov_omit = 0;
410         d->nr++;
411
412         *filter_fn = filter_sparse;
413         *filter_free_fn = filter_sparse_free;
414         return d;
415 }
416
417 typedef void *(*filter_init_fn)(
418         struct oidset *omitted,
419         struct list_objects_filter_options *filter_options,
420         filter_object_fn *filter_fn,
421         filter_free_fn *filter_free_fn);
422
423 /*
424  * Must match "enum list_objects_filter_choice".
425  */
426 static filter_init_fn s_filters[] = {
427         NULL,
428         filter_blobs_none__init,
429         filter_blobs_limit__init,
430         filter_trees_none__init,
431         filter_sparse_oid__init,
432         filter_sparse_path__init,
433 };
434
435 void *list_objects_filter__init(
436         struct oidset *omitted,
437         struct list_objects_filter_options *filter_options,
438         filter_object_fn *filter_fn,
439         filter_free_fn *filter_free_fn)
440 {
441         filter_init_fn init_fn;
442
443         assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
444
445         if (filter_options->choice >= LOFC__COUNT)
446                 BUG("invalid list-objects filter choice: %d",
447                     filter_options->choice);
448
449         init_fn = s_filters[filter_options->choice];
450         if (init_fn)
451                 return init_fn(omitted, filter_options,
452                                filter_fn, filter_free_fn);
453         *filter_fn = NULL;
454         *filter_free_fn = NULL;
455         return NULL;
456 }