list-objects-filter: implement filter tree:0
[git] / list-objects-filter.c
1 #include "cache.h"
2 #include "dir.h"
3 #include "tag.h"
4 #include "commit.h"
5 #include "tree.h"
6 #include "blob.h"
7 #include "diff.h"
8 #include "tree-walk.h"
9 #include "revision.h"
10 #include "list-objects.h"
11 #include "list-objects-filter.h"
12 #include "list-objects-filter-options.h"
13 #include "oidset.h"
14 #include "object-store.h"
15
16 /* Remember to update object flag allocation in object.h */
17 /*
18  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
19  * that have been shown, but should be revisited if they appear
20  * in the traversal (until we mark it SEEN).  This is a way to
21  * let us silently de-dup calls to show() in the caller.  This
22  * is subtly different from the "revision.h:SHOWN" and the
23  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
24  * the non-de-dup usage in pack-bitmap.c
25  */
26 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
27
28 /*
29  * A filter for list-objects to omit ALL blobs from the traversal.
30  * And to OPTIONALLY collect a list of the omitted OIDs.
31  */
32 struct filter_blobs_none_data {
33         struct oidset *omits;
34 };
35
36 static enum list_objects_filter_result filter_blobs_none(
37         enum list_objects_filter_situation filter_situation,
38         struct object *obj,
39         const char *pathname,
40         const char *filename,
41         void *filter_data_)
42 {
43         struct filter_blobs_none_data *filter_data = filter_data_;
44
45         switch (filter_situation) {
46         default:
47                 BUG("unknown filter_situation: %d", filter_situation);
48
49         case LOFS_BEGIN_TREE:
50                 assert(obj->type == OBJ_TREE);
51                 /* always include all tree objects */
52                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
53
54         case LOFS_END_TREE:
55                 assert(obj->type == OBJ_TREE);
56                 return LOFR_ZERO;
57
58         case LOFS_BLOB:
59                 assert(obj->type == OBJ_BLOB);
60                 assert((obj->flags & SEEN) == 0);
61
62                 if (filter_data->omits)
63                         oidset_insert(filter_data->omits, &obj->oid);
64                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
65         }
66 }
67
68 static void *filter_blobs_none__init(
69         struct oidset *omitted,
70         struct list_objects_filter_options *filter_options,
71         filter_object_fn *filter_fn,
72         filter_free_fn *filter_free_fn)
73 {
74         struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
75         d->omits = omitted;
76
77         *filter_fn = filter_blobs_none;
78         *filter_free_fn = free;
79         return d;
80 }
81
82 /*
83  * A filter for list-objects to omit ALL trees and blobs from the traversal.
84  * Can OPTIONALLY collect a list of the omitted OIDs.
85  */
86 struct filter_trees_none_data {
87         struct oidset *omits;
88 };
89
90 static enum list_objects_filter_result filter_trees_none(
91         enum list_objects_filter_situation filter_situation,
92         struct object *obj,
93         const char *pathname,
94         const char *filename,
95         void *filter_data_)
96 {
97         struct filter_trees_none_data *filter_data = filter_data_;
98
99         switch (filter_situation) {
100         default:
101                 BUG("unknown filter_situation: %d", filter_situation);
102
103         case LOFS_BEGIN_TREE:
104         case LOFS_BLOB:
105                 if (filter_data->omits)
106                         oidset_insert(filter_data->omits, &obj->oid);
107                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
108
109         case LOFS_END_TREE:
110                 assert(obj->type == OBJ_TREE);
111                 return LOFR_ZERO;
112
113         }
114 }
115
116 static void* filter_trees_none__init(
117         struct oidset *omitted,
118         struct list_objects_filter_options *filter_options,
119         filter_object_fn *filter_fn,
120         filter_free_fn *filter_free_fn)
121 {
122         struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
123         d->omits = omitted;
124
125         *filter_fn = filter_trees_none;
126         *filter_free_fn = free;
127         return d;
128 }
129
130 /*
131  * A filter for list-objects to omit large blobs.
132  * And to OPTIONALLY collect a list of the omitted OIDs.
133  */
134 struct filter_blobs_limit_data {
135         struct oidset *omits;
136         unsigned long max_bytes;
137 };
138
139 static enum list_objects_filter_result filter_blobs_limit(
140         enum list_objects_filter_situation filter_situation,
141         struct object *obj,
142         const char *pathname,
143         const char *filename,
144         void *filter_data_)
145 {
146         struct filter_blobs_limit_data *filter_data = filter_data_;
147         unsigned long object_length;
148         enum object_type t;
149
150         switch (filter_situation) {
151         default:
152                 BUG("unknown filter_situation: %d", filter_situation);
153
154         case LOFS_BEGIN_TREE:
155                 assert(obj->type == OBJ_TREE);
156                 /* always include all tree objects */
157                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
158
159         case LOFS_END_TREE:
160                 assert(obj->type == OBJ_TREE);
161                 return LOFR_ZERO;
162
163         case LOFS_BLOB:
164                 assert(obj->type == OBJ_BLOB);
165                 assert((obj->flags & SEEN) == 0);
166
167                 t = oid_object_info(the_repository, &obj->oid, &object_length);
168                 if (t != OBJ_BLOB) { /* probably OBJ_NONE */
169                         /*
170                          * We DO NOT have the blob locally, so we cannot
171                          * apply the size filter criteria.  Be conservative
172                          * and force show it (and let the caller deal with
173                          * the ambiguity).
174                          */
175                         goto include_it;
176                 }
177
178                 if (object_length < filter_data->max_bytes)
179                         goto include_it;
180
181                 if (filter_data->omits)
182                         oidset_insert(filter_data->omits, &obj->oid);
183                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
184         }
185
186 include_it:
187         if (filter_data->omits)
188                 oidset_remove(filter_data->omits, &obj->oid);
189         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
190 }
191
192 static void *filter_blobs_limit__init(
193         struct oidset *omitted,
194         struct list_objects_filter_options *filter_options,
195         filter_object_fn *filter_fn,
196         filter_free_fn *filter_free_fn)
197 {
198         struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
199         d->omits = omitted;
200         d->max_bytes = filter_options->blob_limit_value;
201
202         *filter_fn = filter_blobs_limit;
203         *filter_free_fn = free;
204         return d;
205 }
206
207 /*
208  * A filter driven by a sparse-checkout specification to only
209  * include blobs that a sparse checkout would populate.
210  *
211  * The sparse-checkout spec can be loaded from a blob with the
212  * given OID or from a local pathname.  We allow an OID because
213  * the repo may be bare or we may be doing the filtering on the
214  * server.
215  */
216 struct frame {
217         /*
218          * defval is the usual default include/exclude value that
219          * should be inherited as we recurse into directories based
220          * upon pattern matching of the directory itself or of a
221          * containing directory.
222          */
223         int defval;
224
225         /*
226          * 1 if the directory (recursively) contains any provisionally
227          * omitted objects.
228          *
229          * 0 if everything (recursively) contained in this directory
230          * has been explicitly included (SHOWN) in the result and
231          * the directory may be short-cut later in the traversal.
232          */
233         unsigned child_prov_omit : 1;
234 };
235
236 struct filter_sparse_data {
237         struct oidset *omits;
238         struct exclude_list el;
239
240         size_t nr, alloc;
241         struct frame *array_frame;
242 };
243
244 static enum list_objects_filter_result filter_sparse(
245         enum list_objects_filter_situation filter_situation,
246         struct object *obj,
247         const char *pathname,
248         const char *filename,
249         void *filter_data_)
250 {
251         struct filter_sparse_data *filter_data = filter_data_;
252         int val, dtype;
253         struct frame *frame;
254
255         switch (filter_situation) {
256         default:
257                 BUG("unknown filter_situation: %d", filter_situation);
258
259         case LOFS_BEGIN_TREE:
260                 assert(obj->type == OBJ_TREE);
261                 dtype = DT_DIR;
262                 val = is_excluded_from_list(pathname, strlen(pathname),
263                                             filename, &dtype, &filter_data->el,
264                                             &the_index);
265                 if (val < 0)
266                         val = filter_data->array_frame[filter_data->nr].defval;
267
268                 ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
269                            filter_data->alloc);
270                 filter_data->nr++;
271                 filter_data->array_frame[filter_data->nr].defval = val;
272                 filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
273
274                 /*
275                  * A directory with this tree OID may appear in multiple
276                  * places in the tree. (Think of a directory move or copy,
277                  * with no other changes, so the OID is the same, but the
278                  * full pathnames of objects within this directory are new
279                  * and may match is_excluded() patterns differently.)
280                  * So we cannot mark this directory as SEEN (yet), since
281                  * that will prevent process_tree() from revisiting this
282                  * tree object with other pathname prefixes.
283                  *
284                  * Only _DO_SHOW the tree object the first time we visit
285                  * this tree object.
286                  *
287                  * We always show all tree objects.  A future optimization
288                  * may want to attempt to narrow this.
289                  */
290                 if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
291                         return LOFR_ZERO;
292                 obj->flags |= FILTER_SHOWN_BUT_REVISIT;
293                 return LOFR_DO_SHOW;
294
295         case LOFS_END_TREE:
296                 assert(obj->type == OBJ_TREE);
297                 assert(filter_data->nr > 0);
298
299                 frame = &filter_data->array_frame[filter_data->nr];
300                 filter_data->nr--;
301
302                 /*
303                  * Tell our parent directory if any of our children were
304                  * provisionally omitted.
305                  */
306                 filter_data->array_frame[filter_data->nr].child_prov_omit |=
307                         frame->child_prov_omit;
308
309                 /*
310                  * If there are NO provisionally omitted child objects (ALL child
311                  * objects in this folder were INCLUDED), then we can mark the
312                  * folder as SEEN (so we will not have to revisit it again).
313                  */
314                 if (!frame->child_prov_omit)
315                         return LOFR_MARK_SEEN;
316                 return LOFR_ZERO;
317
318         case LOFS_BLOB:
319                 assert(obj->type == OBJ_BLOB);
320                 assert((obj->flags & SEEN) == 0);
321
322                 frame = &filter_data->array_frame[filter_data->nr];
323
324                 dtype = DT_REG;
325                 val = is_excluded_from_list(pathname, strlen(pathname),
326                                             filename, &dtype, &filter_data->el,
327                                             &the_index);
328                 if (val < 0)
329                         val = frame->defval;
330                 if (val > 0) {
331                         if (filter_data->omits)
332                                 oidset_remove(filter_data->omits, &obj->oid);
333                         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
334                 }
335
336                 /*
337                  * Provisionally omit it.  We've already established that
338                  * this pathname is not in the sparse-checkout specification
339                  * with the CURRENT pathname, so we *WANT* to omit this blob.
340                  *
341                  * However, a pathname elsewhere in the tree may also
342                  * reference this same blob, so we cannot reject it yet.
343                  * Leave the LOFR_ bits unset so that if the blob appears
344                  * again in the traversal, we will be asked again.
345                  */
346                 if (filter_data->omits)
347                         oidset_insert(filter_data->omits, &obj->oid);
348
349                 /*
350                  * Remember that at least 1 blob in this tree was
351                  * provisionally omitted.  This prevents us from short
352                  * cutting the tree in future iterations.
353                  */
354                 frame->child_prov_omit = 1;
355                 return LOFR_ZERO;
356         }
357 }
358
359
360 static void filter_sparse_free(void *filter_data)
361 {
362         struct filter_sparse_data *d = filter_data;
363         /* TODO free contents of 'd' */
364         free(d);
365 }
366
367 static void *filter_sparse_oid__init(
368         struct oidset *omitted,
369         struct list_objects_filter_options *filter_options,
370         filter_object_fn *filter_fn,
371         filter_free_fn *filter_free_fn)
372 {
373         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
374         d->omits = omitted;
375         if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
376                                            NULL, 0, &d->el) < 0)
377                 die("could not load filter specification");
378
379         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
380         d->array_frame[d->nr].defval = 0; /* default to include */
381         d->array_frame[d->nr].child_prov_omit = 0;
382
383         *filter_fn = filter_sparse;
384         *filter_free_fn = filter_sparse_free;
385         return d;
386 }
387
388 static void *filter_sparse_path__init(
389         struct oidset *omitted,
390         struct list_objects_filter_options *filter_options,
391         filter_object_fn *filter_fn,
392         filter_free_fn *filter_free_fn)
393 {
394         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
395         d->omits = omitted;
396         if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
397                                            NULL, 0, &d->el, NULL) < 0)
398                 die("could not load filter specification");
399
400         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
401         d->array_frame[d->nr].defval = 0; /* default to include */
402         d->array_frame[d->nr].child_prov_omit = 0;
403
404         *filter_fn = filter_sparse;
405         *filter_free_fn = filter_sparse_free;
406         return d;
407 }
408
409 typedef void *(*filter_init_fn)(
410         struct oidset *omitted,
411         struct list_objects_filter_options *filter_options,
412         filter_object_fn *filter_fn,
413         filter_free_fn *filter_free_fn);
414
415 /*
416  * Must match "enum list_objects_filter_choice".
417  */
418 static filter_init_fn s_filters[] = {
419         NULL,
420         filter_blobs_none__init,
421         filter_blobs_limit__init,
422         filter_trees_none__init,
423         filter_sparse_oid__init,
424         filter_sparse_path__init,
425 };
426
427 void *list_objects_filter__init(
428         struct oidset *omitted,
429         struct list_objects_filter_options *filter_options,
430         filter_object_fn *filter_fn,
431         filter_free_fn *filter_free_fn)
432 {
433         filter_init_fn init_fn;
434
435         assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
436
437         if (filter_options->choice >= LOFC__COUNT)
438                 BUG("invalid list-objects filter choice: %d",
439                     filter_options->choice);
440
441         init_fn = s_filters[filter_options->choice];
442         if (init_fn)
443                 return init_fn(omitted, filter_options,
444                                filter_fn, filter_free_fn);
445         *filter_fn = NULL;
446         *filter_free_fn = NULL;
447         return NULL;
448 }