object-store: provide helpers for loose_objects_cache
[git] / list-objects-filter.c
1 #include "cache.h"
2 #include "dir.h"
3 #include "tag.h"
4 #include "commit.h"
5 #include "tree.h"
6 #include "blob.h"
7 #include "diff.h"
8 #include "tree-walk.h"
9 #include "revision.h"
10 #include "list-objects.h"
11 #include "list-objects-filter.h"
12 #include "list-objects-filter-options.h"
13 #include "oidset.h"
14 #include "object-store.h"
15
16 /* Remember to update object flag allocation in object.h */
17 /*
18  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
19  * that have been shown, but should be revisited if they appear
20  * in the traversal (until we mark it SEEN).  This is a way to
21  * let us silently de-dup calls to show() in the caller.  This
22  * is subtly different from the "revision.h:SHOWN" and the
23  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
24  * the non-de-dup usage in pack-bitmap.c
25  */
26 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
27
28 /*
29  * A filter for list-objects to omit ALL blobs from the traversal.
30  * And to OPTIONALLY collect a list of the omitted OIDs.
31  */
32 struct filter_blobs_none_data {
33         struct oidset *omits;
34 };
35
36 static enum list_objects_filter_result filter_blobs_none(
37         enum list_objects_filter_situation filter_situation,
38         struct object *obj,
39         const char *pathname,
40         const char *filename,
41         void *filter_data_)
42 {
43         struct filter_blobs_none_data *filter_data = filter_data_;
44
45         switch (filter_situation) {
46         default:
47                 BUG("unknown filter_situation: %d", filter_situation);
48
49         case LOFS_BEGIN_TREE:
50                 assert(obj->type == OBJ_TREE);
51                 /* always include all tree objects */
52                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
53
54         case LOFS_END_TREE:
55                 assert(obj->type == OBJ_TREE);
56                 return LOFR_ZERO;
57
58         case LOFS_BLOB:
59                 assert(obj->type == OBJ_BLOB);
60                 assert((obj->flags & SEEN) == 0);
61
62                 if (filter_data->omits)
63                         oidset_insert(filter_data->omits, &obj->oid);
64                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
65         }
66 }
67
68 static void *filter_blobs_none__init(
69         struct oidset *omitted,
70         struct list_objects_filter_options *filter_options,
71         filter_object_fn *filter_fn,
72         filter_free_fn *filter_free_fn)
73 {
74         struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
75         d->omits = omitted;
76
77         *filter_fn = filter_blobs_none;
78         *filter_free_fn = free;
79         return d;
80 }
81
82 /*
83  * A filter for list-objects to omit ALL trees and blobs from the traversal.
84  * Can OPTIONALLY collect a list of the omitted OIDs.
85  */
86 struct filter_trees_none_data {
87         struct oidset *omits;
88 };
89
90 static enum list_objects_filter_result filter_trees_none(
91         enum list_objects_filter_situation filter_situation,
92         struct object *obj,
93         const char *pathname,
94         const char *filename,
95         void *filter_data_)
96 {
97         struct filter_trees_none_data *filter_data = filter_data_;
98
99         switch (filter_situation) {
100         default:
101                 BUG("unknown filter_situation: %d", filter_situation);
102
103         case LOFS_BEGIN_TREE:
104         case LOFS_BLOB:
105                 if (filter_data->omits) {
106                         oidset_insert(filter_data->omits, &obj->oid);
107                         /* _MARK_SEEN but not _DO_SHOW (hard omit) */
108                         return LOFR_MARK_SEEN;
109                 } else {
110                         /*
111                          * Not collecting omits so no need to to traverse tree.
112                          */
113                         return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
114                 }
115
116         case LOFS_END_TREE:
117                 assert(obj->type == OBJ_TREE);
118                 return LOFR_ZERO;
119
120         }
121 }
122
123 static void* filter_trees_none__init(
124         struct oidset *omitted,
125         struct list_objects_filter_options *filter_options,
126         filter_object_fn *filter_fn,
127         filter_free_fn *filter_free_fn)
128 {
129         struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
130         d->omits = omitted;
131
132         *filter_fn = filter_trees_none;
133         *filter_free_fn = free;
134         return d;
135 }
136
137 /*
138  * A filter for list-objects to omit large blobs.
139  * And to OPTIONALLY collect a list of the omitted OIDs.
140  */
141 struct filter_blobs_limit_data {
142         struct oidset *omits;
143         unsigned long max_bytes;
144 };
145
146 static enum list_objects_filter_result filter_blobs_limit(
147         enum list_objects_filter_situation filter_situation,
148         struct object *obj,
149         const char *pathname,
150         const char *filename,
151         void *filter_data_)
152 {
153         struct filter_blobs_limit_data *filter_data = filter_data_;
154         unsigned long object_length;
155         enum object_type t;
156
157         switch (filter_situation) {
158         default:
159                 BUG("unknown filter_situation: %d", filter_situation);
160
161         case LOFS_BEGIN_TREE:
162                 assert(obj->type == OBJ_TREE);
163                 /* always include all tree objects */
164                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
165
166         case LOFS_END_TREE:
167                 assert(obj->type == OBJ_TREE);
168                 return LOFR_ZERO;
169
170         case LOFS_BLOB:
171                 assert(obj->type == OBJ_BLOB);
172                 assert((obj->flags & SEEN) == 0);
173
174                 t = oid_object_info(the_repository, &obj->oid, &object_length);
175                 if (t != OBJ_BLOB) { /* probably OBJ_NONE */
176                         /*
177                          * We DO NOT have the blob locally, so we cannot
178                          * apply the size filter criteria.  Be conservative
179                          * and force show it (and let the caller deal with
180                          * the ambiguity).
181                          */
182                         goto include_it;
183                 }
184
185                 if (object_length < filter_data->max_bytes)
186                         goto include_it;
187
188                 if (filter_data->omits)
189                         oidset_insert(filter_data->omits, &obj->oid);
190                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
191         }
192
193 include_it:
194         if (filter_data->omits)
195                 oidset_remove(filter_data->omits, &obj->oid);
196         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
197 }
198
199 static void *filter_blobs_limit__init(
200         struct oidset *omitted,
201         struct list_objects_filter_options *filter_options,
202         filter_object_fn *filter_fn,
203         filter_free_fn *filter_free_fn)
204 {
205         struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
206         d->omits = omitted;
207         d->max_bytes = filter_options->blob_limit_value;
208
209         *filter_fn = filter_blobs_limit;
210         *filter_free_fn = free;
211         return d;
212 }
213
214 /*
215  * A filter driven by a sparse-checkout specification to only
216  * include blobs that a sparse checkout would populate.
217  *
218  * The sparse-checkout spec can be loaded from a blob with the
219  * given OID or from a local pathname.  We allow an OID because
220  * the repo may be bare or we may be doing the filtering on the
221  * server.
222  */
223 struct frame {
224         /*
225          * defval is the usual default include/exclude value that
226          * should be inherited as we recurse into directories based
227          * upon pattern matching of the directory itself or of a
228          * containing directory.
229          */
230         int defval;
231
232         /*
233          * 1 if the directory (recursively) contains any provisionally
234          * omitted objects.
235          *
236          * 0 if everything (recursively) contained in this directory
237          * has been explicitly included (SHOWN) in the result and
238          * the directory may be short-cut later in the traversal.
239          */
240         unsigned child_prov_omit : 1;
241 };
242
243 struct filter_sparse_data {
244         struct oidset *omits;
245         struct exclude_list el;
246
247         size_t nr, alloc;
248         struct frame *array_frame;
249 };
250
251 static enum list_objects_filter_result filter_sparse(
252         enum list_objects_filter_situation filter_situation,
253         struct object *obj,
254         const char *pathname,
255         const char *filename,
256         void *filter_data_)
257 {
258         struct filter_sparse_data *filter_data = filter_data_;
259         int val, dtype;
260         struct frame *frame;
261
262         switch (filter_situation) {
263         default:
264                 BUG("unknown filter_situation: %d", filter_situation);
265
266         case LOFS_BEGIN_TREE:
267                 assert(obj->type == OBJ_TREE);
268                 dtype = DT_DIR;
269                 val = is_excluded_from_list(pathname, strlen(pathname),
270                                             filename, &dtype, &filter_data->el,
271                                             &the_index);
272                 if (val < 0)
273                         val = filter_data->array_frame[filter_data->nr].defval;
274
275                 ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
276                            filter_data->alloc);
277                 filter_data->nr++;
278                 filter_data->array_frame[filter_data->nr].defval = val;
279                 filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
280
281                 /*
282                  * A directory with this tree OID may appear in multiple
283                  * places in the tree. (Think of a directory move or copy,
284                  * with no other changes, so the OID is the same, but the
285                  * full pathnames of objects within this directory are new
286                  * and may match is_excluded() patterns differently.)
287                  * So we cannot mark this directory as SEEN (yet), since
288                  * that will prevent process_tree() from revisiting this
289                  * tree object with other pathname prefixes.
290                  *
291                  * Only _DO_SHOW the tree object the first time we visit
292                  * this tree object.
293                  *
294                  * We always show all tree objects.  A future optimization
295                  * may want to attempt to narrow this.
296                  */
297                 if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
298                         return LOFR_ZERO;
299                 obj->flags |= FILTER_SHOWN_BUT_REVISIT;
300                 return LOFR_DO_SHOW;
301
302         case LOFS_END_TREE:
303                 assert(obj->type == OBJ_TREE);
304                 assert(filter_data->nr > 0);
305
306                 frame = &filter_data->array_frame[filter_data->nr];
307                 filter_data->nr--;
308
309                 /*
310                  * Tell our parent directory if any of our children were
311                  * provisionally omitted.
312                  */
313                 filter_data->array_frame[filter_data->nr].child_prov_omit |=
314                         frame->child_prov_omit;
315
316                 /*
317                  * If there are NO provisionally omitted child objects (ALL child
318                  * objects in this folder were INCLUDED), then we can mark the
319                  * folder as SEEN (so we will not have to revisit it again).
320                  */
321                 if (!frame->child_prov_omit)
322                         return LOFR_MARK_SEEN;
323                 return LOFR_ZERO;
324
325         case LOFS_BLOB:
326                 assert(obj->type == OBJ_BLOB);
327                 assert((obj->flags & SEEN) == 0);
328
329                 frame = &filter_data->array_frame[filter_data->nr];
330
331                 dtype = DT_REG;
332                 val = is_excluded_from_list(pathname, strlen(pathname),
333                                             filename, &dtype, &filter_data->el,
334                                             &the_index);
335                 if (val < 0)
336                         val = frame->defval;
337                 if (val > 0) {
338                         if (filter_data->omits)
339                                 oidset_remove(filter_data->omits, &obj->oid);
340                         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
341                 }
342
343                 /*
344                  * Provisionally omit it.  We've already established that
345                  * this pathname is not in the sparse-checkout specification
346                  * with the CURRENT pathname, so we *WANT* to omit this blob.
347                  *
348                  * However, a pathname elsewhere in the tree may also
349                  * reference this same blob, so we cannot reject it yet.
350                  * Leave the LOFR_ bits unset so that if the blob appears
351                  * again in the traversal, we will be asked again.
352                  */
353                 if (filter_data->omits)
354                         oidset_insert(filter_data->omits, &obj->oid);
355
356                 /*
357                  * Remember that at least 1 blob in this tree was
358                  * provisionally omitted.  This prevents us from short
359                  * cutting the tree in future iterations.
360                  */
361                 frame->child_prov_omit = 1;
362                 return LOFR_ZERO;
363         }
364 }
365
366
367 static void filter_sparse_free(void *filter_data)
368 {
369         struct filter_sparse_data *d = filter_data;
370         /* TODO free contents of 'd' */
371         free(d);
372 }
373
374 static void *filter_sparse_oid__init(
375         struct oidset *omitted,
376         struct list_objects_filter_options *filter_options,
377         filter_object_fn *filter_fn,
378         filter_free_fn *filter_free_fn)
379 {
380         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
381         d->omits = omitted;
382         if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
383                                            NULL, 0, &d->el) < 0)
384                 die("could not load filter specification");
385
386         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
387         d->array_frame[d->nr].defval = 0; /* default to include */
388         d->array_frame[d->nr].child_prov_omit = 0;
389
390         *filter_fn = filter_sparse;
391         *filter_free_fn = filter_sparse_free;
392         return d;
393 }
394
395 static void *filter_sparse_path__init(
396         struct oidset *omitted,
397         struct list_objects_filter_options *filter_options,
398         filter_object_fn *filter_fn,
399         filter_free_fn *filter_free_fn)
400 {
401         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
402         d->omits = omitted;
403         if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
404                                            NULL, 0, &d->el, NULL) < 0)
405                 die("could not load filter specification");
406
407         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
408         d->array_frame[d->nr].defval = 0; /* default to include */
409         d->array_frame[d->nr].child_prov_omit = 0;
410
411         *filter_fn = filter_sparse;
412         *filter_free_fn = filter_sparse_free;
413         return d;
414 }
415
416 typedef void *(*filter_init_fn)(
417         struct oidset *omitted,
418         struct list_objects_filter_options *filter_options,
419         filter_object_fn *filter_fn,
420         filter_free_fn *filter_free_fn);
421
422 /*
423  * Must match "enum list_objects_filter_choice".
424  */
425 static filter_init_fn s_filters[] = {
426         NULL,
427         filter_blobs_none__init,
428         filter_blobs_limit__init,
429         filter_trees_none__init,
430         filter_sparse_oid__init,
431         filter_sparse_path__init,
432 };
433
434 void *list_objects_filter__init(
435         struct oidset *omitted,
436         struct list_objects_filter_options *filter_options,
437         filter_object_fn *filter_fn,
438         filter_free_fn *filter_free_fn)
439 {
440         filter_init_fn init_fn;
441
442         assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
443
444         if (filter_options->choice >= LOFC__COUNT)
445                 BUG("invalid list-objects filter choice: %d",
446                     filter_options->choice);
447
448         init_fn = s_filters[filter_options->choice];
449         if (init_fn)
450                 return init_fn(omitted, filter_options,
451                                filter_fn, filter_free_fn);
452         *filter_fn = NULL;
453         *filter_free_fn = NULL;
454         return NULL;
455 }