Merge branch 'sd/stash-wo-user-name'
[git] / list-objects-filter.c
1 #include "cache.h"
2 #include "dir.h"
3 #include "tag.h"
4 #include "commit.h"
5 #include "tree.h"
6 #include "blob.h"
7 #include "diff.h"
8 #include "tree-walk.h"
9 #include "revision.h"
10 #include "list-objects.h"
11 #include "list-objects-filter.h"
12 #include "list-objects-filter-options.h"
13 #include "oidset.h"
14 #include "object-store.h"
15
16 /* Remember to update object flag allocation in object.h */
17 /*
18  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
19  * that have been shown, but should be revisited if they appear
20  * in the traversal (until we mark it SEEN).  This is a way to
21  * let us silently de-dup calls to show() in the caller.  This
22  * is subtly different from the "revision.h:SHOWN" and the
23  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
24  * the non-de-dup usage in pack-bitmap.c
25  */
26 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
27
28 /*
29  * A filter for list-objects to omit ALL blobs from the traversal.
30  * And to OPTIONALLY collect a list of the omitted OIDs.
31  */
32 struct filter_blobs_none_data {
33         struct oidset *omits;
34 };
35
36 static enum list_objects_filter_result filter_blobs_none(
37         struct repository *r,
38         enum list_objects_filter_situation filter_situation,
39         struct object *obj,
40         const char *pathname,
41         const char *filename,
42         void *filter_data_)
43 {
44         struct filter_blobs_none_data *filter_data = filter_data_;
45
46         switch (filter_situation) {
47         default:
48                 BUG("unknown filter_situation: %d", filter_situation);
49
50         case LOFS_BEGIN_TREE:
51                 assert(obj->type == OBJ_TREE);
52                 /* always include all tree objects */
53                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
54
55         case LOFS_END_TREE:
56                 assert(obj->type == OBJ_TREE);
57                 return LOFR_ZERO;
58
59         case LOFS_BLOB:
60                 assert(obj->type == OBJ_BLOB);
61                 assert((obj->flags & SEEN) == 0);
62
63                 if (filter_data->omits)
64                         oidset_insert(filter_data->omits, &obj->oid);
65                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
66         }
67 }
68
69 static void *filter_blobs_none__init(
70         struct oidset *omitted,
71         struct list_objects_filter_options *filter_options,
72         filter_object_fn *filter_fn,
73         filter_free_fn *filter_free_fn)
74 {
75         struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
76         d->omits = omitted;
77
78         *filter_fn = filter_blobs_none;
79         *filter_free_fn = free;
80         return d;
81 }
82
83 /*
84  * A filter for list-objects to omit ALL trees and blobs from the traversal.
85  * Can OPTIONALLY collect a list of the omitted OIDs.
86  */
87 struct filter_trees_none_data {
88         struct oidset *omits;
89 };
90
91 static enum list_objects_filter_result filter_trees_none(
92         struct repository *r,
93         enum list_objects_filter_situation filter_situation,
94         struct object *obj,
95         const char *pathname,
96         const char *filename,
97         void *filter_data_)
98 {
99         struct filter_trees_none_data *filter_data = filter_data_;
100
101         switch (filter_situation) {
102         default:
103                 BUG("unknown filter_situation: %d", filter_situation);
104
105         case LOFS_BEGIN_TREE:
106         case LOFS_BLOB:
107                 if (filter_data->omits) {
108                         oidset_insert(filter_data->omits, &obj->oid);
109                         /* _MARK_SEEN but not _DO_SHOW (hard omit) */
110                         return LOFR_MARK_SEEN;
111                 } else {
112                         /*
113                          * Not collecting omits so no need to to traverse tree.
114                          */
115                         return LOFR_SKIP_TREE | LOFR_MARK_SEEN;
116                 }
117
118         case LOFS_END_TREE:
119                 assert(obj->type == OBJ_TREE);
120                 return LOFR_ZERO;
121
122         }
123 }
124
125 static void* filter_trees_none__init(
126         struct oidset *omitted,
127         struct list_objects_filter_options *filter_options,
128         filter_object_fn *filter_fn,
129         filter_free_fn *filter_free_fn)
130 {
131         struct filter_trees_none_data *d = xcalloc(1, sizeof(*d));
132         d->omits = omitted;
133
134         *filter_fn = filter_trees_none;
135         *filter_free_fn = free;
136         return d;
137 }
138
139 /*
140  * A filter for list-objects to omit large blobs.
141  * And to OPTIONALLY collect a list of the omitted OIDs.
142  */
143 struct filter_blobs_limit_data {
144         struct oidset *omits;
145         unsigned long max_bytes;
146 };
147
148 static enum list_objects_filter_result filter_blobs_limit(
149         struct repository *r,
150         enum list_objects_filter_situation filter_situation,
151         struct object *obj,
152         const char *pathname,
153         const char *filename,
154         void *filter_data_)
155 {
156         struct filter_blobs_limit_data *filter_data = filter_data_;
157         unsigned long object_length;
158         enum object_type t;
159
160         switch (filter_situation) {
161         default:
162                 BUG("unknown filter_situation: %d", filter_situation);
163
164         case LOFS_BEGIN_TREE:
165                 assert(obj->type == OBJ_TREE);
166                 /* always include all tree objects */
167                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
168
169         case LOFS_END_TREE:
170                 assert(obj->type == OBJ_TREE);
171                 return LOFR_ZERO;
172
173         case LOFS_BLOB:
174                 assert(obj->type == OBJ_BLOB);
175                 assert((obj->flags & SEEN) == 0);
176
177                 t = oid_object_info(r, &obj->oid, &object_length);
178                 if (t != OBJ_BLOB) { /* probably OBJ_NONE */
179                         /*
180                          * We DO NOT have the blob locally, so we cannot
181                          * apply the size filter criteria.  Be conservative
182                          * and force show it (and let the caller deal with
183                          * the ambiguity).
184                          */
185                         goto include_it;
186                 }
187
188                 if (object_length < filter_data->max_bytes)
189                         goto include_it;
190
191                 if (filter_data->omits)
192                         oidset_insert(filter_data->omits, &obj->oid);
193                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
194         }
195
196 include_it:
197         if (filter_data->omits)
198                 oidset_remove(filter_data->omits, &obj->oid);
199         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
200 }
201
202 static void *filter_blobs_limit__init(
203         struct oidset *omitted,
204         struct list_objects_filter_options *filter_options,
205         filter_object_fn *filter_fn,
206         filter_free_fn *filter_free_fn)
207 {
208         struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
209         d->omits = omitted;
210         d->max_bytes = filter_options->blob_limit_value;
211
212         *filter_fn = filter_blobs_limit;
213         *filter_free_fn = free;
214         return d;
215 }
216
217 /*
218  * A filter driven by a sparse-checkout specification to only
219  * include blobs that a sparse checkout would populate.
220  *
221  * The sparse-checkout spec can be loaded from a blob with the
222  * given OID or from a local pathname.  We allow an OID because
223  * the repo may be bare or we may be doing the filtering on the
224  * server.
225  */
226 struct frame {
227         /*
228          * defval is the usual default include/exclude value that
229          * should be inherited as we recurse into directories based
230          * upon pattern matching of the directory itself or of a
231          * containing directory.
232          */
233         int defval;
234
235         /*
236          * 1 if the directory (recursively) contains any provisionally
237          * omitted objects.
238          *
239          * 0 if everything (recursively) contained in this directory
240          * has been explicitly included (SHOWN) in the result and
241          * the directory may be short-cut later in the traversal.
242          */
243         unsigned child_prov_omit : 1;
244 };
245
246 struct filter_sparse_data {
247         struct oidset *omits;
248         struct exclude_list el;
249
250         size_t nr, alloc;
251         struct frame *array_frame;
252 };
253
254 static enum list_objects_filter_result filter_sparse(
255         struct repository *r,
256         enum list_objects_filter_situation filter_situation,
257         struct object *obj,
258         const char *pathname,
259         const char *filename,
260         void *filter_data_)
261 {
262         struct filter_sparse_data *filter_data = filter_data_;
263         int val, dtype;
264         struct frame *frame;
265
266         switch (filter_situation) {
267         default:
268                 BUG("unknown filter_situation: %d", filter_situation);
269
270         case LOFS_BEGIN_TREE:
271                 assert(obj->type == OBJ_TREE);
272                 dtype = DT_DIR;
273                 val = is_excluded_from_list(pathname, strlen(pathname),
274                                             filename, &dtype, &filter_data->el,
275                                             r->index);
276                 if (val < 0)
277                         val = filter_data->array_frame[filter_data->nr].defval;
278
279                 ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
280                            filter_data->alloc);
281                 filter_data->nr++;
282                 filter_data->array_frame[filter_data->nr].defval = val;
283                 filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
284
285                 /*
286                  * A directory with this tree OID may appear in multiple
287                  * places in the tree. (Think of a directory move or copy,
288                  * with no other changes, so the OID is the same, but the
289                  * full pathnames of objects within this directory are new
290                  * and may match is_excluded() patterns differently.)
291                  * So we cannot mark this directory as SEEN (yet), since
292                  * that will prevent process_tree() from revisiting this
293                  * tree object with other pathname prefixes.
294                  *
295                  * Only _DO_SHOW the tree object the first time we visit
296                  * this tree object.
297                  *
298                  * We always show all tree objects.  A future optimization
299                  * may want to attempt to narrow this.
300                  */
301                 if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
302                         return LOFR_ZERO;
303                 obj->flags |= FILTER_SHOWN_BUT_REVISIT;
304                 return LOFR_DO_SHOW;
305
306         case LOFS_END_TREE:
307                 assert(obj->type == OBJ_TREE);
308                 assert(filter_data->nr > 0);
309
310                 frame = &filter_data->array_frame[filter_data->nr];
311                 filter_data->nr--;
312
313                 /*
314                  * Tell our parent directory if any of our children were
315                  * provisionally omitted.
316                  */
317                 filter_data->array_frame[filter_data->nr].child_prov_omit |=
318                         frame->child_prov_omit;
319
320                 /*
321                  * If there are NO provisionally omitted child objects (ALL child
322                  * objects in this folder were INCLUDED), then we can mark the
323                  * folder as SEEN (so we will not have to revisit it again).
324                  */
325                 if (!frame->child_prov_omit)
326                         return LOFR_MARK_SEEN;
327                 return LOFR_ZERO;
328
329         case LOFS_BLOB:
330                 assert(obj->type == OBJ_BLOB);
331                 assert((obj->flags & SEEN) == 0);
332
333                 frame = &filter_data->array_frame[filter_data->nr];
334
335                 dtype = DT_REG;
336                 val = is_excluded_from_list(pathname, strlen(pathname),
337                                             filename, &dtype, &filter_data->el,
338                                             r->index);
339                 if (val < 0)
340                         val = frame->defval;
341                 if (val > 0) {
342                         if (filter_data->omits)
343                                 oidset_remove(filter_data->omits, &obj->oid);
344                         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
345                 }
346
347                 /*
348                  * Provisionally omit it.  We've already established that
349                  * this pathname is not in the sparse-checkout specification
350                  * with the CURRENT pathname, so we *WANT* to omit this blob.
351                  *
352                  * However, a pathname elsewhere in the tree may also
353                  * reference this same blob, so we cannot reject it yet.
354                  * Leave the LOFR_ bits unset so that if the blob appears
355                  * again in the traversal, we will be asked again.
356                  */
357                 if (filter_data->omits)
358                         oidset_insert(filter_data->omits, &obj->oid);
359
360                 /*
361                  * Remember that at least 1 blob in this tree was
362                  * provisionally omitted.  This prevents us from short
363                  * cutting the tree in future iterations.
364                  */
365                 frame->child_prov_omit = 1;
366                 return LOFR_ZERO;
367         }
368 }
369
370
371 static void filter_sparse_free(void *filter_data)
372 {
373         struct filter_sparse_data *d = filter_data;
374         /* TODO free contents of 'd' */
375         free(d);
376 }
377
378 static void *filter_sparse_oid__init(
379         struct oidset *omitted,
380         struct list_objects_filter_options *filter_options,
381         filter_object_fn *filter_fn,
382         filter_free_fn *filter_free_fn)
383 {
384         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
385         d->omits = omitted;
386         if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
387                                            NULL, 0, &d->el) < 0)
388                 die("could not load filter specification");
389
390         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
391         d->array_frame[d->nr].defval = 0; /* default to include */
392         d->array_frame[d->nr].child_prov_omit = 0;
393
394         *filter_fn = filter_sparse;
395         *filter_free_fn = filter_sparse_free;
396         return d;
397 }
398
399 static void *filter_sparse_path__init(
400         struct oidset *omitted,
401         struct list_objects_filter_options *filter_options,
402         filter_object_fn *filter_fn,
403         filter_free_fn *filter_free_fn)
404 {
405         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
406         d->omits = omitted;
407         if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
408                                            NULL, 0, &d->el, NULL) < 0)
409                 die("could not load filter specification");
410
411         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
412         d->array_frame[d->nr].defval = 0; /* default to include */
413         d->array_frame[d->nr].child_prov_omit = 0;
414
415         *filter_fn = filter_sparse;
416         *filter_free_fn = filter_sparse_free;
417         return d;
418 }
419
420 typedef void *(*filter_init_fn)(
421         struct oidset *omitted,
422         struct list_objects_filter_options *filter_options,
423         filter_object_fn *filter_fn,
424         filter_free_fn *filter_free_fn);
425
426 /*
427  * Must match "enum list_objects_filter_choice".
428  */
429 static filter_init_fn s_filters[] = {
430         NULL,
431         filter_blobs_none__init,
432         filter_blobs_limit__init,
433         filter_trees_none__init,
434         filter_sparse_oid__init,
435         filter_sparse_path__init,
436 };
437
438 void *list_objects_filter__init(
439         struct oidset *omitted,
440         struct list_objects_filter_options *filter_options,
441         filter_object_fn *filter_fn,
442         filter_free_fn *filter_free_fn)
443 {
444         filter_init_fn init_fn;
445
446         assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
447
448         if (filter_options->choice >= LOFC__COUNT)
449                 BUG("invalid list-objects filter choice: %d",
450                     filter_options->choice);
451
452         init_fn = s_filters[filter_options->choice];
453         if (init_fn)
454                 return init_fn(omitted, filter_options,
455                                filter_fn, filter_free_fn);
456         *filter_fn = NULL;
457         *filter_free_fn = NULL;
458         return NULL;
459 }