list-objects-filter: use BUG rather than die
[git] / list-objects-filter.c
1 #include "cache.h"
2 #include "dir.h"
3 #include "tag.h"
4 #include "commit.h"
5 #include "tree.h"
6 #include "blob.h"
7 #include "diff.h"
8 #include "tree-walk.h"
9 #include "revision.h"
10 #include "list-objects.h"
11 #include "list-objects-filter.h"
12 #include "list-objects-filter-options.h"
13 #include "oidset.h"
14 #include "object-store.h"
15
16 /* Remember to update object flag allocation in object.h */
17 /*
18  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
19  * that have been shown, but should be revisited if they appear
20  * in the traversal (until we mark it SEEN).  This is a way to
21  * let us silently de-dup calls to show() in the caller.  This
22  * is subtly different from the "revision.h:SHOWN" and the
23  * "sha1-name.c:ONELINE_SEEN" bits.  And also different from
24  * the non-de-dup usage in pack-bitmap.c
25  */
26 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
27
28 /*
29  * A filter for list-objects to omit ALL blobs from the traversal.
30  * And to OPTIONALLY collect a list of the omitted OIDs.
31  */
32 struct filter_blobs_none_data {
33         struct oidset *omits;
34 };
35
36 static enum list_objects_filter_result filter_blobs_none(
37         enum list_objects_filter_situation filter_situation,
38         struct object *obj,
39         const char *pathname,
40         const char *filename,
41         void *filter_data_)
42 {
43         struct filter_blobs_none_data *filter_data = filter_data_;
44
45         switch (filter_situation) {
46         default:
47                 BUG("unknown filter_situation: %d", filter_situation);
48
49         case LOFS_BEGIN_TREE:
50                 assert(obj->type == OBJ_TREE);
51                 /* always include all tree objects */
52                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
53
54         case LOFS_END_TREE:
55                 assert(obj->type == OBJ_TREE);
56                 return LOFR_ZERO;
57
58         case LOFS_BLOB:
59                 assert(obj->type == OBJ_BLOB);
60                 assert((obj->flags & SEEN) == 0);
61
62                 if (filter_data->omits)
63                         oidset_insert(filter_data->omits, &obj->oid);
64                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
65         }
66 }
67
68 static void *filter_blobs_none__init(
69         struct oidset *omitted,
70         struct list_objects_filter_options *filter_options,
71         filter_object_fn *filter_fn,
72         filter_free_fn *filter_free_fn)
73 {
74         struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
75         d->omits = omitted;
76
77         *filter_fn = filter_blobs_none;
78         *filter_free_fn = free;
79         return d;
80 }
81
82 /*
83  * A filter for list-objects to omit large blobs.
84  * And to OPTIONALLY collect a list of the omitted OIDs.
85  */
86 struct filter_blobs_limit_data {
87         struct oidset *omits;
88         unsigned long max_bytes;
89 };
90
91 static enum list_objects_filter_result filter_blobs_limit(
92         enum list_objects_filter_situation filter_situation,
93         struct object *obj,
94         const char *pathname,
95         const char *filename,
96         void *filter_data_)
97 {
98         struct filter_blobs_limit_data *filter_data = filter_data_;
99         unsigned long object_length;
100         enum object_type t;
101
102         switch (filter_situation) {
103         default:
104                 BUG("unknown filter_situation: %d", filter_situation);
105
106         case LOFS_BEGIN_TREE:
107                 assert(obj->type == OBJ_TREE);
108                 /* always include all tree objects */
109                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
110
111         case LOFS_END_TREE:
112                 assert(obj->type == OBJ_TREE);
113                 return LOFR_ZERO;
114
115         case LOFS_BLOB:
116                 assert(obj->type == OBJ_BLOB);
117                 assert((obj->flags & SEEN) == 0);
118
119                 t = oid_object_info(the_repository, &obj->oid, &object_length);
120                 if (t != OBJ_BLOB) { /* probably OBJ_NONE */
121                         /*
122                          * We DO NOT have the blob locally, so we cannot
123                          * apply the size filter criteria.  Be conservative
124                          * and force show it (and let the caller deal with
125                          * the ambiguity).
126                          */
127                         goto include_it;
128                 }
129
130                 if (object_length < filter_data->max_bytes)
131                         goto include_it;
132
133                 if (filter_data->omits)
134                         oidset_insert(filter_data->omits, &obj->oid);
135                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
136         }
137
138 include_it:
139         if (filter_data->omits)
140                 oidset_remove(filter_data->omits, &obj->oid);
141         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
142 }
143
144 static void *filter_blobs_limit__init(
145         struct oidset *omitted,
146         struct list_objects_filter_options *filter_options,
147         filter_object_fn *filter_fn,
148         filter_free_fn *filter_free_fn)
149 {
150         struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
151         d->omits = omitted;
152         d->max_bytes = filter_options->blob_limit_value;
153
154         *filter_fn = filter_blobs_limit;
155         *filter_free_fn = free;
156         return d;
157 }
158
159 /*
160  * A filter driven by a sparse-checkout specification to only
161  * include blobs that a sparse checkout would populate.
162  *
163  * The sparse-checkout spec can be loaded from a blob with the
164  * given OID or from a local pathname.  We allow an OID because
165  * the repo may be bare or we may be doing the filtering on the
166  * server.
167  */
168 struct frame {
169         /*
170          * defval is the usual default include/exclude value that
171          * should be inherited as we recurse into directories based
172          * upon pattern matching of the directory itself or of a
173          * containing directory.
174          */
175         int defval;
176
177         /*
178          * 1 if the directory (recursively) contains any provisionally
179          * omitted objects.
180          *
181          * 0 if everything (recursively) contained in this directory
182          * has been explicitly included (SHOWN) in the result and
183          * the directory may be short-cut later in the traversal.
184          */
185         unsigned child_prov_omit : 1;
186 };
187
188 struct filter_sparse_data {
189         struct oidset *omits;
190         struct exclude_list el;
191
192         size_t nr, alloc;
193         struct frame *array_frame;
194 };
195
196 static enum list_objects_filter_result filter_sparse(
197         enum list_objects_filter_situation filter_situation,
198         struct object *obj,
199         const char *pathname,
200         const char *filename,
201         void *filter_data_)
202 {
203         struct filter_sparse_data *filter_data = filter_data_;
204         int val, dtype;
205         struct frame *frame;
206
207         switch (filter_situation) {
208         default:
209                 BUG("unknown filter_situation: %d", filter_situation);
210
211         case LOFS_BEGIN_TREE:
212                 assert(obj->type == OBJ_TREE);
213                 dtype = DT_DIR;
214                 val = is_excluded_from_list(pathname, strlen(pathname),
215                                             filename, &dtype, &filter_data->el,
216                                             &the_index);
217                 if (val < 0)
218                         val = filter_data->array_frame[filter_data->nr].defval;
219
220                 ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
221                            filter_data->alloc);
222                 filter_data->nr++;
223                 filter_data->array_frame[filter_data->nr].defval = val;
224                 filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
225
226                 /*
227                  * A directory with this tree OID may appear in multiple
228                  * places in the tree. (Think of a directory move or copy,
229                  * with no other changes, so the OID is the same, but the
230                  * full pathnames of objects within this directory are new
231                  * and may match is_excluded() patterns differently.)
232                  * So we cannot mark this directory as SEEN (yet), since
233                  * that will prevent process_tree() from revisiting this
234                  * tree object with other pathname prefixes.
235                  *
236                  * Only _DO_SHOW the tree object the first time we visit
237                  * this tree object.
238                  *
239                  * We always show all tree objects.  A future optimization
240                  * may want to attempt to narrow this.
241                  */
242                 if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
243                         return LOFR_ZERO;
244                 obj->flags |= FILTER_SHOWN_BUT_REVISIT;
245                 return LOFR_DO_SHOW;
246
247         case LOFS_END_TREE:
248                 assert(obj->type == OBJ_TREE);
249                 assert(filter_data->nr > 0);
250
251                 frame = &filter_data->array_frame[filter_data->nr];
252                 filter_data->nr--;
253
254                 /*
255                  * Tell our parent directory if any of our children were
256                  * provisionally omitted.
257                  */
258                 filter_data->array_frame[filter_data->nr].child_prov_omit |=
259                         frame->child_prov_omit;
260
261                 /*
262                  * If there are NO provisionally omitted child objects (ALL child
263                  * objects in this folder were INCLUDED), then we can mark the
264                  * folder as SEEN (so we will not have to revisit it again).
265                  */
266                 if (!frame->child_prov_omit)
267                         return LOFR_MARK_SEEN;
268                 return LOFR_ZERO;
269
270         case LOFS_BLOB:
271                 assert(obj->type == OBJ_BLOB);
272                 assert((obj->flags & SEEN) == 0);
273
274                 frame = &filter_data->array_frame[filter_data->nr];
275
276                 dtype = DT_REG;
277                 val = is_excluded_from_list(pathname, strlen(pathname),
278                                             filename, &dtype, &filter_data->el,
279                                             &the_index);
280                 if (val < 0)
281                         val = frame->defval;
282                 if (val > 0) {
283                         if (filter_data->omits)
284                                 oidset_remove(filter_data->omits, &obj->oid);
285                         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
286                 }
287
288                 /*
289                  * Provisionally omit it.  We've already established that
290                  * this pathname is not in the sparse-checkout specification
291                  * with the CURRENT pathname, so we *WANT* to omit this blob.
292                  *
293                  * However, a pathname elsewhere in the tree may also
294                  * reference this same blob, so we cannot reject it yet.
295                  * Leave the LOFR_ bits unset so that if the blob appears
296                  * again in the traversal, we will be asked again.
297                  */
298                 if (filter_data->omits)
299                         oidset_insert(filter_data->omits, &obj->oid);
300
301                 /*
302                  * Remember that at least 1 blob in this tree was
303                  * provisionally omitted.  This prevents us from short
304                  * cutting the tree in future iterations.
305                  */
306                 frame->child_prov_omit = 1;
307                 return LOFR_ZERO;
308         }
309 }
310
311
312 static void filter_sparse_free(void *filter_data)
313 {
314         struct filter_sparse_data *d = filter_data;
315         /* TODO free contents of 'd' */
316         free(d);
317 }
318
319 static void *filter_sparse_oid__init(
320         struct oidset *omitted,
321         struct list_objects_filter_options *filter_options,
322         filter_object_fn *filter_fn,
323         filter_free_fn *filter_free_fn)
324 {
325         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
326         d->omits = omitted;
327         if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
328                                            NULL, 0, &d->el) < 0)
329                 die("could not load filter specification");
330
331         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
332         d->array_frame[d->nr].defval = 0; /* default to include */
333         d->array_frame[d->nr].child_prov_omit = 0;
334
335         *filter_fn = filter_sparse;
336         *filter_free_fn = filter_sparse_free;
337         return d;
338 }
339
340 static void *filter_sparse_path__init(
341         struct oidset *omitted,
342         struct list_objects_filter_options *filter_options,
343         filter_object_fn *filter_fn,
344         filter_free_fn *filter_free_fn)
345 {
346         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
347         d->omits = omitted;
348         if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
349                                            NULL, 0, &d->el, NULL) < 0)
350                 die("could not load filter specification");
351
352         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
353         d->array_frame[d->nr].defval = 0; /* default to include */
354         d->array_frame[d->nr].child_prov_omit = 0;
355
356         *filter_fn = filter_sparse;
357         *filter_free_fn = filter_sparse_free;
358         return d;
359 }
360
361 typedef void *(*filter_init_fn)(
362         struct oidset *omitted,
363         struct list_objects_filter_options *filter_options,
364         filter_object_fn *filter_fn,
365         filter_free_fn *filter_free_fn);
366
367 /*
368  * Must match "enum list_objects_filter_choice".
369  */
370 static filter_init_fn s_filters[] = {
371         NULL,
372         filter_blobs_none__init,
373         filter_blobs_limit__init,
374         filter_sparse_oid__init,
375         filter_sparse_path__init,
376 };
377
378 void *list_objects_filter__init(
379         struct oidset *omitted,
380         struct list_objects_filter_options *filter_options,
381         filter_object_fn *filter_fn,
382         filter_free_fn *filter_free_fn)
383 {
384         filter_init_fn init_fn;
385
386         assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
387
388         if (filter_options->choice >= LOFC__COUNT)
389                 BUG("invalid list-objects filter choice: %d",
390                     filter_options->choice);
391
392         init_fn = s_filters[filter_options->choice];
393         if (init_fn)
394                 return init_fn(omitted, filter_options,
395                                filter_fn, filter_free_fn);
396         *filter_fn = NULL;
397         *filter_free_fn = NULL;
398         return NULL;
399 }