Merge branch 'jk/test-hashmap-updates' into jch
[git] / list-objects-filter.c
1 #include "cache.h"
2 #include "dir.h"
3 #include "tag.h"
4 #include "commit.h"
5 #include "tree.h"
6 #include "blob.h"
7 #include "diff.h"
8 #include "tree-walk.h"
9 #include "revision.h"
10 #include "list-objects.h"
11 #include "list-objects-filter.h"
12 #include "list-objects-filter-options.h"
13 #include "oidset.h"
14
15 /* Remember to update object flag allocation in object.h */
16 /*
17  * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
18  * that have been shown, but should be revisited if they appear
19  * in the traversal (until we mark it SEEN).  This is a way to
20  * let us silently de-dup calls to show() in the caller.  This
21  * is subtly different from the "revision.h:SHOWN" and the
22  * "sha1_name.c:ONELINE_SEEN" bits.  And also different from
23  * the non-de-dup usage in pack-bitmap.c
24  */
25 #define FILTER_SHOWN_BUT_REVISIT (1<<21)
26
27 /*
28  * A filter for list-objects to omit ALL blobs from the traversal.
29  * And to OPTIONALLY collect a list of the omitted OIDs.
30  */
31 struct filter_blobs_none_data {
32         struct oidset *omits;
33 };
34
35 static enum list_objects_filter_result filter_blobs_none(
36         enum list_objects_filter_situation filter_situation,
37         struct object *obj,
38         const char *pathname,
39         const char *filename,
40         void *filter_data_)
41 {
42         struct filter_blobs_none_data *filter_data = filter_data_;
43
44         switch (filter_situation) {
45         default:
46                 die("unknown filter_situation");
47                 return LOFR_ZERO;
48
49         case LOFS_BEGIN_TREE:
50                 assert(obj->type == OBJ_TREE);
51                 /* always include all tree objects */
52                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
53
54         case LOFS_END_TREE:
55                 assert(obj->type == OBJ_TREE);
56                 return LOFR_ZERO;
57
58         case LOFS_BLOB:
59                 assert(obj->type == OBJ_BLOB);
60                 assert((obj->flags & SEEN) == 0);
61
62                 if (filter_data->omits)
63                         oidset_insert(filter_data->omits, &obj->oid);
64                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
65         }
66 }
67
68 static void *filter_blobs_none__init(
69         struct oidset *omitted,
70         struct list_objects_filter_options *filter_options,
71         filter_object_fn *filter_fn,
72         filter_free_fn *filter_free_fn)
73 {
74         struct filter_blobs_none_data *d = xcalloc(1, sizeof(*d));
75         d->omits = omitted;
76
77         *filter_fn = filter_blobs_none;
78         *filter_free_fn = free;
79         return d;
80 }
81
82 /*
83  * A filter for list-objects to omit large blobs.
84  * And to OPTIONALLY collect a list of the omitted OIDs.
85  */
86 struct filter_blobs_limit_data {
87         struct oidset *omits;
88         unsigned long max_bytes;
89 };
90
91 static enum list_objects_filter_result filter_blobs_limit(
92         enum list_objects_filter_situation filter_situation,
93         struct object *obj,
94         const char *pathname,
95         const char *filename,
96         void *filter_data_)
97 {
98         struct filter_blobs_limit_data *filter_data = filter_data_;
99         unsigned long object_length;
100         enum object_type t;
101
102         switch (filter_situation) {
103         default:
104                 die("unknown filter_situation");
105                 return LOFR_ZERO;
106
107         case LOFS_BEGIN_TREE:
108                 assert(obj->type == OBJ_TREE);
109                 /* always include all tree objects */
110                 return LOFR_MARK_SEEN | LOFR_DO_SHOW;
111
112         case LOFS_END_TREE:
113                 assert(obj->type == OBJ_TREE);
114                 return LOFR_ZERO;
115
116         case LOFS_BLOB:
117                 assert(obj->type == OBJ_BLOB);
118                 assert((obj->flags & SEEN) == 0);
119
120                 t = sha1_object_info(obj->oid.hash, &object_length);
121                 if (t != OBJ_BLOB) { /* probably OBJ_NONE */
122                         /*
123                          * We DO NOT have the blob locally, so we cannot
124                          * apply the size filter criteria.  Be conservative
125                          * and force show it (and let the caller deal with
126                          * the ambiguity).
127                          */
128                         goto include_it;
129                 }
130
131                 if (object_length < filter_data->max_bytes)
132                         goto include_it;
133
134                 if (filter_data->omits)
135                         oidset_insert(filter_data->omits, &obj->oid);
136                 return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */
137         }
138
139 include_it:
140         if (filter_data->omits)
141                 oidset_remove(filter_data->omits, &obj->oid);
142         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
143 }
144
145 static void *filter_blobs_limit__init(
146         struct oidset *omitted,
147         struct list_objects_filter_options *filter_options,
148         filter_object_fn *filter_fn,
149         filter_free_fn *filter_free_fn)
150 {
151         struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));
152         d->omits = omitted;
153         d->max_bytes = filter_options->blob_limit_value;
154
155         *filter_fn = filter_blobs_limit;
156         *filter_free_fn = free;
157         return d;
158 }
159
160 /*
161  * A filter driven by a sparse-checkout specification to only
162  * include blobs that a sparse checkout would populate.
163  *
164  * The sparse-checkout spec can be loaded from a blob with the
165  * given OID or from a local pathname.  We allow an OID because
166  * the repo may be bare or we may be doing the filtering on the
167  * server.
168  */
169 struct frame {
170         /*
171          * defval is the usual default include/exclude value that
172          * should be inherited as we recurse into directories based
173          * upon pattern matching of the directory itself or of a
174          * containing directory.
175          */
176         int defval;
177
178         /*
179          * 1 if the directory (recursively) contains any provisionally
180          * omitted objects.
181          *
182          * 0 if everything (recursively) contained in this directory
183          * has been explicitly included (SHOWN) in the result and
184          * the directory may be short-cut later in the traversal.
185          */
186         unsigned child_prov_omit : 1;
187 };
188
189 struct filter_sparse_data {
190         struct oidset *omits;
191         struct exclude_list el;
192
193         size_t nr, alloc;
194         struct frame *array_frame;
195 };
196
197 static enum list_objects_filter_result filter_sparse(
198         enum list_objects_filter_situation filter_situation,
199         struct object *obj,
200         const char *pathname,
201         const char *filename,
202         void *filter_data_)
203 {
204         struct filter_sparse_data *filter_data = filter_data_;
205         int val, dtype;
206         struct frame *frame;
207
208         switch (filter_situation) {
209         default:
210                 die("unknown filter_situation");
211                 return LOFR_ZERO;
212
213         case LOFS_BEGIN_TREE:
214                 assert(obj->type == OBJ_TREE);
215                 dtype = DT_DIR;
216                 val = is_excluded_from_list(pathname, strlen(pathname),
217                                             filename, &dtype, &filter_data->el,
218                                             &the_index);
219                 if (val < 0)
220                         val = filter_data->array_frame[filter_data->nr].defval;
221
222                 ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,
223                            filter_data->alloc);
224                 filter_data->nr++;
225                 filter_data->array_frame[filter_data->nr].defval = val;
226                 filter_data->array_frame[filter_data->nr].child_prov_omit = 0;
227
228                 /*
229                  * A directory with this tree OID may appear in multiple
230                  * places in the tree. (Think of a directory move or copy,
231                  * with no other changes, so the OID is the same, but the
232                  * full pathnames of objects within this directory are new
233                  * and may match is_excluded() patterns differently.)
234                  * So we cannot mark this directory as SEEN (yet), since
235                  * that will prevent process_tree() from revisiting this
236                  * tree object with other pathname prefixes.
237                  *
238                  * Only _DO_SHOW the tree object the first time we visit
239                  * this tree object.
240                  *
241                  * We always show all tree objects.  A future optimization
242                  * may want to attempt to narrow this.
243                  */
244                 if (obj->flags & FILTER_SHOWN_BUT_REVISIT)
245                         return LOFR_ZERO;
246                 obj->flags |= FILTER_SHOWN_BUT_REVISIT;
247                 return LOFR_DO_SHOW;
248
249         case LOFS_END_TREE:
250                 assert(obj->type == OBJ_TREE);
251                 assert(filter_data->nr > 0);
252
253                 frame = &filter_data->array_frame[filter_data->nr];
254                 filter_data->nr--;
255
256                 /*
257                  * Tell our parent directory if any of our children were
258                  * provisionally omitted.
259                  */
260                 filter_data->array_frame[filter_data->nr].child_prov_omit |=
261                         frame->child_prov_omit;
262
263                 /*
264                  * If there are NO provisionally omitted child objects (ALL child
265                  * objects in this folder were INCLUDED), then we can mark the
266                  * folder as SEEN (so we will not have to revisit it again).
267                  */
268                 if (!frame->child_prov_omit)
269                         return LOFR_MARK_SEEN;
270                 return LOFR_ZERO;
271
272         case LOFS_BLOB:
273                 assert(obj->type == OBJ_BLOB);
274                 assert((obj->flags & SEEN) == 0);
275
276                 frame = &filter_data->array_frame[filter_data->nr];
277
278                 dtype = DT_REG;
279                 val = is_excluded_from_list(pathname, strlen(pathname),
280                                             filename, &dtype, &filter_data->el,
281                                             &the_index);
282                 if (val < 0)
283                         val = frame->defval;
284                 if (val > 0) {
285                         if (filter_data->omits)
286                                 oidset_remove(filter_data->omits, &obj->oid);
287                         return LOFR_MARK_SEEN | LOFR_DO_SHOW;
288                 }
289
290                 /*
291                  * Provisionally omit it.  We've already established that
292                  * this pathname is not in the sparse-checkout specification
293                  * with the CURRENT pathname, so we *WANT* to omit this blob.
294                  *
295                  * However, a pathname elsewhere in the tree may also
296                  * reference this same blob, so we cannot reject it yet.
297                  * Leave the LOFR_ bits unset so that if the blob appears
298                  * again in the traversal, we will be asked again.
299                  */
300                 if (filter_data->omits)
301                         oidset_insert(filter_data->omits, &obj->oid);
302
303                 /*
304                  * Remember that at least 1 blob in this tree was
305                  * provisionally omitted.  This prevents us from short
306                  * cutting the tree in future iterations.
307                  */
308                 frame->child_prov_omit = 1;
309                 return LOFR_ZERO;
310         }
311 }
312
313
314 static void filter_sparse_free(void *filter_data)
315 {
316         struct filter_sparse_data *d = filter_data;
317         /* TODO free contents of 'd' */
318         free(d);
319 }
320
321 static void *filter_sparse_oid__init(
322         struct oidset *omitted,
323         struct list_objects_filter_options *filter_options,
324         filter_object_fn *filter_fn,
325         filter_free_fn *filter_free_fn)
326 {
327         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
328         d->omits = omitted;
329         if (add_excludes_from_blob_to_list(filter_options->sparse_oid_value,
330                                            NULL, 0, &d->el) < 0)
331                 die("could not load filter specification");
332
333         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
334         d->array_frame[d->nr].defval = 0; /* default to include */
335         d->array_frame[d->nr].child_prov_omit = 0;
336
337         *filter_fn = filter_sparse;
338         *filter_free_fn = filter_sparse_free;
339         return d;
340 }
341
342 static void *filter_sparse_path__init(
343         struct oidset *omitted,
344         struct list_objects_filter_options *filter_options,
345         filter_object_fn *filter_fn,
346         filter_free_fn *filter_free_fn)
347 {
348         struct filter_sparse_data *d = xcalloc(1, sizeof(*d));
349         d->omits = omitted;
350         if (add_excludes_from_file_to_list(filter_options->sparse_path_value,
351                                            NULL, 0, &d->el, NULL) < 0)
352                 die("could not load filter specification");
353
354         ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);
355         d->array_frame[d->nr].defval = 0; /* default to include */
356         d->array_frame[d->nr].child_prov_omit = 0;
357
358         *filter_fn = filter_sparse;
359         *filter_free_fn = filter_sparse_free;
360         return d;
361 }
362
363 typedef void *(*filter_init_fn)(
364         struct oidset *omitted,
365         struct list_objects_filter_options *filter_options,
366         filter_object_fn *filter_fn,
367         filter_free_fn *filter_free_fn);
368
369 /*
370  * Must match "enum list_objects_filter_choice".
371  */
372 static filter_init_fn s_filters[] = {
373         NULL,
374         filter_blobs_none__init,
375         filter_blobs_limit__init,
376         filter_sparse_oid__init,
377         filter_sparse_path__init,
378 };
379
380 void *list_objects_filter__init(
381         struct oidset *omitted,
382         struct list_objects_filter_options *filter_options,
383         filter_object_fn *filter_fn,
384         filter_free_fn *filter_free_fn)
385 {
386         filter_init_fn init_fn;
387
388         assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);
389
390         if (filter_options->choice >= LOFC__COUNT)
391                 die("invalid list-objects filter choice: %d",
392                     filter_options->choice);
393
394         init_fn = s_filters[filter_options->choice];
395         if (init_fn)
396                 return init_fn(omitted, filter_options,
397                                filter_fn, filter_free_fn);
398         *filter_fn = NULL;
399         *filter_free_fn = NULL;
400         return NULL;
401 }