remote-mediawiki: fix duplicate revisions being imported
[git] / list-objects.c
1 #include "cache.h"
2 #include "tag.h"
3 #include "commit.h"
4 #include "tree.h"
5 #include "blob.h"
6 #include "diff.h"
7 #include "tree-walk.h"
8 #include "revision.h"
9 #include "list-objects.h"
10 #include "list-objects-filter.h"
11 #include "list-objects-filter-options.h"
12 #include "packfile.h"
13 #include "object-store.h"
14 #include "trace.h"
15
16 struct traversal_context {
17         struct rev_info *revs;
18         show_object_fn show_object;
19         show_commit_fn show_commit;
20         void *show_data;
21         struct filter *filter;
22 };
23
24 static void process_blob(struct traversal_context *ctx,
25                          struct blob *blob,
26                          struct strbuf *path,
27                          const char *name)
28 {
29         struct object *obj = &blob->object;
30         size_t pathlen;
31         enum list_objects_filter_result r;
32
33         if (!ctx->revs->blob_objects)
34                 return;
35         if (!obj)
36                 die("bad blob object");
37         if (obj->flags & (UNINTERESTING | SEEN))
38                 return;
39
40         /*
41          * Pre-filter known-missing objects when explicitly requested.
42          * Otherwise, a missing object error message may be reported
43          * later (depending on other filtering criteria).
44          *
45          * Note that this "--exclude-promisor-objects" pre-filtering
46          * may cause the actual filter to report an incomplete list
47          * of missing objects.
48          */
49         if (ctx->revs->exclude_promisor_objects &&
50             !has_object_file(&obj->oid) &&
51             is_promisor_object(&obj->oid))
52                 return;
53
54         pathlen = path->len;
55         strbuf_addstr(path, name);
56         r = list_objects_filter__filter_object(ctx->revs->repo,
57                                                LOFS_BLOB, obj,
58                                                path->buf, &path->buf[pathlen],
59                                                ctx->filter);
60         if (r & LOFR_MARK_SEEN)
61                 obj->flags |= SEEN;
62         if (r & LOFR_DO_SHOW)
63                 ctx->show_object(obj, path->buf, ctx->show_data);
64         strbuf_setlen(path, pathlen);
65 }
66
67 /*
68  * Processing a gitlink entry currently does nothing, since
69  * we do not recurse into the subproject.
70  *
71  * We *could* eventually add a flag that actually does that,
72  * which would involve:
73  *  - is the subproject actually checked out?
74  *  - if so, see if the subproject has already been added
75  *    to the alternates list, and add it if not.
76  *  - process the commit (or tag) the gitlink points to
77  *    recursively.
78  *
79  * However, it's unclear whether there is really ever any
80  * reason to see superprojects and subprojects as such a
81  * "unified" object pool (potentially resulting in a totally
82  * humongous pack - avoiding which was the whole point of
83  * having gitlinks in the first place!).
84  *
85  * So for now, there is just a note that we *could* follow
86  * the link, and how to do it. Whether it necessarily makes
87  * any sense what-so-ever to ever do that is another issue.
88  */
89 static void process_gitlink(struct traversal_context *ctx,
90                             const unsigned char *sha1,
91                             struct strbuf *path,
92                             const char *name)
93 {
94         /* Nothing to do */
95 }
96
97 static void process_tree(struct traversal_context *ctx,
98                          struct tree *tree,
99                          struct strbuf *base,
100                          const char *name);
101
102 static void process_tree_contents(struct traversal_context *ctx,
103                                   struct tree *tree,
104                                   struct strbuf *base)
105 {
106         struct tree_desc desc;
107         struct name_entry entry;
108         enum interesting match = ctx->revs->diffopt.pathspec.nr == 0 ?
109                 all_entries_interesting : entry_not_interesting;
110
111         init_tree_desc(&desc, tree->buffer, tree->size);
112
113         while (tree_entry(&desc, &entry)) {
114                 if (match != all_entries_interesting) {
115                         match = tree_entry_interesting(ctx->revs->repo->index,
116                                                        &entry, base, 0,
117                                                        &ctx->revs->diffopt.pathspec);
118                         if (match == all_entries_not_interesting)
119                                 break;
120                         if (match == entry_not_interesting)
121                                 continue;
122                 }
123
124                 if (S_ISDIR(entry.mode)) {
125                         struct tree *t = lookup_tree(ctx->revs->repo, &entry.oid);
126                         if (!t) {
127                                 die(_("entry '%s' in tree %s has tree mode, "
128                                       "but is not a tree"),
129                                     entry.path, oid_to_hex(&tree->object.oid));
130                         }
131                         t->object.flags |= NOT_USER_GIVEN;
132                         process_tree(ctx, t, base, entry.path);
133                 }
134                 else if (S_ISGITLINK(entry.mode))
135                         process_gitlink(ctx, entry.oid.hash,
136                                         base, entry.path);
137                 else {
138                         struct blob *b = lookup_blob(ctx->revs->repo, &entry.oid);
139                         if (!b) {
140                                 die(_("entry '%s' in tree %s has blob mode, "
141                                       "but is not a blob"),
142                                     entry.path, oid_to_hex(&tree->object.oid));
143                         }
144                         b->object.flags |= NOT_USER_GIVEN;
145                         process_blob(ctx, b, base, entry.path);
146                 }
147         }
148 }
149
150 static void process_tree(struct traversal_context *ctx,
151                          struct tree *tree,
152                          struct strbuf *base,
153                          const char *name)
154 {
155         struct object *obj = &tree->object;
156         struct rev_info *revs = ctx->revs;
157         int baselen = base->len;
158         enum list_objects_filter_result r;
159         int failed_parse;
160
161         if (!revs->tree_objects)
162                 return;
163         if (!obj)
164                 die("bad tree object");
165         if (obj->flags & (UNINTERESTING | SEEN))
166                 return;
167
168         failed_parse = parse_tree_gently(tree, 1);
169         if (failed_parse) {
170                 if (revs->ignore_missing_links)
171                         return;
172
173                 /*
174                  * Pre-filter known-missing tree objects when explicitly
175                  * requested.  This may cause the actual filter to report
176                  * an incomplete list of missing objects.
177                  */
178                 if (revs->exclude_promisor_objects &&
179                     is_promisor_object(&obj->oid))
180                         return;
181
182                 if (!revs->do_not_die_on_missing_tree)
183                         die("bad tree object %s", oid_to_hex(&obj->oid));
184         }
185
186         strbuf_addstr(base, name);
187         r = list_objects_filter__filter_object(ctx->revs->repo,
188                                                LOFS_BEGIN_TREE, obj,
189                                                base->buf, &base->buf[baselen],
190                                                ctx->filter);
191         if (r & LOFR_MARK_SEEN)
192                 obj->flags |= SEEN;
193         if (r & LOFR_DO_SHOW)
194                 ctx->show_object(obj, base->buf, ctx->show_data);
195         if (base->len)
196                 strbuf_addch(base, '/');
197
198         if (r & LOFR_SKIP_TREE)
199                 trace_printf("Skipping contents of tree %s...\n", base->buf);
200         else if (!failed_parse)
201                 process_tree_contents(ctx, tree, base);
202
203         r = list_objects_filter__filter_object(ctx->revs->repo,
204                                                LOFS_END_TREE, obj,
205                                                base->buf, &base->buf[baselen],
206                                                ctx->filter);
207         if (r & LOFR_MARK_SEEN)
208                 obj->flags |= SEEN;
209         if (r & LOFR_DO_SHOW)
210                 ctx->show_object(obj, base->buf, ctx->show_data);
211
212         strbuf_setlen(base, baselen);
213         free_tree_buffer(tree);
214 }
215
216 static void mark_edge_parents_uninteresting(struct commit *commit,
217                                             struct rev_info *revs,
218                                             show_edge_fn show_edge)
219 {
220         struct commit_list *parents;
221
222         for (parents = commit->parents; parents; parents = parents->next) {
223                 struct commit *parent = parents->item;
224                 if (!(parent->object.flags & UNINTERESTING))
225                         continue;
226                 mark_tree_uninteresting(revs->repo, get_commit_tree(parent));
227                 if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
228                         parent->object.flags |= SHOWN;
229                         show_edge(parent);
230                 }
231         }
232 }
233
234 static void add_edge_parents(struct commit *commit,
235                              struct rev_info *revs,
236                              show_edge_fn show_edge,
237                              struct oidset *set)
238 {
239         struct commit_list *parents;
240
241         for (parents = commit->parents; parents; parents = parents->next) {
242                 struct commit *parent = parents->item;
243                 struct tree *tree = get_commit_tree(parent);
244
245                 if (!tree)
246                         continue;
247
248                 oidset_insert(set, &tree->object.oid);
249
250                 if (!(parent->object.flags & UNINTERESTING))
251                         continue;
252                 tree->object.flags |= UNINTERESTING;
253
254                 if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
255                         parent->object.flags |= SHOWN;
256                         show_edge(parent);
257                 }
258         }
259 }
260
261 void mark_edges_uninteresting(struct rev_info *revs,
262                               show_edge_fn show_edge,
263                               int sparse)
264 {
265         struct commit_list *list;
266         int i;
267
268         if (sparse) {
269                 struct oidset set;
270                 oidset_init(&set, 16);
271
272                 for (list = revs->commits; list; list = list->next) {
273                         struct commit *commit = list->item;
274                         struct tree *tree = get_commit_tree(commit);
275
276                         if (commit->object.flags & UNINTERESTING)
277                                 tree->object.flags |= UNINTERESTING;
278
279                         oidset_insert(&set, &tree->object.oid);
280                         add_edge_parents(commit, revs, show_edge, &set);
281                 }
282
283                 mark_trees_uninteresting_sparse(revs->repo, &set);
284                 oidset_clear(&set);
285         } else {
286                 for (list = revs->commits; list; list = list->next) {
287                         struct commit *commit = list->item;
288                         if (commit->object.flags & UNINTERESTING) {
289                                 mark_tree_uninteresting(revs->repo,
290                                                         get_commit_tree(commit));
291                                 if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
292                                         commit->object.flags |= SHOWN;
293                                         show_edge(commit);
294                                 }
295                                 continue;
296                         }
297                         mark_edge_parents_uninteresting(commit, revs, show_edge);
298                 }
299         }
300
301         if (revs->edge_hint_aggressive) {
302                 for (i = 0; i < revs->cmdline.nr; i++) {
303                         struct object *obj = revs->cmdline.rev[i].item;
304                         struct commit *commit = (struct commit *)obj;
305                         if (obj->type != OBJ_COMMIT || !(obj->flags & UNINTERESTING))
306                                 continue;
307                         mark_tree_uninteresting(revs->repo,
308                                                 get_commit_tree(commit));
309                         if (!(obj->flags & SHOWN)) {
310                                 obj->flags |= SHOWN;
311                                 show_edge(commit);
312                         }
313                 }
314         }
315 }
316
317 static void add_pending_tree(struct rev_info *revs, struct tree *tree)
318 {
319         add_pending_object(revs, &tree->object, "");
320 }
321
322 static void traverse_trees_and_blobs(struct traversal_context *ctx,
323                                      struct strbuf *base)
324 {
325         int i;
326
327         assert(base->len == 0);
328
329         for (i = 0; i < ctx->revs->pending.nr; i++) {
330                 struct object_array_entry *pending = ctx->revs->pending.objects + i;
331                 struct object *obj = pending->item;
332                 const char *name = pending->name;
333                 const char *path = pending->path;
334                 if (obj->flags & (UNINTERESTING | SEEN))
335                         continue;
336                 if (obj->type == OBJ_TAG) {
337                         obj->flags |= SEEN;
338                         ctx->show_object(obj, name, ctx->show_data);
339                         continue;
340                 }
341                 if (!path)
342                         path = "";
343                 if (obj->type == OBJ_TREE) {
344                         process_tree(ctx, (struct tree *)obj, base, path);
345                         continue;
346                 }
347                 if (obj->type == OBJ_BLOB) {
348                         process_blob(ctx, (struct blob *)obj, base, path);
349                         continue;
350                 }
351                 die("unknown pending object %s (%s)",
352                     oid_to_hex(&obj->oid), name);
353         }
354         object_array_clear(&ctx->revs->pending);
355 }
356
357 static void do_traverse(struct traversal_context *ctx)
358 {
359         struct commit *commit;
360         struct strbuf csp; /* callee's scratch pad */
361         strbuf_init(&csp, PATH_MAX);
362
363         while ((commit = get_revision(ctx->revs)) != NULL) {
364                 /*
365                  * an uninteresting boundary commit may not have its tree
366                  * parsed yet, but we are not going to show them anyway
367                  */
368                 if (!ctx->revs->tree_objects)
369                         ; /* do not bother loading tree */
370                 else if (get_commit_tree(commit)) {
371                         struct tree *tree = get_commit_tree(commit);
372                         tree->object.flags |= NOT_USER_GIVEN;
373                         add_pending_tree(ctx->revs, tree);
374                 } else if (commit->object.parsed) {
375                         die(_("unable to load root tree for commit %s"),
376                               oid_to_hex(&commit->object.oid));
377                 }
378                 ctx->show_commit(commit, ctx->show_data);
379
380                 if (ctx->revs->tree_blobs_in_commit_order)
381                         /*
382                          * NEEDSWORK: Adding the tree and then flushing it here
383                          * needs a reallocation for each commit. Can we pass the
384                          * tree directory without allocation churn?
385                          */
386                         traverse_trees_and_blobs(ctx, &csp);
387         }
388         traverse_trees_and_blobs(ctx, &csp);
389         strbuf_release(&csp);
390 }
391
392 void traverse_commit_list(struct rev_info *revs,
393                           show_commit_fn show_commit,
394                           show_object_fn show_object,
395                           void *show_data)
396 {
397         struct traversal_context ctx;
398         ctx.revs = revs;
399         ctx.show_commit = show_commit;
400         ctx.show_object = show_object;
401         ctx.show_data = show_data;
402         ctx.filter = NULL;
403         do_traverse(&ctx);
404 }
405
406 void traverse_commit_list_filtered(
407         struct list_objects_filter_options *filter_options,
408         struct rev_info *revs,
409         show_commit_fn show_commit,
410         show_object_fn show_object,
411         void *show_data,
412         struct oidset *omitted)
413 {
414         struct traversal_context ctx;
415
416         ctx.revs = revs;
417         ctx.show_object = show_object;
418         ctx.show_commit = show_commit;
419         ctx.show_data = show_data;
420         ctx.filter = list_objects_filter__init(omitted, filter_options);
421         do_traverse(&ctx);
422         list_objects_filter__free(ctx.filter);
423 }