bloom: parse commit before computing filters
[git] / reachable.c
1 #include "cache.h"
2 #include "refs.h"
3 #include "tag.h"
4 #include "commit.h"
5 #include "blob.h"
6 #include "diff.h"
7 #include "revision.h"
8 #include "reachable.h"
9 #include "cache-tree.h"
10 #include "progress.h"
11 #include "list-objects.h"
12 #include "packfile.h"
13 #include "worktree.h"
14 #include "object-store.h"
15 #include "pack-bitmap.h"
16
17 struct connectivity_progress {
18         struct progress *progress;
19         unsigned long count;
20 };
21
22 static void update_progress(struct connectivity_progress *cp)
23 {
24         cp->count++;
25         if ((cp->count & 1023) == 0)
26                 display_progress(cp->progress, cp->count);
27 }
28
29 static int add_one_ref(const char *path, const struct object_id *oid,
30                        int flag, void *cb_data)
31 {
32         struct rev_info *revs = (struct rev_info *)cb_data;
33         struct object *object;
34
35         if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) {
36                 warning("symbolic ref is dangling: %s", path);
37                 return 0;
38         }
39
40         object = parse_object_or_die(oid, path);
41         add_pending_object(revs, object, "");
42
43         return 0;
44 }
45
46 /*
47  * The traversal will have already marked us as SEEN, so we
48  * only need to handle any progress reporting here.
49  */
50 static void mark_object(struct object *obj, const char *name, void *data)
51 {
52         update_progress(data);
53 }
54
55 static void mark_commit(struct commit *c, void *data)
56 {
57         mark_object(&c->object, NULL, data);
58 }
59
60 struct recent_data {
61         struct rev_info *revs;
62         timestamp_t timestamp;
63 };
64
65 static void add_recent_object(const struct object_id *oid,
66                               timestamp_t mtime,
67                               struct recent_data *data)
68 {
69         struct object *obj;
70         enum object_type type;
71
72         if (mtime <= data->timestamp)
73                 return;
74
75         /*
76          * We do not want to call parse_object here, because
77          * inflating blobs and trees could be very expensive.
78          * However, we do need to know the correct type for
79          * later processing, and the revision machinery expects
80          * commits and tags to have been parsed.
81          */
82         type = oid_object_info(the_repository, oid, NULL);
83         if (type < 0)
84                 die("unable to get object info for %s", oid_to_hex(oid));
85
86         switch (type) {
87         case OBJ_TAG:
88         case OBJ_COMMIT:
89                 obj = parse_object_or_die(oid, NULL);
90                 break;
91         case OBJ_TREE:
92                 obj = (struct object *)lookup_tree(the_repository, oid);
93                 break;
94         case OBJ_BLOB:
95                 obj = (struct object *)lookup_blob(the_repository, oid);
96                 break;
97         default:
98                 die("unknown object type for %s: %s",
99                     oid_to_hex(oid), type_name(type));
100         }
101
102         if (!obj)
103                 die("unable to lookup %s", oid_to_hex(oid));
104
105         add_pending_object(data->revs, obj, "");
106 }
107
108 static int add_recent_loose(const struct object_id *oid,
109                             const char *path, void *data)
110 {
111         struct stat st;
112         struct object *obj = lookup_object(the_repository, oid);
113
114         if (obj && obj->flags & SEEN)
115                 return 0;
116
117         if (stat(path, &st) < 0) {
118                 /*
119                  * It's OK if an object went away during our iteration; this
120                  * could be due to a simultaneous repack. But anything else
121                  * we should abort, since we might then fail to mark objects
122                  * which should not be pruned.
123                  */
124                 if (errno == ENOENT)
125                         return 0;
126                 return error_errno("unable to stat %s", oid_to_hex(oid));
127         }
128
129         add_recent_object(oid, st.st_mtime, data);
130         return 0;
131 }
132
133 static int add_recent_packed(const struct object_id *oid,
134                              struct packed_git *p, uint32_t pos,
135                              void *data)
136 {
137         struct object *obj = lookup_object(the_repository, oid);
138
139         if (obj && obj->flags & SEEN)
140                 return 0;
141         add_recent_object(oid, p->mtime, data);
142         return 0;
143 }
144
145 int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
146                                            timestamp_t timestamp)
147 {
148         struct recent_data data;
149         int r;
150
151         data.revs = revs;
152         data.timestamp = timestamp;
153
154         r = for_each_loose_object(add_recent_loose, &data,
155                                   FOR_EACH_OBJECT_LOCAL_ONLY);
156         if (r)
157                 return r;
158         return for_each_packed_object(add_recent_packed, &data,
159                                       FOR_EACH_OBJECT_LOCAL_ONLY);
160 }
161
162 static void *lookup_object_by_type(struct repository *r,
163                                    const struct object_id *oid,
164                                    enum object_type type)
165 {
166         switch (type) {
167         case OBJ_COMMIT:
168                 return lookup_commit(r, oid);
169         case OBJ_TREE:
170                 return lookup_tree(r, oid);
171         case OBJ_TAG:
172                 return lookup_tag(r, oid);
173         case OBJ_BLOB:
174                 return lookup_blob(r, oid);
175         default:
176                 die("BUG: unknown object type %d", type);
177         }
178 }
179
180 static int mark_object_seen(const struct object_id *oid,
181                              enum object_type type,
182                              int exclude,
183                              uint32_t name_hash,
184                              struct packed_git *found_pack,
185                              off_t found_offset)
186 {
187         struct object *obj = lookup_object_by_type(the_repository, oid, type);
188         if (!obj)
189                 die("unable to create object '%s'", oid_to_hex(oid));
190
191         obj->flags |= SEEN;
192         return 0;
193 }
194
195 void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
196                             timestamp_t mark_recent, struct progress *progress)
197 {
198         struct connectivity_progress cp;
199         struct bitmap_index *bitmap_git;
200
201         /*
202          * Set up revision parsing, and mark us as being interested
203          * in all object types, not just commits.
204          */
205         revs->tag_objects = 1;
206         revs->blob_objects = 1;
207         revs->tree_objects = 1;
208
209         /* Add all refs from the index file */
210         add_index_objects_to_pending(revs, 0);
211
212         /* Add all external refs */
213         for_each_ref(add_one_ref, revs);
214
215         /* detached HEAD is not included in the list above */
216         head_ref(add_one_ref, revs);
217         other_head_refs(add_one_ref, revs);
218
219         /* Add all reflog info */
220         if (mark_reflog)
221                 add_reflogs_to_pending(revs, 0);
222
223         cp.progress = progress;
224         cp.count = 0;
225
226         bitmap_git = prepare_bitmap_walk(revs, NULL);
227         if (bitmap_git) {
228                 traverse_bitmap_commit_list(bitmap_git, revs, mark_object_seen);
229                 free_bitmap_index(bitmap_git);
230                 return;
231         }
232
233         /*
234          * Set up the revision walk - this will move all commits
235          * from the pending list to the commit walking list.
236          */
237         if (prepare_revision_walk(revs))
238                 die("revision walk setup failed");
239         traverse_commit_list(revs, mark_commit, mark_object, &cp);
240
241         if (mark_recent) {
242                 revs->ignore_missing_links = 1;
243                 if (add_unseen_recent_objects_to_traversal(revs, mark_recent))
244                         die("unable to mark recent objects");
245                 if (prepare_revision_walk(revs))
246                         die("revision walk setup failed");
247                 traverse_commit_list(revs, mark_commit, mark_object, &cp);
248         }
249
250         display_progress(cp.progress, cp.count);
251 }