index-pack: reduce object_entry size to save memory
[git] / object.c
1 #include "cache.h"
2 #include "object.h"
3 #include "blob.h"
4 #include "tree.h"
5 #include "commit.h"
6 #include "tag.h"
7
8 static struct object **obj_hash;
9 static int nr_objs, obj_hash_size;
10
11 unsigned int get_max_object_index(void)
12 {
13         return obj_hash_size;
14 }
15
16 struct object *get_indexed_object(unsigned int idx)
17 {
18         return obj_hash[idx];
19 }
20
21 static const char *object_type_strings[] = {
22         NULL,           /* OBJ_NONE = 0 */
23         "commit",       /* OBJ_COMMIT = 1 */
24         "tree",         /* OBJ_TREE = 2 */
25         "blob",         /* OBJ_BLOB = 3 */
26         "tag",          /* OBJ_TAG = 4 */
27 };
28
29 const char *typename(unsigned int type)
30 {
31         if (type >= ARRAY_SIZE(object_type_strings))
32                 return NULL;
33         return object_type_strings[type];
34 }
35
36 int type_from_string_gently(const char *str, ssize_t len, int gentle)
37 {
38         int i;
39
40         if (len < 0)
41                 len = strlen(str);
42
43         for (i = 1; i < ARRAY_SIZE(object_type_strings); i++)
44                 if (!strncmp(str, object_type_strings[i], len))
45                         return i;
46
47         if (gentle)
48                 return -1;
49
50         die("invalid object type \"%s\"", str);
51 }
52
53 /*
54  * Return a numerical hash value between 0 and n-1 for the object with
55  * the specified sha1.  n must be a power of 2.  Please note that the
56  * return value is *not* consistent across computer architectures.
57  */
58 static unsigned int hash_obj(const unsigned char *sha1, unsigned int n)
59 {
60         return sha1hash(sha1) & (n - 1);
61 }
62
63 /*
64  * Insert obj into the hash table hash, which has length size (which
65  * must be a power of 2).  On collisions, simply overflow to the next
66  * empty bucket.
67  */
68 static void insert_obj_hash(struct object *obj, struct object **hash, unsigned int size)
69 {
70         unsigned int j = hash_obj(obj->sha1, size);
71
72         while (hash[j]) {
73                 j++;
74                 if (j >= size)
75                         j = 0;
76         }
77         hash[j] = obj;
78 }
79
80 /*
81  * Look up the record for the given sha1 in the hash map stored in
82  * obj_hash.  Return NULL if it was not found.
83  */
84 struct object *lookup_object(const unsigned char *sha1)
85 {
86         unsigned int i, first;
87         struct object *obj;
88
89         if (!obj_hash)
90                 return NULL;
91
92         first = i = hash_obj(sha1, obj_hash_size);
93         while ((obj = obj_hash[i]) != NULL) {
94                 if (!hashcmp(sha1, obj->sha1))
95                         break;
96                 i++;
97                 if (i == obj_hash_size)
98                         i = 0;
99         }
100         if (obj && i != first) {
101                 /*
102                  * Move object to where we started to look for it so
103                  * that we do not need to walk the hash table the next
104                  * time we look for it.
105                  */
106                 struct object *tmp = obj_hash[i];
107                 obj_hash[i] = obj_hash[first];
108                 obj_hash[first] = tmp;
109         }
110         return obj;
111 }
112
113 /*
114  * Increase the size of the hash map stored in obj_hash to the next
115  * power of 2 (but at least 32).  Copy the existing values to the new
116  * hash map.
117  */
118 static void grow_object_hash(void)
119 {
120         int i;
121         /*
122          * Note that this size must always be power-of-2 to match hash_obj
123          * above.
124          */
125         int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
126         struct object **new_hash;
127
128         new_hash = xcalloc(new_hash_size, sizeof(struct object *));
129         for (i = 0; i < obj_hash_size; i++) {
130                 struct object *obj = obj_hash[i];
131                 if (!obj)
132                         continue;
133                 insert_obj_hash(obj, new_hash, new_hash_size);
134         }
135         free(obj_hash);
136         obj_hash = new_hash;
137         obj_hash_size = new_hash_size;
138 }
139
140 void *create_object(const unsigned char *sha1, void *o)
141 {
142         struct object *obj = o;
143
144         obj->parsed = 0;
145         obj->used = 0;
146         obj->flags = 0;
147         hashcpy(obj->sha1, sha1);
148
149         if (obj_hash_size - 1 <= nr_objs * 2)
150                 grow_object_hash();
151
152         insert_obj_hash(obj, obj_hash, obj_hash_size);
153         nr_objs++;
154         return obj;
155 }
156
157 void *object_as_type(struct object *obj, enum object_type type, int quiet)
158 {
159         if (obj->type == type)
160                 return obj;
161         else if (obj->type == OBJ_NONE) {
162                 if (type == OBJ_COMMIT)
163                         ((struct commit *)obj)->index = alloc_commit_index();
164                 obj->type = type;
165                 return obj;
166         }
167         else {
168                 if (!quiet)
169                         error("object %s is a %s, not a %s",
170                               sha1_to_hex(obj->sha1),
171                               typename(obj->type), typename(type));
172                 return NULL;
173         }
174 }
175
176 struct object *lookup_unknown_object(const unsigned char *sha1)
177 {
178         struct object *obj = lookup_object(sha1);
179         if (!obj)
180                 obj = create_object(sha1, alloc_object_node());
181         return obj;
182 }
183
184 struct object *parse_object_buffer(const unsigned char *sha1, enum object_type type, unsigned long size, void *buffer, int *eaten_p)
185 {
186         struct object *obj;
187         *eaten_p = 0;
188
189         obj = NULL;
190         if (type == OBJ_BLOB) {
191                 struct blob *blob = lookup_blob(sha1);
192                 if (blob) {
193                         if (parse_blob_buffer(blob, buffer, size))
194                                 return NULL;
195                         obj = &blob->object;
196                 }
197         } else if (type == OBJ_TREE) {
198                 struct tree *tree = lookup_tree(sha1);
199                 if (tree) {
200                         obj = &tree->object;
201                         if (!tree->buffer)
202                                 tree->object.parsed = 0;
203                         if (!tree->object.parsed) {
204                                 if (parse_tree_buffer(tree, buffer, size))
205                                         return NULL;
206                                 *eaten_p = 1;
207                         }
208                 }
209         } else if (type == OBJ_COMMIT) {
210                 struct commit *commit = lookup_commit(sha1);
211                 if (commit) {
212                         if (parse_commit_buffer(commit, buffer, size))
213                                 return NULL;
214                         if (!get_cached_commit_buffer(commit, NULL)) {
215                                 set_commit_buffer(commit, buffer, size);
216                                 *eaten_p = 1;
217                         }
218                         obj = &commit->object;
219                 }
220         } else if (type == OBJ_TAG) {
221                 struct tag *tag = lookup_tag(sha1);
222                 if (tag) {
223                         if (parse_tag_buffer(tag, buffer, size))
224                                return NULL;
225                         obj = &tag->object;
226                 }
227         } else {
228                 warning("object %s has unknown type id %d", sha1_to_hex(sha1), type);
229                 obj = NULL;
230         }
231         return obj;
232 }
233
234 struct object *parse_object_or_die(const unsigned char *sha1,
235                                    const char *name)
236 {
237         struct object *o = parse_object(sha1);
238         if (o)
239                 return o;
240
241         die(_("unable to parse object: %s"), name ? name : sha1_to_hex(sha1));
242 }
243
244 struct object *parse_object(const unsigned char *sha1)
245 {
246         unsigned long size;
247         enum object_type type;
248         int eaten;
249         const unsigned char *repl = lookup_replace_object(sha1);
250         void *buffer;
251         struct object *obj;
252
253         obj = lookup_object(sha1);
254         if (obj && obj->parsed)
255                 return obj;
256
257         if ((obj && obj->type == OBJ_BLOB) ||
258             (!obj && has_sha1_file(sha1) &&
259              sha1_object_info(sha1, NULL) == OBJ_BLOB)) {
260                 if (check_sha1_signature(repl, NULL, 0, NULL) < 0) {
261                         error("sha1 mismatch %s", sha1_to_hex(repl));
262                         return NULL;
263                 }
264                 parse_blob_buffer(lookup_blob(sha1), NULL, 0);
265                 return lookup_object(sha1);
266         }
267
268         buffer = read_sha1_file(sha1, &type, &size);
269         if (buffer) {
270                 if (check_sha1_signature(repl, buffer, size, typename(type)) < 0) {
271                         free(buffer);
272                         error("sha1 mismatch %s", sha1_to_hex(repl));
273                         return NULL;
274                 }
275
276                 obj = parse_object_buffer(sha1, type, size, buffer, &eaten);
277                 if (!eaten)
278                         free(buffer);
279                 return obj;
280         }
281         return NULL;
282 }
283
284 struct object_list *object_list_insert(struct object *item,
285                                        struct object_list **list_p)
286 {
287         struct object_list *new_list = xmalloc(sizeof(struct object_list));
288         new_list->item = item;
289         new_list->next = *list_p;
290         *list_p = new_list;
291         return new_list;
292 }
293
294 int object_list_contains(struct object_list *list, struct object *obj)
295 {
296         while (list) {
297                 if (list->item == obj)
298                         return 1;
299                 list = list->next;
300         }
301         return 0;
302 }
303
304 /*
305  * A zero-length string to which object_array_entry::name can be
306  * initialized without requiring a malloc/free.
307  */
308 static char object_array_slopbuf[1];
309
310 void add_object_array_with_path(struct object *obj, const char *name,
311                                 struct object_array *array,
312                                 unsigned mode, const char *path)
313 {
314         unsigned nr = array->nr;
315         unsigned alloc = array->alloc;
316         struct object_array_entry *objects = array->objects;
317         struct object_array_entry *entry;
318
319         if (nr >= alloc) {
320                 alloc = (alloc + 32) * 2;
321                 REALLOC_ARRAY(objects, alloc);
322                 array->alloc = alloc;
323                 array->objects = objects;
324         }
325         entry = &objects[nr];
326         entry->item = obj;
327         if (!name)
328                 entry->name = NULL;
329         else if (!*name)
330                 /* Use our own empty string instead of allocating one: */
331                 entry->name = object_array_slopbuf;
332         else
333                 entry->name = xstrdup(name);
334         entry->mode = mode;
335         if (path)
336                 entry->path = xstrdup(path);
337         else
338                 entry->path = NULL;
339         array->nr = ++nr;
340 }
341
342 void add_object_array(struct object *obj, const char *name, struct object_array *array)
343 {
344         add_object_array_with_path(obj, name, array, S_IFINVALID, NULL);
345 }
346
347 /*
348  * Free all memory associated with an entry; the result is
349  * in an unspecified state and should not be examined.
350  */
351 static void object_array_release_entry(struct object_array_entry *ent)
352 {
353         if (ent->name != object_array_slopbuf)
354                 free(ent->name);
355         free(ent->path);
356 }
357
358 void object_array_filter(struct object_array *array,
359                          object_array_each_func_t want, void *cb_data)
360 {
361         unsigned nr = array->nr, src, dst;
362         struct object_array_entry *objects = array->objects;
363
364         for (src = dst = 0; src < nr; src++) {
365                 if (want(&objects[src], cb_data)) {
366                         if (src != dst)
367                                 objects[dst] = objects[src];
368                         dst++;
369                 } else {
370                         object_array_release_entry(&objects[src]);
371                 }
372         }
373         array->nr = dst;
374 }
375
376 void object_array_clear(struct object_array *array)
377 {
378         int i;
379         for (i = 0; i < array->nr; i++)
380                 object_array_release_entry(&array->objects[i]);
381         free(array->objects);
382         array->objects = NULL;
383         array->nr = array->alloc = 0;
384 }
385
386 /*
387  * Return true iff array already contains an entry with name.
388  */
389 static int contains_name(struct object_array *array, const char *name)
390 {
391         unsigned nr = array->nr, i;
392         struct object_array_entry *object = array->objects;
393
394         for (i = 0; i < nr; i++, object++)
395                 if (!strcmp(object->name, name))
396                         return 1;
397         return 0;
398 }
399
400 void object_array_remove_duplicates(struct object_array *array)
401 {
402         unsigned nr = array->nr, src;
403         struct object_array_entry *objects = array->objects;
404
405         array->nr = 0;
406         for (src = 0; src < nr; src++) {
407                 if (!contains_name(array, objects[src].name)) {
408                         if (src != array->nr)
409                                 objects[array->nr] = objects[src];
410                         array->nr++;
411                 } else {
412                         object_array_release_entry(&objects[src]);
413                 }
414         }
415 }
416
417 void clear_object_flags(unsigned flags)
418 {
419         int i;
420
421         for (i=0; i < obj_hash_size; i++) {
422                 struct object *obj = obj_hash[i];
423                 if (obj)
424                         obj->flags &= ~flags;
425         }
426 }