midx: read pack names into array
[git] / midx.c
1 #include "cache.h"
2 #include "csum-file.h"
3 #include "dir.h"
4 #include "lockfile.h"
5 #include "packfile.h"
6 #include "object-store.h"
7 #include "midx.h"
8
9 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
10 #define MIDX_VERSION 1
11 #define MIDX_BYTE_FILE_VERSION 4
12 #define MIDX_BYTE_HASH_VERSION 5
13 #define MIDX_BYTE_NUM_CHUNKS 6
14 #define MIDX_BYTE_NUM_PACKS 8
15 #define MIDX_HASH_VERSION 1
16 #define MIDX_HEADER_SIZE 12
17 #define MIDX_HASH_LEN 20
18 #define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
19
20 #define MIDX_MAX_CHUNKS 1
21 #define MIDX_CHUNK_ALIGNMENT 4
22 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
23 #define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
24
25 static char *get_midx_filename(const char *object_dir)
26 {
27         return xstrfmt("%s/pack/multi-pack-index", object_dir);
28 }
29
30 struct multi_pack_index *load_multi_pack_index(const char *object_dir)
31 {
32         struct multi_pack_index *m = NULL;
33         int fd;
34         struct stat st;
35         size_t midx_size;
36         void *midx_map = NULL;
37         uint32_t hash_version;
38         char *midx_name = get_midx_filename(object_dir);
39         uint32_t i;
40         const char *cur_pack_name;
41
42         fd = git_open(midx_name);
43
44         if (fd < 0)
45                 goto cleanup_fail;
46         if (fstat(fd, &st)) {
47                 error_errno(_("failed to read %s"), midx_name);
48                 goto cleanup_fail;
49         }
50
51         midx_size = xsize_t(st.st_size);
52
53         if (midx_size < MIDX_MIN_SIZE) {
54                 error(_("multi-pack-index file %s is too small"), midx_name);
55                 goto cleanup_fail;
56         }
57
58         FREE_AND_NULL(midx_name);
59
60         midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
61
62         FLEX_ALLOC_MEM(m, object_dir, object_dir, strlen(object_dir));
63         m->fd = fd;
64         m->data = midx_map;
65         m->data_len = midx_size;
66
67         m->signature = get_be32(m->data);
68         if (m->signature != MIDX_SIGNATURE) {
69                 error(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
70                       m->signature, MIDX_SIGNATURE);
71                 goto cleanup_fail;
72         }
73
74         m->version = m->data[MIDX_BYTE_FILE_VERSION];
75         if (m->version != MIDX_VERSION) {
76                 error(_("multi-pack-index version %d not recognized"),
77                       m->version);
78                 goto cleanup_fail;
79         }
80
81         hash_version = m->data[MIDX_BYTE_HASH_VERSION];
82         if (hash_version != MIDX_HASH_VERSION) {
83                 error(_("hash version %u does not match"), hash_version);
84                 goto cleanup_fail;
85         }
86         m->hash_len = MIDX_HASH_LEN;
87
88         m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
89
90         m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
91
92         for (i = 0; i < m->num_chunks; i++) {
93                 uint32_t chunk_id = get_be32(m->data + MIDX_HEADER_SIZE +
94                                              MIDX_CHUNKLOOKUP_WIDTH * i);
95                 uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 +
96                                                  MIDX_CHUNKLOOKUP_WIDTH * i);
97
98                 switch (chunk_id) {
99                         case MIDX_CHUNKID_PACKNAMES:
100                                 m->chunk_pack_names = m->data + chunk_offset;
101                                 break;
102
103                         case 0:
104                                 die(_("terminating multi-pack-index chunk id appears earlier than expected"));
105                                 break;
106
107                         default:
108                                 /*
109                                  * Do nothing on unrecognized chunks, allowing future
110                                  * extensions to add optional chunks.
111                                  */
112                                 break;
113                 }
114         }
115
116         if (!m->chunk_pack_names)
117                 die(_("multi-pack-index missing required pack-name chunk"));
118
119         m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names));
120
121         cur_pack_name = (const char *)m->chunk_pack_names;
122         for (i = 0; i < m->num_packs; i++) {
123                 m->pack_names[i] = cur_pack_name;
124
125                 cur_pack_name += strlen(cur_pack_name) + 1;
126
127                 if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) {
128                         error(_("multi-pack-index pack names out of order: '%s' before '%s'"),
129                               m->pack_names[i - 1],
130                               m->pack_names[i]);
131                         goto cleanup_fail;
132                 }
133         }
134
135         return m;
136
137 cleanup_fail:
138         free(m);
139         free(midx_name);
140         if (midx_map)
141                 munmap(midx_map, midx_size);
142         if (0 <= fd)
143                 close(fd);
144         return NULL;
145 }
146
147 static size_t write_midx_header(struct hashfile *f,
148                                 unsigned char num_chunks,
149                                 uint32_t num_packs)
150 {
151         unsigned char byte_values[4];
152
153         hashwrite_be32(f, MIDX_SIGNATURE);
154         byte_values[0] = MIDX_VERSION;
155         byte_values[1] = MIDX_HASH_VERSION;
156         byte_values[2] = num_chunks;
157         byte_values[3] = 0; /* unused */
158         hashwrite(f, byte_values, sizeof(byte_values));
159         hashwrite_be32(f, num_packs);
160
161         return MIDX_HEADER_SIZE;
162 }
163
164 struct pack_list {
165         struct packed_git **list;
166         char **names;
167         uint32_t nr;
168         uint32_t alloc_list;
169         uint32_t alloc_names;
170         size_t pack_name_concat_len;
171 };
172
173 static void add_pack_to_midx(const char *full_path, size_t full_path_len,
174                              const char *file_name, void *data)
175 {
176         struct pack_list *packs = (struct pack_list *)data;
177
178         if (ends_with(file_name, ".idx")) {
179                 ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list);
180                 ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names);
181
182                 packs->list[packs->nr] = add_packed_git(full_path,
183                                                         full_path_len,
184                                                         0);
185                 if (!packs->list[packs->nr]) {
186                         warning(_("failed to add packfile '%s'"),
187                                 full_path);
188                         return;
189                 }
190
191                 packs->names[packs->nr] = xstrdup(file_name);
192                 packs->pack_name_concat_len += strlen(file_name) + 1;
193                 packs->nr++;
194         }
195 }
196
197 struct pack_pair {
198         uint32_t pack_int_id;
199         char *pack_name;
200 };
201
202 static int pack_pair_compare(const void *_a, const void *_b)
203 {
204         struct pack_pair *a = (struct pack_pair *)_a;
205         struct pack_pair *b = (struct pack_pair *)_b;
206         return strcmp(a->pack_name, b->pack_name);
207 }
208
209 static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm)
210 {
211         uint32_t i;
212         struct pack_pair *pairs;
213
214         ALLOC_ARRAY(pairs, nr_packs);
215
216         for (i = 0; i < nr_packs; i++) {
217                 pairs[i].pack_int_id = i;
218                 pairs[i].pack_name = pack_names[i];
219         }
220
221         QSORT(pairs, nr_packs, pack_pair_compare);
222
223         for (i = 0; i < nr_packs; i++) {
224                 pack_names[i] = pairs[i].pack_name;
225                 perm[pairs[i].pack_int_id] = i;
226         }
227
228         free(pairs);
229 }
230
231 static size_t write_midx_pack_names(struct hashfile *f,
232                                     char **pack_names,
233                                     uint32_t num_packs)
234 {
235         uint32_t i;
236         unsigned char padding[MIDX_CHUNK_ALIGNMENT];
237         size_t written = 0;
238
239         for (i = 0; i < num_packs; i++) {
240                 size_t writelen = strlen(pack_names[i]) + 1;
241
242                 if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0)
243                         BUG("incorrect pack-file order: %s before %s",
244                             pack_names[i - 1],
245                             pack_names[i]);
246
247                 hashwrite(f, pack_names[i], writelen);
248                 written += writelen;
249         }
250
251         /* add padding to be aligned */
252         i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT);
253         if (i < MIDX_CHUNK_ALIGNMENT) {
254                 memset(padding, 0, sizeof(padding));
255                 hashwrite(f, padding, i);
256                 written += i;
257         }
258
259         return written;
260 }
261
262 int write_midx_file(const char *object_dir)
263 {
264         unsigned char cur_chunk, num_chunks = 0;
265         char *midx_name;
266         uint32_t i;
267         struct hashfile *f = NULL;
268         struct lock_file lk;
269         struct pack_list packs;
270         uint32_t *pack_perm = NULL;
271         uint64_t written = 0;
272         uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
273         uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
274
275         midx_name = get_midx_filename(object_dir);
276         if (safe_create_leading_directories(midx_name)) {
277                 UNLEAK(midx_name);
278                 die_errno(_("unable to create leading directories of %s"),
279                           midx_name);
280         }
281
282         packs.nr = 0;
283         packs.alloc_list = 16;
284         packs.alloc_names = 16;
285         packs.list = NULL;
286         packs.pack_name_concat_len = 0;
287         ALLOC_ARRAY(packs.list, packs.alloc_list);
288         ALLOC_ARRAY(packs.names, packs.alloc_names);
289
290         for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs);
291
292         if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
293                 packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
294                                               (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
295
296         ALLOC_ARRAY(pack_perm, packs.nr);
297         sort_packs_by_name(packs.names, packs.nr, pack_perm);
298
299         hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
300         f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
301         FREE_AND_NULL(midx_name);
302
303         cur_chunk = 0;
304         num_chunks = 1;
305
306         written = write_midx_header(f, num_chunks, packs.nr);
307
308         chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
309         chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
310
311         cur_chunk++;
312         chunk_ids[cur_chunk] = 0;
313         chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len;
314
315         for (i = 0; i <= num_chunks; i++) {
316                 if (i && chunk_offsets[i] < chunk_offsets[i - 1])
317                         BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64,
318                             chunk_offsets[i - 1],
319                             chunk_offsets[i]);
320
321                 if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT)
322                         BUG("chunk offset %"PRIu64" is not properly aligned",
323                             chunk_offsets[i]);
324
325                 hashwrite_be32(f, chunk_ids[i]);
326                 hashwrite_be32(f, chunk_offsets[i] >> 32);
327                 hashwrite_be32(f, chunk_offsets[i]);
328
329                 written += MIDX_CHUNKLOOKUP_WIDTH;
330         }
331
332         for (i = 0; i < num_chunks; i++) {
333                 if (written != chunk_offsets[i])
334                         BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32,
335                             chunk_offsets[i],
336                             written,
337                             chunk_ids[i]);
338
339                 switch (chunk_ids[i]) {
340                         case MIDX_CHUNKID_PACKNAMES:
341                                 written += write_midx_pack_names(f, packs.names, packs.nr);
342                                 break;
343
344                         default:
345                                 BUG("trying to write unknown chunk id %"PRIx32,
346                                     chunk_ids[i]);
347                 }
348         }
349
350         if (written != chunk_offsets[num_chunks])
351                 BUG("incorrect final offset %"PRIu64" != %"PRIu64,
352                     written,
353                     chunk_offsets[num_chunks]);
354
355         finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
356         commit_lock_file(&lk);
357
358         for (i = 0; i < packs.nr; i++) {
359                 if (packs.list[i]) {
360                         close_pack(packs.list[i]);
361                         free(packs.list[i]);
362                 }
363                 free(packs.names[i]);
364         }
365
366         free(packs.list);
367         free(packs.names);
368         return 0;
369 }