Git 2.16.2
[git] / packfile.c
1 #include "cache.h"
2 #include "mru.h"
3 #include "pack.h"
4 #include "dir.h"
5 #include "mergesort.h"
6 #include "packfile.h"
7 #include "delta.h"
8 #include "list.h"
9 #include "streaming.h"
10 #include "sha1-lookup.h"
11
12 char *odb_pack_name(struct strbuf *buf,
13                     const unsigned char *sha1,
14                     const char *ext)
15 {
16         strbuf_reset(buf);
17         strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
18                     sha1_to_hex(sha1), ext);
19         return buf->buf;
20 }
21
22 char *sha1_pack_name(const unsigned char *sha1)
23 {
24         static struct strbuf buf = STRBUF_INIT;
25         return odb_pack_name(&buf, sha1, "pack");
26 }
27
28 char *sha1_pack_index_name(const unsigned char *sha1)
29 {
30         static struct strbuf buf = STRBUF_INIT;
31         return odb_pack_name(&buf, sha1, "idx");
32 }
33
34 static unsigned int pack_used_ctr;
35 static unsigned int pack_mmap_calls;
36 static unsigned int peak_pack_open_windows;
37 static unsigned int pack_open_windows;
38 static unsigned int pack_open_fds;
39 static unsigned int pack_max_fds;
40 static size_t peak_pack_mapped;
41 static size_t pack_mapped;
42 struct packed_git *packed_git;
43 struct mru packed_git_mru;
44
45 #define SZ_FMT PRIuMAX
46 static inline uintmax_t sz_fmt(size_t s) { return s; }
47
48 void pack_report(void)
49 {
50         fprintf(stderr,
51                 "pack_report: getpagesize()            = %10" SZ_FMT "\n"
52                 "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
53                 "pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
54                 sz_fmt(getpagesize()),
55                 sz_fmt(packed_git_window_size),
56                 sz_fmt(packed_git_limit));
57         fprintf(stderr,
58                 "pack_report: pack_used_ctr            = %10u\n"
59                 "pack_report: pack_mmap_calls          = %10u\n"
60                 "pack_report: pack_open_windows        = %10u / %10u\n"
61                 "pack_report: pack_mapped              = "
62                         "%10" SZ_FMT " / %10" SZ_FMT "\n",
63                 pack_used_ctr,
64                 pack_mmap_calls,
65                 pack_open_windows, peak_pack_open_windows,
66                 sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
67 }
68
69 /*
70  * Open and mmap the index file at path, perform a couple of
71  * consistency checks, then record its information to p.  Return 0 on
72  * success.
73  */
74 static int check_packed_git_idx(const char *path, struct packed_git *p)
75 {
76         void *idx_map;
77         struct pack_idx_header *hdr;
78         size_t idx_size;
79         uint32_t version, nr, i, *index;
80         int fd = git_open(path);
81         struct stat st;
82
83         if (fd < 0)
84                 return -1;
85         if (fstat(fd, &st)) {
86                 close(fd);
87                 return -1;
88         }
89         idx_size = xsize_t(st.st_size);
90         if (idx_size < 4 * 256 + 20 + 20) {
91                 close(fd);
92                 return error("index file %s is too small", path);
93         }
94         idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
95         close(fd);
96
97         hdr = idx_map;
98         if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
99                 version = ntohl(hdr->idx_version);
100                 if (version < 2 || version > 2) {
101                         munmap(idx_map, idx_size);
102                         return error("index file %s is version %"PRIu32
103                                      " and is not supported by this binary"
104                                      " (try upgrading GIT to a newer version)",
105                                      path, version);
106                 }
107         } else
108                 version = 1;
109
110         nr = 0;
111         index = idx_map;
112         if (version > 1)
113                 index += 2;  /* skip index header */
114         for (i = 0; i < 256; i++) {
115                 uint32_t n = ntohl(index[i]);
116                 if (n < nr) {
117                         munmap(idx_map, idx_size);
118                         return error("non-monotonic index %s", path);
119                 }
120                 nr = n;
121         }
122
123         if (version == 1) {
124                 /*
125                  * Total size:
126                  *  - 256 index entries 4 bytes each
127                  *  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
128                  *  - 20-byte SHA1 of the packfile
129                  *  - 20-byte SHA1 file checksum
130                  */
131                 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
132                         munmap(idx_map, idx_size);
133                         return error("wrong index v1 file size in %s", path);
134                 }
135         } else if (version == 2) {
136                 /*
137                  * Minimum size:
138                  *  - 8 bytes of header
139                  *  - 256 index entries 4 bytes each
140                  *  - 20-byte sha1 entry * nr
141                  *  - 4-byte crc entry * nr
142                  *  - 4-byte offset entry * nr
143                  *  - 20-byte SHA1 of the packfile
144                  *  - 20-byte SHA1 file checksum
145                  * And after the 4-byte offset table might be a
146                  * variable sized table containing 8-byte entries
147                  * for offsets larger than 2^31.
148                  */
149                 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
150                 unsigned long max_size = min_size;
151                 if (nr)
152                         max_size += (nr - 1)*8;
153                 if (idx_size < min_size || idx_size > max_size) {
154                         munmap(idx_map, idx_size);
155                         return error("wrong index v2 file size in %s", path);
156                 }
157                 if (idx_size != min_size &&
158                     /*
159                      * make sure we can deal with large pack offsets.
160                      * 31-bit signed offset won't be enough, neither
161                      * 32-bit unsigned one will be.
162                      */
163                     (sizeof(off_t) <= 4)) {
164                         munmap(idx_map, idx_size);
165                         return error("pack too large for current definition of off_t in %s", path);
166                 }
167         }
168
169         p->index_version = version;
170         p->index_data = idx_map;
171         p->index_size = idx_size;
172         p->num_objects = nr;
173         return 0;
174 }
175
176 int open_pack_index(struct packed_git *p)
177 {
178         char *idx_name;
179         size_t len;
180         int ret;
181
182         if (p->index_data)
183                 return 0;
184
185         if (!strip_suffix(p->pack_name, ".pack", &len))
186                 die("BUG: pack_name does not end in .pack");
187         idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
188         ret = check_packed_git_idx(idx_name, p);
189         free(idx_name);
190         return ret;
191 }
192
193 static struct packed_git *alloc_packed_git(int extra)
194 {
195         struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
196         memset(p, 0, sizeof(*p));
197         p->pack_fd = -1;
198         return p;
199 }
200
201 struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
202 {
203         const char *path = sha1_pack_name(sha1);
204         size_t alloc = st_add(strlen(path), 1);
205         struct packed_git *p = alloc_packed_git(alloc);
206
207         memcpy(p->pack_name, path, alloc); /* includes NUL */
208         hashcpy(p->sha1, sha1);
209         if (check_packed_git_idx(idx_path, p)) {
210                 free(p);
211                 return NULL;
212         }
213
214         return p;
215 }
216
217 static void scan_windows(struct packed_git *p,
218         struct packed_git **lru_p,
219         struct pack_window **lru_w,
220         struct pack_window **lru_l)
221 {
222         struct pack_window *w, *w_l;
223
224         for (w_l = NULL, w = p->windows; w; w = w->next) {
225                 if (!w->inuse_cnt) {
226                         if (!*lru_w || w->last_used < (*lru_w)->last_used) {
227                                 *lru_p = p;
228                                 *lru_w = w;
229                                 *lru_l = w_l;
230                         }
231                 }
232                 w_l = w;
233         }
234 }
235
236 static int unuse_one_window(struct packed_git *current)
237 {
238         struct packed_git *p, *lru_p = NULL;
239         struct pack_window *lru_w = NULL, *lru_l = NULL;
240
241         if (current)
242                 scan_windows(current, &lru_p, &lru_w, &lru_l);
243         for (p = packed_git; p; p = p->next)
244                 scan_windows(p, &lru_p, &lru_w, &lru_l);
245         if (lru_p) {
246                 munmap(lru_w->base, lru_w->len);
247                 pack_mapped -= lru_w->len;
248                 if (lru_l)
249                         lru_l->next = lru_w->next;
250                 else
251                         lru_p->windows = lru_w->next;
252                 free(lru_w);
253                 pack_open_windows--;
254                 return 1;
255         }
256         return 0;
257 }
258
259 void release_pack_memory(size_t need)
260 {
261         size_t cur = pack_mapped;
262         while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
263                 ; /* nothing */
264 }
265
266 void close_pack_windows(struct packed_git *p)
267 {
268         while (p->windows) {
269                 struct pack_window *w = p->windows;
270
271                 if (w->inuse_cnt)
272                         die("pack '%s' still has open windows to it",
273                             p->pack_name);
274                 munmap(w->base, w->len);
275                 pack_mapped -= w->len;
276                 pack_open_windows--;
277                 p->windows = w->next;
278                 free(w);
279         }
280 }
281
282 static int close_pack_fd(struct packed_git *p)
283 {
284         if (p->pack_fd < 0)
285                 return 0;
286
287         close(p->pack_fd);
288         pack_open_fds--;
289         p->pack_fd = -1;
290
291         return 1;
292 }
293
294 void close_pack_index(struct packed_git *p)
295 {
296         if (p->index_data) {
297                 munmap((void *)p->index_data, p->index_size);
298                 p->index_data = NULL;
299         }
300 }
301
302 static void close_pack(struct packed_git *p)
303 {
304         close_pack_windows(p);
305         close_pack_fd(p);
306         close_pack_index(p);
307 }
308
309 void close_all_packs(void)
310 {
311         struct packed_git *p;
312
313         for (p = packed_git; p; p = p->next)
314                 if (p->do_not_close)
315                         die("BUG: want to close pack marked 'do-not-close'");
316                 else
317                         close_pack(p);
318 }
319
320 /*
321  * The LRU pack is the one with the oldest MRU window, preferring packs
322  * with no used windows, or the oldest mtime if it has no windows allocated.
323  */
324 static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
325 {
326         struct pack_window *w, *this_mru_w;
327         int has_windows_inuse = 0;
328
329         /*
330          * Reject this pack if it has windows and the previously selected
331          * one does not.  If this pack does not have windows, reject
332          * it if the pack file is newer than the previously selected one.
333          */
334         if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
335                 return;
336
337         for (w = this_mru_w = p->windows; w; w = w->next) {
338                 /*
339                  * Reject this pack if any of its windows are in use,
340                  * but the previously selected pack did not have any
341                  * inuse windows.  Otherwise, record that this pack
342                  * has windows in use.
343                  */
344                 if (w->inuse_cnt) {
345                         if (*accept_windows_inuse)
346                                 has_windows_inuse = 1;
347                         else
348                                 return;
349                 }
350
351                 if (w->last_used > this_mru_w->last_used)
352                         this_mru_w = w;
353
354                 /*
355                  * Reject this pack if it has windows that have been
356                  * used more recently than the previously selected pack.
357                  * If the previously selected pack had windows inuse and
358                  * we have not encountered a window in this pack that is
359                  * inuse, skip this check since we prefer a pack with no
360                  * inuse windows to one that has inuse windows.
361                  */
362                 if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
363                     this_mru_w->last_used > (*mru_w)->last_used)
364                         return;
365         }
366
367         /*
368          * Select this pack.
369          */
370         *mru_w = this_mru_w;
371         *lru_p = p;
372         *accept_windows_inuse = has_windows_inuse;
373 }
374
375 static int close_one_pack(void)
376 {
377         struct packed_git *p, *lru_p = NULL;
378         struct pack_window *mru_w = NULL;
379         int accept_windows_inuse = 1;
380
381         for (p = packed_git; p; p = p->next) {
382                 if (p->pack_fd == -1)
383                         continue;
384                 find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
385         }
386
387         if (lru_p)
388                 return close_pack_fd(lru_p);
389
390         return 0;
391 }
392
393 static unsigned int get_max_fd_limit(void)
394 {
395 #ifdef RLIMIT_NOFILE
396         {
397                 struct rlimit lim;
398
399                 if (!getrlimit(RLIMIT_NOFILE, &lim))
400                         return lim.rlim_cur;
401         }
402 #endif
403
404 #ifdef _SC_OPEN_MAX
405         {
406                 long open_max = sysconf(_SC_OPEN_MAX);
407                 if (0 < open_max)
408                         return open_max;
409                 /*
410                  * Otherwise, we got -1 for one of the two
411                  * reasons:
412                  *
413                  * (1) sysconf() did not understand _SC_OPEN_MAX
414                  *     and signaled an error with -1; or
415                  * (2) sysconf() said there is no limit.
416                  *
417                  * We _could_ clear errno before calling sysconf() to
418                  * tell these two cases apart and return a huge number
419                  * in the latter case to let the caller cap it to a
420                  * value that is not so selfish, but letting the
421                  * fallback OPEN_MAX codepath take care of these cases
422                  * is a lot simpler.
423                  */
424         }
425 #endif
426
427 #ifdef OPEN_MAX
428         return OPEN_MAX;
429 #else
430         return 1; /* see the caller ;-) */
431 #endif
432 }
433
434 /*
435  * Do not call this directly as this leaks p->pack_fd on error return;
436  * call open_packed_git() instead.
437  */
438 static int open_packed_git_1(struct packed_git *p)
439 {
440         struct stat st;
441         struct pack_header hdr;
442         unsigned char sha1[20];
443         unsigned char *idx_sha1;
444         long fd_flag;
445         ssize_t read_result;
446
447         if (!p->index_data && open_pack_index(p))
448                 return error("packfile %s index unavailable", p->pack_name);
449
450         if (!pack_max_fds) {
451                 unsigned int max_fds = get_max_fd_limit();
452
453                 /* Save 3 for stdin/stdout/stderr, 22 for work */
454                 if (25 < max_fds)
455                         pack_max_fds = max_fds - 25;
456                 else
457                         pack_max_fds = 1;
458         }
459
460         while (pack_max_fds <= pack_open_fds && close_one_pack())
461                 ; /* nothing */
462
463         p->pack_fd = git_open(p->pack_name);
464         if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
465                 return -1;
466         pack_open_fds++;
467
468         /* If we created the struct before we had the pack we lack size. */
469         if (!p->pack_size) {
470                 if (!S_ISREG(st.st_mode))
471                         return error("packfile %s not a regular file", p->pack_name);
472                 p->pack_size = st.st_size;
473         } else if (p->pack_size != st.st_size)
474                 return error("packfile %s size changed", p->pack_name);
475
476         /* We leave these file descriptors open with sliding mmap;
477          * there is no point keeping them open across exec(), though.
478          */
479         fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
480         if (fd_flag < 0)
481                 return error("cannot determine file descriptor flags");
482         fd_flag |= FD_CLOEXEC;
483         if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
484                 return error("cannot set FD_CLOEXEC");
485
486         /* Verify we recognize this pack file format. */
487         read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr));
488         if (read_result < 0)
489                 return error_errno("error reading from %s", p->pack_name);
490         if (read_result != sizeof(hdr))
491                 return error("file %s is far too short to be a packfile", p->pack_name);
492         if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
493                 return error("file %s is not a GIT packfile", p->pack_name);
494         if (!pack_version_ok(hdr.hdr_version))
495                 return error("packfile %s is version %"PRIu32" and not"
496                         " supported (try upgrading GIT to a newer version)",
497                         p->pack_name, ntohl(hdr.hdr_version));
498
499         /* Verify the pack matches its index. */
500         if (p->num_objects != ntohl(hdr.hdr_entries))
501                 return error("packfile %s claims to have %"PRIu32" objects"
502                              " while index indicates %"PRIu32" objects",
503                              p->pack_name, ntohl(hdr.hdr_entries),
504                              p->num_objects);
505         if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
506                 return error("end of packfile %s is unavailable", p->pack_name);
507         read_result = read_in_full(p->pack_fd, sha1, sizeof(sha1));
508         if (read_result < 0)
509                 return error_errno("error reading from %s", p->pack_name);
510         if (read_result != sizeof(sha1))
511                 return error("packfile %s signature is unavailable", p->pack_name);
512         idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
513         if (hashcmp(sha1, idx_sha1))
514                 return error("packfile %s does not match index", p->pack_name);
515         return 0;
516 }
517
518 static int open_packed_git(struct packed_git *p)
519 {
520         if (!open_packed_git_1(p))
521                 return 0;
522         close_pack_fd(p);
523         return -1;
524 }
525
526 static int in_window(struct pack_window *win, off_t offset)
527 {
528         /* We must promise at least 20 bytes (one hash) after the
529          * offset is available from this window, otherwise the offset
530          * is not actually in this window and a different window (which
531          * has that one hash excess) must be used.  This is to support
532          * the object header and delta base parsing routines below.
533          */
534         off_t win_off = win->offset;
535         return win_off <= offset
536                 && (offset + 20) <= (win_off + win->len);
537 }
538
539 unsigned char *use_pack(struct packed_git *p,
540                 struct pack_window **w_cursor,
541                 off_t offset,
542                 unsigned long *left)
543 {
544         struct pack_window *win = *w_cursor;
545
546         /* Since packfiles end in a hash of their content and it's
547          * pointless to ask for an offset into the middle of that
548          * hash, and the in_window function above wouldn't match
549          * don't allow an offset too close to the end of the file.
550          */
551         if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
552                 die("packfile %s cannot be accessed", p->pack_name);
553         if (offset > (p->pack_size - 20))
554                 die("offset beyond end of packfile (truncated pack?)");
555         if (offset < 0)
556                 die(_("offset before end of packfile (broken .idx?)"));
557
558         if (!win || !in_window(win, offset)) {
559                 if (win)
560                         win->inuse_cnt--;
561                 for (win = p->windows; win; win = win->next) {
562                         if (in_window(win, offset))
563                                 break;
564                 }
565                 if (!win) {
566                         size_t window_align = packed_git_window_size / 2;
567                         off_t len;
568
569                         if (p->pack_fd == -1 && open_packed_git(p))
570                                 die("packfile %s cannot be accessed", p->pack_name);
571
572                         win = xcalloc(1, sizeof(*win));
573                         win->offset = (offset / window_align) * window_align;
574                         len = p->pack_size - win->offset;
575                         if (len > packed_git_window_size)
576                                 len = packed_git_window_size;
577                         win->len = (size_t)len;
578                         pack_mapped += win->len;
579                         while (packed_git_limit < pack_mapped
580                                 && unuse_one_window(p))
581                                 ; /* nothing */
582                         win->base = xmmap(NULL, win->len,
583                                 PROT_READ, MAP_PRIVATE,
584                                 p->pack_fd, win->offset);
585                         if (win->base == MAP_FAILED)
586                                 die_errno("packfile %s cannot be mapped",
587                                           p->pack_name);
588                         if (!win->offset && win->len == p->pack_size
589                                 && !p->do_not_close)
590                                 close_pack_fd(p);
591                         pack_mmap_calls++;
592                         pack_open_windows++;
593                         if (pack_mapped > peak_pack_mapped)
594                                 peak_pack_mapped = pack_mapped;
595                         if (pack_open_windows > peak_pack_open_windows)
596                                 peak_pack_open_windows = pack_open_windows;
597                         win->next = p->windows;
598                         p->windows = win;
599                 }
600         }
601         if (win != *w_cursor) {
602                 win->last_used = pack_used_ctr++;
603                 win->inuse_cnt++;
604                 *w_cursor = win;
605         }
606         offset -= win->offset;
607         if (left)
608                 *left = win->len - xsize_t(offset);
609         return win->base + offset;
610 }
611
612 void unuse_pack(struct pack_window **w_cursor)
613 {
614         struct pack_window *w = *w_cursor;
615         if (w) {
616                 w->inuse_cnt--;
617                 *w_cursor = NULL;
618         }
619 }
620
621 static void try_to_free_pack_memory(size_t size)
622 {
623         release_pack_memory(size);
624 }
625
626 struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
627 {
628         static int have_set_try_to_free_routine;
629         struct stat st;
630         size_t alloc;
631         struct packed_git *p;
632
633         if (!have_set_try_to_free_routine) {
634                 have_set_try_to_free_routine = 1;
635                 set_try_to_free_routine(try_to_free_pack_memory);
636         }
637
638         /*
639          * Make sure a corresponding .pack file exists and that
640          * the index looks sane.
641          */
642         if (!strip_suffix_mem(path, &path_len, ".idx"))
643                 return NULL;
644
645         /*
646          * ".pack" is long enough to hold any suffix we're adding (and
647          * the use xsnprintf double-checks that)
648          */
649         alloc = st_add3(path_len, strlen(".pack"), 1);
650         p = alloc_packed_git(alloc);
651         memcpy(p->pack_name, path, path_len);
652
653         xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
654         if (!access(p->pack_name, F_OK))
655                 p->pack_keep = 1;
656
657         xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
658         if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
659                 free(p);
660                 return NULL;
661         }
662
663         /* ok, it looks sane as far as we can check without
664          * actually mapping the pack file.
665          */
666         p->pack_size = st.st_size;
667         p->pack_local = local;
668         p->mtime = st.st_mtime;
669         if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
670                 hashclr(p->sha1);
671         return p;
672 }
673
674 void install_packed_git(struct packed_git *pack)
675 {
676         if (pack->pack_fd != -1)
677                 pack_open_fds++;
678
679         pack->next = packed_git;
680         packed_git = pack;
681 }
682
683 void (*report_garbage)(unsigned seen_bits, const char *path);
684
685 static void report_helper(const struct string_list *list,
686                           int seen_bits, int first, int last)
687 {
688         if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
689                 return;
690
691         for (; first < last; first++)
692                 report_garbage(seen_bits, list->items[first].string);
693 }
694
695 static void report_pack_garbage(struct string_list *list)
696 {
697         int i, baselen = -1, first = 0, seen_bits = 0;
698
699         if (!report_garbage)
700                 return;
701
702         string_list_sort(list);
703
704         for (i = 0; i < list->nr; i++) {
705                 const char *path = list->items[i].string;
706                 if (baselen != -1 &&
707                     strncmp(path, list->items[first].string, baselen)) {
708                         report_helper(list, seen_bits, first, i);
709                         baselen = -1;
710                         seen_bits = 0;
711                 }
712                 if (baselen == -1) {
713                         const char *dot = strrchr(path, '.');
714                         if (!dot) {
715                                 report_garbage(PACKDIR_FILE_GARBAGE, path);
716                                 continue;
717                         }
718                         baselen = dot - path + 1;
719                         first = i;
720                 }
721                 if (!strcmp(path + baselen, "pack"))
722                         seen_bits |= 1;
723                 else if (!strcmp(path + baselen, "idx"))
724                         seen_bits |= 2;
725         }
726         report_helper(list, seen_bits, first, list->nr);
727 }
728
729 static void prepare_packed_git_one(char *objdir, int local)
730 {
731         struct strbuf path = STRBUF_INIT;
732         size_t dirnamelen;
733         DIR *dir;
734         struct dirent *de;
735         struct string_list garbage = STRING_LIST_INIT_DUP;
736
737         strbuf_addstr(&path, objdir);
738         strbuf_addstr(&path, "/pack");
739         dir = opendir(path.buf);
740         if (!dir) {
741                 if (errno != ENOENT)
742                         error_errno("unable to open object pack directory: %s",
743                                     path.buf);
744                 strbuf_release(&path);
745                 return;
746         }
747         strbuf_addch(&path, '/');
748         dirnamelen = path.len;
749         while ((de = readdir(dir)) != NULL) {
750                 struct packed_git *p;
751                 size_t base_len;
752
753                 if (is_dot_or_dotdot(de->d_name))
754                         continue;
755
756                 strbuf_setlen(&path, dirnamelen);
757                 strbuf_addstr(&path, de->d_name);
758
759                 base_len = path.len;
760                 if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
761                         /* Don't reopen a pack we already have. */
762                         for (p = packed_git; p; p = p->next) {
763                                 size_t len;
764                                 if (strip_suffix(p->pack_name, ".pack", &len) &&
765                                     len == base_len &&
766                                     !memcmp(p->pack_name, path.buf, len))
767                                         break;
768                         }
769                         if (p == NULL &&
770                             /*
771                              * See if it really is a valid .idx file with
772                              * corresponding .pack file that we can map.
773                              */
774                             (p = add_packed_git(path.buf, path.len, local)) != NULL)
775                                 install_packed_git(p);
776                 }
777
778                 if (!report_garbage)
779                         continue;
780
781                 if (ends_with(de->d_name, ".idx") ||
782                     ends_with(de->d_name, ".pack") ||
783                     ends_with(de->d_name, ".bitmap") ||
784                     ends_with(de->d_name, ".keep"))
785                         string_list_append(&garbage, path.buf);
786                 else
787                         report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
788         }
789         closedir(dir);
790         report_pack_garbage(&garbage);
791         string_list_clear(&garbage, 0);
792         strbuf_release(&path);
793 }
794
795 static int approximate_object_count_valid;
796
797 /*
798  * Give a fast, rough count of the number of objects in the repository. This
799  * ignores loose objects completely. If you have a lot of them, then either
800  * you should repack because your performance will be awful, or they are
801  * all unreachable objects about to be pruned, in which case they're not really
802  * interesting as a measure of repo size in the first place.
803  */
804 unsigned long approximate_object_count(void)
805 {
806         static unsigned long count;
807         if (!approximate_object_count_valid) {
808                 struct packed_git *p;
809
810                 prepare_packed_git();
811                 count = 0;
812                 for (p = packed_git; p; p = p->next) {
813                         if (open_pack_index(p))
814                                 continue;
815                         count += p->num_objects;
816                 }
817         }
818         return count;
819 }
820
821 static void *get_next_packed_git(const void *p)
822 {
823         return ((const struct packed_git *)p)->next;
824 }
825
826 static void set_next_packed_git(void *p, void *next)
827 {
828         ((struct packed_git *)p)->next = next;
829 }
830
831 static int sort_pack(const void *a_, const void *b_)
832 {
833         const struct packed_git *a = a_;
834         const struct packed_git *b = b_;
835         int st;
836
837         /*
838          * Local packs tend to contain objects specific to our
839          * variant of the project than remote ones.  In addition,
840          * remote ones could be on a network mounted filesystem.
841          * Favor local ones for these reasons.
842          */
843         st = a->pack_local - b->pack_local;
844         if (st)
845                 return -st;
846
847         /*
848          * Younger packs tend to contain more recent objects,
849          * and more recent objects tend to get accessed more
850          * often.
851          */
852         if (a->mtime < b->mtime)
853                 return 1;
854         else if (a->mtime == b->mtime)
855                 return 0;
856         return -1;
857 }
858
859 static void rearrange_packed_git(void)
860 {
861         packed_git = llist_mergesort(packed_git, get_next_packed_git,
862                                      set_next_packed_git, sort_pack);
863 }
864
865 static void prepare_packed_git_mru(void)
866 {
867         struct packed_git *p;
868
869         mru_clear(&packed_git_mru);
870         for (p = packed_git; p; p = p->next)
871                 mru_append(&packed_git_mru, p);
872 }
873
874 static int prepare_packed_git_run_once = 0;
875 void prepare_packed_git(void)
876 {
877         struct alternate_object_database *alt;
878
879         if (prepare_packed_git_run_once)
880                 return;
881         prepare_packed_git_one(get_object_directory(), 1);
882         prepare_alt_odb();
883         for (alt = alt_odb_list; alt; alt = alt->next)
884                 prepare_packed_git_one(alt->path, 0);
885         rearrange_packed_git();
886         prepare_packed_git_mru();
887         prepare_packed_git_run_once = 1;
888 }
889
890 void reprepare_packed_git(void)
891 {
892         approximate_object_count_valid = 0;
893         prepare_packed_git_run_once = 0;
894         prepare_packed_git();
895 }
896
897 unsigned long unpack_object_header_buffer(const unsigned char *buf,
898                 unsigned long len, enum object_type *type, unsigned long *sizep)
899 {
900         unsigned shift;
901         unsigned long size, c;
902         unsigned long used = 0;
903
904         c = buf[used++];
905         *type = (c >> 4) & 7;
906         size = c & 15;
907         shift = 4;
908         while (c & 0x80) {
909                 if (len <= used || bitsizeof(long) <= shift) {
910                         error("bad object header");
911                         size = used = 0;
912                         break;
913                 }
914                 c = buf[used++];
915                 size += (c & 0x7f) << shift;
916                 shift += 7;
917         }
918         *sizep = size;
919         return used;
920 }
921
922 unsigned long get_size_from_delta(struct packed_git *p,
923                                   struct pack_window **w_curs,
924                                   off_t curpos)
925 {
926         const unsigned char *data;
927         unsigned char delta_head[20], *in;
928         git_zstream stream;
929         int st;
930
931         memset(&stream, 0, sizeof(stream));
932         stream.next_out = delta_head;
933         stream.avail_out = sizeof(delta_head);
934
935         git_inflate_init(&stream);
936         do {
937                 in = use_pack(p, w_curs, curpos, &stream.avail_in);
938                 stream.next_in = in;
939                 st = git_inflate(&stream, Z_FINISH);
940                 curpos += stream.next_in - in;
941         } while ((st == Z_OK || st == Z_BUF_ERROR) &&
942                  stream.total_out < sizeof(delta_head));
943         git_inflate_end(&stream);
944         if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
945                 error("delta data unpack-initial failed");
946                 return 0;
947         }
948
949         /* Examine the initial part of the delta to figure out
950          * the result size.
951          */
952         data = delta_head;
953
954         /* ignore base size */
955         get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
956
957         /* Read the result size */
958         return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
959 }
960
961 int unpack_object_header(struct packed_git *p,
962                          struct pack_window **w_curs,
963                          off_t *curpos,
964                          unsigned long *sizep)
965 {
966         unsigned char *base;
967         unsigned long left;
968         unsigned long used;
969         enum object_type type;
970
971         /* use_pack() assures us we have [base, base + 20) available
972          * as a range that we can look at.  (Its actually the hash
973          * size that is assured.)  With our object header encoding
974          * the maximum deflated object size is 2^137, which is just
975          * insane, so we know won't exceed what we have been given.
976          */
977         base = use_pack(p, w_curs, *curpos, &left);
978         used = unpack_object_header_buffer(base, left, &type, sizep);
979         if (!used) {
980                 type = OBJ_BAD;
981         } else
982                 *curpos += used;
983
984         return type;
985 }
986
987 void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1)
988 {
989         unsigned i;
990         for (i = 0; i < p->num_bad_objects; i++)
991                 if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
992                         return;
993         p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
994                                       st_mult(GIT_MAX_RAWSZ,
995                                               st_add(p->num_bad_objects, 1)));
996         hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
997         p->num_bad_objects++;
998 }
999
1000 const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
1001 {
1002         struct packed_git *p;
1003         unsigned i;
1004
1005         for (p = packed_git; p; p = p->next)
1006                 for (i = 0; i < p->num_bad_objects; i++)
1007                         if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1008                                 return p;
1009         return NULL;
1010 }
1011
1012 static off_t get_delta_base(struct packed_git *p,
1013                                     struct pack_window **w_curs,
1014                                     off_t *curpos,
1015                                     enum object_type type,
1016                                     off_t delta_obj_offset)
1017 {
1018         unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1019         off_t base_offset;
1020
1021         /* use_pack() assured us we have [base_info, base_info + 20)
1022          * as a range that we can look at without walking off the
1023          * end of the mapped window.  Its actually the hash size
1024          * that is assured.  An OFS_DELTA longer than the hash size
1025          * is stupid, as then a REF_DELTA would be smaller to store.
1026          */
1027         if (type == OBJ_OFS_DELTA) {
1028                 unsigned used = 0;
1029                 unsigned char c = base_info[used++];
1030                 base_offset = c & 127;
1031                 while (c & 128) {
1032                         base_offset += 1;
1033                         if (!base_offset || MSB(base_offset, 7))
1034                                 return 0;  /* overflow */
1035                         c = base_info[used++];
1036                         base_offset = (base_offset << 7) + (c & 127);
1037                 }
1038                 base_offset = delta_obj_offset - base_offset;
1039                 if (base_offset <= 0 || base_offset >= delta_obj_offset)
1040                         return 0;  /* out of bound */
1041                 *curpos += used;
1042         } else if (type == OBJ_REF_DELTA) {
1043                 /* The base entry _must_ be in the same pack */
1044                 base_offset = find_pack_entry_one(base_info, p);
1045                 *curpos += 20;
1046         } else
1047                 die("I am totally screwed");
1048         return base_offset;
1049 }
1050
1051 /*
1052  * Like get_delta_base above, but we return the sha1 instead of the pack
1053  * offset. This means it is cheaper for REF deltas (we do not have to do
1054  * the final object lookup), but more expensive for OFS deltas (we
1055  * have to load the revidx to convert the offset back into a sha1).
1056  */
1057 static const unsigned char *get_delta_base_sha1(struct packed_git *p,
1058                                                 struct pack_window **w_curs,
1059                                                 off_t curpos,
1060                                                 enum object_type type,
1061                                                 off_t delta_obj_offset)
1062 {
1063         if (type == OBJ_REF_DELTA) {
1064                 unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1065                 return base;
1066         } else if (type == OBJ_OFS_DELTA) {
1067                 struct revindex_entry *revidx;
1068                 off_t base_offset = get_delta_base(p, w_curs, &curpos,
1069                                                    type, delta_obj_offset);
1070
1071                 if (!base_offset)
1072                         return NULL;
1073
1074                 revidx = find_pack_revindex(p, base_offset);
1075                 if (!revidx)
1076                         return NULL;
1077
1078                 return nth_packed_object_sha1(p, revidx->nr);
1079         } else
1080                 return NULL;
1081 }
1082
1083 static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
1084 {
1085         int type;
1086         struct revindex_entry *revidx;
1087         const unsigned char *sha1;
1088         revidx = find_pack_revindex(p, obj_offset);
1089         if (!revidx)
1090                 return OBJ_BAD;
1091         sha1 = nth_packed_object_sha1(p, revidx->nr);
1092         mark_bad_packed_object(p, sha1);
1093         type = sha1_object_info(sha1, NULL);
1094         if (type <= OBJ_NONE)
1095                 return OBJ_BAD;
1096         return type;
1097 }
1098
1099 #define POI_STACK_PREALLOC 64
1100
1101 static enum object_type packed_to_object_type(struct packed_git *p,
1102                                               off_t obj_offset,
1103                                               enum object_type type,
1104                                               struct pack_window **w_curs,
1105                                               off_t curpos)
1106 {
1107         off_t small_poi_stack[POI_STACK_PREALLOC];
1108         off_t *poi_stack = small_poi_stack;
1109         int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1110
1111         while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1112                 off_t base_offset;
1113                 unsigned long size;
1114                 /* Push the object we're going to leave behind */
1115                 if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1116                         poi_stack_alloc = alloc_nr(poi_stack_nr);
1117                         ALLOC_ARRAY(poi_stack, poi_stack_alloc);
1118                         memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
1119                 } else {
1120                         ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1121                 }
1122                 poi_stack[poi_stack_nr++] = obj_offset;
1123                 /* If parsing the base offset fails, just unwind */
1124                 base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1125                 if (!base_offset)
1126                         goto unwind;
1127                 curpos = obj_offset = base_offset;
1128                 type = unpack_object_header(p, w_curs, &curpos, &size);
1129                 if (type <= OBJ_NONE) {
1130                         /* If getting the base itself fails, we first
1131                          * retry the base, otherwise unwind */
1132                         type = retry_bad_packed_offset(p, base_offset);
1133                         if (type > OBJ_NONE)
1134                                 goto out;
1135                         goto unwind;
1136                 }
1137         }
1138
1139         switch (type) {
1140         case OBJ_BAD:
1141         case OBJ_COMMIT:
1142         case OBJ_TREE:
1143         case OBJ_BLOB:
1144         case OBJ_TAG:
1145                 break;
1146         default:
1147                 error("unknown object type %i at offset %"PRIuMAX" in %s",
1148                       type, (uintmax_t)obj_offset, p->pack_name);
1149                 type = OBJ_BAD;
1150         }
1151
1152 out:
1153         if (poi_stack != small_poi_stack)
1154                 free(poi_stack);
1155         return type;
1156
1157 unwind:
1158         while (poi_stack_nr) {
1159                 obj_offset = poi_stack[--poi_stack_nr];
1160                 type = retry_bad_packed_offset(p, obj_offset);
1161                 if (type > OBJ_NONE)
1162                         goto out;
1163         }
1164         type = OBJ_BAD;
1165         goto out;
1166 }
1167
1168 static struct hashmap delta_base_cache;
1169 static size_t delta_base_cached;
1170
1171 static LIST_HEAD(delta_base_cache_lru);
1172
1173 struct delta_base_cache_key {
1174         struct packed_git *p;
1175         off_t base_offset;
1176 };
1177
1178 struct delta_base_cache_entry {
1179         struct hashmap hash;
1180         struct delta_base_cache_key key;
1181         struct list_head lru;
1182         void *data;
1183         unsigned long size;
1184         enum object_type type;
1185 };
1186
1187 static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
1188 {
1189         unsigned int hash;
1190
1191         hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
1192         hash += (hash >> 8) + (hash >> 16);
1193         return hash;
1194 }
1195
1196 static struct delta_base_cache_entry *
1197 get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
1198 {
1199         struct hashmap_entry entry;
1200         struct delta_base_cache_key key;
1201
1202         if (!delta_base_cache.cmpfn)
1203                 return NULL;
1204
1205         hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
1206         key.p = p;
1207         key.base_offset = base_offset;
1208         return hashmap_get(&delta_base_cache, &entry, &key);
1209 }
1210
1211 static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
1212                                    const struct delta_base_cache_key *b)
1213 {
1214         return a->p == b->p && a->base_offset == b->base_offset;
1215 }
1216
1217 static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
1218                                      const void *va, const void *vb,
1219                                      const void *vkey)
1220 {
1221         const struct delta_base_cache_entry *a = va, *b = vb;
1222         const struct delta_base_cache_key *key = vkey;
1223         if (key)
1224                 return !delta_base_cache_key_eq(&a->key, key);
1225         else
1226                 return !delta_base_cache_key_eq(&a->key, &b->key);
1227 }
1228
1229 static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
1230 {
1231         return !!get_delta_base_cache_entry(p, base_offset);
1232 }
1233
1234 /*
1235  * Remove the entry from the cache, but do _not_ free the associated
1236  * entry data. The caller takes ownership of the "data" buffer, and
1237  * should copy out any fields it wants before detaching.
1238  */
1239 static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
1240 {
1241         hashmap_remove(&delta_base_cache, ent, &ent->key);
1242         list_del(&ent->lru);
1243         delta_base_cached -= ent->size;
1244         free(ent);
1245 }
1246
1247 static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
1248         unsigned long *base_size, enum object_type *type)
1249 {
1250         struct delta_base_cache_entry *ent;
1251
1252         ent = get_delta_base_cache_entry(p, base_offset);
1253         if (!ent)
1254                 return unpack_entry(p, base_offset, type, base_size);
1255
1256         if (type)
1257                 *type = ent->type;
1258         if (base_size)
1259                 *base_size = ent->size;
1260         return xmemdupz(ent->data, ent->size);
1261 }
1262
1263 static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1264 {
1265         free(ent->data);
1266         detach_delta_base_cache_entry(ent);
1267 }
1268
1269 void clear_delta_base_cache(void)
1270 {
1271         struct list_head *lru, *tmp;
1272         list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1273                 struct delta_base_cache_entry *entry =
1274                         list_entry(lru, struct delta_base_cache_entry, lru);
1275                 release_delta_base_cache(entry);
1276         }
1277 }
1278
1279 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1280         void *base, unsigned long base_size, enum object_type type)
1281 {
1282         struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
1283         struct list_head *lru, *tmp;
1284
1285         delta_base_cached += base_size;
1286
1287         list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1288                 struct delta_base_cache_entry *f =
1289                         list_entry(lru, struct delta_base_cache_entry, lru);
1290                 if (delta_base_cached <= delta_base_cache_limit)
1291                         break;
1292                 release_delta_base_cache(f);
1293         }
1294
1295         ent->key.p = p;
1296         ent->key.base_offset = base_offset;
1297         ent->type = type;
1298         ent->data = base;
1299         ent->size = base_size;
1300         list_add_tail(&ent->lru, &delta_base_cache_lru);
1301
1302         if (!delta_base_cache.cmpfn)
1303                 hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
1304         hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
1305         hashmap_add(&delta_base_cache, ent);
1306 }
1307
1308 int packed_object_info(struct packed_git *p, off_t obj_offset,
1309                        struct object_info *oi)
1310 {
1311         struct pack_window *w_curs = NULL;
1312         unsigned long size;
1313         off_t curpos = obj_offset;
1314         enum object_type type;
1315
1316         /*
1317          * We always get the representation type, but only convert it to
1318          * a "real" type later if the caller is interested.
1319          */
1320         if (oi->contentp) {
1321                 *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep,
1322                                                       &type);
1323                 if (!*oi->contentp)
1324                         type = OBJ_BAD;
1325         } else {
1326                 type = unpack_object_header(p, &w_curs, &curpos, &size);
1327         }
1328
1329         if (!oi->contentp && oi->sizep) {
1330                 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1331                         off_t tmp_pos = curpos;
1332                         off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1333                                                            type, obj_offset);
1334                         if (!base_offset) {
1335                                 type = OBJ_BAD;
1336                                 goto out;
1337                         }
1338                         *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1339                         if (*oi->sizep == 0) {
1340                                 type = OBJ_BAD;
1341                                 goto out;
1342                         }
1343                 } else {
1344                         *oi->sizep = size;
1345                 }
1346         }
1347
1348         if (oi->disk_sizep) {
1349                 struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1350                 *oi->disk_sizep = revidx[1].offset - obj_offset;
1351         }
1352
1353         if (oi->typep || oi->typename) {
1354                 enum object_type ptot;
1355                 ptot = packed_to_object_type(p, obj_offset, type, &w_curs,
1356                                              curpos);
1357                 if (oi->typep)
1358                         *oi->typep = ptot;
1359                 if (oi->typename) {
1360                         const char *tn = typename(ptot);
1361                         if (tn)
1362                                 strbuf_addstr(oi->typename, tn);
1363                 }
1364                 if (ptot < 0) {
1365                         type = OBJ_BAD;
1366                         goto out;
1367                 }
1368         }
1369
1370         if (oi->delta_base_sha1) {
1371                 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1372                         const unsigned char *base;
1373
1374                         base = get_delta_base_sha1(p, &w_curs, curpos,
1375                                                    type, obj_offset);
1376                         if (!base) {
1377                                 type = OBJ_BAD;
1378                                 goto out;
1379                         }
1380
1381                         hashcpy(oi->delta_base_sha1, base);
1382                 } else
1383                         hashclr(oi->delta_base_sha1);
1384         }
1385
1386         oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
1387                                                           OI_PACKED;
1388
1389 out:
1390         unuse_pack(&w_curs);
1391         return type;
1392 }
1393
1394 static void *unpack_compressed_entry(struct packed_git *p,
1395                                     struct pack_window **w_curs,
1396                                     off_t curpos,
1397                                     unsigned long size)
1398 {
1399         int st;
1400         git_zstream stream;
1401         unsigned char *buffer, *in;
1402
1403         buffer = xmallocz_gently(size);
1404         if (!buffer)
1405                 return NULL;
1406         memset(&stream, 0, sizeof(stream));
1407         stream.next_out = buffer;
1408         stream.avail_out = size + 1;
1409
1410         git_inflate_init(&stream);
1411         do {
1412                 in = use_pack(p, w_curs, curpos, &stream.avail_in);
1413                 stream.next_in = in;
1414                 st = git_inflate(&stream, Z_FINISH);
1415                 if (!stream.avail_out)
1416                         break; /* the payload is larger than it should be */
1417                 curpos += stream.next_in - in;
1418         } while (st == Z_OK || st == Z_BUF_ERROR);
1419         git_inflate_end(&stream);
1420         if ((st != Z_STREAM_END) || stream.total_out != size) {
1421                 free(buffer);
1422                 return NULL;
1423         }
1424
1425         return buffer;
1426 }
1427
1428 static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
1429 {
1430         static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
1431         trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
1432                          p->pack_name, (uintmax_t)obj_offset);
1433 }
1434
1435 int do_check_packed_object_crc;
1436
1437 #define UNPACK_ENTRY_STACK_PREALLOC 64
1438 struct unpack_entry_stack_ent {
1439         off_t obj_offset;
1440         off_t curpos;
1441         unsigned long size;
1442 };
1443
1444 static void *read_object(const unsigned char *sha1, enum object_type *type,
1445                          unsigned long *size)
1446 {
1447         struct object_info oi = OBJECT_INFO_INIT;
1448         void *content;
1449         oi.typep = type;
1450         oi.sizep = size;
1451         oi.contentp = &content;
1452
1453         if (sha1_object_info_extended(sha1, &oi, 0) < 0)
1454                 return NULL;
1455         return content;
1456 }
1457
1458 void *unpack_entry(struct packed_git *p, off_t obj_offset,
1459                    enum object_type *final_type, unsigned long *final_size)
1460 {
1461         struct pack_window *w_curs = NULL;
1462         off_t curpos = obj_offset;
1463         void *data = NULL;
1464         unsigned long size;
1465         enum object_type type;
1466         struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
1467         struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
1468         int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
1469         int base_from_cache = 0;
1470
1471         write_pack_access_log(p, obj_offset);
1472
1473         /* PHASE 1: drill down to the innermost base object */
1474         for (;;) {
1475                 off_t base_offset;
1476                 int i;
1477                 struct delta_base_cache_entry *ent;
1478
1479                 ent = get_delta_base_cache_entry(p, curpos);
1480                 if (ent) {
1481                         type = ent->type;
1482                         data = ent->data;
1483                         size = ent->size;
1484                         detach_delta_base_cache_entry(ent);
1485                         base_from_cache = 1;
1486                         break;
1487                 }
1488
1489                 if (do_check_packed_object_crc && p->index_version > 1) {
1490                         struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1491                         off_t len = revidx[1].offset - obj_offset;
1492                         if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
1493                                 const unsigned char *sha1 =
1494                                         nth_packed_object_sha1(p, revidx->nr);
1495                                 error("bad packed object CRC for %s",
1496                                       sha1_to_hex(sha1));
1497                                 mark_bad_packed_object(p, sha1);
1498                                 data = NULL;
1499                                 goto out;
1500                         }
1501                 }
1502
1503                 type = unpack_object_header(p, &w_curs, &curpos, &size);
1504                 if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
1505                         break;
1506
1507                 base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1508                 if (!base_offset) {
1509                         error("failed to validate delta base reference "
1510                               "at offset %"PRIuMAX" from %s",
1511                               (uintmax_t)curpos, p->pack_name);
1512                         /* bail to phase 2, in hopes of recovery */
1513                         data = NULL;
1514                         break;
1515                 }
1516
1517                 /* push object, proceed to base */
1518                 if (delta_stack_nr >= delta_stack_alloc
1519                     && delta_stack == small_delta_stack) {
1520                         delta_stack_alloc = alloc_nr(delta_stack_nr);
1521                         ALLOC_ARRAY(delta_stack, delta_stack_alloc);
1522                         memcpy(delta_stack, small_delta_stack,
1523                                sizeof(*delta_stack)*delta_stack_nr);
1524                 } else {
1525                         ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
1526                 }
1527                 i = delta_stack_nr++;
1528                 delta_stack[i].obj_offset = obj_offset;
1529                 delta_stack[i].curpos = curpos;
1530                 delta_stack[i].size = size;
1531
1532                 curpos = obj_offset = base_offset;
1533         }
1534
1535         /* PHASE 2: handle the base */
1536         switch (type) {
1537         case OBJ_OFS_DELTA:
1538         case OBJ_REF_DELTA:
1539                 if (data)
1540                         die("BUG: unpack_entry: left loop at a valid delta");
1541                 break;
1542         case OBJ_COMMIT:
1543         case OBJ_TREE:
1544         case OBJ_BLOB:
1545         case OBJ_TAG:
1546                 if (!base_from_cache)
1547                         data = unpack_compressed_entry(p, &w_curs, curpos, size);
1548                 break;
1549         default:
1550                 data = NULL;
1551                 error("unknown object type %i at offset %"PRIuMAX" in %s",
1552                       type, (uintmax_t)obj_offset, p->pack_name);
1553         }
1554
1555         /* PHASE 3: apply deltas in order */
1556
1557         /* invariants:
1558          *   'data' holds the base data, or NULL if there was corruption
1559          */
1560         while (delta_stack_nr) {
1561                 void *delta_data;
1562                 void *base = data;
1563                 void *external_base = NULL;
1564                 unsigned long delta_size, base_size = size;
1565                 int i;
1566
1567                 data = NULL;
1568
1569                 if (base)
1570                         add_delta_base_cache(p, obj_offset, base, base_size, type);
1571
1572                 if (!base) {
1573                         /*
1574                          * We're probably in deep shit, but let's try to fetch
1575                          * the required base anyway from another pack or loose.
1576                          * This is costly but should happen only in the presence
1577                          * of a corrupted pack, and is better than failing outright.
1578                          */
1579                         struct revindex_entry *revidx;
1580                         const unsigned char *base_sha1;
1581                         revidx = find_pack_revindex(p, obj_offset);
1582                         if (revidx) {
1583                                 base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1584                                 error("failed to read delta base object %s"
1585                                       " at offset %"PRIuMAX" from %s",
1586                                       sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
1587                                       p->pack_name);
1588                                 mark_bad_packed_object(p, base_sha1);
1589                                 base = read_object(base_sha1, &type, &base_size);
1590                                 external_base = base;
1591                         }
1592                 }
1593
1594                 i = --delta_stack_nr;
1595                 obj_offset = delta_stack[i].obj_offset;
1596                 curpos = delta_stack[i].curpos;
1597                 delta_size = delta_stack[i].size;
1598
1599                 if (!base)
1600                         continue;
1601
1602                 delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
1603
1604                 if (!delta_data) {
1605                         error("failed to unpack compressed delta "
1606                               "at offset %"PRIuMAX" from %s",
1607                               (uintmax_t)curpos, p->pack_name);
1608                         data = NULL;
1609                         free(external_base);
1610                         continue;
1611                 }
1612
1613                 data = patch_delta(base, base_size,
1614                                    delta_data, delta_size,
1615                                    &size);
1616
1617                 /*
1618                  * We could not apply the delta; warn the user, but keep going.
1619                  * Our failure will be noticed either in the next iteration of
1620                  * the loop, or if this is the final delta, in the caller when
1621                  * we return NULL. Those code paths will take care of making
1622                  * a more explicit warning and retrying with another copy of
1623                  * the object.
1624                  */
1625                 if (!data)
1626                         error("failed to apply delta");
1627
1628                 free(delta_data);
1629                 free(external_base);
1630         }
1631
1632         if (final_type)
1633                 *final_type = type;
1634         if (final_size)
1635                 *final_size = size;
1636
1637 out:
1638         unuse_pack(&w_curs);
1639
1640         if (delta_stack != small_delta_stack)
1641                 free(delta_stack);
1642
1643         return data;
1644 }
1645
1646 const unsigned char *nth_packed_object_sha1(struct packed_git *p,
1647                                             uint32_t n)
1648 {
1649         const unsigned char *index = p->index_data;
1650         if (!index) {
1651                 if (open_pack_index(p))
1652                         return NULL;
1653                 index = p->index_data;
1654         }
1655         if (n >= p->num_objects)
1656                 return NULL;
1657         index += 4 * 256;
1658         if (p->index_version == 1) {
1659                 return index + 24 * n + 4;
1660         } else {
1661                 index += 8;
1662                 return index + 20 * n;
1663         }
1664 }
1665
1666 const struct object_id *nth_packed_object_oid(struct object_id *oid,
1667                                               struct packed_git *p,
1668                                               uint32_t n)
1669 {
1670         const unsigned char *hash = nth_packed_object_sha1(p, n);
1671         if (!hash)
1672                 return NULL;
1673         hashcpy(oid->hash, hash);
1674         return oid;
1675 }
1676
1677 void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
1678 {
1679         const unsigned char *ptr = vptr;
1680         const unsigned char *start = p->index_data;
1681         const unsigned char *end = start + p->index_size;
1682         if (ptr < start)
1683                 die(_("offset before start of pack index for %s (corrupt index?)"),
1684                     p->pack_name);
1685         /* No need to check for underflow; .idx files must be at least 8 bytes */
1686         if (ptr >= end - 8)
1687                 die(_("offset beyond end of pack index for %s (truncated index?)"),
1688                     p->pack_name);
1689 }
1690
1691 off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
1692 {
1693         const unsigned char *index = p->index_data;
1694         index += 4 * 256;
1695         if (p->index_version == 1) {
1696                 return ntohl(*((uint32_t *)(index + 24 * n)));
1697         } else {
1698                 uint32_t off;
1699                 index += 8 + p->num_objects * (20 + 4);
1700                 off = ntohl(*((uint32_t *)(index + 4 * n)));
1701                 if (!(off & 0x80000000))
1702                         return off;
1703                 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1704                 check_pack_index_ptr(p, index);
1705                 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
1706                                    ntohl(*((uint32_t *)(index + 4)));
1707         }
1708 }
1709
1710 off_t find_pack_entry_one(const unsigned char *sha1,
1711                                   struct packed_git *p)
1712 {
1713         const uint32_t *level1_ofs = p->index_data;
1714         const unsigned char *index = p->index_data;
1715         unsigned hi, lo, stride;
1716         static int debug_lookup = -1;
1717
1718         if (debug_lookup < 0)
1719                 debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
1720
1721         if (!index) {
1722                 if (open_pack_index(p))
1723                         return 0;
1724                 level1_ofs = p->index_data;
1725                 index = p->index_data;
1726         }
1727         if (p->index_version > 1) {
1728                 level1_ofs += 2;
1729                 index += 8;
1730         }
1731         index += 4 * 256;
1732         hi = ntohl(level1_ofs[*sha1]);
1733         lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
1734         if (p->index_version > 1) {
1735                 stride = 20;
1736         } else {
1737                 stride = 24;
1738                 index += 4;
1739         }
1740
1741         if (debug_lookup)
1742                 printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
1743                        sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
1744
1745         while (lo < hi) {
1746                 unsigned mi = lo + (hi - lo) / 2;
1747                 int cmp = hashcmp(index + mi * stride, sha1);
1748
1749                 if (debug_lookup)
1750                         printf("lo %u hi %u rg %u mi %u\n",
1751                                lo, hi, hi - lo, mi);
1752                 if (!cmp)
1753                         return nth_packed_object_offset(p, mi);
1754                 if (cmp > 0)
1755                         hi = mi;
1756                 else
1757                         lo = mi+1;
1758         }
1759         return 0;
1760 }
1761
1762 int is_pack_valid(struct packed_git *p)
1763 {
1764         /* An already open pack is known to be valid. */
1765         if (p->pack_fd != -1)
1766                 return 1;
1767
1768         /* If the pack has one window completely covering the
1769          * file size, the pack is known to be valid even if
1770          * the descriptor is not currently open.
1771          */
1772         if (p->windows) {
1773                 struct pack_window *w = p->windows;
1774
1775                 if (!w->offset && w->len == p->pack_size)
1776                         return 1;
1777         }
1778
1779         /* Force the pack to open to prove its valid. */
1780         return !open_packed_git(p);
1781 }
1782
1783 struct packed_git *find_sha1_pack(const unsigned char *sha1,
1784                                   struct packed_git *packs)
1785 {
1786         struct packed_git *p;
1787
1788         for (p = packs; p; p = p->next) {
1789                 if (find_pack_entry_one(sha1, p))
1790                         return p;
1791         }
1792         return NULL;
1793
1794 }
1795
1796 static int fill_pack_entry(const unsigned char *sha1,
1797                            struct pack_entry *e,
1798                            struct packed_git *p)
1799 {
1800         off_t offset;
1801
1802         if (p->num_bad_objects) {
1803                 unsigned i;
1804                 for (i = 0; i < p->num_bad_objects; i++)
1805                         if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1806                                 return 0;
1807         }
1808
1809         offset = find_pack_entry_one(sha1, p);
1810         if (!offset)
1811                 return 0;
1812
1813         /*
1814          * We are about to tell the caller where they can locate the
1815          * requested object.  We better make sure the packfile is
1816          * still here and can be accessed before supplying that
1817          * answer, as it may have been deleted since the index was
1818          * loaded!
1819          */
1820         if (!is_pack_valid(p))
1821                 return 0;
1822         e->offset = offset;
1823         e->p = p;
1824         hashcpy(e->sha1, sha1);
1825         return 1;
1826 }
1827
1828 /*
1829  * Iff a pack file contains the object named by sha1, return true and
1830  * store its location to e.
1831  */
1832 int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
1833 {
1834         struct mru_entry *p;
1835
1836         prepare_packed_git();
1837         if (!packed_git)
1838                 return 0;
1839
1840         for (p = packed_git_mru.head; p; p = p->next) {
1841                 if (fill_pack_entry(sha1, e, p->item)) {
1842                         mru_mark(&packed_git_mru, p);
1843                         return 1;
1844                 }
1845         }
1846         return 0;
1847 }
1848
1849 int has_sha1_pack(const unsigned char *sha1)
1850 {
1851         struct pack_entry e;
1852         return find_pack_entry(sha1, &e);
1853 }
1854
1855 int has_pack_index(const unsigned char *sha1)
1856 {
1857         struct stat st;
1858         if (stat(sha1_pack_index_name(sha1), &st))
1859                 return 0;
1860         return 1;
1861 }
1862
1863 static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
1864 {
1865         uint32_t i;
1866         int r = 0;
1867
1868         for (i = 0; i < p->num_objects; i++) {
1869                 struct object_id oid;
1870
1871                 if (!nth_packed_object_oid(&oid, p, i))
1872                         return error("unable to get sha1 of object %u in %s",
1873                                      i, p->pack_name);
1874
1875                 r = cb(&oid, p, i, data);
1876                 if (r)
1877                         break;
1878         }
1879         return r;
1880 }
1881
1882 int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
1883 {
1884         struct packed_git *p;
1885         int r = 0;
1886         int pack_errors = 0;
1887
1888         prepare_packed_git();
1889         for (p = packed_git; p; p = p->next) {
1890                 if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
1891                         continue;
1892                 if (open_pack_index(p)) {
1893                         pack_errors = 1;
1894                         continue;
1895                 }
1896                 r = for_each_object_in_pack(p, cb, data);
1897                 if (r)
1898                         break;
1899         }
1900         return r ? r : pack_errors;
1901 }