distinguish error versus short read from read_in_full()
[git] / packfile.c
1 #include "cache.h"
2 #include "mru.h"
3 #include "pack.h"
4 #include "dir.h"
5 #include "mergesort.h"
6 #include "packfile.h"
7 #include "delta.h"
8 #include "list.h"
9 #include "streaming.h"
10 #include "sha1-lookup.h"
11
12 char *odb_pack_name(struct strbuf *buf,
13                     const unsigned char *sha1,
14                     const char *ext)
15 {
16         strbuf_reset(buf);
17         strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
18                     sha1_to_hex(sha1), ext);
19         return buf->buf;
20 }
21
22 char *sha1_pack_name(const unsigned char *sha1)
23 {
24         static struct strbuf buf = STRBUF_INIT;
25         return odb_pack_name(&buf, sha1, "pack");
26 }
27
28 char *sha1_pack_index_name(const unsigned char *sha1)
29 {
30         static struct strbuf buf = STRBUF_INIT;
31         return odb_pack_name(&buf, sha1, "idx");
32 }
33
34 static unsigned int pack_used_ctr;
35 static unsigned int pack_mmap_calls;
36 static unsigned int peak_pack_open_windows;
37 static unsigned int pack_open_windows;
38 static unsigned int pack_open_fds;
39 static unsigned int pack_max_fds;
40 static size_t peak_pack_mapped;
41 static size_t pack_mapped;
42 struct packed_git *packed_git;
43
44 static struct mru packed_git_mru_storage;
45 struct mru *packed_git_mru = &packed_git_mru_storage;
46
47 #define SZ_FMT PRIuMAX
48 static inline uintmax_t sz_fmt(size_t s) { return s; }
49
50 void pack_report(void)
51 {
52         fprintf(stderr,
53                 "pack_report: getpagesize()            = %10" SZ_FMT "\n"
54                 "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
55                 "pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
56                 sz_fmt(getpagesize()),
57                 sz_fmt(packed_git_window_size),
58                 sz_fmt(packed_git_limit));
59         fprintf(stderr,
60                 "pack_report: pack_used_ctr            = %10u\n"
61                 "pack_report: pack_mmap_calls          = %10u\n"
62                 "pack_report: pack_open_windows        = %10u / %10u\n"
63                 "pack_report: pack_mapped              = "
64                         "%10" SZ_FMT " / %10" SZ_FMT "\n",
65                 pack_used_ctr,
66                 pack_mmap_calls,
67                 pack_open_windows, peak_pack_open_windows,
68                 sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
69 }
70
71 /*
72  * Open and mmap the index file at path, perform a couple of
73  * consistency checks, then record its information to p.  Return 0 on
74  * success.
75  */
76 static int check_packed_git_idx(const char *path, struct packed_git *p)
77 {
78         void *idx_map;
79         struct pack_idx_header *hdr;
80         size_t idx_size;
81         uint32_t version, nr, i, *index;
82         int fd = git_open(path);
83         struct stat st;
84
85         if (fd < 0)
86                 return -1;
87         if (fstat(fd, &st)) {
88                 close(fd);
89                 return -1;
90         }
91         idx_size = xsize_t(st.st_size);
92         if (idx_size < 4 * 256 + 20 + 20) {
93                 close(fd);
94                 return error("index file %s is too small", path);
95         }
96         idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
97         close(fd);
98
99         hdr = idx_map;
100         if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
101                 version = ntohl(hdr->idx_version);
102                 if (version < 2 || version > 2) {
103                         munmap(idx_map, idx_size);
104                         return error("index file %s is version %"PRIu32
105                                      " and is not supported by this binary"
106                                      " (try upgrading GIT to a newer version)",
107                                      path, version);
108                 }
109         } else
110                 version = 1;
111
112         nr = 0;
113         index = idx_map;
114         if (version > 1)
115                 index += 2;  /* skip index header */
116         for (i = 0; i < 256; i++) {
117                 uint32_t n = ntohl(index[i]);
118                 if (n < nr) {
119                         munmap(idx_map, idx_size);
120                         return error("non-monotonic index %s", path);
121                 }
122                 nr = n;
123         }
124
125         if (version == 1) {
126                 /*
127                  * Total size:
128                  *  - 256 index entries 4 bytes each
129                  *  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
130                  *  - 20-byte SHA1 of the packfile
131                  *  - 20-byte SHA1 file checksum
132                  */
133                 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
134                         munmap(idx_map, idx_size);
135                         return error("wrong index v1 file size in %s", path);
136                 }
137         } else if (version == 2) {
138                 /*
139                  * Minimum size:
140                  *  - 8 bytes of header
141                  *  - 256 index entries 4 bytes each
142                  *  - 20-byte sha1 entry * nr
143                  *  - 4-byte crc entry * nr
144                  *  - 4-byte offset entry * nr
145                  *  - 20-byte SHA1 of the packfile
146                  *  - 20-byte SHA1 file checksum
147                  * And after the 4-byte offset table might be a
148                  * variable sized table containing 8-byte entries
149                  * for offsets larger than 2^31.
150                  */
151                 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
152                 unsigned long max_size = min_size;
153                 if (nr)
154                         max_size += (nr - 1)*8;
155                 if (idx_size < min_size || idx_size > max_size) {
156                         munmap(idx_map, idx_size);
157                         return error("wrong index v2 file size in %s", path);
158                 }
159                 if (idx_size != min_size &&
160                     /*
161                      * make sure we can deal with large pack offsets.
162                      * 31-bit signed offset won't be enough, neither
163                      * 32-bit unsigned one will be.
164                      */
165                     (sizeof(off_t) <= 4)) {
166                         munmap(idx_map, idx_size);
167                         return error("pack too large for current definition of off_t in %s", path);
168                 }
169         }
170
171         p->index_version = version;
172         p->index_data = idx_map;
173         p->index_size = idx_size;
174         p->num_objects = nr;
175         return 0;
176 }
177
178 int open_pack_index(struct packed_git *p)
179 {
180         char *idx_name;
181         size_t len;
182         int ret;
183
184         if (p->index_data)
185                 return 0;
186
187         if (!strip_suffix(p->pack_name, ".pack", &len))
188                 die("BUG: pack_name does not end in .pack");
189         idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
190         ret = check_packed_git_idx(idx_name, p);
191         free(idx_name);
192         return ret;
193 }
194
195 static struct packed_git *alloc_packed_git(int extra)
196 {
197         struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
198         memset(p, 0, sizeof(*p));
199         p->pack_fd = -1;
200         return p;
201 }
202
203 struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
204 {
205         const char *path = sha1_pack_name(sha1);
206         size_t alloc = st_add(strlen(path), 1);
207         struct packed_git *p = alloc_packed_git(alloc);
208
209         memcpy(p->pack_name, path, alloc); /* includes NUL */
210         hashcpy(p->sha1, sha1);
211         if (check_packed_git_idx(idx_path, p)) {
212                 free(p);
213                 return NULL;
214         }
215
216         return p;
217 }
218
219 static void scan_windows(struct packed_git *p,
220         struct packed_git **lru_p,
221         struct pack_window **lru_w,
222         struct pack_window **lru_l)
223 {
224         struct pack_window *w, *w_l;
225
226         for (w_l = NULL, w = p->windows; w; w = w->next) {
227                 if (!w->inuse_cnt) {
228                         if (!*lru_w || w->last_used < (*lru_w)->last_used) {
229                                 *lru_p = p;
230                                 *lru_w = w;
231                                 *lru_l = w_l;
232                         }
233                 }
234                 w_l = w;
235         }
236 }
237
238 static int unuse_one_window(struct packed_git *current)
239 {
240         struct packed_git *p, *lru_p = NULL;
241         struct pack_window *lru_w = NULL, *lru_l = NULL;
242
243         if (current)
244                 scan_windows(current, &lru_p, &lru_w, &lru_l);
245         for (p = packed_git; p; p = p->next)
246                 scan_windows(p, &lru_p, &lru_w, &lru_l);
247         if (lru_p) {
248                 munmap(lru_w->base, lru_w->len);
249                 pack_mapped -= lru_w->len;
250                 if (lru_l)
251                         lru_l->next = lru_w->next;
252                 else
253                         lru_p->windows = lru_w->next;
254                 free(lru_w);
255                 pack_open_windows--;
256                 return 1;
257         }
258         return 0;
259 }
260
261 void release_pack_memory(size_t need)
262 {
263         size_t cur = pack_mapped;
264         while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
265                 ; /* nothing */
266 }
267
268 void close_pack_windows(struct packed_git *p)
269 {
270         while (p->windows) {
271                 struct pack_window *w = p->windows;
272
273                 if (w->inuse_cnt)
274                         die("pack '%s' still has open windows to it",
275                             p->pack_name);
276                 munmap(w->base, w->len);
277                 pack_mapped -= w->len;
278                 pack_open_windows--;
279                 p->windows = w->next;
280                 free(w);
281         }
282 }
283
284 static int close_pack_fd(struct packed_git *p)
285 {
286         if (p->pack_fd < 0)
287                 return 0;
288
289         close(p->pack_fd);
290         pack_open_fds--;
291         p->pack_fd = -1;
292
293         return 1;
294 }
295
296 void close_pack_index(struct packed_git *p)
297 {
298         if (p->index_data) {
299                 munmap((void *)p->index_data, p->index_size);
300                 p->index_data = NULL;
301         }
302 }
303
304 static void close_pack(struct packed_git *p)
305 {
306         close_pack_windows(p);
307         close_pack_fd(p);
308         close_pack_index(p);
309 }
310
311 void close_all_packs(void)
312 {
313         struct packed_git *p;
314
315         for (p = packed_git; p; p = p->next)
316                 if (p->do_not_close)
317                         die("BUG: want to close pack marked 'do-not-close'");
318                 else
319                         close_pack(p);
320 }
321
322 /*
323  * The LRU pack is the one with the oldest MRU window, preferring packs
324  * with no used windows, or the oldest mtime if it has no windows allocated.
325  */
326 static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
327 {
328         struct pack_window *w, *this_mru_w;
329         int has_windows_inuse = 0;
330
331         /*
332          * Reject this pack if it has windows and the previously selected
333          * one does not.  If this pack does not have windows, reject
334          * it if the pack file is newer than the previously selected one.
335          */
336         if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
337                 return;
338
339         for (w = this_mru_w = p->windows; w; w = w->next) {
340                 /*
341                  * Reject this pack if any of its windows are in use,
342                  * but the previously selected pack did not have any
343                  * inuse windows.  Otherwise, record that this pack
344                  * has windows in use.
345                  */
346                 if (w->inuse_cnt) {
347                         if (*accept_windows_inuse)
348                                 has_windows_inuse = 1;
349                         else
350                                 return;
351                 }
352
353                 if (w->last_used > this_mru_w->last_used)
354                         this_mru_w = w;
355
356                 /*
357                  * Reject this pack if it has windows that have been
358                  * used more recently than the previously selected pack.
359                  * If the previously selected pack had windows inuse and
360                  * we have not encountered a window in this pack that is
361                  * inuse, skip this check since we prefer a pack with no
362                  * inuse windows to one that has inuse windows.
363                  */
364                 if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
365                     this_mru_w->last_used > (*mru_w)->last_used)
366                         return;
367         }
368
369         /*
370          * Select this pack.
371          */
372         *mru_w = this_mru_w;
373         *lru_p = p;
374         *accept_windows_inuse = has_windows_inuse;
375 }
376
377 static int close_one_pack(void)
378 {
379         struct packed_git *p, *lru_p = NULL;
380         struct pack_window *mru_w = NULL;
381         int accept_windows_inuse = 1;
382
383         for (p = packed_git; p; p = p->next) {
384                 if (p->pack_fd == -1)
385                         continue;
386                 find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
387         }
388
389         if (lru_p)
390                 return close_pack_fd(lru_p);
391
392         return 0;
393 }
394
395 static unsigned int get_max_fd_limit(void)
396 {
397 #ifdef RLIMIT_NOFILE
398         {
399                 struct rlimit lim;
400
401                 if (!getrlimit(RLIMIT_NOFILE, &lim))
402                         return lim.rlim_cur;
403         }
404 #endif
405
406 #ifdef _SC_OPEN_MAX
407         {
408                 long open_max = sysconf(_SC_OPEN_MAX);
409                 if (0 < open_max)
410                         return open_max;
411                 /*
412                  * Otherwise, we got -1 for one of the two
413                  * reasons:
414                  *
415                  * (1) sysconf() did not understand _SC_OPEN_MAX
416                  *     and signaled an error with -1; or
417                  * (2) sysconf() said there is no limit.
418                  *
419                  * We _could_ clear errno before calling sysconf() to
420                  * tell these two cases apart and return a huge number
421                  * in the latter case to let the caller cap it to a
422                  * value that is not so selfish, but letting the
423                  * fallback OPEN_MAX codepath take care of these cases
424                  * is a lot simpler.
425                  */
426         }
427 #endif
428
429 #ifdef OPEN_MAX
430         return OPEN_MAX;
431 #else
432         return 1; /* see the caller ;-) */
433 #endif
434 }
435
436 /*
437  * Do not call this directly as this leaks p->pack_fd on error return;
438  * call open_packed_git() instead.
439  */
440 static int open_packed_git_1(struct packed_git *p)
441 {
442         struct stat st;
443         struct pack_header hdr;
444         unsigned char sha1[20];
445         unsigned char *idx_sha1;
446         long fd_flag;
447         ssize_t read_result;
448
449         if (!p->index_data && open_pack_index(p))
450                 return error("packfile %s index unavailable", p->pack_name);
451
452         if (!pack_max_fds) {
453                 unsigned int max_fds = get_max_fd_limit();
454
455                 /* Save 3 for stdin/stdout/stderr, 22 for work */
456                 if (25 < max_fds)
457                         pack_max_fds = max_fds - 25;
458                 else
459                         pack_max_fds = 1;
460         }
461
462         while (pack_max_fds <= pack_open_fds && close_one_pack())
463                 ; /* nothing */
464
465         p->pack_fd = git_open(p->pack_name);
466         if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
467                 return -1;
468         pack_open_fds++;
469
470         /* If we created the struct before we had the pack we lack size. */
471         if (!p->pack_size) {
472                 if (!S_ISREG(st.st_mode))
473                         return error("packfile %s not a regular file", p->pack_name);
474                 p->pack_size = st.st_size;
475         } else if (p->pack_size != st.st_size)
476                 return error("packfile %s size changed", p->pack_name);
477
478         /* We leave these file descriptors open with sliding mmap;
479          * there is no point keeping them open across exec(), though.
480          */
481         fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
482         if (fd_flag < 0)
483                 return error("cannot determine file descriptor flags");
484         fd_flag |= FD_CLOEXEC;
485         if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
486                 return error("cannot set FD_CLOEXEC");
487
488         /* Verify we recognize this pack file format. */
489         read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr));
490         if (read_result < 0)
491                 return error_errno("error reading from %s", p->pack_name);
492         if (read_result != sizeof(hdr))
493                 return error("file %s is far too short to be a packfile", p->pack_name);
494         if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
495                 return error("file %s is not a GIT packfile", p->pack_name);
496         if (!pack_version_ok(hdr.hdr_version))
497                 return error("packfile %s is version %"PRIu32" and not"
498                         " supported (try upgrading GIT to a newer version)",
499                         p->pack_name, ntohl(hdr.hdr_version));
500
501         /* Verify the pack matches its index. */
502         if (p->num_objects != ntohl(hdr.hdr_entries))
503                 return error("packfile %s claims to have %"PRIu32" objects"
504                              " while index indicates %"PRIu32" objects",
505                              p->pack_name, ntohl(hdr.hdr_entries),
506                              p->num_objects);
507         if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
508                 return error("end of packfile %s is unavailable", p->pack_name);
509         read_result = read_in_full(p->pack_fd, sha1, sizeof(sha1));
510         if (read_result < 0)
511                 return error_errno("error reading from %s", p->pack_name);
512         if (read_result != sizeof(sha1))
513                 return error("packfile %s signature is unavailable", p->pack_name);
514         idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
515         if (hashcmp(sha1, idx_sha1))
516                 return error("packfile %s does not match index", p->pack_name);
517         return 0;
518 }
519
520 static int open_packed_git(struct packed_git *p)
521 {
522         if (!open_packed_git_1(p))
523                 return 0;
524         close_pack_fd(p);
525         return -1;
526 }
527
528 static int in_window(struct pack_window *win, off_t offset)
529 {
530         /* We must promise at least 20 bytes (one hash) after the
531          * offset is available from this window, otherwise the offset
532          * is not actually in this window and a different window (which
533          * has that one hash excess) must be used.  This is to support
534          * the object header and delta base parsing routines below.
535          */
536         off_t win_off = win->offset;
537         return win_off <= offset
538                 && (offset + 20) <= (win_off + win->len);
539 }
540
541 unsigned char *use_pack(struct packed_git *p,
542                 struct pack_window **w_cursor,
543                 off_t offset,
544                 unsigned long *left)
545 {
546         struct pack_window *win = *w_cursor;
547
548         /* Since packfiles end in a hash of their content and it's
549          * pointless to ask for an offset into the middle of that
550          * hash, and the in_window function above wouldn't match
551          * don't allow an offset too close to the end of the file.
552          */
553         if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
554                 die("packfile %s cannot be accessed", p->pack_name);
555         if (offset > (p->pack_size - 20))
556                 die("offset beyond end of packfile (truncated pack?)");
557         if (offset < 0)
558                 die(_("offset before end of packfile (broken .idx?)"));
559
560         if (!win || !in_window(win, offset)) {
561                 if (win)
562                         win->inuse_cnt--;
563                 for (win = p->windows; win; win = win->next) {
564                         if (in_window(win, offset))
565                                 break;
566                 }
567                 if (!win) {
568                         size_t window_align = packed_git_window_size / 2;
569                         off_t len;
570
571                         if (p->pack_fd == -1 && open_packed_git(p))
572                                 die("packfile %s cannot be accessed", p->pack_name);
573
574                         win = xcalloc(1, sizeof(*win));
575                         win->offset = (offset / window_align) * window_align;
576                         len = p->pack_size - win->offset;
577                         if (len > packed_git_window_size)
578                                 len = packed_git_window_size;
579                         win->len = (size_t)len;
580                         pack_mapped += win->len;
581                         while (packed_git_limit < pack_mapped
582                                 && unuse_one_window(p))
583                                 ; /* nothing */
584                         win->base = xmmap(NULL, win->len,
585                                 PROT_READ, MAP_PRIVATE,
586                                 p->pack_fd, win->offset);
587                         if (win->base == MAP_FAILED)
588                                 die_errno("packfile %s cannot be mapped",
589                                           p->pack_name);
590                         if (!win->offset && win->len == p->pack_size
591                                 && !p->do_not_close)
592                                 close_pack_fd(p);
593                         pack_mmap_calls++;
594                         pack_open_windows++;
595                         if (pack_mapped > peak_pack_mapped)
596                                 peak_pack_mapped = pack_mapped;
597                         if (pack_open_windows > peak_pack_open_windows)
598                                 peak_pack_open_windows = pack_open_windows;
599                         win->next = p->windows;
600                         p->windows = win;
601                 }
602         }
603         if (win != *w_cursor) {
604                 win->last_used = pack_used_ctr++;
605                 win->inuse_cnt++;
606                 *w_cursor = win;
607         }
608         offset -= win->offset;
609         if (left)
610                 *left = win->len - xsize_t(offset);
611         return win->base + offset;
612 }
613
614 void unuse_pack(struct pack_window **w_cursor)
615 {
616         struct pack_window *w = *w_cursor;
617         if (w) {
618                 w->inuse_cnt--;
619                 *w_cursor = NULL;
620         }
621 }
622
623 static void try_to_free_pack_memory(size_t size)
624 {
625         release_pack_memory(size);
626 }
627
628 struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
629 {
630         static int have_set_try_to_free_routine;
631         struct stat st;
632         size_t alloc;
633         struct packed_git *p;
634
635         if (!have_set_try_to_free_routine) {
636                 have_set_try_to_free_routine = 1;
637                 set_try_to_free_routine(try_to_free_pack_memory);
638         }
639
640         /*
641          * Make sure a corresponding .pack file exists and that
642          * the index looks sane.
643          */
644         if (!strip_suffix_mem(path, &path_len, ".idx"))
645                 return NULL;
646
647         /*
648          * ".pack" is long enough to hold any suffix we're adding (and
649          * the use xsnprintf double-checks that)
650          */
651         alloc = st_add3(path_len, strlen(".pack"), 1);
652         p = alloc_packed_git(alloc);
653         memcpy(p->pack_name, path, path_len);
654
655         xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
656         if (!access(p->pack_name, F_OK))
657                 p->pack_keep = 1;
658
659         xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
660         if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
661                 free(p);
662                 return NULL;
663         }
664
665         /* ok, it looks sane as far as we can check without
666          * actually mapping the pack file.
667          */
668         p->pack_size = st.st_size;
669         p->pack_local = local;
670         p->mtime = st.st_mtime;
671         if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
672                 hashclr(p->sha1);
673         return p;
674 }
675
676 void install_packed_git(struct packed_git *pack)
677 {
678         if (pack->pack_fd != -1)
679                 pack_open_fds++;
680
681         pack->next = packed_git;
682         packed_git = pack;
683 }
684
685 void (*report_garbage)(unsigned seen_bits, const char *path);
686
687 static void report_helper(const struct string_list *list,
688                           int seen_bits, int first, int last)
689 {
690         if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
691                 return;
692
693         for (; first < last; first++)
694                 report_garbage(seen_bits, list->items[first].string);
695 }
696
697 static void report_pack_garbage(struct string_list *list)
698 {
699         int i, baselen = -1, first = 0, seen_bits = 0;
700
701         if (!report_garbage)
702                 return;
703
704         string_list_sort(list);
705
706         for (i = 0; i < list->nr; i++) {
707                 const char *path = list->items[i].string;
708                 if (baselen != -1 &&
709                     strncmp(path, list->items[first].string, baselen)) {
710                         report_helper(list, seen_bits, first, i);
711                         baselen = -1;
712                         seen_bits = 0;
713                 }
714                 if (baselen == -1) {
715                         const char *dot = strrchr(path, '.');
716                         if (!dot) {
717                                 report_garbage(PACKDIR_FILE_GARBAGE, path);
718                                 continue;
719                         }
720                         baselen = dot - path + 1;
721                         first = i;
722                 }
723                 if (!strcmp(path + baselen, "pack"))
724                         seen_bits |= 1;
725                 else if (!strcmp(path + baselen, "idx"))
726                         seen_bits |= 2;
727         }
728         report_helper(list, seen_bits, first, list->nr);
729 }
730
731 static void prepare_packed_git_one(char *objdir, int local)
732 {
733         struct strbuf path = STRBUF_INIT;
734         size_t dirnamelen;
735         DIR *dir;
736         struct dirent *de;
737         struct string_list garbage = STRING_LIST_INIT_DUP;
738
739         strbuf_addstr(&path, objdir);
740         strbuf_addstr(&path, "/pack");
741         dir = opendir(path.buf);
742         if (!dir) {
743                 if (errno != ENOENT)
744                         error_errno("unable to open object pack directory: %s",
745                                     path.buf);
746                 strbuf_release(&path);
747                 return;
748         }
749         strbuf_addch(&path, '/');
750         dirnamelen = path.len;
751         while ((de = readdir(dir)) != NULL) {
752                 struct packed_git *p;
753                 size_t base_len;
754
755                 if (is_dot_or_dotdot(de->d_name))
756                         continue;
757
758                 strbuf_setlen(&path, dirnamelen);
759                 strbuf_addstr(&path, de->d_name);
760
761                 base_len = path.len;
762                 if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
763                         /* Don't reopen a pack we already have. */
764                         for (p = packed_git; p; p = p->next) {
765                                 size_t len;
766                                 if (strip_suffix(p->pack_name, ".pack", &len) &&
767                                     len == base_len &&
768                                     !memcmp(p->pack_name, path.buf, len))
769                                         break;
770                         }
771                         if (p == NULL &&
772                             /*
773                              * See if it really is a valid .idx file with
774                              * corresponding .pack file that we can map.
775                              */
776                             (p = add_packed_git(path.buf, path.len, local)) != NULL)
777                                 install_packed_git(p);
778                 }
779
780                 if (!report_garbage)
781                         continue;
782
783                 if (ends_with(de->d_name, ".idx") ||
784                     ends_with(de->d_name, ".pack") ||
785                     ends_with(de->d_name, ".bitmap") ||
786                     ends_with(de->d_name, ".keep"))
787                         string_list_append(&garbage, path.buf);
788                 else
789                         report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
790         }
791         closedir(dir);
792         report_pack_garbage(&garbage);
793         string_list_clear(&garbage, 0);
794         strbuf_release(&path);
795 }
796
797 static int approximate_object_count_valid;
798
799 /*
800  * Give a fast, rough count of the number of objects in the repository. This
801  * ignores loose objects completely. If you have a lot of them, then either
802  * you should repack because your performance will be awful, or they are
803  * all unreachable objects about to be pruned, in which case they're not really
804  * interesting as a measure of repo size in the first place.
805  */
806 unsigned long approximate_object_count(void)
807 {
808         static unsigned long count;
809         if (!approximate_object_count_valid) {
810                 struct packed_git *p;
811
812                 prepare_packed_git();
813                 count = 0;
814                 for (p = packed_git; p; p = p->next) {
815                         if (open_pack_index(p))
816                                 continue;
817                         count += p->num_objects;
818                 }
819         }
820         return count;
821 }
822
823 static void *get_next_packed_git(const void *p)
824 {
825         return ((const struct packed_git *)p)->next;
826 }
827
828 static void set_next_packed_git(void *p, void *next)
829 {
830         ((struct packed_git *)p)->next = next;
831 }
832
833 static int sort_pack(const void *a_, const void *b_)
834 {
835         const struct packed_git *a = a_;
836         const struct packed_git *b = b_;
837         int st;
838
839         /*
840          * Local packs tend to contain objects specific to our
841          * variant of the project than remote ones.  In addition,
842          * remote ones could be on a network mounted filesystem.
843          * Favor local ones for these reasons.
844          */
845         st = a->pack_local - b->pack_local;
846         if (st)
847                 return -st;
848
849         /*
850          * Younger packs tend to contain more recent objects,
851          * and more recent objects tend to get accessed more
852          * often.
853          */
854         if (a->mtime < b->mtime)
855                 return 1;
856         else if (a->mtime == b->mtime)
857                 return 0;
858         return -1;
859 }
860
861 static void rearrange_packed_git(void)
862 {
863         packed_git = llist_mergesort(packed_git, get_next_packed_git,
864                                      set_next_packed_git, sort_pack);
865 }
866
867 static void prepare_packed_git_mru(void)
868 {
869         struct packed_git *p;
870
871         mru_clear(packed_git_mru);
872         for (p = packed_git; p; p = p->next)
873                 mru_append(packed_git_mru, p);
874 }
875
876 static int prepare_packed_git_run_once = 0;
877 void prepare_packed_git(void)
878 {
879         struct alternate_object_database *alt;
880
881         if (prepare_packed_git_run_once)
882                 return;
883         prepare_packed_git_one(get_object_directory(), 1);
884         prepare_alt_odb();
885         for (alt = alt_odb_list; alt; alt = alt->next)
886                 prepare_packed_git_one(alt->path, 0);
887         rearrange_packed_git();
888         prepare_packed_git_mru();
889         prepare_packed_git_run_once = 1;
890 }
891
892 void reprepare_packed_git(void)
893 {
894         approximate_object_count_valid = 0;
895         prepare_packed_git_run_once = 0;
896         prepare_packed_git();
897 }
898
899 unsigned long unpack_object_header_buffer(const unsigned char *buf,
900                 unsigned long len, enum object_type *type, unsigned long *sizep)
901 {
902         unsigned shift;
903         unsigned long size, c;
904         unsigned long used = 0;
905
906         c = buf[used++];
907         *type = (c >> 4) & 7;
908         size = c & 15;
909         shift = 4;
910         while (c & 0x80) {
911                 if (len <= used || bitsizeof(long) <= shift) {
912                         error("bad object header");
913                         size = used = 0;
914                         break;
915                 }
916                 c = buf[used++];
917                 size += (c & 0x7f) << shift;
918                 shift += 7;
919         }
920         *sizep = size;
921         return used;
922 }
923
924 unsigned long get_size_from_delta(struct packed_git *p,
925                                   struct pack_window **w_curs,
926                                   off_t curpos)
927 {
928         const unsigned char *data;
929         unsigned char delta_head[20], *in;
930         git_zstream stream;
931         int st;
932
933         memset(&stream, 0, sizeof(stream));
934         stream.next_out = delta_head;
935         stream.avail_out = sizeof(delta_head);
936
937         git_inflate_init(&stream);
938         do {
939                 in = use_pack(p, w_curs, curpos, &stream.avail_in);
940                 stream.next_in = in;
941                 st = git_inflate(&stream, Z_FINISH);
942                 curpos += stream.next_in - in;
943         } while ((st == Z_OK || st == Z_BUF_ERROR) &&
944                  stream.total_out < sizeof(delta_head));
945         git_inflate_end(&stream);
946         if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
947                 error("delta data unpack-initial failed");
948                 return 0;
949         }
950
951         /* Examine the initial part of the delta to figure out
952          * the result size.
953          */
954         data = delta_head;
955
956         /* ignore base size */
957         get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
958
959         /* Read the result size */
960         return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
961 }
962
963 int unpack_object_header(struct packed_git *p,
964                          struct pack_window **w_curs,
965                          off_t *curpos,
966                          unsigned long *sizep)
967 {
968         unsigned char *base;
969         unsigned long left;
970         unsigned long used;
971         enum object_type type;
972
973         /* use_pack() assures us we have [base, base + 20) available
974          * as a range that we can look at.  (Its actually the hash
975          * size that is assured.)  With our object header encoding
976          * the maximum deflated object size is 2^137, which is just
977          * insane, so we know won't exceed what we have been given.
978          */
979         base = use_pack(p, w_curs, *curpos, &left);
980         used = unpack_object_header_buffer(base, left, &type, sizep);
981         if (!used) {
982                 type = OBJ_BAD;
983         } else
984                 *curpos += used;
985
986         return type;
987 }
988
989 void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1)
990 {
991         unsigned i;
992         for (i = 0; i < p->num_bad_objects; i++)
993                 if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
994                         return;
995         p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
996                                       st_mult(GIT_MAX_RAWSZ,
997                                               st_add(p->num_bad_objects, 1)));
998         hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
999         p->num_bad_objects++;
1000 }
1001
1002 const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
1003 {
1004         struct packed_git *p;
1005         unsigned i;
1006
1007         for (p = packed_git; p; p = p->next)
1008                 for (i = 0; i < p->num_bad_objects; i++)
1009                         if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1010                                 return p;
1011         return NULL;
1012 }
1013
1014 static off_t get_delta_base(struct packed_git *p,
1015                                     struct pack_window **w_curs,
1016                                     off_t *curpos,
1017                                     enum object_type type,
1018                                     off_t delta_obj_offset)
1019 {
1020         unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1021         off_t base_offset;
1022
1023         /* use_pack() assured us we have [base_info, base_info + 20)
1024          * as a range that we can look at without walking off the
1025          * end of the mapped window.  Its actually the hash size
1026          * that is assured.  An OFS_DELTA longer than the hash size
1027          * is stupid, as then a REF_DELTA would be smaller to store.
1028          */
1029         if (type == OBJ_OFS_DELTA) {
1030                 unsigned used = 0;
1031                 unsigned char c = base_info[used++];
1032                 base_offset = c & 127;
1033                 while (c & 128) {
1034                         base_offset += 1;
1035                         if (!base_offset || MSB(base_offset, 7))
1036                                 return 0;  /* overflow */
1037                         c = base_info[used++];
1038                         base_offset = (base_offset << 7) + (c & 127);
1039                 }
1040                 base_offset = delta_obj_offset - base_offset;
1041                 if (base_offset <= 0 || base_offset >= delta_obj_offset)
1042                         return 0;  /* out of bound */
1043                 *curpos += used;
1044         } else if (type == OBJ_REF_DELTA) {
1045                 /* The base entry _must_ be in the same pack */
1046                 base_offset = find_pack_entry_one(base_info, p);
1047                 *curpos += 20;
1048         } else
1049                 die("I am totally screwed");
1050         return base_offset;
1051 }
1052
1053 /*
1054  * Like get_delta_base above, but we return the sha1 instead of the pack
1055  * offset. This means it is cheaper for REF deltas (we do not have to do
1056  * the final object lookup), but more expensive for OFS deltas (we
1057  * have to load the revidx to convert the offset back into a sha1).
1058  */
1059 static const unsigned char *get_delta_base_sha1(struct packed_git *p,
1060                                                 struct pack_window **w_curs,
1061                                                 off_t curpos,
1062                                                 enum object_type type,
1063                                                 off_t delta_obj_offset)
1064 {
1065         if (type == OBJ_REF_DELTA) {
1066                 unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1067                 return base;
1068         } else if (type == OBJ_OFS_DELTA) {
1069                 struct revindex_entry *revidx;
1070                 off_t base_offset = get_delta_base(p, w_curs, &curpos,
1071                                                    type, delta_obj_offset);
1072
1073                 if (!base_offset)
1074                         return NULL;
1075
1076                 revidx = find_pack_revindex(p, base_offset);
1077                 if (!revidx)
1078                         return NULL;
1079
1080                 return nth_packed_object_sha1(p, revidx->nr);
1081         } else
1082                 return NULL;
1083 }
1084
1085 static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
1086 {
1087         int type;
1088         struct revindex_entry *revidx;
1089         const unsigned char *sha1;
1090         revidx = find_pack_revindex(p, obj_offset);
1091         if (!revidx)
1092                 return OBJ_BAD;
1093         sha1 = nth_packed_object_sha1(p, revidx->nr);
1094         mark_bad_packed_object(p, sha1);
1095         type = sha1_object_info(sha1, NULL);
1096         if (type <= OBJ_NONE)
1097                 return OBJ_BAD;
1098         return type;
1099 }
1100
1101 #define POI_STACK_PREALLOC 64
1102
1103 static enum object_type packed_to_object_type(struct packed_git *p,
1104                                               off_t obj_offset,
1105                                               enum object_type type,
1106                                               struct pack_window **w_curs,
1107                                               off_t curpos)
1108 {
1109         off_t small_poi_stack[POI_STACK_PREALLOC];
1110         off_t *poi_stack = small_poi_stack;
1111         int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1112
1113         while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1114                 off_t base_offset;
1115                 unsigned long size;
1116                 /* Push the object we're going to leave behind */
1117                 if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1118                         poi_stack_alloc = alloc_nr(poi_stack_nr);
1119                         ALLOC_ARRAY(poi_stack, poi_stack_alloc);
1120                         memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
1121                 } else {
1122                         ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1123                 }
1124                 poi_stack[poi_stack_nr++] = obj_offset;
1125                 /* If parsing the base offset fails, just unwind */
1126                 base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1127                 if (!base_offset)
1128                         goto unwind;
1129                 curpos = obj_offset = base_offset;
1130                 type = unpack_object_header(p, w_curs, &curpos, &size);
1131                 if (type <= OBJ_NONE) {
1132                         /* If getting the base itself fails, we first
1133                          * retry the base, otherwise unwind */
1134                         type = retry_bad_packed_offset(p, base_offset);
1135                         if (type > OBJ_NONE)
1136                                 goto out;
1137                         goto unwind;
1138                 }
1139         }
1140
1141         switch (type) {
1142         case OBJ_BAD:
1143         case OBJ_COMMIT:
1144         case OBJ_TREE:
1145         case OBJ_BLOB:
1146         case OBJ_TAG:
1147                 break;
1148         default:
1149                 error("unknown object type %i at offset %"PRIuMAX" in %s",
1150                       type, (uintmax_t)obj_offset, p->pack_name);
1151                 type = OBJ_BAD;
1152         }
1153
1154 out:
1155         if (poi_stack != small_poi_stack)
1156                 free(poi_stack);
1157         return type;
1158
1159 unwind:
1160         while (poi_stack_nr) {
1161                 obj_offset = poi_stack[--poi_stack_nr];
1162                 type = retry_bad_packed_offset(p, obj_offset);
1163                 if (type > OBJ_NONE)
1164                         goto out;
1165         }
1166         type = OBJ_BAD;
1167         goto out;
1168 }
1169
1170 static struct hashmap delta_base_cache;
1171 static size_t delta_base_cached;
1172
1173 static LIST_HEAD(delta_base_cache_lru);
1174
1175 struct delta_base_cache_key {
1176         struct packed_git *p;
1177         off_t base_offset;
1178 };
1179
1180 struct delta_base_cache_entry {
1181         struct hashmap hash;
1182         struct delta_base_cache_key key;
1183         struct list_head lru;
1184         void *data;
1185         unsigned long size;
1186         enum object_type type;
1187 };
1188
1189 static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
1190 {
1191         unsigned int hash;
1192
1193         hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
1194         hash += (hash >> 8) + (hash >> 16);
1195         return hash;
1196 }
1197
1198 static struct delta_base_cache_entry *
1199 get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
1200 {
1201         struct hashmap_entry entry;
1202         struct delta_base_cache_key key;
1203
1204         if (!delta_base_cache.cmpfn)
1205                 return NULL;
1206
1207         hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
1208         key.p = p;
1209         key.base_offset = base_offset;
1210         return hashmap_get(&delta_base_cache, &entry, &key);
1211 }
1212
1213 static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
1214                                    const struct delta_base_cache_key *b)
1215 {
1216         return a->p == b->p && a->base_offset == b->base_offset;
1217 }
1218
1219 static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
1220                                      const void *va, const void *vb,
1221                                      const void *vkey)
1222 {
1223         const struct delta_base_cache_entry *a = va, *b = vb;
1224         const struct delta_base_cache_key *key = vkey;
1225         if (key)
1226                 return !delta_base_cache_key_eq(&a->key, key);
1227         else
1228                 return !delta_base_cache_key_eq(&a->key, &b->key);
1229 }
1230
1231 static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
1232 {
1233         return !!get_delta_base_cache_entry(p, base_offset);
1234 }
1235
1236 /*
1237  * Remove the entry from the cache, but do _not_ free the associated
1238  * entry data. The caller takes ownership of the "data" buffer, and
1239  * should copy out any fields it wants before detaching.
1240  */
1241 static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
1242 {
1243         hashmap_remove(&delta_base_cache, ent, &ent->key);
1244         list_del(&ent->lru);
1245         delta_base_cached -= ent->size;
1246         free(ent);
1247 }
1248
1249 static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
1250         unsigned long *base_size, enum object_type *type)
1251 {
1252         struct delta_base_cache_entry *ent;
1253
1254         ent = get_delta_base_cache_entry(p, base_offset);
1255         if (!ent)
1256                 return unpack_entry(p, base_offset, type, base_size);
1257
1258         if (type)
1259                 *type = ent->type;
1260         if (base_size)
1261                 *base_size = ent->size;
1262         return xmemdupz(ent->data, ent->size);
1263 }
1264
1265 static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1266 {
1267         free(ent->data);
1268         detach_delta_base_cache_entry(ent);
1269 }
1270
1271 void clear_delta_base_cache(void)
1272 {
1273         struct list_head *lru, *tmp;
1274         list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1275                 struct delta_base_cache_entry *entry =
1276                         list_entry(lru, struct delta_base_cache_entry, lru);
1277                 release_delta_base_cache(entry);
1278         }
1279 }
1280
1281 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1282         void *base, unsigned long base_size, enum object_type type)
1283 {
1284         struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
1285         struct list_head *lru, *tmp;
1286
1287         delta_base_cached += base_size;
1288
1289         list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1290                 struct delta_base_cache_entry *f =
1291                         list_entry(lru, struct delta_base_cache_entry, lru);
1292                 if (delta_base_cached <= delta_base_cache_limit)
1293                         break;
1294                 release_delta_base_cache(f);
1295         }
1296
1297         ent->key.p = p;
1298         ent->key.base_offset = base_offset;
1299         ent->type = type;
1300         ent->data = base;
1301         ent->size = base_size;
1302         list_add_tail(&ent->lru, &delta_base_cache_lru);
1303
1304         if (!delta_base_cache.cmpfn)
1305                 hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
1306         hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
1307         hashmap_add(&delta_base_cache, ent);
1308 }
1309
1310 int packed_object_info(struct packed_git *p, off_t obj_offset,
1311                        struct object_info *oi)
1312 {
1313         struct pack_window *w_curs = NULL;
1314         unsigned long size;
1315         off_t curpos = obj_offset;
1316         enum object_type type;
1317
1318         /*
1319          * We always get the representation type, but only convert it to
1320          * a "real" type later if the caller is interested.
1321          */
1322         if (oi->contentp) {
1323                 *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep,
1324                                                       &type);
1325                 if (!*oi->contentp)
1326                         type = OBJ_BAD;
1327         } else {
1328                 type = unpack_object_header(p, &w_curs, &curpos, &size);
1329         }
1330
1331         if (!oi->contentp && oi->sizep) {
1332                 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1333                         off_t tmp_pos = curpos;
1334                         off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1335                                                            type, obj_offset);
1336                         if (!base_offset) {
1337                                 type = OBJ_BAD;
1338                                 goto out;
1339                         }
1340                         *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1341                         if (*oi->sizep == 0) {
1342                                 type = OBJ_BAD;
1343                                 goto out;
1344                         }
1345                 } else {
1346                         *oi->sizep = size;
1347                 }
1348         }
1349
1350         if (oi->disk_sizep) {
1351                 struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1352                 *oi->disk_sizep = revidx[1].offset - obj_offset;
1353         }
1354
1355         if (oi->typep || oi->typename) {
1356                 enum object_type ptot;
1357                 ptot = packed_to_object_type(p, obj_offset, type, &w_curs,
1358                                              curpos);
1359                 if (oi->typep)
1360                         *oi->typep = ptot;
1361                 if (oi->typename) {
1362                         const char *tn = typename(ptot);
1363                         if (tn)
1364                                 strbuf_addstr(oi->typename, tn);
1365                 }
1366                 if (ptot < 0) {
1367                         type = OBJ_BAD;
1368                         goto out;
1369                 }
1370         }
1371
1372         if (oi->delta_base_sha1) {
1373                 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1374                         const unsigned char *base;
1375
1376                         base = get_delta_base_sha1(p, &w_curs, curpos,
1377                                                    type, obj_offset);
1378                         if (!base) {
1379                                 type = OBJ_BAD;
1380                                 goto out;
1381                         }
1382
1383                         hashcpy(oi->delta_base_sha1, base);
1384                 } else
1385                         hashclr(oi->delta_base_sha1);
1386         }
1387
1388         oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
1389                                                           OI_PACKED;
1390
1391 out:
1392         unuse_pack(&w_curs);
1393         return type;
1394 }
1395
1396 static void *unpack_compressed_entry(struct packed_git *p,
1397                                     struct pack_window **w_curs,
1398                                     off_t curpos,
1399                                     unsigned long size)
1400 {
1401         int st;
1402         git_zstream stream;
1403         unsigned char *buffer, *in;
1404
1405         buffer = xmallocz_gently(size);
1406         if (!buffer)
1407                 return NULL;
1408         memset(&stream, 0, sizeof(stream));
1409         stream.next_out = buffer;
1410         stream.avail_out = size + 1;
1411
1412         git_inflate_init(&stream);
1413         do {
1414                 in = use_pack(p, w_curs, curpos, &stream.avail_in);
1415                 stream.next_in = in;
1416                 st = git_inflate(&stream, Z_FINISH);
1417                 if (!stream.avail_out)
1418                         break; /* the payload is larger than it should be */
1419                 curpos += stream.next_in - in;
1420         } while (st == Z_OK || st == Z_BUF_ERROR);
1421         git_inflate_end(&stream);
1422         if ((st != Z_STREAM_END) || stream.total_out != size) {
1423                 free(buffer);
1424                 return NULL;
1425         }
1426
1427         return buffer;
1428 }
1429
1430 static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
1431 {
1432         static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
1433         trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
1434                          p->pack_name, (uintmax_t)obj_offset);
1435 }
1436
1437 int do_check_packed_object_crc;
1438
1439 #define UNPACK_ENTRY_STACK_PREALLOC 64
1440 struct unpack_entry_stack_ent {
1441         off_t obj_offset;
1442         off_t curpos;
1443         unsigned long size;
1444 };
1445
1446 static void *read_object(const unsigned char *sha1, enum object_type *type,
1447                          unsigned long *size)
1448 {
1449         struct object_info oi = OBJECT_INFO_INIT;
1450         void *content;
1451         oi.typep = type;
1452         oi.sizep = size;
1453         oi.contentp = &content;
1454
1455         if (sha1_object_info_extended(sha1, &oi, 0) < 0)
1456                 return NULL;
1457         return content;
1458 }
1459
1460 void *unpack_entry(struct packed_git *p, off_t obj_offset,
1461                    enum object_type *final_type, unsigned long *final_size)
1462 {
1463         struct pack_window *w_curs = NULL;
1464         off_t curpos = obj_offset;
1465         void *data = NULL;
1466         unsigned long size;
1467         enum object_type type;
1468         struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
1469         struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
1470         int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
1471         int base_from_cache = 0;
1472
1473         write_pack_access_log(p, obj_offset);
1474
1475         /* PHASE 1: drill down to the innermost base object */
1476         for (;;) {
1477                 off_t base_offset;
1478                 int i;
1479                 struct delta_base_cache_entry *ent;
1480
1481                 ent = get_delta_base_cache_entry(p, curpos);
1482                 if (ent) {
1483                         type = ent->type;
1484                         data = ent->data;
1485                         size = ent->size;
1486                         detach_delta_base_cache_entry(ent);
1487                         base_from_cache = 1;
1488                         break;
1489                 }
1490
1491                 if (do_check_packed_object_crc && p->index_version > 1) {
1492                         struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1493                         off_t len = revidx[1].offset - obj_offset;
1494                         if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
1495                                 const unsigned char *sha1 =
1496                                         nth_packed_object_sha1(p, revidx->nr);
1497                                 error("bad packed object CRC for %s",
1498                                       sha1_to_hex(sha1));
1499                                 mark_bad_packed_object(p, sha1);
1500                                 data = NULL;
1501                                 goto out;
1502                         }
1503                 }
1504
1505                 type = unpack_object_header(p, &w_curs, &curpos, &size);
1506                 if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
1507                         break;
1508
1509                 base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1510                 if (!base_offset) {
1511                         error("failed to validate delta base reference "
1512                               "at offset %"PRIuMAX" from %s",
1513                               (uintmax_t)curpos, p->pack_name);
1514                         /* bail to phase 2, in hopes of recovery */
1515                         data = NULL;
1516                         break;
1517                 }
1518
1519                 /* push object, proceed to base */
1520                 if (delta_stack_nr >= delta_stack_alloc
1521                     && delta_stack == small_delta_stack) {
1522                         delta_stack_alloc = alloc_nr(delta_stack_nr);
1523                         ALLOC_ARRAY(delta_stack, delta_stack_alloc);
1524                         memcpy(delta_stack, small_delta_stack,
1525                                sizeof(*delta_stack)*delta_stack_nr);
1526                 } else {
1527                         ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
1528                 }
1529                 i = delta_stack_nr++;
1530                 delta_stack[i].obj_offset = obj_offset;
1531                 delta_stack[i].curpos = curpos;
1532                 delta_stack[i].size = size;
1533
1534                 curpos = obj_offset = base_offset;
1535         }
1536
1537         /* PHASE 2: handle the base */
1538         switch (type) {
1539         case OBJ_OFS_DELTA:
1540         case OBJ_REF_DELTA:
1541                 if (data)
1542                         die("BUG: unpack_entry: left loop at a valid delta");
1543                 break;
1544         case OBJ_COMMIT:
1545         case OBJ_TREE:
1546         case OBJ_BLOB:
1547         case OBJ_TAG:
1548                 if (!base_from_cache)
1549                         data = unpack_compressed_entry(p, &w_curs, curpos, size);
1550                 break;
1551         default:
1552                 data = NULL;
1553                 error("unknown object type %i at offset %"PRIuMAX" in %s",
1554                       type, (uintmax_t)obj_offset, p->pack_name);
1555         }
1556
1557         /* PHASE 3: apply deltas in order */
1558
1559         /* invariants:
1560          *   'data' holds the base data, or NULL if there was corruption
1561          */
1562         while (delta_stack_nr) {
1563                 void *delta_data;
1564                 void *base = data;
1565                 void *external_base = NULL;
1566                 unsigned long delta_size, base_size = size;
1567                 int i;
1568
1569                 data = NULL;
1570
1571                 if (base)
1572                         add_delta_base_cache(p, obj_offset, base, base_size, type);
1573
1574                 if (!base) {
1575                         /*
1576                          * We're probably in deep shit, but let's try to fetch
1577                          * the required base anyway from another pack or loose.
1578                          * This is costly but should happen only in the presence
1579                          * of a corrupted pack, and is better than failing outright.
1580                          */
1581                         struct revindex_entry *revidx;
1582                         const unsigned char *base_sha1;
1583                         revidx = find_pack_revindex(p, obj_offset);
1584                         if (revidx) {
1585                                 base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1586                                 error("failed to read delta base object %s"
1587                                       " at offset %"PRIuMAX" from %s",
1588                                       sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
1589                                       p->pack_name);
1590                                 mark_bad_packed_object(p, base_sha1);
1591                                 base = read_object(base_sha1, &type, &base_size);
1592                                 external_base = base;
1593                         }
1594                 }
1595
1596                 i = --delta_stack_nr;
1597                 obj_offset = delta_stack[i].obj_offset;
1598                 curpos = delta_stack[i].curpos;
1599                 delta_size = delta_stack[i].size;
1600
1601                 if (!base)
1602                         continue;
1603
1604                 delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
1605
1606                 if (!delta_data) {
1607                         error("failed to unpack compressed delta "
1608                               "at offset %"PRIuMAX" from %s",
1609                               (uintmax_t)curpos, p->pack_name);
1610                         data = NULL;
1611                         free(external_base);
1612                         continue;
1613                 }
1614
1615                 data = patch_delta(base, base_size,
1616                                    delta_data, delta_size,
1617                                    &size);
1618
1619                 /*
1620                  * We could not apply the delta; warn the user, but keep going.
1621                  * Our failure will be noticed either in the next iteration of
1622                  * the loop, or if this is the final delta, in the caller when
1623                  * we return NULL. Those code paths will take care of making
1624                  * a more explicit warning and retrying with another copy of
1625                  * the object.
1626                  */
1627                 if (!data)
1628                         error("failed to apply delta");
1629
1630                 free(delta_data);
1631                 free(external_base);
1632         }
1633
1634         if (final_type)
1635                 *final_type = type;
1636         if (final_size)
1637                 *final_size = size;
1638
1639 out:
1640         unuse_pack(&w_curs);
1641
1642         if (delta_stack != small_delta_stack)
1643                 free(delta_stack);
1644
1645         return data;
1646 }
1647
1648 const unsigned char *nth_packed_object_sha1(struct packed_git *p,
1649                                             uint32_t n)
1650 {
1651         const unsigned char *index = p->index_data;
1652         if (!index) {
1653                 if (open_pack_index(p))
1654                         return NULL;
1655                 index = p->index_data;
1656         }
1657         if (n >= p->num_objects)
1658                 return NULL;
1659         index += 4 * 256;
1660         if (p->index_version == 1) {
1661                 return index + 24 * n + 4;
1662         } else {
1663                 index += 8;
1664                 return index + 20 * n;
1665         }
1666 }
1667
1668 const struct object_id *nth_packed_object_oid(struct object_id *oid,
1669                                               struct packed_git *p,
1670                                               uint32_t n)
1671 {
1672         const unsigned char *hash = nth_packed_object_sha1(p, n);
1673         if (!hash)
1674                 return NULL;
1675         hashcpy(oid->hash, hash);
1676         return oid;
1677 }
1678
1679 void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
1680 {
1681         const unsigned char *ptr = vptr;
1682         const unsigned char *start = p->index_data;
1683         const unsigned char *end = start + p->index_size;
1684         if (ptr < start)
1685                 die(_("offset before start of pack index for %s (corrupt index?)"),
1686                     p->pack_name);
1687         /* No need to check for underflow; .idx files must be at least 8 bytes */
1688         if (ptr >= end - 8)
1689                 die(_("offset beyond end of pack index for %s (truncated index?)"),
1690                     p->pack_name);
1691 }
1692
1693 off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
1694 {
1695         const unsigned char *index = p->index_data;
1696         index += 4 * 256;
1697         if (p->index_version == 1) {
1698                 return ntohl(*((uint32_t *)(index + 24 * n)));
1699         } else {
1700                 uint32_t off;
1701                 index += 8 + p->num_objects * (20 + 4);
1702                 off = ntohl(*((uint32_t *)(index + 4 * n)));
1703                 if (!(off & 0x80000000))
1704                         return off;
1705                 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1706                 check_pack_index_ptr(p, index);
1707                 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
1708                                    ntohl(*((uint32_t *)(index + 4)));
1709         }
1710 }
1711
1712 off_t find_pack_entry_one(const unsigned char *sha1,
1713                                   struct packed_git *p)
1714 {
1715         const uint32_t *level1_ofs = p->index_data;
1716         const unsigned char *index = p->index_data;
1717         unsigned hi, lo, stride;
1718         static int debug_lookup = -1;
1719
1720         if (debug_lookup < 0)
1721                 debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
1722
1723         if (!index) {
1724                 if (open_pack_index(p))
1725                         return 0;
1726                 level1_ofs = p->index_data;
1727                 index = p->index_data;
1728         }
1729         if (p->index_version > 1) {
1730                 level1_ofs += 2;
1731                 index += 8;
1732         }
1733         index += 4 * 256;
1734         hi = ntohl(level1_ofs[*sha1]);
1735         lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
1736         if (p->index_version > 1) {
1737                 stride = 20;
1738         } else {
1739                 stride = 24;
1740                 index += 4;
1741         }
1742
1743         if (debug_lookup)
1744                 printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
1745                        sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
1746
1747         while (lo < hi) {
1748                 unsigned mi = (lo + hi) / 2;
1749                 int cmp = hashcmp(index + mi * stride, sha1);
1750
1751                 if (debug_lookup)
1752                         printf("lo %u hi %u rg %u mi %u\n",
1753                                lo, hi, hi - lo, mi);
1754                 if (!cmp)
1755                         return nth_packed_object_offset(p, mi);
1756                 if (cmp > 0)
1757                         hi = mi;
1758                 else
1759                         lo = mi+1;
1760         }
1761         return 0;
1762 }
1763
1764 int is_pack_valid(struct packed_git *p)
1765 {
1766         /* An already open pack is known to be valid. */
1767         if (p->pack_fd != -1)
1768                 return 1;
1769
1770         /* If the pack has one window completely covering the
1771          * file size, the pack is known to be valid even if
1772          * the descriptor is not currently open.
1773          */
1774         if (p->windows) {
1775                 struct pack_window *w = p->windows;
1776
1777                 if (!w->offset && w->len == p->pack_size)
1778                         return 1;
1779         }
1780
1781         /* Force the pack to open to prove its valid. */
1782         return !open_packed_git(p);
1783 }
1784
1785 struct packed_git *find_sha1_pack(const unsigned char *sha1,
1786                                   struct packed_git *packs)
1787 {
1788         struct packed_git *p;
1789
1790         for (p = packs; p; p = p->next) {
1791                 if (find_pack_entry_one(sha1, p))
1792                         return p;
1793         }
1794         return NULL;
1795
1796 }
1797
1798 static int fill_pack_entry(const unsigned char *sha1,
1799                            struct pack_entry *e,
1800                            struct packed_git *p)
1801 {
1802         off_t offset;
1803
1804         if (p->num_bad_objects) {
1805                 unsigned i;
1806                 for (i = 0; i < p->num_bad_objects; i++)
1807                         if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1808                                 return 0;
1809         }
1810
1811         offset = find_pack_entry_one(sha1, p);
1812         if (!offset)
1813                 return 0;
1814
1815         /*
1816          * We are about to tell the caller where they can locate the
1817          * requested object.  We better make sure the packfile is
1818          * still here and can be accessed before supplying that
1819          * answer, as it may have been deleted since the index was
1820          * loaded!
1821          */
1822         if (!is_pack_valid(p))
1823                 return 0;
1824         e->offset = offset;
1825         e->p = p;
1826         hashcpy(e->sha1, sha1);
1827         return 1;
1828 }
1829
1830 /*
1831  * Iff a pack file contains the object named by sha1, return true and
1832  * store its location to e.
1833  */
1834 int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
1835 {
1836         struct mru_entry *p;
1837
1838         prepare_packed_git();
1839         if (!packed_git)
1840                 return 0;
1841
1842         for (p = packed_git_mru->head; p; p = p->next) {
1843                 if (fill_pack_entry(sha1, e, p->item)) {
1844                         mru_mark(packed_git_mru, p);
1845                         return 1;
1846                 }
1847         }
1848         return 0;
1849 }
1850
1851 int has_sha1_pack(const unsigned char *sha1)
1852 {
1853         struct pack_entry e;
1854         return find_pack_entry(sha1, &e);
1855 }
1856
1857 int has_pack_index(const unsigned char *sha1)
1858 {
1859         struct stat st;
1860         if (stat(sha1_pack_index_name(sha1), &st))
1861                 return 0;
1862         return 1;
1863 }
1864
1865 static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
1866 {
1867         uint32_t i;
1868         int r = 0;
1869
1870         for (i = 0; i < p->num_objects; i++) {
1871                 struct object_id oid;
1872
1873                 if (!nth_packed_object_oid(&oid, p, i))
1874                         return error("unable to get sha1 of object %u in %s",
1875                                      i, p->pack_name);
1876
1877                 r = cb(&oid, p, i, data);
1878                 if (r)
1879                         break;
1880         }
1881         return r;
1882 }
1883
1884 int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
1885 {
1886         struct packed_git *p;
1887         int r = 0;
1888         int pack_errors = 0;
1889
1890         prepare_packed_git();
1891         for (p = packed_git; p; p = p->next) {
1892                 if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
1893                         continue;
1894                 if (open_pack_index(p)) {
1895                         pack_errors = 1;
1896                         continue;
1897                 }
1898                 r = for_each_object_in_pack(p, cb, data);
1899                 if (r)
1900                         break;
1901         }
1902         return r ? r : pack_errors;
1903 }