midx: write object id fanout chunk
[git] / packfile.c
1 #include "cache.h"
2 #include "list.h"
3 #include "pack.h"
4 #include "repository.h"
5 #include "dir.h"
6 #include "mergesort.h"
7 #include "packfile.h"
8 #include "delta.h"
9 #include "list.h"
10 #include "streaming.h"
11 #include "sha1-lookup.h"
12 #include "commit.h"
13 #include "object.h"
14 #include "tag.h"
15 #include "tree-walk.h"
16 #include "tree.h"
17 #include "object-store.h"
18
19 char *odb_pack_name(struct strbuf *buf,
20                     const unsigned char *sha1,
21                     const char *ext)
22 {
23         strbuf_reset(buf);
24         strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
25                     sha1_to_hex(sha1), ext);
26         return buf->buf;
27 }
28
29 char *sha1_pack_name(const unsigned char *sha1)
30 {
31         static struct strbuf buf = STRBUF_INIT;
32         return odb_pack_name(&buf, sha1, "pack");
33 }
34
35 char *sha1_pack_index_name(const unsigned char *sha1)
36 {
37         static struct strbuf buf = STRBUF_INIT;
38         return odb_pack_name(&buf, sha1, "idx");
39 }
40
41 static unsigned int pack_used_ctr;
42 static unsigned int pack_mmap_calls;
43 static unsigned int peak_pack_open_windows;
44 static unsigned int pack_open_windows;
45 static unsigned int pack_open_fds;
46 static unsigned int pack_max_fds;
47 static size_t peak_pack_mapped;
48 static size_t pack_mapped;
49
50 #define SZ_FMT PRIuMAX
51 static inline uintmax_t sz_fmt(size_t s) { return s; }
52
53 void pack_report(void)
54 {
55         fprintf(stderr,
56                 "pack_report: getpagesize()            = %10" SZ_FMT "\n"
57                 "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
58                 "pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
59                 sz_fmt(getpagesize()),
60                 sz_fmt(packed_git_window_size),
61                 sz_fmt(packed_git_limit));
62         fprintf(stderr,
63                 "pack_report: pack_used_ctr            = %10u\n"
64                 "pack_report: pack_mmap_calls          = %10u\n"
65                 "pack_report: pack_open_windows        = %10u / %10u\n"
66                 "pack_report: pack_mapped              = "
67                         "%10" SZ_FMT " / %10" SZ_FMT "\n",
68                 pack_used_ctr,
69                 pack_mmap_calls,
70                 pack_open_windows, peak_pack_open_windows,
71                 sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
72 }
73
74 /*
75  * Open and mmap the index file at path, perform a couple of
76  * consistency checks, then record its information to p.  Return 0 on
77  * success.
78  */
79 static int check_packed_git_idx(const char *path, struct packed_git *p)
80 {
81         void *idx_map;
82         struct pack_idx_header *hdr;
83         size_t idx_size;
84         uint32_t version, nr, i, *index;
85         int fd = git_open(path);
86         struct stat st;
87         const unsigned int hashsz = the_hash_algo->rawsz;
88
89         if (fd < 0)
90                 return -1;
91         if (fstat(fd, &st)) {
92                 close(fd);
93                 return -1;
94         }
95         idx_size = xsize_t(st.st_size);
96         if (idx_size < 4 * 256 + hashsz + hashsz) {
97                 close(fd);
98                 return error("index file %s is too small", path);
99         }
100         idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
101         close(fd);
102
103         hdr = idx_map;
104         if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
105                 version = ntohl(hdr->idx_version);
106                 if (version < 2 || version > 2) {
107                         munmap(idx_map, idx_size);
108                         return error("index file %s is version %"PRIu32
109                                      " and is not supported by this binary"
110                                      " (try upgrading GIT to a newer version)",
111                                      path, version);
112                 }
113         } else
114                 version = 1;
115
116         nr = 0;
117         index = idx_map;
118         if (version > 1)
119                 index += 2;  /* skip index header */
120         for (i = 0; i < 256; i++) {
121                 uint32_t n = ntohl(index[i]);
122                 if (n < nr) {
123                         munmap(idx_map, idx_size);
124                         return error("non-monotonic index %s", path);
125                 }
126                 nr = n;
127         }
128
129         if (version == 1) {
130                 /*
131                  * Total size:
132                  *  - 256 index entries 4 bytes each
133                  *  - 24-byte entries * nr (object ID + 4-byte offset)
134                  *  - hash of the packfile
135                  *  - file checksum
136                  */
137                 if (idx_size != 4*256 + nr * (hashsz + 4) + hashsz + hashsz) {
138                         munmap(idx_map, idx_size);
139                         return error("wrong index v1 file size in %s", path);
140                 }
141         } else if (version == 2) {
142                 /*
143                  * Minimum size:
144                  *  - 8 bytes of header
145                  *  - 256 index entries 4 bytes each
146                  *  - object ID entry * nr
147                  *  - 4-byte crc entry * nr
148                  *  - 4-byte offset entry * nr
149                  *  - hash of the packfile
150                  *  - file checksum
151                  * And after the 4-byte offset table might be a
152                  * variable sized table containing 8-byte entries
153                  * for offsets larger than 2^31.
154                  */
155                 unsigned long min_size = 8 + 4*256 + nr*(hashsz + 4 + 4) + hashsz + hashsz;
156                 unsigned long max_size = min_size;
157                 if (nr)
158                         max_size += (nr - 1)*8;
159                 if (idx_size < min_size || idx_size > max_size) {
160                         munmap(idx_map, idx_size);
161                         return error("wrong index v2 file size in %s", path);
162                 }
163                 if (idx_size != min_size &&
164                     /*
165                      * make sure we can deal with large pack offsets.
166                      * 31-bit signed offset won't be enough, neither
167                      * 32-bit unsigned one will be.
168                      */
169                     (sizeof(off_t) <= 4)) {
170                         munmap(idx_map, idx_size);
171                         return error("pack too large for current definition of off_t in %s", path);
172                 }
173         }
174
175         p->index_version = version;
176         p->index_data = idx_map;
177         p->index_size = idx_size;
178         p->num_objects = nr;
179         return 0;
180 }
181
182 int open_pack_index(struct packed_git *p)
183 {
184         char *idx_name;
185         size_t len;
186         int ret;
187
188         if (p->index_data)
189                 return 0;
190
191         if (!strip_suffix(p->pack_name, ".pack", &len))
192                 BUG("pack_name does not end in .pack");
193         idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
194         ret = check_packed_git_idx(idx_name, p);
195         free(idx_name);
196         return ret;
197 }
198
199 uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
200 {
201         const uint32_t *level1_ofs = p->index_data;
202
203         if (!level1_ofs) {
204                 if (open_pack_index(p))
205                         return 0;
206                 level1_ofs = p->index_data;
207         }
208
209         if (p->index_version > 1) {
210                 level1_ofs += 2;
211         }
212
213         return ntohl(level1_ofs[value]);
214 }
215
216 static struct packed_git *alloc_packed_git(int extra)
217 {
218         struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
219         memset(p, 0, sizeof(*p));
220         p->pack_fd = -1;
221         return p;
222 }
223
224 struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
225 {
226         const char *path = sha1_pack_name(sha1);
227         size_t alloc = st_add(strlen(path), 1);
228         struct packed_git *p = alloc_packed_git(alloc);
229
230         memcpy(p->pack_name, path, alloc); /* includes NUL */
231         hashcpy(p->sha1, sha1);
232         if (check_packed_git_idx(idx_path, p)) {
233                 free(p);
234                 return NULL;
235         }
236
237         return p;
238 }
239
240 static void scan_windows(struct packed_git *p,
241         struct packed_git **lru_p,
242         struct pack_window **lru_w,
243         struct pack_window **lru_l)
244 {
245         struct pack_window *w, *w_l;
246
247         for (w_l = NULL, w = p->windows; w; w = w->next) {
248                 if (!w->inuse_cnt) {
249                         if (!*lru_w || w->last_used < (*lru_w)->last_used) {
250                                 *lru_p = p;
251                                 *lru_w = w;
252                                 *lru_l = w_l;
253                         }
254                 }
255                 w_l = w;
256         }
257 }
258
259 static int unuse_one_window(struct packed_git *current)
260 {
261         struct packed_git *p, *lru_p = NULL;
262         struct pack_window *lru_w = NULL, *lru_l = NULL;
263
264         if (current)
265                 scan_windows(current, &lru_p, &lru_w, &lru_l);
266         for (p = the_repository->objects->packed_git; p; p = p->next)
267                 scan_windows(p, &lru_p, &lru_w, &lru_l);
268         if (lru_p) {
269                 munmap(lru_w->base, lru_w->len);
270                 pack_mapped -= lru_w->len;
271                 if (lru_l)
272                         lru_l->next = lru_w->next;
273                 else
274                         lru_p->windows = lru_w->next;
275                 free(lru_w);
276                 pack_open_windows--;
277                 return 1;
278         }
279         return 0;
280 }
281
282 void release_pack_memory(size_t need)
283 {
284         size_t cur = pack_mapped;
285         while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
286                 ; /* nothing */
287 }
288
289 void close_pack_windows(struct packed_git *p)
290 {
291         while (p->windows) {
292                 struct pack_window *w = p->windows;
293
294                 if (w->inuse_cnt)
295                         die("pack '%s' still has open windows to it",
296                             p->pack_name);
297                 munmap(w->base, w->len);
298                 pack_mapped -= w->len;
299                 pack_open_windows--;
300                 p->windows = w->next;
301                 free(w);
302         }
303 }
304
305 static int close_pack_fd(struct packed_git *p)
306 {
307         if (p->pack_fd < 0)
308                 return 0;
309
310         close(p->pack_fd);
311         pack_open_fds--;
312         p->pack_fd = -1;
313
314         return 1;
315 }
316
317 void close_pack_index(struct packed_git *p)
318 {
319         if (p->index_data) {
320                 munmap((void *)p->index_data, p->index_size);
321                 p->index_data = NULL;
322         }
323 }
324
325 void close_pack(struct packed_git *p)
326 {
327         close_pack_windows(p);
328         close_pack_fd(p);
329         close_pack_index(p);
330 }
331
332 void close_all_packs(struct raw_object_store *o)
333 {
334         struct packed_git *p;
335
336         for (p = o->packed_git; p; p = p->next)
337                 if (p->do_not_close)
338                         BUG("want to close pack marked 'do-not-close'");
339                 else
340                         close_pack(p);
341 }
342
343 /*
344  * The LRU pack is the one with the oldest MRU window, preferring packs
345  * with no used windows, or the oldest mtime if it has no windows allocated.
346  */
347 static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
348 {
349         struct pack_window *w, *this_mru_w;
350         int has_windows_inuse = 0;
351
352         /*
353          * Reject this pack if it has windows and the previously selected
354          * one does not.  If this pack does not have windows, reject
355          * it if the pack file is newer than the previously selected one.
356          */
357         if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
358                 return;
359
360         for (w = this_mru_w = p->windows; w; w = w->next) {
361                 /*
362                  * Reject this pack if any of its windows are in use,
363                  * but the previously selected pack did not have any
364                  * inuse windows.  Otherwise, record that this pack
365                  * has windows in use.
366                  */
367                 if (w->inuse_cnt) {
368                         if (*accept_windows_inuse)
369                                 has_windows_inuse = 1;
370                         else
371                                 return;
372                 }
373
374                 if (w->last_used > this_mru_w->last_used)
375                         this_mru_w = w;
376
377                 /*
378                  * Reject this pack if it has windows that have been
379                  * used more recently than the previously selected pack.
380                  * If the previously selected pack had windows inuse and
381                  * we have not encountered a window in this pack that is
382                  * inuse, skip this check since we prefer a pack with no
383                  * inuse windows to one that has inuse windows.
384                  */
385                 if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
386                     this_mru_w->last_used > (*mru_w)->last_used)
387                         return;
388         }
389
390         /*
391          * Select this pack.
392          */
393         *mru_w = this_mru_w;
394         *lru_p = p;
395         *accept_windows_inuse = has_windows_inuse;
396 }
397
398 static int close_one_pack(void)
399 {
400         struct packed_git *p, *lru_p = NULL;
401         struct pack_window *mru_w = NULL;
402         int accept_windows_inuse = 1;
403
404         for (p = the_repository->objects->packed_git; p; p = p->next) {
405                 if (p->pack_fd == -1)
406                         continue;
407                 find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
408         }
409
410         if (lru_p)
411                 return close_pack_fd(lru_p);
412
413         return 0;
414 }
415
416 static unsigned int get_max_fd_limit(void)
417 {
418 #ifdef RLIMIT_NOFILE
419         {
420                 struct rlimit lim;
421
422                 if (!getrlimit(RLIMIT_NOFILE, &lim))
423                         return lim.rlim_cur;
424         }
425 #endif
426
427 #ifdef _SC_OPEN_MAX
428         {
429                 long open_max = sysconf(_SC_OPEN_MAX);
430                 if (0 < open_max)
431                         return open_max;
432                 /*
433                  * Otherwise, we got -1 for one of the two
434                  * reasons:
435                  *
436                  * (1) sysconf() did not understand _SC_OPEN_MAX
437                  *     and signaled an error with -1; or
438                  * (2) sysconf() said there is no limit.
439                  *
440                  * We _could_ clear errno before calling sysconf() to
441                  * tell these two cases apart and return a huge number
442                  * in the latter case to let the caller cap it to a
443                  * value that is not so selfish, but letting the
444                  * fallback OPEN_MAX codepath take care of these cases
445                  * is a lot simpler.
446                  */
447         }
448 #endif
449
450 #ifdef OPEN_MAX
451         return OPEN_MAX;
452 #else
453         return 1; /* see the caller ;-) */
454 #endif
455 }
456
457 /*
458  * Do not call this directly as this leaks p->pack_fd on error return;
459  * call open_packed_git() instead.
460  */
461 static int open_packed_git_1(struct packed_git *p)
462 {
463         struct stat st;
464         struct pack_header hdr;
465         unsigned char hash[GIT_MAX_RAWSZ];
466         unsigned char *idx_hash;
467         long fd_flag;
468         ssize_t read_result;
469         const unsigned hashsz = the_hash_algo->rawsz;
470
471         if (!p->index_data && open_pack_index(p))
472                 return error("packfile %s index unavailable", p->pack_name);
473
474         if (!pack_max_fds) {
475                 unsigned int max_fds = get_max_fd_limit();
476
477                 /* Save 3 for stdin/stdout/stderr, 22 for work */
478                 if (25 < max_fds)
479                         pack_max_fds = max_fds - 25;
480                 else
481                         pack_max_fds = 1;
482         }
483
484         while (pack_max_fds <= pack_open_fds && close_one_pack())
485                 ; /* nothing */
486
487         p->pack_fd = git_open(p->pack_name);
488         if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
489                 return -1;
490         pack_open_fds++;
491
492         /* If we created the struct before we had the pack we lack size. */
493         if (!p->pack_size) {
494                 if (!S_ISREG(st.st_mode))
495                         return error("packfile %s not a regular file", p->pack_name);
496                 p->pack_size = st.st_size;
497         } else if (p->pack_size != st.st_size)
498                 return error("packfile %s size changed", p->pack_name);
499
500         /* We leave these file descriptors open with sliding mmap;
501          * there is no point keeping them open across exec(), though.
502          */
503         fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
504         if (fd_flag < 0)
505                 return error("cannot determine file descriptor flags");
506         fd_flag |= FD_CLOEXEC;
507         if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
508                 return error("cannot set FD_CLOEXEC");
509
510         /* Verify we recognize this pack file format. */
511         read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr));
512         if (read_result < 0)
513                 return error_errno("error reading from %s", p->pack_name);
514         if (read_result != sizeof(hdr))
515                 return error("file %s is far too short to be a packfile", p->pack_name);
516         if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
517                 return error("file %s is not a GIT packfile", p->pack_name);
518         if (!pack_version_ok(hdr.hdr_version))
519                 return error("packfile %s is version %"PRIu32" and not"
520                         " supported (try upgrading GIT to a newer version)",
521                         p->pack_name, ntohl(hdr.hdr_version));
522
523         /* Verify the pack matches its index. */
524         if (p->num_objects != ntohl(hdr.hdr_entries))
525                 return error("packfile %s claims to have %"PRIu32" objects"
526                              " while index indicates %"PRIu32" objects",
527                              p->pack_name, ntohl(hdr.hdr_entries),
528                              p->num_objects);
529         if (lseek(p->pack_fd, p->pack_size - hashsz, SEEK_SET) == -1)
530                 return error("end of packfile %s is unavailable", p->pack_name);
531         read_result = read_in_full(p->pack_fd, hash, hashsz);
532         if (read_result < 0)
533                 return error_errno("error reading from %s", p->pack_name);
534         if (read_result != hashsz)
535                 return error("packfile %s signature is unavailable", p->pack_name);
536         idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
537         if (hashcmp(hash, idx_hash))
538                 return error("packfile %s does not match index", p->pack_name);
539         return 0;
540 }
541
542 static int open_packed_git(struct packed_git *p)
543 {
544         if (!open_packed_git_1(p))
545                 return 0;
546         close_pack_fd(p);
547         return -1;
548 }
549
550 static int in_window(struct pack_window *win, off_t offset)
551 {
552         /* We must promise at least one full hash after the
553          * offset is available from this window, otherwise the offset
554          * is not actually in this window and a different window (which
555          * has that one hash excess) must be used.  This is to support
556          * the object header and delta base parsing routines below.
557          */
558         off_t win_off = win->offset;
559         return win_off <= offset
560                 && (offset + the_hash_algo->rawsz) <= (win_off + win->len);
561 }
562
563 unsigned char *use_pack(struct packed_git *p,
564                 struct pack_window **w_cursor,
565                 off_t offset,
566                 unsigned long *left)
567 {
568         struct pack_window *win = *w_cursor;
569
570         /* Since packfiles end in a hash of their content and it's
571          * pointless to ask for an offset into the middle of that
572          * hash, and the in_window function above wouldn't match
573          * don't allow an offset too close to the end of the file.
574          */
575         if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
576                 die("packfile %s cannot be accessed", p->pack_name);
577         if (offset > (p->pack_size - the_hash_algo->rawsz))
578                 die("offset beyond end of packfile (truncated pack?)");
579         if (offset < 0)
580                 die(_("offset before end of packfile (broken .idx?)"));
581
582         if (!win || !in_window(win, offset)) {
583                 if (win)
584                         win->inuse_cnt--;
585                 for (win = p->windows; win; win = win->next) {
586                         if (in_window(win, offset))
587                                 break;
588                 }
589                 if (!win) {
590                         size_t window_align = packed_git_window_size / 2;
591                         off_t len;
592
593                         if (p->pack_fd == -1 && open_packed_git(p))
594                                 die("packfile %s cannot be accessed", p->pack_name);
595
596                         win = xcalloc(1, sizeof(*win));
597                         win->offset = (offset / window_align) * window_align;
598                         len = p->pack_size - win->offset;
599                         if (len > packed_git_window_size)
600                                 len = packed_git_window_size;
601                         win->len = (size_t)len;
602                         pack_mapped += win->len;
603                         while (packed_git_limit < pack_mapped
604                                 && unuse_one_window(p))
605                                 ; /* nothing */
606                         win->base = xmmap(NULL, win->len,
607                                 PROT_READ, MAP_PRIVATE,
608                                 p->pack_fd, win->offset);
609                         if (win->base == MAP_FAILED)
610                                 die_errno("packfile %s cannot be mapped",
611                                           p->pack_name);
612                         if (!win->offset && win->len == p->pack_size
613                                 && !p->do_not_close)
614                                 close_pack_fd(p);
615                         pack_mmap_calls++;
616                         pack_open_windows++;
617                         if (pack_mapped > peak_pack_mapped)
618                                 peak_pack_mapped = pack_mapped;
619                         if (pack_open_windows > peak_pack_open_windows)
620                                 peak_pack_open_windows = pack_open_windows;
621                         win->next = p->windows;
622                         p->windows = win;
623                 }
624         }
625         if (win != *w_cursor) {
626                 win->last_used = pack_used_ctr++;
627                 win->inuse_cnt++;
628                 *w_cursor = win;
629         }
630         offset -= win->offset;
631         if (left)
632                 *left = win->len - xsize_t(offset);
633         return win->base + offset;
634 }
635
636 void unuse_pack(struct pack_window **w_cursor)
637 {
638         struct pack_window *w = *w_cursor;
639         if (w) {
640                 w->inuse_cnt--;
641                 *w_cursor = NULL;
642         }
643 }
644
645 static void try_to_free_pack_memory(size_t size)
646 {
647         release_pack_memory(size);
648 }
649
650 struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
651 {
652         static int have_set_try_to_free_routine;
653         struct stat st;
654         size_t alloc;
655         struct packed_git *p;
656
657         if (!have_set_try_to_free_routine) {
658                 have_set_try_to_free_routine = 1;
659                 set_try_to_free_routine(try_to_free_pack_memory);
660         }
661
662         /*
663          * Make sure a corresponding .pack file exists and that
664          * the index looks sane.
665          */
666         if (!strip_suffix_mem(path, &path_len, ".idx"))
667                 return NULL;
668
669         /*
670          * ".promisor" is long enough to hold any suffix we're adding (and
671          * the use xsnprintf double-checks that)
672          */
673         alloc = st_add3(path_len, strlen(".promisor"), 1);
674         p = alloc_packed_git(alloc);
675         memcpy(p->pack_name, path, path_len);
676
677         xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
678         if (!access(p->pack_name, F_OK))
679                 p->pack_keep = 1;
680
681         xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor");
682         if (!access(p->pack_name, F_OK))
683                 p->pack_promisor = 1;
684
685         xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
686         if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
687                 free(p);
688                 return NULL;
689         }
690
691         /* ok, it looks sane as far as we can check without
692          * actually mapping the pack file.
693          */
694         p->pack_size = st.st_size;
695         p->pack_local = local;
696         p->mtime = st.st_mtime;
697         if (path_len < the_hash_algo->hexsz ||
698             get_sha1_hex(path + path_len - the_hash_algo->hexsz, p->sha1))
699                 hashclr(p->sha1);
700         return p;
701 }
702
703 void install_packed_git(struct repository *r, struct packed_git *pack)
704 {
705         if (pack->pack_fd != -1)
706                 pack_open_fds++;
707
708         pack->next = r->objects->packed_git;
709         r->objects->packed_git = pack;
710 }
711
712 void (*report_garbage)(unsigned seen_bits, const char *path);
713
714 static void report_helper(const struct string_list *list,
715                           int seen_bits, int first, int last)
716 {
717         if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
718                 return;
719
720         for (; first < last; first++)
721                 report_garbage(seen_bits, list->items[first].string);
722 }
723
724 static void report_pack_garbage(struct string_list *list)
725 {
726         int i, baselen = -1, first = 0, seen_bits = 0;
727
728         if (!report_garbage)
729                 return;
730
731         string_list_sort(list);
732
733         for (i = 0; i < list->nr; i++) {
734                 const char *path = list->items[i].string;
735                 if (baselen != -1 &&
736                     strncmp(path, list->items[first].string, baselen)) {
737                         report_helper(list, seen_bits, first, i);
738                         baselen = -1;
739                         seen_bits = 0;
740                 }
741                 if (baselen == -1) {
742                         const char *dot = strrchr(path, '.');
743                         if (!dot) {
744                                 report_garbage(PACKDIR_FILE_GARBAGE, path);
745                                 continue;
746                         }
747                         baselen = dot - path + 1;
748                         first = i;
749                 }
750                 if (!strcmp(path + baselen, "pack"))
751                         seen_bits |= 1;
752                 else if (!strcmp(path + baselen, "idx"))
753                         seen_bits |= 2;
754         }
755         report_helper(list, seen_bits, first, list->nr);
756 }
757
758 void for_each_file_in_pack_dir(const char *objdir,
759                                each_file_in_pack_dir_fn fn,
760                                void *data)
761 {
762         struct strbuf path = STRBUF_INIT;
763         size_t dirnamelen;
764         DIR *dir;
765         struct dirent *de;
766
767         strbuf_addstr(&path, objdir);
768         strbuf_addstr(&path, "/pack");
769         dir = opendir(path.buf);
770         if (!dir) {
771                 if (errno != ENOENT)
772                         error_errno("unable to open object pack directory: %s",
773                                     path.buf);
774                 strbuf_release(&path);
775                 return;
776         }
777         strbuf_addch(&path, '/');
778         dirnamelen = path.len;
779         while ((de = readdir(dir)) != NULL) {
780                 if (is_dot_or_dotdot(de->d_name))
781                         continue;
782
783                 strbuf_setlen(&path, dirnamelen);
784                 strbuf_addstr(&path, de->d_name);
785
786                 fn(path.buf, path.len, de->d_name, data);
787         }
788
789         closedir(dir);
790         strbuf_release(&path);
791 }
792
793 struct prepare_pack_data {
794         struct repository *r;
795         struct string_list *garbage;
796         int local;
797 };
798
799 static void prepare_pack(const char *full_name, size_t full_name_len,
800                          const char *file_name, void *_data)
801 {
802         struct prepare_pack_data *data = (struct prepare_pack_data *)_data;
803         struct packed_git *p;
804         size_t base_len = full_name_len;
805
806         if (strip_suffix_mem(full_name, &base_len, ".idx")) {
807                 /* Don't reopen a pack we already have. */
808                 for (p = data->r->objects->packed_git; p; p = p->next) {
809                         size_t len;
810                         if (strip_suffix(p->pack_name, ".pack", &len) &&
811                             len == base_len &&
812                             !memcmp(p->pack_name, full_name, len))
813                                 break;
814                 }
815
816                 if (!p) {
817                         p = add_packed_git(full_name, full_name_len, data->local);
818                         if (p)
819                                 install_packed_git(data->r, p);
820                 }
821         }
822
823         if (!report_garbage)
824                 return;
825
826         if (ends_with(file_name, ".idx") ||
827             ends_with(file_name, ".pack") ||
828             ends_with(file_name, ".bitmap") ||
829             ends_with(file_name, ".keep") ||
830             ends_with(file_name, ".promisor"))
831                 string_list_append(data->garbage, full_name);
832         else
833                 report_garbage(PACKDIR_FILE_GARBAGE, full_name);
834 }
835
836 static void prepare_packed_git_one(struct repository *r, char *objdir, int local)
837 {
838         struct prepare_pack_data data;
839         struct string_list garbage = STRING_LIST_INIT_DUP;
840
841         data.r = r;
842         data.garbage = &garbage;
843         data.local = local;
844
845         for_each_file_in_pack_dir(objdir, prepare_pack, &data);
846
847         report_pack_garbage(data.garbage);
848         string_list_clear(data.garbage, 0);
849 }
850
851 static void prepare_packed_git(struct repository *r);
852 /*
853  * Give a fast, rough count of the number of objects in the repository. This
854  * ignores loose objects completely. If you have a lot of them, then either
855  * you should repack because your performance will be awful, or they are
856  * all unreachable objects about to be pruned, in which case they're not really
857  * interesting as a measure of repo size in the first place.
858  */
859 unsigned long approximate_object_count(void)
860 {
861         if (!the_repository->objects->approximate_object_count_valid) {
862                 unsigned long count;
863                 struct packed_git *p;
864
865                 prepare_packed_git(the_repository);
866                 count = 0;
867                 for (p = the_repository->objects->packed_git; p; p = p->next) {
868                         if (open_pack_index(p))
869                                 continue;
870                         count += p->num_objects;
871                 }
872                 the_repository->objects->approximate_object_count = count;
873         }
874         return the_repository->objects->approximate_object_count;
875 }
876
877 static void *get_next_packed_git(const void *p)
878 {
879         return ((const struct packed_git *)p)->next;
880 }
881
882 static void set_next_packed_git(void *p, void *next)
883 {
884         ((struct packed_git *)p)->next = next;
885 }
886
887 static int sort_pack(const void *a_, const void *b_)
888 {
889         const struct packed_git *a = a_;
890         const struct packed_git *b = b_;
891         int st;
892
893         /*
894          * Local packs tend to contain objects specific to our
895          * variant of the project than remote ones.  In addition,
896          * remote ones could be on a network mounted filesystem.
897          * Favor local ones for these reasons.
898          */
899         st = a->pack_local - b->pack_local;
900         if (st)
901                 return -st;
902
903         /*
904          * Younger packs tend to contain more recent objects,
905          * and more recent objects tend to get accessed more
906          * often.
907          */
908         if (a->mtime < b->mtime)
909                 return 1;
910         else if (a->mtime == b->mtime)
911                 return 0;
912         return -1;
913 }
914
915 static void rearrange_packed_git(struct repository *r)
916 {
917         r->objects->packed_git = llist_mergesort(
918                 r->objects->packed_git, get_next_packed_git,
919                 set_next_packed_git, sort_pack);
920 }
921
922 static void prepare_packed_git_mru(struct repository *r)
923 {
924         struct packed_git *p;
925
926         INIT_LIST_HEAD(&r->objects->packed_git_mru);
927
928         for (p = r->objects->packed_git; p; p = p->next)
929                 list_add_tail(&p->mru, &r->objects->packed_git_mru);
930 }
931
932 static void prepare_packed_git(struct repository *r)
933 {
934         struct alternate_object_database *alt;
935
936         if (r->objects->packed_git_initialized)
937                 return;
938         prepare_packed_git_one(r, r->objects->objectdir, 1);
939         prepare_alt_odb(r);
940         for (alt = r->objects->alt_odb_list; alt; alt = alt->next)
941                 prepare_packed_git_one(r, alt->path, 0);
942         rearrange_packed_git(r);
943         prepare_packed_git_mru(r);
944         r->objects->packed_git_initialized = 1;
945 }
946
947 void reprepare_packed_git(struct repository *r)
948 {
949         r->objects->approximate_object_count_valid = 0;
950         r->objects->packed_git_initialized = 0;
951         prepare_packed_git(r);
952 }
953
954 struct packed_git *get_packed_git(struct repository *r)
955 {
956         prepare_packed_git(r);
957         return r->objects->packed_git;
958 }
959
960 struct list_head *get_packed_git_mru(struct repository *r)
961 {
962         prepare_packed_git(r);
963         return &r->objects->packed_git_mru;
964 }
965
966 unsigned long unpack_object_header_buffer(const unsigned char *buf,
967                 unsigned long len, enum object_type *type, unsigned long *sizep)
968 {
969         unsigned shift;
970         unsigned long size, c;
971         unsigned long used = 0;
972
973         c = buf[used++];
974         *type = (c >> 4) & 7;
975         size = c & 15;
976         shift = 4;
977         while (c & 0x80) {
978                 if (len <= used || bitsizeof(long) <= shift) {
979                         error("bad object header");
980                         size = used = 0;
981                         break;
982                 }
983                 c = buf[used++];
984                 size += (c & 0x7f) << shift;
985                 shift += 7;
986         }
987         *sizep = size;
988         return used;
989 }
990
991 unsigned long get_size_from_delta(struct packed_git *p,
992                                   struct pack_window **w_curs,
993                                   off_t curpos)
994 {
995         const unsigned char *data;
996         unsigned char delta_head[20], *in;
997         git_zstream stream;
998         int st;
999
1000         memset(&stream, 0, sizeof(stream));
1001         stream.next_out = delta_head;
1002         stream.avail_out = sizeof(delta_head);
1003
1004         git_inflate_init(&stream);
1005         do {
1006                 in = use_pack(p, w_curs, curpos, &stream.avail_in);
1007                 stream.next_in = in;
1008                 st = git_inflate(&stream, Z_FINISH);
1009                 curpos += stream.next_in - in;
1010         } while ((st == Z_OK || st == Z_BUF_ERROR) &&
1011                  stream.total_out < sizeof(delta_head));
1012         git_inflate_end(&stream);
1013         if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
1014                 error("delta data unpack-initial failed");
1015                 return 0;
1016         }
1017
1018         /* Examine the initial part of the delta to figure out
1019          * the result size.
1020          */
1021         data = delta_head;
1022
1023         /* ignore base size */
1024         get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1025
1026         /* Read the result size */
1027         return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1028 }
1029
1030 int unpack_object_header(struct packed_git *p,
1031                          struct pack_window **w_curs,
1032                          off_t *curpos,
1033                          unsigned long *sizep)
1034 {
1035         unsigned char *base;
1036         unsigned long left;
1037         unsigned long used;
1038         enum object_type type;
1039
1040         /* use_pack() assures us we have [base, base + 20) available
1041          * as a range that we can look at.  (Its actually the hash
1042          * size that is assured.)  With our object header encoding
1043          * the maximum deflated object size is 2^137, which is just
1044          * insane, so we know won't exceed what we have been given.
1045          */
1046         base = use_pack(p, w_curs, *curpos, &left);
1047         used = unpack_object_header_buffer(base, left, &type, sizep);
1048         if (!used) {
1049                 type = OBJ_BAD;
1050         } else
1051                 *curpos += used;
1052
1053         return type;
1054 }
1055
1056 void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1)
1057 {
1058         unsigned i;
1059         for (i = 0; i < p->num_bad_objects; i++)
1060                 if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
1061                         return;
1062         p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
1063                                       st_mult(GIT_MAX_RAWSZ,
1064                                               st_add(p->num_bad_objects, 1)));
1065         hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
1066         p->num_bad_objects++;
1067 }
1068
1069 const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
1070 {
1071         struct packed_git *p;
1072         unsigned i;
1073
1074         for (p = the_repository->objects->packed_git; p; p = p->next)
1075                 for (i = 0; i < p->num_bad_objects; i++)
1076                         if (!hashcmp(sha1,
1077                                      p->bad_object_sha1 + the_hash_algo->rawsz * i))
1078                                 return p;
1079         return NULL;
1080 }
1081
1082 static off_t get_delta_base(struct packed_git *p,
1083                                     struct pack_window **w_curs,
1084                                     off_t *curpos,
1085                                     enum object_type type,
1086                                     off_t delta_obj_offset)
1087 {
1088         unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1089         off_t base_offset;
1090
1091         /* use_pack() assured us we have [base_info, base_info + 20)
1092          * as a range that we can look at without walking off the
1093          * end of the mapped window.  Its actually the hash size
1094          * that is assured.  An OFS_DELTA longer than the hash size
1095          * is stupid, as then a REF_DELTA would be smaller to store.
1096          */
1097         if (type == OBJ_OFS_DELTA) {
1098                 unsigned used = 0;
1099                 unsigned char c = base_info[used++];
1100                 base_offset = c & 127;
1101                 while (c & 128) {
1102                         base_offset += 1;
1103                         if (!base_offset || MSB(base_offset, 7))
1104                                 return 0;  /* overflow */
1105                         c = base_info[used++];
1106                         base_offset = (base_offset << 7) + (c & 127);
1107                 }
1108                 base_offset = delta_obj_offset - base_offset;
1109                 if (base_offset <= 0 || base_offset >= delta_obj_offset)
1110                         return 0;  /* out of bound */
1111                 *curpos += used;
1112         } else if (type == OBJ_REF_DELTA) {
1113                 /* The base entry _must_ be in the same pack */
1114                 base_offset = find_pack_entry_one(base_info, p);
1115                 *curpos += the_hash_algo->rawsz;
1116         } else
1117                 die("I am totally screwed");
1118         return base_offset;
1119 }
1120
1121 /*
1122  * Like get_delta_base above, but we return the sha1 instead of the pack
1123  * offset. This means it is cheaper for REF deltas (we do not have to do
1124  * the final object lookup), but more expensive for OFS deltas (we
1125  * have to load the revidx to convert the offset back into a sha1).
1126  */
1127 static const unsigned char *get_delta_base_sha1(struct packed_git *p,
1128                                                 struct pack_window **w_curs,
1129                                                 off_t curpos,
1130                                                 enum object_type type,
1131                                                 off_t delta_obj_offset)
1132 {
1133         if (type == OBJ_REF_DELTA) {
1134                 unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1135                 return base;
1136         } else if (type == OBJ_OFS_DELTA) {
1137                 struct revindex_entry *revidx;
1138                 off_t base_offset = get_delta_base(p, w_curs, &curpos,
1139                                                    type, delta_obj_offset);
1140
1141                 if (!base_offset)
1142                         return NULL;
1143
1144                 revidx = find_pack_revindex(p, base_offset);
1145                 if (!revidx)
1146                         return NULL;
1147
1148                 return nth_packed_object_sha1(p, revidx->nr);
1149         } else
1150                 return NULL;
1151 }
1152
1153 static int retry_bad_packed_offset(struct repository *r,
1154                                    struct packed_git *p,
1155                                    off_t obj_offset)
1156 {
1157         int type;
1158         struct revindex_entry *revidx;
1159         struct object_id oid;
1160         revidx = find_pack_revindex(p, obj_offset);
1161         if (!revidx)
1162                 return OBJ_BAD;
1163         nth_packed_object_oid(&oid, p, revidx->nr);
1164         mark_bad_packed_object(p, oid.hash);
1165         type = oid_object_info(r, &oid, NULL);
1166         if (type <= OBJ_NONE)
1167                 return OBJ_BAD;
1168         return type;
1169 }
1170
1171 #define POI_STACK_PREALLOC 64
1172
1173 static enum object_type packed_to_object_type(struct repository *r,
1174                                               struct packed_git *p,
1175                                               off_t obj_offset,
1176                                               enum object_type type,
1177                                               struct pack_window **w_curs,
1178                                               off_t curpos)
1179 {
1180         off_t small_poi_stack[POI_STACK_PREALLOC];
1181         off_t *poi_stack = small_poi_stack;
1182         int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1183
1184         while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1185                 off_t base_offset;
1186                 unsigned long size;
1187                 /* Push the object we're going to leave behind */
1188                 if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1189                         poi_stack_alloc = alloc_nr(poi_stack_nr);
1190                         ALLOC_ARRAY(poi_stack, poi_stack_alloc);
1191                         memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
1192                 } else {
1193                         ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1194                 }
1195                 poi_stack[poi_stack_nr++] = obj_offset;
1196                 /* If parsing the base offset fails, just unwind */
1197                 base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1198                 if (!base_offset)
1199                         goto unwind;
1200                 curpos = obj_offset = base_offset;
1201                 type = unpack_object_header(p, w_curs, &curpos, &size);
1202                 if (type <= OBJ_NONE) {
1203                         /* If getting the base itself fails, we first
1204                          * retry the base, otherwise unwind */
1205                         type = retry_bad_packed_offset(r, p, base_offset);
1206                         if (type > OBJ_NONE)
1207                                 goto out;
1208                         goto unwind;
1209                 }
1210         }
1211
1212         switch (type) {
1213         case OBJ_BAD:
1214         case OBJ_COMMIT:
1215         case OBJ_TREE:
1216         case OBJ_BLOB:
1217         case OBJ_TAG:
1218                 break;
1219         default:
1220                 error("unknown object type %i at offset %"PRIuMAX" in %s",
1221                       type, (uintmax_t)obj_offset, p->pack_name);
1222                 type = OBJ_BAD;
1223         }
1224
1225 out:
1226         if (poi_stack != small_poi_stack)
1227                 free(poi_stack);
1228         return type;
1229
1230 unwind:
1231         while (poi_stack_nr) {
1232                 obj_offset = poi_stack[--poi_stack_nr];
1233                 type = retry_bad_packed_offset(r, p, obj_offset);
1234                 if (type > OBJ_NONE)
1235                         goto out;
1236         }
1237         type = OBJ_BAD;
1238         goto out;
1239 }
1240
1241 static struct hashmap delta_base_cache;
1242 static size_t delta_base_cached;
1243
1244 static LIST_HEAD(delta_base_cache_lru);
1245
1246 struct delta_base_cache_key {
1247         struct packed_git *p;
1248         off_t base_offset;
1249 };
1250
1251 struct delta_base_cache_entry {
1252         struct hashmap hash;
1253         struct delta_base_cache_key key;
1254         struct list_head lru;
1255         void *data;
1256         unsigned long size;
1257         enum object_type type;
1258 };
1259
1260 static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
1261 {
1262         unsigned int hash;
1263
1264         hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
1265         hash += (hash >> 8) + (hash >> 16);
1266         return hash;
1267 }
1268
1269 static struct delta_base_cache_entry *
1270 get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
1271 {
1272         struct hashmap_entry entry;
1273         struct delta_base_cache_key key;
1274
1275         if (!delta_base_cache.cmpfn)
1276                 return NULL;
1277
1278         hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
1279         key.p = p;
1280         key.base_offset = base_offset;
1281         return hashmap_get(&delta_base_cache, &entry, &key);
1282 }
1283
1284 static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
1285                                    const struct delta_base_cache_key *b)
1286 {
1287         return a->p == b->p && a->base_offset == b->base_offset;
1288 }
1289
1290 static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
1291                                      const void *va, const void *vb,
1292                                      const void *vkey)
1293 {
1294         const struct delta_base_cache_entry *a = va, *b = vb;
1295         const struct delta_base_cache_key *key = vkey;
1296         if (key)
1297                 return !delta_base_cache_key_eq(&a->key, key);
1298         else
1299                 return !delta_base_cache_key_eq(&a->key, &b->key);
1300 }
1301
1302 static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
1303 {
1304         return !!get_delta_base_cache_entry(p, base_offset);
1305 }
1306
1307 /*
1308  * Remove the entry from the cache, but do _not_ free the associated
1309  * entry data. The caller takes ownership of the "data" buffer, and
1310  * should copy out any fields it wants before detaching.
1311  */
1312 static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
1313 {
1314         hashmap_remove(&delta_base_cache, ent, &ent->key);
1315         list_del(&ent->lru);
1316         delta_base_cached -= ent->size;
1317         free(ent);
1318 }
1319
1320 static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p,
1321                                    off_t base_offset, unsigned long *base_size,
1322                                    enum object_type *type)
1323 {
1324         struct delta_base_cache_entry *ent;
1325
1326         ent = get_delta_base_cache_entry(p, base_offset);
1327         if (!ent)
1328                 return unpack_entry(r, p, base_offset, type, base_size);
1329
1330         if (type)
1331                 *type = ent->type;
1332         if (base_size)
1333                 *base_size = ent->size;
1334         return xmemdupz(ent->data, ent->size);
1335 }
1336
1337 static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1338 {
1339         free(ent->data);
1340         detach_delta_base_cache_entry(ent);
1341 }
1342
1343 void clear_delta_base_cache(void)
1344 {
1345         struct list_head *lru, *tmp;
1346         list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1347                 struct delta_base_cache_entry *entry =
1348                         list_entry(lru, struct delta_base_cache_entry, lru);
1349                 release_delta_base_cache(entry);
1350         }
1351 }
1352
1353 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1354         void *base, unsigned long base_size, enum object_type type)
1355 {
1356         struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
1357         struct list_head *lru, *tmp;
1358
1359         delta_base_cached += base_size;
1360
1361         list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1362                 struct delta_base_cache_entry *f =
1363                         list_entry(lru, struct delta_base_cache_entry, lru);
1364                 if (delta_base_cached <= delta_base_cache_limit)
1365                         break;
1366                 release_delta_base_cache(f);
1367         }
1368
1369         ent->key.p = p;
1370         ent->key.base_offset = base_offset;
1371         ent->type = type;
1372         ent->data = base;
1373         ent->size = base_size;
1374         list_add_tail(&ent->lru, &delta_base_cache_lru);
1375
1376         if (!delta_base_cache.cmpfn)
1377                 hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
1378         hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
1379         hashmap_add(&delta_base_cache, ent);
1380 }
1381
1382 int packed_object_info(struct repository *r, struct packed_git *p,
1383                        off_t obj_offset, struct object_info *oi)
1384 {
1385         struct pack_window *w_curs = NULL;
1386         unsigned long size;
1387         off_t curpos = obj_offset;
1388         enum object_type type;
1389
1390         /*
1391          * We always get the representation type, but only convert it to
1392          * a "real" type later if the caller is interested.
1393          */
1394         if (oi->contentp) {
1395                 *oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep,
1396                                                       &type);
1397                 if (!*oi->contentp)
1398                         type = OBJ_BAD;
1399         } else {
1400                 type = unpack_object_header(p, &w_curs, &curpos, &size);
1401         }
1402
1403         if (!oi->contentp && oi->sizep) {
1404                 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1405                         off_t tmp_pos = curpos;
1406                         off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1407                                                            type, obj_offset);
1408                         if (!base_offset) {
1409                                 type = OBJ_BAD;
1410                                 goto out;
1411                         }
1412                         *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1413                         if (*oi->sizep == 0) {
1414                                 type = OBJ_BAD;
1415                                 goto out;
1416                         }
1417                 } else {
1418                         *oi->sizep = size;
1419                 }
1420         }
1421
1422         if (oi->disk_sizep) {
1423                 struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1424                 *oi->disk_sizep = revidx[1].offset - obj_offset;
1425         }
1426
1427         if (oi->typep || oi->type_name) {
1428                 enum object_type ptot;
1429                 ptot = packed_to_object_type(r, p, obj_offset,
1430                                              type, &w_curs, curpos);
1431                 if (oi->typep)
1432                         *oi->typep = ptot;
1433                 if (oi->type_name) {
1434                         const char *tn = type_name(ptot);
1435                         if (tn)
1436                                 strbuf_addstr(oi->type_name, tn);
1437                 }
1438                 if (ptot < 0) {
1439                         type = OBJ_BAD;
1440                         goto out;
1441                 }
1442         }
1443
1444         if (oi->delta_base_sha1) {
1445                 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1446                         const unsigned char *base;
1447
1448                         base = get_delta_base_sha1(p, &w_curs, curpos,
1449                                                    type, obj_offset);
1450                         if (!base) {
1451                                 type = OBJ_BAD;
1452                                 goto out;
1453                         }
1454
1455                         hashcpy(oi->delta_base_sha1, base);
1456                 } else
1457                         hashclr(oi->delta_base_sha1);
1458         }
1459
1460         oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
1461                                                           OI_PACKED;
1462
1463 out:
1464         unuse_pack(&w_curs);
1465         return type;
1466 }
1467
1468 static void *unpack_compressed_entry(struct packed_git *p,
1469                                     struct pack_window **w_curs,
1470                                     off_t curpos,
1471                                     unsigned long size)
1472 {
1473         int st;
1474         git_zstream stream;
1475         unsigned char *buffer, *in;
1476
1477         buffer = xmallocz_gently(size);
1478         if (!buffer)
1479                 return NULL;
1480         memset(&stream, 0, sizeof(stream));
1481         stream.next_out = buffer;
1482         stream.avail_out = size + 1;
1483
1484         git_inflate_init(&stream);
1485         do {
1486                 in = use_pack(p, w_curs, curpos, &stream.avail_in);
1487                 stream.next_in = in;
1488                 st = git_inflate(&stream, Z_FINISH);
1489                 if (!stream.avail_out)
1490                         break; /* the payload is larger than it should be */
1491                 curpos += stream.next_in - in;
1492         } while (st == Z_OK || st == Z_BUF_ERROR);
1493         git_inflate_end(&stream);
1494         if ((st != Z_STREAM_END) || stream.total_out != size) {
1495                 free(buffer);
1496                 return NULL;
1497         }
1498
1499         /* versions of zlib can clobber unconsumed portion of outbuf */
1500         buffer[size] = '\0';
1501
1502         return buffer;
1503 }
1504
1505 static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
1506 {
1507         static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
1508         trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
1509                          p->pack_name, (uintmax_t)obj_offset);
1510 }
1511
1512 int do_check_packed_object_crc;
1513
1514 #define UNPACK_ENTRY_STACK_PREALLOC 64
1515 struct unpack_entry_stack_ent {
1516         off_t obj_offset;
1517         off_t curpos;
1518         unsigned long size;
1519 };
1520
1521 static void *read_object(struct repository *r,
1522                          const struct object_id *oid,
1523                          enum object_type *type,
1524                          unsigned long *size)
1525 {
1526         struct object_info oi = OBJECT_INFO_INIT;
1527         void *content;
1528         oi.typep = type;
1529         oi.sizep = size;
1530         oi.contentp = &content;
1531
1532         if (oid_object_info_extended(r, oid, &oi, 0) < 0)
1533                 return NULL;
1534         return content;
1535 }
1536
1537 void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
1538                    enum object_type *final_type, unsigned long *final_size)
1539 {
1540         struct pack_window *w_curs = NULL;
1541         off_t curpos = obj_offset;
1542         void *data = NULL;
1543         unsigned long size;
1544         enum object_type type;
1545         struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
1546         struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
1547         int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
1548         int base_from_cache = 0;
1549
1550         write_pack_access_log(p, obj_offset);
1551
1552         /* PHASE 1: drill down to the innermost base object */
1553         for (;;) {
1554                 off_t base_offset;
1555                 int i;
1556                 struct delta_base_cache_entry *ent;
1557
1558                 ent = get_delta_base_cache_entry(p, curpos);
1559                 if (ent) {
1560                         type = ent->type;
1561                         data = ent->data;
1562                         size = ent->size;
1563                         detach_delta_base_cache_entry(ent);
1564                         base_from_cache = 1;
1565                         break;
1566                 }
1567
1568                 if (do_check_packed_object_crc && p->index_version > 1) {
1569                         struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1570                         off_t len = revidx[1].offset - obj_offset;
1571                         if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
1572                                 struct object_id oid;
1573                                 nth_packed_object_oid(&oid, p, revidx->nr);
1574                                 error("bad packed object CRC for %s",
1575                                       oid_to_hex(&oid));
1576                                 mark_bad_packed_object(p, oid.hash);
1577                                 data = NULL;
1578                                 goto out;
1579                         }
1580                 }
1581
1582                 type = unpack_object_header(p, &w_curs, &curpos, &size);
1583                 if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
1584                         break;
1585
1586                 base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1587                 if (!base_offset) {
1588                         error("failed to validate delta base reference "
1589                               "at offset %"PRIuMAX" from %s",
1590                               (uintmax_t)curpos, p->pack_name);
1591                         /* bail to phase 2, in hopes of recovery */
1592                         data = NULL;
1593                         break;
1594                 }
1595
1596                 /* push object, proceed to base */
1597                 if (delta_stack_nr >= delta_stack_alloc
1598                     && delta_stack == small_delta_stack) {
1599                         delta_stack_alloc = alloc_nr(delta_stack_nr);
1600                         ALLOC_ARRAY(delta_stack, delta_stack_alloc);
1601                         memcpy(delta_stack, small_delta_stack,
1602                                sizeof(*delta_stack)*delta_stack_nr);
1603                 } else {
1604                         ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
1605                 }
1606                 i = delta_stack_nr++;
1607                 delta_stack[i].obj_offset = obj_offset;
1608                 delta_stack[i].curpos = curpos;
1609                 delta_stack[i].size = size;
1610
1611                 curpos = obj_offset = base_offset;
1612         }
1613
1614         /* PHASE 2: handle the base */
1615         switch (type) {
1616         case OBJ_OFS_DELTA:
1617         case OBJ_REF_DELTA:
1618                 if (data)
1619                         BUG("unpack_entry: left loop at a valid delta");
1620                 break;
1621         case OBJ_COMMIT:
1622         case OBJ_TREE:
1623         case OBJ_BLOB:
1624         case OBJ_TAG:
1625                 if (!base_from_cache)
1626                         data = unpack_compressed_entry(p, &w_curs, curpos, size);
1627                 break;
1628         default:
1629                 data = NULL;
1630                 error("unknown object type %i at offset %"PRIuMAX" in %s",
1631                       type, (uintmax_t)obj_offset, p->pack_name);
1632         }
1633
1634         /* PHASE 3: apply deltas in order */
1635
1636         /* invariants:
1637          *   'data' holds the base data, or NULL if there was corruption
1638          */
1639         while (delta_stack_nr) {
1640                 void *delta_data;
1641                 void *base = data;
1642                 void *external_base = NULL;
1643                 unsigned long delta_size, base_size = size;
1644                 int i;
1645
1646                 data = NULL;
1647
1648                 if (base)
1649                         add_delta_base_cache(p, obj_offset, base, base_size, type);
1650
1651                 if (!base) {
1652                         /*
1653                          * We're probably in deep shit, but let's try to fetch
1654                          * the required base anyway from another pack or loose.
1655                          * This is costly but should happen only in the presence
1656                          * of a corrupted pack, and is better than failing outright.
1657                          */
1658                         struct revindex_entry *revidx;
1659                         struct object_id base_oid;
1660                         revidx = find_pack_revindex(p, obj_offset);
1661                         if (revidx) {
1662                                 nth_packed_object_oid(&base_oid, p, revidx->nr);
1663                                 error("failed to read delta base object %s"
1664                                       " at offset %"PRIuMAX" from %s",
1665                                       oid_to_hex(&base_oid), (uintmax_t)obj_offset,
1666                                       p->pack_name);
1667                                 mark_bad_packed_object(p, base_oid.hash);
1668                                 base = read_object(r, &base_oid, &type, &base_size);
1669                                 external_base = base;
1670                         }
1671                 }
1672
1673                 i = --delta_stack_nr;
1674                 obj_offset = delta_stack[i].obj_offset;
1675                 curpos = delta_stack[i].curpos;
1676                 delta_size = delta_stack[i].size;
1677
1678                 if (!base)
1679                         continue;
1680
1681                 delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
1682
1683                 if (!delta_data) {
1684                         error("failed to unpack compressed delta "
1685                               "at offset %"PRIuMAX" from %s",
1686                               (uintmax_t)curpos, p->pack_name);
1687                         data = NULL;
1688                         free(external_base);
1689                         continue;
1690                 }
1691
1692                 data = patch_delta(base, base_size,
1693                                    delta_data, delta_size,
1694                                    &size);
1695
1696                 /*
1697                  * We could not apply the delta; warn the user, but keep going.
1698                  * Our failure will be noticed either in the next iteration of
1699                  * the loop, or if this is the final delta, in the caller when
1700                  * we return NULL. Those code paths will take care of making
1701                  * a more explicit warning and retrying with another copy of
1702                  * the object.
1703                  */
1704                 if (!data)
1705                         error("failed to apply delta");
1706
1707                 free(delta_data);
1708                 free(external_base);
1709         }
1710
1711         if (final_type)
1712                 *final_type = type;
1713         if (final_size)
1714                 *final_size = size;
1715
1716 out:
1717         unuse_pack(&w_curs);
1718
1719         if (delta_stack != small_delta_stack)
1720                 free(delta_stack);
1721
1722         return data;
1723 }
1724
1725 int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result)
1726 {
1727         const unsigned char *index_fanout = p->index_data;
1728         const unsigned char *index_lookup;
1729         const unsigned int hashsz = the_hash_algo->rawsz;
1730         int index_lookup_width;
1731
1732         if (!index_fanout)
1733                 BUG("bsearch_pack called without a valid pack-index");
1734
1735         index_lookup = index_fanout + 4 * 256;
1736         if (p->index_version == 1) {
1737                 index_lookup_width = hashsz + 4;
1738                 index_lookup += 4;
1739         } else {
1740                 index_lookup_width = hashsz;
1741                 index_fanout += 8;
1742                 index_lookup += 8;
1743         }
1744
1745         return bsearch_hash(oid->hash, (const uint32_t*)index_fanout,
1746                             index_lookup, index_lookup_width, result);
1747 }
1748
1749 const unsigned char *nth_packed_object_sha1(struct packed_git *p,
1750                                             uint32_t n)
1751 {
1752         const unsigned char *index = p->index_data;
1753         const unsigned int hashsz = the_hash_algo->rawsz;
1754         if (!index) {
1755                 if (open_pack_index(p))
1756                         return NULL;
1757                 index = p->index_data;
1758         }
1759         if (n >= p->num_objects)
1760                 return NULL;
1761         index += 4 * 256;
1762         if (p->index_version == 1) {
1763                 return index + (hashsz + 4) * n + 4;
1764         } else {
1765                 index += 8;
1766                 return index + hashsz * n;
1767         }
1768 }
1769
1770 const struct object_id *nth_packed_object_oid(struct object_id *oid,
1771                                               struct packed_git *p,
1772                                               uint32_t n)
1773 {
1774         const unsigned char *hash = nth_packed_object_sha1(p, n);
1775         if (!hash)
1776                 return NULL;
1777         hashcpy(oid->hash, hash);
1778         return oid;
1779 }
1780
1781 void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
1782 {
1783         const unsigned char *ptr = vptr;
1784         const unsigned char *start = p->index_data;
1785         const unsigned char *end = start + p->index_size;
1786         if (ptr < start)
1787                 die(_("offset before start of pack index for %s (corrupt index?)"),
1788                     p->pack_name);
1789         /* No need to check for underflow; .idx files must be at least 8 bytes */
1790         if (ptr >= end - 8)
1791                 die(_("offset beyond end of pack index for %s (truncated index?)"),
1792                     p->pack_name);
1793 }
1794
1795 off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
1796 {
1797         const unsigned char *index = p->index_data;
1798         const unsigned int hashsz = the_hash_algo->rawsz;
1799         index += 4 * 256;
1800         if (p->index_version == 1) {
1801                 return ntohl(*((uint32_t *)(index + (hashsz + 4) * n)));
1802         } else {
1803                 uint32_t off;
1804                 index += 8 + p->num_objects * (hashsz + 4);
1805                 off = ntohl(*((uint32_t *)(index + 4 * n)));
1806                 if (!(off & 0x80000000))
1807                         return off;
1808                 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1809                 check_pack_index_ptr(p, index);
1810                 return get_be64(index);
1811         }
1812 }
1813
1814 off_t find_pack_entry_one(const unsigned char *sha1,
1815                                   struct packed_git *p)
1816 {
1817         const unsigned char *index = p->index_data;
1818         struct object_id oid;
1819         uint32_t result;
1820
1821         if (!index) {
1822                 if (open_pack_index(p))
1823                         return 0;
1824         }
1825
1826         hashcpy(oid.hash, sha1);
1827         if (bsearch_pack(&oid, p, &result))
1828                 return nth_packed_object_offset(p, result);
1829         return 0;
1830 }
1831
1832 int is_pack_valid(struct packed_git *p)
1833 {
1834         /* An already open pack is known to be valid. */
1835         if (p->pack_fd != -1)
1836                 return 1;
1837
1838         /* If the pack has one window completely covering the
1839          * file size, the pack is known to be valid even if
1840          * the descriptor is not currently open.
1841          */
1842         if (p->windows) {
1843                 struct pack_window *w = p->windows;
1844
1845                 if (!w->offset && w->len == p->pack_size)
1846                         return 1;
1847         }
1848
1849         /* Force the pack to open to prove its valid. */
1850         return !open_packed_git(p);
1851 }
1852
1853 struct packed_git *find_sha1_pack(const unsigned char *sha1,
1854                                   struct packed_git *packs)
1855 {
1856         struct packed_git *p;
1857
1858         for (p = packs; p; p = p->next) {
1859                 if (find_pack_entry_one(sha1, p))
1860                         return p;
1861         }
1862         return NULL;
1863
1864 }
1865
1866 static int fill_pack_entry(const struct object_id *oid,
1867                            struct pack_entry *e,
1868                            struct packed_git *p)
1869 {
1870         off_t offset;
1871
1872         if (p->num_bad_objects) {
1873                 unsigned i;
1874                 for (i = 0; i < p->num_bad_objects; i++)
1875                         if (!hashcmp(oid->hash,
1876                                      p->bad_object_sha1 + the_hash_algo->rawsz * i))
1877                                 return 0;
1878         }
1879
1880         offset = find_pack_entry_one(oid->hash, p);
1881         if (!offset)
1882                 return 0;
1883
1884         /*
1885          * We are about to tell the caller where they can locate the
1886          * requested object.  We better make sure the packfile is
1887          * still here and can be accessed before supplying that
1888          * answer, as it may have been deleted since the index was
1889          * loaded!
1890          */
1891         if (!is_pack_valid(p))
1892                 return 0;
1893         e->offset = offset;
1894         e->p = p;
1895         return 1;
1896 }
1897
1898 int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e)
1899 {
1900         struct list_head *pos;
1901
1902         prepare_packed_git(r);
1903         if (!r->objects->packed_git)
1904                 return 0;
1905
1906         list_for_each(pos, &r->objects->packed_git_mru) {
1907                 struct packed_git *p = list_entry(pos, struct packed_git, mru);
1908                 if (fill_pack_entry(oid, e, p)) {
1909                         list_move(&p->mru, &r->objects->packed_git_mru);
1910                         return 1;
1911                 }
1912         }
1913         return 0;
1914 }
1915
1916 int has_object_pack(const struct object_id *oid)
1917 {
1918         struct pack_entry e;
1919         return find_pack_entry(the_repository, oid, &e);
1920 }
1921
1922 int has_pack_index(const unsigned char *sha1)
1923 {
1924         struct stat st;
1925         if (stat(sha1_pack_index_name(sha1), &st))
1926                 return 0;
1927         return 1;
1928 }
1929
1930 int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
1931 {
1932         uint32_t i;
1933         int r = 0;
1934
1935         for (i = 0; i < p->num_objects; i++) {
1936                 struct object_id oid;
1937
1938                 if (!nth_packed_object_oid(&oid, p, i))
1939                         return error("unable to get sha1 of object %u in %s",
1940                                      i, p->pack_name);
1941
1942                 r = cb(&oid, p, i, data);
1943                 if (r)
1944                         break;
1945         }
1946         return r;
1947 }
1948
1949 int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
1950 {
1951         struct packed_git *p;
1952         int r = 0;
1953         int pack_errors = 0;
1954
1955         prepare_packed_git(the_repository);
1956         for (p = the_repository->objects->packed_git; p; p = p->next) {
1957                 if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
1958                         continue;
1959                 if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
1960                     !p->pack_promisor)
1961                         continue;
1962                 if (open_pack_index(p)) {
1963                         pack_errors = 1;
1964                         continue;
1965                 }
1966                 r = for_each_object_in_pack(p, cb, data);
1967                 if (r)
1968                         break;
1969         }
1970         return r ? r : pack_errors;
1971 }
1972
1973 static int add_promisor_object(const struct object_id *oid,
1974                                struct packed_git *pack,
1975                                uint32_t pos,
1976                                void *set_)
1977 {
1978         struct oidset *set = set_;
1979         struct object *obj = parse_object(oid);
1980         if (!obj)
1981                 return 1;
1982
1983         oidset_insert(set, oid);
1984
1985         /*
1986          * If this is a tree, commit, or tag, the objects it refers
1987          * to are also promisor objects. (Blobs refer to no objects->)
1988          */
1989         if (obj->type == OBJ_TREE) {
1990                 struct tree *tree = (struct tree *)obj;
1991                 struct tree_desc desc;
1992                 struct name_entry entry;
1993                 if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
1994                         /*
1995                          * Error messages are given when packs are
1996                          * verified, so do not print any here.
1997                          */
1998                         return 0;
1999                 while (tree_entry_gently(&desc, &entry))
2000                         oidset_insert(set, entry.oid);
2001         } else if (obj->type == OBJ_COMMIT) {
2002                 struct commit *commit = (struct commit *) obj;
2003                 struct commit_list *parents = commit->parents;
2004
2005                 oidset_insert(set, get_commit_tree_oid(commit));
2006                 for (; parents; parents = parents->next)
2007                         oidset_insert(set, &parents->item->object.oid);
2008         } else if (obj->type == OBJ_TAG) {
2009                 struct tag *tag = (struct tag *) obj;
2010                 oidset_insert(set, &tag->tagged->oid);
2011         }
2012         return 0;
2013 }
2014
2015 int is_promisor_object(const struct object_id *oid)
2016 {
2017         static struct oidset promisor_objects;
2018         static int promisor_objects_prepared;
2019
2020         if (!promisor_objects_prepared) {
2021                 if (repository_format_partial_clone) {
2022                         for_each_packed_object(add_promisor_object,
2023                                                &promisor_objects,
2024                                                FOR_EACH_OBJECT_PROMISOR_ONLY);
2025                 }
2026                 promisor_objects_prepared = 1;
2027         }
2028         return oidset_contains(&promisor_objects, oid);
2029 }