fsmonitor: update documentation to remove reference to invalid config settings
[git] / packfile.c
1 #include "cache.h"
2 #include "mru.h"
3 #include "pack.h"
4 #include "dir.h"
5 #include "mergesort.h"
6 #include "packfile.h"
7 #include "delta.h"
8 #include "list.h"
9 #include "streaming.h"
10 #include "sha1-lookup.h"
11
12 char *odb_pack_name(struct strbuf *buf,
13                     const unsigned char *sha1,
14                     const char *ext)
15 {
16         strbuf_reset(buf);
17         strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
18                     sha1_to_hex(sha1), ext);
19         return buf->buf;
20 }
21
22 char *sha1_pack_name(const unsigned char *sha1)
23 {
24         static struct strbuf buf = STRBUF_INIT;
25         return odb_pack_name(&buf, sha1, "pack");
26 }
27
28 char *sha1_pack_index_name(const unsigned char *sha1)
29 {
30         static struct strbuf buf = STRBUF_INIT;
31         return odb_pack_name(&buf, sha1, "idx");
32 }
33
34 static unsigned int pack_used_ctr;
35 static unsigned int pack_mmap_calls;
36 static unsigned int peak_pack_open_windows;
37 static unsigned int pack_open_windows;
38 static unsigned int pack_open_fds;
39 static unsigned int pack_max_fds;
40 static size_t peak_pack_mapped;
41 static size_t pack_mapped;
42 struct packed_git *packed_git;
43
44 static struct mru packed_git_mru_storage;
45 struct mru *packed_git_mru = &packed_git_mru_storage;
46
47 #define SZ_FMT PRIuMAX
48 static inline uintmax_t sz_fmt(size_t s) { return s; }
49
50 void pack_report(void)
51 {
52         fprintf(stderr,
53                 "pack_report: getpagesize()            = %10" SZ_FMT "\n"
54                 "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
55                 "pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
56                 sz_fmt(getpagesize()),
57                 sz_fmt(packed_git_window_size),
58                 sz_fmt(packed_git_limit));
59         fprintf(stderr,
60                 "pack_report: pack_used_ctr            = %10u\n"
61                 "pack_report: pack_mmap_calls          = %10u\n"
62                 "pack_report: pack_open_windows        = %10u / %10u\n"
63                 "pack_report: pack_mapped              = "
64                         "%10" SZ_FMT " / %10" SZ_FMT "\n",
65                 pack_used_ctr,
66                 pack_mmap_calls,
67                 pack_open_windows, peak_pack_open_windows,
68                 sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
69 }
70
71 /*
72  * Open and mmap the index file at path, perform a couple of
73  * consistency checks, then record its information to p.  Return 0 on
74  * success.
75  */
76 static int check_packed_git_idx(const char *path, struct packed_git *p)
77 {
78         void *idx_map;
79         struct pack_idx_header *hdr;
80         size_t idx_size;
81         uint32_t version, nr, i, *index;
82         int fd = git_open(path);
83         struct stat st;
84
85         if (fd < 0)
86                 return -1;
87         if (fstat(fd, &st)) {
88                 close(fd);
89                 return -1;
90         }
91         idx_size = xsize_t(st.st_size);
92         if (idx_size < 4 * 256 + 20 + 20) {
93                 close(fd);
94                 return error("index file %s is too small", path);
95         }
96         idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
97         close(fd);
98
99         hdr = idx_map;
100         if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
101                 version = ntohl(hdr->idx_version);
102                 if (version < 2 || version > 2) {
103                         munmap(idx_map, idx_size);
104                         return error("index file %s is version %"PRIu32
105                                      " and is not supported by this binary"
106                                      " (try upgrading GIT to a newer version)",
107                                      path, version);
108                 }
109         } else
110                 version = 1;
111
112         nr = 0;
113         index = idx_map;
114         if (version > 1)
115                 index += 2;  /* skip index header */
116         for (i = 0; i < 256; i++) {
117                 uint32_t n = ntohl(index[i]);
118                 if (n < nr) {
119                         munmap(idx_map, idx_size);
120                         return error("non-monotonic index %s", path);
121                 }
122                 nr = n;
123         }
124
125         if (version == 1) {
126                 /*
127                  * Total size:
128                  *  - 256 index entries 4 bytes each
129                  *  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
130                  *  - 20-byte SHA1 of the packfile
131                  *  - 20-byte SHA1 file checksum
132                  */
133                 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
134                         munmap(idx_map, idx_size);
135                         return error("wrong index v1 file size in %s", path);
136                 }
137         } else if (version == 2) {
138                 /*
139                  * Minimum size:
140                  *  - 8 bytes of header
141                  *  - 256 index entries 4 bytes each
142                  *  - 20-byte sha1 entry * nr
143                  *  - 4-byte crc entry * nr
144                  *  - 4-byte offset entry * nr
145                  *  - 20-byte SHA1 of the packfile
146                  *  - 20-byte SHA1 file checksum
147                  * And after the 4-byte offset table might be a
148                  * variable sized table containing 8-byte entries
149                  * for offsets larger than 2^31.
150                  */
151                 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
152                 unsigned long max_size = min_size;
153                 if (nr)
154                         max_size += (nr - 1)*8;
155                 if (idx_size < min_size || idx_size > max_size) {
156                         munmap(idx_map, idx_size);
157                         return error("wrong index v2 file size in %s", path);
158                 }
159                 if (idx_size != min_size &&
160                     /*
161                      * make sure we can deal with large pack offsets.
162                      * 31-bit signed offset won't be enough, neither
163                      * 32-bit unsigned one will be.
164                      */
165                     (sizeof(off_t) <= 4)) {
166                         munmap(idx_map, idx_size);
167                         return error("pack too large for current definition of off_t in %s", path);
168                 }
169         }
170
171         p->index_version = version;
172         p->index_data = idx_map;
173         p->index_size = idx_size;
174         p->num_objects = nr;
175         return 0;
176 }
177
178 int open_pack_index(struct packed_git *p)
179 {
180         char *idx_name;
181         size_t len;
182         int ret;
183
184         if (p->index_data)
185                 return 0;
186
187         if (!strip_suffix(p->pack_name, ".pack", &len))
188                 die("BUG: pack_name does not end in .pack");
189         idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
190         ret = check_packed_git_idx(idx_name, p);
191         free(idx_name);
192         return ret;
193 }
194
195 static struct packed_git *alloc_packed_git(int extra)
196 {
197         struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
198         memset(p, 0, sizeof(*p));
199         p->pack_fd = -1;
200         return p;
201 }
202
203 struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
204 {
205         const char *path = sha1_pack_name(sha1);
206         size_t alloc = st_add(strlen(path), 1);
207         struct packed_git *p = alloc_packed_git(alloc);
208
209         memcpy(p->pack_name, path, alloc); /* includes NUL */
210         hashcpy(p->sha1, sha1);
211         if (check_packed_git_idx(idx_path, p)) {
212                 free(p);
213                 return NULL;
214         }
215
216         return p;
217 }
218
219 static void scan_windows(struct packed_git *p,
220         struct packed_git **lru_p,
221         struct pack_window **lru_w,
222         struct pack_window **lru_l)
223 {
224         struct pack_window *w, *w_l;
225
226         for (w_l = NULL, w = p->windows; w; w = w->next) {
227                 if (!w->inuse_cnt) {
228                         if (!*lru_w || w->last_used < (*lru_w)->last_used) {
229                                 *lru_p = p;
230                                 *lru_w = w;
231                                 *lru_l = w_l;
232                         }
233                 }
234                 w_l = w;
235         }
236 }
237
238 static int unuse_one_window(struct packed_git *current)
239 {
240         struct packed_git *p, *lru_p = NULL;
241         struct pack_window *lru_w = NULL, *lru_l = NULL;
242
243         if (current)
244                 scan_windows(current, &lru_p, &lru_w, &lru_l);
245         for (p = packed_git; p; p = p->next)
246                 scan_windows(p, &lru_p, &lru_w, &lru_l);
247         if (lru_p) {
248                 munmap(lru_w->base, lru_w->len);
249                 pack_mapped -= lru_w->len;
250                 if (lru_l)
251                         lru_l->next = lru_w->next;
252                 else
253                         lru_p->windows = lru_w->next;
254                 free(lru_w);
255                 pack_open_windows--;
256                 return 1;
257         }
258         return 0;
259 }
260
261 void release_pack_memory(size_t need)
262 {
263         size_t cur = pack_mapped;
264         while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
265                 ; /* nothing */
266 }
267
268 void close_pack_windows(struct packed_git *p)
269 {
270         while (p->windows) {
271                 struct pack_window *w = p->windows;
272
273                 if (w->inuse_cnt)
274                         die("pack '%s' still has open windows to it",
275                             p->pack_name);
276                 munmap(w->base, w->len);
277                 pack_mapped -= w->len;
278                 pack_open_windows--;
279                 p->windows = w->next;
280                 free(w);
281         }
282 }
283
284 static int close_pack_fd(struct packed_git *p)
285 {
286         if (p->pack_fd < 0)
287                 return 0;
288
289         close(p->pack_fd);
290         pack_open_fds--;
291         p->pack_fd = -1;
292
293         return 1;
294 }
295
296 void close_pack_index(struct packed_git *p)
297 {
298         if (p->index_data) {
299                 munmap((void *)p->index_data, p->index_size);
300                 p->index_data = NULL;
301         }
302 }
303
304 static void close_pack(struct packed_git *p)
305 {
306         close_pack_windows(p);
307         close_pack_fd(p);
308         close_pack_index(p);
309 }
310
311 void close_all_packs(void)
312 {
313         struct packed_git *p;
314
315         for (p = packed_git; p; p = p->next)
316                 if (p->do_not_close)
317                         die("BUG: want to close pack marked 'do-not-close'");
318                 else
319                         close_pack(p);
320 }
321
322 /*
323  * The LRU pack is the one with the oldest MRU window, preferring packs
324  * with no used windows, or the oldest mtime if it has no windows allocated.
325  */
326 static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
327 {
328         struct pack_window *w, *this_mru_w;
329         int has_windows_inuse = 0;
330
331         /*
332          * Reject this pack if it has windows and the previously selected
333          * one does not.  If this pack does not have windows, reject
334          * it if the pack file is newer than the previously selected one.
335          */
336         if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
337                 return;
338
339         for (w = this_mru_w = p->windows; w; w = w->next) {
340                 /*
341                  * Reject this pack if any of its windows are in use,
342                  * but the previously selected pack did not have any
343                  * inuse windows.  Otherwise, record that this pack
344                  * has windows in use.
345                  */
346                 if (w->inuse_cnt) {
347                         if (*accept_windows_inuse)
348                                 has_windows_inuse = 1;
349                         else
350                                 return;
351                 }
352
353                 if (w->last_used > this_mru_w->last_used)
354                         this_mru_w = w;
355
356                 /*
357                  * Reject this pack if it has windows that have been
358                  * used more recently than the previously selected pack.
359                  * If the previously selected pack had windows inuse and
360                  * we have not encountered a window in this pack that is
361                  * inuse, skip this check since we prefer a pack with no
362                  * inuse windows to one that has inuse windows.
363                  */
364                 if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
365                     this_mru_w->last_used > (*mru_w)->last_used)
366                         return;
367         }
368
369         /*
370          * Select this pack.
371          */
372         *mru_w = this_mru_w;
373         *lru_p = p;
374         *accept_windows_inuse = has_windows_inuse;
375 }
376
377 static int close_one_pack(void)
378 {
379         struct packed_git *p, *lru_p = NULL;
380         struct pack_window *mru_w = NULL;
381         int accept_windows_inuse = 1;
382
383         for (p = packed_git; p; p = p->next) {
384                 if (p->pack_fd == -1)
385                         continue;
386                 find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
387         }
388
389         if (lru_p)
390                 return close_pack_fd(lru_p);
391
392         return 0;
393 }
394
395 static unsigned int get_max_fd_limit(void)
396 {
397 #ifdef RLIMIT_NOFILE
398         {
399                 struct rlimit lim;
400
401                 if (!getrlimit(RLIMIT_NOFILE, &lim))
402                         return lim.rlim_cur;
403         }
404 #endif
405
406 #ifdef _SC_OPEN_MAX
407         {
408                 long open_max = sysconf(_SC_OPEN_MAX);
409                 if (0 < open_max)
410                         return open_max;
411                 /*
412                  * Otherwise, we got -1 for one of the two
413                  * reasons:
414                  *
415                  * (1) sysconf() did not understand _SC_OPEN_MAX
416                  *     and signaled an error with -1; or
417                  * (2) sysconf() said there is no limit.
418                  *
419                  * We _could_ clear errno before calling sysconf() to
420                  * tell these two cases apart and return a huge number
421                  * in the latter case to let the caller cap it to a
422                  * value that is not so selfish, but letting the
423                  * fallback OPEN_MAX codepath take care of these cases
424                  * is a lot simpler.
425                  */
426         }
427 #endif
428
429 #ifdef OPEN_MAX
430         return OPEN_MAX;
431 #else
432         return 1; /* see the caller ;-) */
433 #endif
434 }
435
436 /*
437  * Do not call this directly as this leaks p->pack_fd on error return;
438  * call open_packed_git() instead.
439  */
440 static int open_packed_git_1(struct packed_git *p)
441 {
442         struct stat st;
443         struct pack_header hdr;
444         unsigned char sha1[20];
445         unsigned char *idx_sha1;
446         long fd_flag;
447
448         if (!p->index_data && open_pack_index(p))
449                 return error("packfile %s index unavailable", p->pack_name);
450
451         if (!pack_max_fds) {
452                 unsigned int max_fds = get_max_fd_limit();
453
454                 /* Save 3 for stdin/stdout/stderr, 22 for work */
455                 if (25 < max_fds)
456                         pack_max_fds = max_fds - 25;
457                 else
458                         pack_max_fds = 1;
459         }
460
461         while (pack_max_fds <= pack_open_fds && close_one_pack())
462                 ; /* nothing */
463
464         p->pack_fd = git_open(p->pack_name);
465         if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
466                 return -1;
467         pack_open_fds++;
468
469         /* If we created the struct before we had the pack we lack size. */
470         if (!p->pack_size) {
471                 if (!S_ISREG(st.st_mode))
472                         return error("packfile %s not a regular file", p->pack_name);
473                 p->pack_size = st.st_size;
474         } else if (p->pack_size != st.st_size)
475                 return error("packfile %s size changed", p->pack_name);
476
477         /* We leave these file descriptors open with sliding mmap;
478          * there is no point keeping them open across exec(), though.
479          */
480         fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
481         if (fd_flag < 0)
482                 return error("cannot determine file descriptor flags");
483         fd_flag |= FD_CLOEXEC;
484         if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
485                 return error("cannot set FD_CLOEXEC");
486
487         /* Verify we recognize this pack file format. */
488         if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
489                 return error("file %s is far too short to be a packfile", p->pack_name);
490         if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
491                 return error("file %s is not a GIT packfile", p->pack_name);
492         if (!pack_version_ok(hdr.hdr_version))
493                 return error("packfile %s is version %"PRIu32" and not"
494                         " supported (try upgrading GIT to a newer version)",
495                         p->pack_name, ntohl(hdr.hdr_version));
496
497         /* Verify the pack matches its index. */
498         if (p->num_objects != ntohl(hdr.hdr_entries))
499                 return error("packfile %s claims to have %"PRIu32" objects"
500                              " while index indicates %"PRIu32" objects",
501                              p->pack_name, ntohl(hdr.hdr_entries),
502                              p->num_objects);
503         if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
504                 return error("end of packfile %s is unavailable", p->pack_name);
505         if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1))
506                 return error("packfile %s signature is unavailable", p->pack_name);
507         idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
508         if (hashcmp(sha1, idx_sha1))
509                 return error("packfile %s does not match index", p->pack_name);
510         return 0;
511 }
512
513 static int open_packed_git(struct packed_git *p)
514 {
515         if (!open_packed_git_1(p))
516                 return 0;
517         close_pack_fd(p);
518         return -1;
519 }
520
521 static int in_window(struct pack_window *win, off_t offset)
522 {
523         /* We must promise at least 20 bytes (one hash) after the
524          * offset is available from this window, otherwise the offset
525          * is not actually in this window and a different window (which
526          * has that one hash excess) must be used.  This is to support
527          * the object header and delta base parsing routines below.
528          */
529         off_t win_off = win->offset;
530         return win_off <= offset
531                 && (offset + 20) <= (win_off + win->len);
532 }
533
534 unsigned char *use_pack(struct packed_git *p,
535                 struct pack_window **w_cursor,
536                 off_t offset,
537                 unsigned long *left)
538 {
539         struct pack_window *win = *w_cursor;
540
541         /* Since packfiles end in a hash of their content and it's
542          * pointless to ask for an offset into the middle of that
543          * hash, and the in_window function above wouldn't match
544          * don't allow an offset too close to the end of the file.
545          */
546         if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
547                 die("packfile %s cannot be accessed", p->pack_name);
548         if (offset > (p->pack_size - 20))
549                 die("offset beyond end of packfile (truncated pack?)");
550         if (offset < 0)
551                 die(_("offset before end of packfile (broken .idx?)"));
552
553         if (!win || !in_window(win, offset)) {
554                 if (win)
555                         win->inuse_cnt--;
556                 for (win = p->windows; win; win = win->next) {
557                         if (in_window(win, offset))
558                                 break;
559                 }
560                 if (!win) {
561                         size_t window_align = packed_git_window_size / 2;
562                         off_t len;
563
564                         if (p->pack_fd == -1 && open_packed_git(p))
565                                 die("packfile %s cannot be accessed", p->pack_name);
566
567                         win = xcalloc(1, sizeof(*win));
568                         win->offset = (offset / window_align) * window_align;
569                         len = p->pack_size - win->offset;
570                         if (len > packed_git_window_size)
571                                 len = packed_git_window_size;
572                         win->len = (size_t)len;
573                         pack_mapped += win->len;
574                         while (packed_git_limit < pack_mapped
575                                 && unuse_one_window(p))
576                                 ; /* nothing */
577                         win->base = xmmap(NULL, win->len,
578                                 PROT_READ, MAP_PRIVATE,
579                                 p->pack_fd, win->offset);
580                         if (win->base == MAP_FAILED)
581                                 die_errno("packfile %s cannot be mapped",
582                                           p->pack_name);
583                         if (!win->offset && win->len == p->pack_size
584                                 && !p->do_not_close)
585                                 close_pack_fd(p);
586                         pack_mmap_calls++;
587                         pack_open_windows++;
588                         if (pack_mapped > peak_pack_mapped)
589                                 peak_pack_mapped = pack_mapped;
590                         if (pack_open_windows > peak_pack_open_windows)
591                                 peak_pack_open_windows = pack_open_windows;
592                         win->next = p->windows;
593                         p->windows = win;
594                 }
595         }
596         if (win != *w_cursor) {
597                 win->last_used = pack_used_ctr++;
598                 win->inuse_cnt++;
599                 *w_cursor = win;
600         }
601         offset -= win->offset;
602         if (left)
603                 *left = win->len - xsize_t(offset);
604         return win->base + offset;
605 }
606
607 void unuse_pack(struct pack_window **w_cursor)
608 {
609         struct pack_window *w = *w_cursor;
610         if (w) {
611                 w->inuse_cnt--;
612                 *w_cursor = NULL;
613         }
614 }
615
616 static void try_to_free_pack_memory(size_t size)
617 {
618         release_pack_memory(size);
619 }
620
621 struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
622 {
623         static int have_set_try_to_free_routine;
624         struct stat st;
625         size_t alloc;
626         struct packed_git *p;
627
628         if (!have_set_try_to_free_routine) {
629                 have_set_try_to_free_routine = 1;
630                 set_try_to_free_routine(try_to_free_pack_memory);
631         }
632
633         /*
634          * Make sure a corresponding .pack file exists and that
635          * the index looks sane.
636          */
637         if (!strip_suffix_mem(path, &path_len, ".idx"))
638                 return NULL;
639
640         /*
641          * ".pack" is long enough to hold any suffix we're adding (and
642          * the use xsnprintf double-checks that)
643          */
644         alloc = st_add3(path_len, strlen(".pack"), 1);
645         p = alloc_packed_git(alloc);
646         memcpy(p->pack_name, path, path_len);
647
648         xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
649         if (!access(p->pack_name, F_OK))
650                 p->pack_keep = 1;
651
652         xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
653         if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
654                 free(p);
655                 return NULL;
656         }
657
658         /* ok, it looks sane as far as we can check without
659          * actually mapping the pack file.
660          */
661         p->pack_size = st.st_size;
662         p->pack_local = local;
663         p->mtime = st.st_mtime;
664         if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
665                 hashclr(p->sha1);
666         return p;
667 }
668
669 void install_packed_git(struct packed_git *pack)
670 {
671         if (pack->pack_fd != -1)
672                 pack_open_fds++;
673
674         pack->next = packed_git;
675         packed_git = pack;
676 }
677
678 void (*report_garbage)(unsigned seen_bits, const char *path);
679
680 static void report_helper(const struct string_list *list,
681                           int seen_bits, int first, int last)
682 {
683         if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
684                 return;
685
686         for (; first < last; first++)
687                 report_garbage(seen_bits, list->items[first].string);
688 }
689
690 static void report_pack_garbage(struct string_list *list)
691 {
692         int i, baselen = -1, first = 0, seen_bits = 0;
693
694         if (!report_garbage)
695                 return;
696
697         string_list_sort(list);
698
699         for (i = 0; i < list->nr; i++) {
700                 const char *path = list->items[i].string;
701                 if (baselen != -1 &&
702                     strncmp(path, list->items[first].string, baselen)) {
703                         report_helper(list, seen_bits, first, i);
704                         baselen = -1;
705                         seen_bits = 0;
706                 }
707                 if (baselen == -1) {
708                         const char *dot = strrchr(path, '.');
709                         if (!dot) {
710                                 report_garbage(PACKDIR_FILE_GARBAGE, path);
711                                 continue;
712                         }
713                         baselen = dot - path + 1;
714                         first = i;
715                 }
716                 if (!strcmp(path + baselen, "pack"))
717                         seen_bits |= 1;
718                 else if (!strcmp(path + baselen, "idx"))
719                         seen_bits |= 2;
720         }
721         report_helper(list, seen_bits, first, list->nr);
722 }
723
724 static void prepare_packed_git_one(char *objdir, int local)
725 {
726         struct strbuf path = STRBUF_INIT;
727         size_t dirnamelen;
728         DIR *dir;
729         struct dirent *de;
730         struct string_list garbage = STRING_LIST_INIT_DUP;
731
732         strbuf_addstr(&path, objdir);
733         strbuf_addstr(&path, "/pack");
734         dir = opendir(path.buf);
735         if (!dir) {
736                 if (errno != ENOENT)
737                         error_errno("unable to open object pack directory: %s",
738                                     path.buf);
739                 strbuf_release(&path);
740                 return;
741         }
742         strbuf_addch(&path, '/');
743         dirnamelen = path.len;
744         while ((de = readdir(dir)) != NULL) {
745                 struct packed_git *p;
746                 size_t base_len;
747
748                 if (is_dot_or_dotdot(de->d_name))
749                         continue;
750
751                 strbuf_setlen(&path, dirnamelen);
752                 strbuf_addstr(&path, de->d_name);
753
754                 base_len = path.len;
755                 if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
756                         /* Don't reopen a pack we already have. */
757                         for (p = packed_git; p; p = p->next) {
758                                 size_t len;
759                                 if (strip_suffix(p->pack_name, ".pack", &len) &&
760                                     len == base_len &&
761                                     !memcmp(p->pack_name, path.buf, len))
762                                         break;
763                         }
764                         if (p == NULL &&
765                             /*
766                              * See if it really is a valid .idx file with
767                              * corresponding .pack file that we can map.
768                              */
769                             (p = add_packed_git(path.buf, path.len, local)) != NULL)
770                                 install_packed_git(p);
771                 }
772
773                 if (!report_garbage)
774                         continue;
775
776                 if (ends_with(de->d_name, ".idx") ||
777                     ends_with(de->d_name, ".pack") ||
778                     ends_with(de->d_name, ".bitmap") ||
779                     ends_with(de->d_name, ".keep"))
780                         string_list_append(&garbage, path.buf);
781                 else
782                         report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
783         }
784         closedir(dir);
785         report_pack_garbage(&garbage);
786         string_list_clear(&garbage, 0);
787         strbuf_release(&path);
788 }
789
790 static int approximate_object_count_valid;
791
792 /*
793  * Give a fast, rough count of the number of objects in the repository. This
794  * ignores loose objects completely. If you have a lot of them, then either
795  * you should repack because your performance will be awful, or they are
796  * all unreachable objects about to be pruned, in which case they're not really
797  * interesting as a measure of repo size in the first place.
798  */
799 unsigned long approximate_object_count(void)
800 {
801         static unsigned long count;
802         if (!approximate_object_count_valid) {
803                 struct packed_git *p;
804
805                 prepare_packed_git();
806                 count = 0;
807                 for (p = packed_git; p; p = p->next) {
808                         if (open_pack_index(p))
809                                 continue;
810                         count += p->num_objects;
811                 }
812         }
813         return count;
814 }
815
816 static void *get_next_packed_git(const void *p)
817 {
818         return ((const struct packed_git *)p)->next;
819 }
820
821 static void set_next_packed_git(void *p, void *next)
822 {
823         ((struct packed_git *)p)->next = next;
824 }
825
826 static int sort_pack(const void *a_, const void *b_)
827 {
828         const struct packed_git *a = a_;
829         const struct packed_git *b = b_;
830         int st;
831
832         /*
833          * Local packs tend to contain objects specific to our
834          * variant of the project than remote ones.  In addition,
835          * remote ones could be on a network mounted filesystem.
836          * Favor local ones for these reasons.
837          */
838         st = a->pack_local - b->pack_local;
839         if (st)
840                 return -st;
841
842         /*
843          * Younger packs tend to contain more recent objects,
844          * and more recent objects tend to get accessed more
845          * often.
846          */
847         if (a->mtime < b->mtime)
848                 return 1;
849         else if (a->mtime == b->mtime)
850                 return 0;
851         return -1;
852 }
853
854 static void rearrange_packed_git(void)
855 {
856         packed_git = llist_mergesort(packed_git, get_next_packed_git,
857                                      set_next_packed_git, sort_pack);
858 }
859
860 static void prepare_packed_git_mru(void)
861 {
862         struct packed_git *p;
863
864         mru_clear(packed_git_mru);
865         for (p = packed_git; p; p = p->next)
866                 mru_append(packed_git_mru, p);
867 }
868
869 static int prepare_packed_git_run_once = 0;
870 void prepare_packed_git(void)
871 {
872         struct alternate_object_database *alt;
873
874         if (prepare_packed_git_run_once)
875                 return;
876         prepare_packed_git_one(get_object_directory(), 1);
877         prepare_alt_odb();
878         for (alt = alt_odb_list; alt; alt = alt->next)
879                 prepare_packed_git_one(alt->path, 0);
880         rearrange_packed_git();
881         prepare_packed_git_mru();
882         prepare_packed_git_run_once = 1;
883 }
884
885 void reprepare_packed_git(void)
886 {
887         approximate_object_count_valid = 0;
888         prepare_packed_git_run_once = 0;
889         prepare_packed_git();
890 }
891
892 unsigned long unpack_object_header_buffer(const unsigned char *buf,
893                 unsigned long len, enum object_type *type, unsigned long *sizep)
894 {
895         unsigned shift;
896         unsigned long size, c;
897         unsigned long used = 0;
898
899         c = buf[used++];
900         *type = (c >> 4) & 7;
901         size = c & 15;
902         shift = 4;
903         while (c & 0x80) {
904                 if (len <= used || bitsizeof(long) <= shift) {
905                         error("bad object header");
906                         size = used = 0;
907                         break;
908                 }
909                 c = buf[used++];
910                 size += (c & 0x7f) << shift;
911                 shift += 7;
912         }
913         *sizep = size;
914         return used;
915 }
916
917 unsigned long get_size_from_delta(struct packed_git *p,
918                                   struct pack_window **w_curs,
919                                   off_t curpos)
920 {
921         const unsigned char *data;
922         unsigned char delta_head[20], *in;
923         git_zstream stream;
924         int st;
925
926         memset(&stream, 0, sizeof(stream));
927         stream.next_out = delta_head;
928         stream.avail_out = sizeof(delta_head);
929
930         git_inflate_init(&stream);
931         do {
932                 in = use_pack(p, w_curs, curpos, &stream.avail_in);
933                 stream.next_in = in;
934                 st = git_inflate(&stream, Z_FINISH);
935                 curpos += stream.next_in - in;
936         } while ((st == Z_OK || st == Z_BUF_ERROR) &&
937                  stream.total_out < sizeof(delta_head));
938         git_inflate_end(&stream);
939         if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
940                 error("delta data unpack-initial failed");
941                 return 0;
942         }
943
944         /* Examine the initial part of the delta to figure out
945          * the result size.
946          */
947         data = delta_head;
948
949         /* ignore base size */
950         get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
951
952         /* Read the result size */
953         return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
954 }
955
956 int unpack_object_header(struct packed_git *p,
957                          struct pack_window **w_curs,
958                          off_t *curpos,
959                          unsigned long *sizep)
960 {
961         unsigned char *base;
962         unsigned long left;
963         unsigned long used;
964         enum object_type type;
965
966         /* use_pack() assures us we have [base, base + 20) available
967          * as a range that we can look at.  (Its actually the hash
968          * size that is assured.)  With our object header encoding
969          * the maximum deflated object size is 2^137, which is just
970          * insane, so we know won't exceed what we have been given.
971          */
972         base = use_pack(p, w_curs, *curpos, &left);
973         used = unpack_object_header_buffer(base, left, &type, sizep);
974         if (!used) {
975                 type = OBJ_BAD;
976         } else
977                 *curpos += used;
978
979         return type;
980 }
981
982 void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1)
983 {
984         unsigned i;
985         for (i = 0; i < p->num_bad_objects; i++)
986                 if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
987                         return;
988         p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
989                                       st_mult(GIT_MAX_RAWSZ,
990                                               st_add(p->num_bad_objects, 1)));
991         hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
992         p->num_bad_objects++;
993 }
994
995 const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
996 {
997         struct packed_git *p;
998         unsigned i;
999
1000         for (p = packed_git; p; p = p->next)
1001                 for (i = 0; i < p->num_bad_objects; i++)
1002                         if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1003                                 return p;
1004         return NULL;
1005 }
1006
1007 static off_t get_delta_base(struct packed_git *p,
1008                                     struct pack_window **w_curs,
1009                                     off_t *curpos,
1010                                     enum object_type type,
1011                                     off_t delta_obj_offset)
1012 {
1013         unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1014         off_t base_offset;
1015
1016         /* use_pack() assured us we have [base_info, base_info + 20)
1017          * as a range that we can look at without walking off the
1018          * end of the mapped window.  Its actually the hash size
1019          * that is assured.  An OFS_DELTA longer than the hash size
1020          * is stupid, as then a REF_DELTA would be smaller to store.
1021          */
1022         if (type == OBJ_OFS_DELTA) {
1023                 unsigned used = 0;
1024                 unsigned char c = base_info[used++];
1025                 base_offset = c & 127;
1026                 while (c & 128) {
1027                         base_offset += 1;
1028                         if (!base_offset || MSB(base_offset, 7))
1029                                 return 0;  /* overflow */
1030                         c = base_info[used++];
1031                         base_offset = (base_offset << 7) + (c & 127);
1032                 }
1033                 base_offset = delta_obj_offset - base_offset;
1034                 if (base_offset <= 0 || base_offset >= delta_obj_offset)
1035                         return 0;  /* out of bound */
1036                 *curpos += used;
1037         } else if (type == OBJ_REF_DELTA) {
1038                 /* The base entry _must_ be in the same pack */
1039                 base_offset = find_pack_entry_one(base_info, p);
1040                 *curpos += 20;
1041         } else
1042                 die("I am totally screwed");
1043         return base_offset;
1044 }
1045
1046 /*
1047  * Like get_delta_base above, but we return the sha1 instead of the pack
1048  * offset. This means it is cheaper for REF deltas (we do not have to do
1049  * the final object lookup), but more expensive for OFS deltas (we
1050  * have to load the revidx to convert the offset back into a sha1).
1051  */
1052 static const unsigned char *get_delta_base_sha1(struct packed_git *p,
1053                                                 struct pack_window **w_curs,
1054                                                 off_t curpos,
1055                                                 enum object_type type,
1056                                                 off_t delta_obj_offset)
1057 {
1058         if (type == OBJ_REF_DELTA) {
1059                 unsigned char *base = use_pack(p, w_curs, curpos, NULL);
1060                 return base;
1061         } else if (type == OBJ_OFS_DELTA) {
1062                 struct revindex_entry *revidx;
1063                 off_t base_offset = get_delta_base(p, w_curs, &curpos,
1064                                                    type, delta_obj_offset);
1065
1066                 if (!base_offset)
1067                         return NULL;
1068
1069                 revidx = find_pack_revindex(p, base_offset);
1070                 if (!revidx)
1071                         return NULL;
1072
1073                 return nth_packed_object_sha1(p, revidx->nr);
1074         } else
1075                 return NULL;
1076 }
1077
1078 static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
1079 {
1080         int type;
1081         struct revindex_entry *revidx;
1082         const unsigned char *sha1;
1083         revidx = find_pack_revindex(p, obj_offset);
1084         if (!revidx)
1085                 return OBJ_BAD;
1086         sha1 = nth_packed_object_sha1(p, revidx->nr);
1087         mark_bad_packed_object(p, sha1);
1088         type = sha1_object_info(sha1, NULL);
1089         if (type <= OBJ_NONE)
1090                 return OBJ_BAD;
1091         return type;
1092 }
1093
1094 #define POI_STACK_PREALLOC 64
1095
1096 static enum object_type packed_to_object_type(struct packed_git *p,
1097                                               off_t obj_offset,
1098                                               enum object_type type,
1099                                               struct pack_window **w_curs,
1100                                               off_t curpos)
1101 {
1102         off_t small_poi_stack[POI_STACK_PREALLOC];
1103         off_t *poi_stack = small_poi_stack;
1104         int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1105
1106         while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1107                 off_t base_offset;
1108                 unsigned long size;
1109                 /* Push the object we're going to leave behind */
1110                 if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1111                         poi_stack_alloc = alloc_nr(poi_stack_nr);
1112                         ALLOC_ARRAY(poi_stack, poi_stack_alloc);
1113                         memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
1114                 } else {
1115                         ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
1116                 }
1117                 poi_stack[poi_stack_nr++] = obj_offset;
1118                 /* If parsing the base offset fails, just unwind */
1119                 base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1120                 if (!base_offset)
1121                         goto unwind;
1122                 curpos = obj_offset = base_offset;
1123                 type = unpack_object_header(p, w_curs, &curpos, &size);
1124                 if (type <= OBJ_NONE) {
1125                         /* If getting the base itself fails, we first
1126                          * retry the base, otherwise unwind */
1127                         type = retry_bad_packed_offset(p, base_offset);
1128                         if (type > OBJ_NONE)
1129                                 goto out;
1130                         goto unwind;
1131                 }
1132         }
1133
1134         switch (type) {
1135         case OBJ_BAD:
1136         case OBJ_COMMIT:
1137         case OBJ_TREE:
1138         case OBJ_BLOB:
1139         case OBJ_TAG:
1140                 break;
1141         default:
1142                 error("unknown object type %i at offset %"PRIuMAX" in %s",
1143                       type, (uintmax_t)obj_offset, p->pack_name);
1144                 type = OBJ_BAD;
1145         }
1146
1147 out:
1148         if (poi_stack != small_poi_stack)
1149                 free(poi_stack);
1150         return type;
1151
1152 unwind:
1153         while (poi_stack_nr) {
1154                 obj_offset = poi_stack[--poi_stack_nr];
1155                 type = retry_bad_packed_offset(p, obj_offset);
1156                 if (type > OBJ_NONE)
1157                         goto out;
1158         }
1159         type = OBJ_BAD;
1160         goto out;
1161 }
1162
1163 static struct hashmap delta_base_cache;
1164 static size_t delta_base_cached;
1165
1166 static LIST_HEAD(delta_base_cache_lru);
1167
1168 struct delta_base_cache_key {
1169         struct packed_git *p;
1170         off_t base_offset;
1171 };
1172
1173 struct delta_base_cache_entry {
1174         struct hashmap hash;
1175         struct delta_base_cache_key key;
1176         struct list_head lru;
1177         void *data;
1178         unsigned long size;
1179         enum object_type type;
1180 };
1181
1182 static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
1183 {
1184         unsigned int hash;
1185
1186         hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
1187         hash += (hash >> 8) + (hash >> 16);
1188         return hash;
1189 }
1190
1191 static struct delta_base_cache_entry *
1192 get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
1193 {
1194         struct hashmap_entry entry;
1195         struct delta_base_cache_key key;
1196
1197         if (!delta_base_cache.cmpfn)
1198                 return NULL;
1199
1200         hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
1201         key.p = p;
1202         key.base_offset = base_offset;
1203         return hashmap_get(&delta_base_cache, &entry, &key);
1204 }
1205
1206 static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
1207                                    const struct delta_base_cache_key *b)
1208 {
1209         return a->p == b->p && a->base_offset == b->base_offset;
1210 }
1211
1212 static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
1213                                      const void *va, const void *vb,
1214                                      const void *vkey)
1215 {
1216         const struct delta_base_cache_entry *a = va, *b = vb;
1217         const struct delta_base_cache_key *key = vkey;
1218         if (key)
1219                 return !delta_base_cache_key_eq(&a->key, key);
1220         else
1221                 return !delta_base_cache_key_eq(&a->key, &b->key);
1222 }
1223
1224 static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
1225 {
1226         return !!get_delta_base_cache_entry(p, base_offset);
1227 }
1228
1229 /*
1230  * Remove the entry from the cache, but do _not_ free the associated
1231  * entry data. The caller takes ownership of the "data" buffer, and
1232  * should copy out any fields it wants before detaching.
1233  */
1234 static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
1235 {
1236         hashmap_remove(&delta_base_cache, ent, &ent->key);
1237         list_del(&ent->lru);
1238         delta_base_cached -= ent->size;
1239         free(ent);
1240 }
1241
1242 static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
1243         unsigned long *base_size, enum object_type *type)
1244 {
1245         struct delta_base_cache_entry *ent;
1246
1247         ent = get_delta_base_cache_entry(p, base_offset);
1248         if (!ent)
1249                 return unpack_entry(p, base_offset, type, base_size);
1250
1251         if (type)
1252                 *type = ent->type;
1253         if (base_size)
1254                 *base_size = ent->size;
1255         return xmemdupz(ent->data, ent->size);
1256 }
1257
1258 static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1259 {
1260         free(ent->data);
1261         detach_delta_base_cache_entry(ent);
1262 }
1263
1264 void clear_delta_base_cache(void)
1265 {
1266         struct list_head *lru, *tmp;
1267         list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1268                 struct delta_base_cache_entry *entry =
1269                         list_entry(lru, struct delta_base_cache_entry, lru);
1270                 release_delta_base_cache(entry);
1271         }
1272 }
1273
1274 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1275         void *base, unsigned long base_size, enum object_type type)
1276 {
1277         struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
1278         struct list_head *lru, *tmp;
1279
1280         delta_base_cached += base_size;
1281
1282         list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
1283                 struct delta_base_cache_entry *f =
1284                         list_entry(lru, struct delta_base_cache_entry, lru);
1285                 if (delta_base_cached <= delta_base_cache_limit)
1286                         break;
1287                 release_delta_base_cache(f);
1288         }
1289
1290         ent->key.p = p;
1291         ent->key.base_offset = base_offset;
1292         ent->type = type;
1293         ent->data = base;
1294         ent->size = base_size;
1295         list_add_tail(&ent->lru, &delta_base_cache_lru);
1296
1297         if (!delta_base_cache.cmpfn)
1298                 hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
1299         hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
1300         hashmap_add(&delta_base_cache, ent);
1301 }
1302
1303 int packed_object_info(struct packed_git *p, off_t obj_offset,
1304                        struct object_info *oi)
1305 {
1306         struct pack_window *w_curs = NULL;
1307         unsigned long size;
1308         off_t curpos = obj_offset;
1309         enum object_type type;
1310
1311         /*
1312          * We always get the representation type, but only convert it to
1313          * a "real" type later if the caller is interested.
1314          */
1315         if (oi->contentp) {
1316                 *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep,
1317                                                       &type);
1318                 if (!*oi->contentp)
1319                         type = OBJ_BAD;
1320         } else {
1321                 type = unpack_object_header(p, &w_curs, &curpos, &size);
1322         }
1323
1324         if (!oi->contentp && oi->sizep) {
1325                 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1326                         off_t tmp_pos = curpos;
1327                         off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1328                                                            type, obj_offset);
1329                         if (!base_offset) {
1330                                 type = OBJ_BAD;
1331                                 goto out;
1332                         }
1333                         *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1334                         if (*oi->sizep == 0) {
1335                                 type = OBJ_BAD;
1336                                 goto out;
1337                         }
1338                 } else {
1339                         *oi->sizep = size;
1340                 }
1341         }
1342
1343         if (oi->disk_sizep) {
1344                 struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1345                 *oi->disk_sizep = revidx[1].offset - obj_offset;
1346         }
1347
1348         if (oi->typep || oi->typename) {
1349                 enum object_type ptot;
1350                 ptot = packed_to_object_type(p, obj_offset, type, &w_curs,
1351                                              curpos);
1352                 if (oi->typep)
1353                         *oi->typep = ptot;
1354                 if (oi->typename) {
1355                         const char *tn = typename(ptot);
1356                         if (tn)
1357                                 strbuf_addstr(oi->typename, tn);
1358                 }
1359                 if (ptot < 0) {
1360                         type = OBJ_BAD;
1361                         goto out;
1362                 }
1363         }
1364
1365         if (oi->delta_base_sha1) {
1366                 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1367                         const unsigned char *base;
1368
1369                         base = get_delta_base_sha1(p, &w_curs, curpos,
1370                                                    type, obj_offset);
1371                         if (!base) {
1372                                 type = OBJ_BAD;
1373                                 goto out;
1374                         }
1375
1376                         hashcpy(oi->delta_base_sha1, base);
1377                 } else
1378                         hashclr(oi->delta_base_sha1);
1379         }
1380
1381         oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
1382                                                           OI_PACKED;
1383
1384 out:
1385         unuse_pack(&w_curs);
1386         return type;
1387 }
1388
1389 static void *unpack_compressed_entry(struct packed_git *p,
1390                                     struct pack_window **w_curs,
1391                                     off_t curpos,
1392                                     unsigned long size)
1393 {
1394         int st;
1395         git_zstream stream;
1396         unsigned char *buffer, *in;
1397
1398         buffer = xmallocz_gently(size);
1399         if (!buffer)
1400                 return NULL;
1401         memset(&stream, 0, sizeof(stream));
1402         stream.next_out = buffer;
1403         stream.avail_out = size + 1;
1404
1405         git_inflate_init(&stream);
1406         do {
1407                 in = use_pack(p, w_curs, curpos, &stream.avail_in);
1408                 stream.next_in = in;
1409                 st = git_inflate(&stream, Z_FINISH);
1410                 if (!stream.avail_out)
1411                         break; /* the payload is larger than it should be */
1412                 curpos += stream.next_in - in;
1413         } while (st == Z_OK || st == Z_BUF_ERROR);
1414         git_inflate_end(&stream);
1415         if ((st != Z_STREAM_END) || stream.total_out != size) {
1416                 free(buffer);
1417                 return NULL;
1418         }
1419
1420         return buffer;
1421 }
1422
1423 static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
1424 {
1425         static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
1426         trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
1427                          p->pack_name, (uintmax_t)obj_offset);
1428 }
1429
1430 int do_check_packed_object_crc;
1431
1432 #define UNPACK_ENTRY_STACK_PREALLOC 64
1433 struct unpack_entry_stack_ent {
1434         off_t obj_offset;
1435         off_t curpos;
1436         unsigned long size;
1437 };
1438
1439 static void *read_object(const unsigned char *sha1, enum object_type *type,
1440                          unsigned long *size)
1441 {
1442         struct object_info oi = OBJECT_INFO_INIT;
1443         void *content;
1444         oi.typep = type;
1445         oi.sizep = size;
1446         oi.contentp = &content;
1447
1448         if (sha1_object_info_extended(sha1, &oi, 0) < 0)
1449                 return NULL;
1450         return content;
1451 }
1452
1453 void *unpack_entry(struct packed_git *p, off_t obj_offset,
1454                    enum object_type *final_type, unsigned long *final_size)
1455 {
1456         struct pack_window *w_curs = NULL;
1457         off_t curpos = obj_offset;
1458         void *data = NULL;
1459         unsigned long size;
1460         enum object_type type;
1461         struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
1462         struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
1463         int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
1464         int base_from_cache = 0;
1465
1466         write_pack_access_log(p, obj_offset);
1467
1468         /* PHASE 1: drill down to the innermost base object */
1469         for (;;) {
1470                 off_t base_offset;
1471                 int i;
1472                 struct delta_base_cache_entry *ent;
1473
1474                 ent = get_delta_base_cache_entry(p, curpos);
1475                 if (ent) {
1476                         type = ent->type;
1477                         data = ent->data;
1478                         size = ent->size;
1479                         detach_delta_base_cache_entry(ent);
1480                         base_from_cache = 1;
1481                         break;
1482                 }
1483
1484                 if (do_check_packed_object_crc && p->index_version > 1) {
1485                         struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1486                         off_t len = revidx[1].offset - obj_offset;
1487                         if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
1488                                 const unsigned char *sha1 =
1489                                         nth_packed_object_sha1(p, revidx->nr);
1490                                 error("bad packed object CRC for %s",
1491                                       sha1_to_hex(sha1));
1492                                 mark_bad_packed_object(p, sha1);
1493                                 data = NULL;
1494                                 goto out;
1495                         }
1496                 }
1497
1498                 type = unpack_object_header(p, &w_curs, &curpos, &size);
1499                 if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
1500                         break;
1501
1502                 base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1503                 if (!base_offset) {
1504                         error("failed to validate delta base reference "
1505                               "at offset %"PRIuMAX" from %s",
1506                               (uintmax_t)curpos, p->pack_name);
1507                         /* bail to phase 2, in hopes of recovery */
1508                         data = NULL;
1509                         break;
1510                 }
1511
1512                 /* push object, proceed to base */
1513                 if (delta_stack_nr >= delta_stack_alloc
1514                     && delta_stack == small_delta_stack) {
1515                         delta_stack_alloc = alloc_nr(delta_stack_nr);
1516                         ALLOC_ARRAY(delta_stack, delta_stack_alloc);
1517                         memcpy(delta_stack, small_delta_stack,
1518                                sizeof(*delta_stack)*delta_stack_nr);
1519                 } else {
1520                         ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
1521                 }
1522                 i = delta_stack_nr++;
1523                 delta_stack[i].obj_offset = obj_offset;
1524                 delta_stack[i].curpos = curpos;
1525                 delta_stack[i].size = size;
1526
1527                 curpos = obj_offset = base_offset;
1528         }
1529
1530         /* PHASE 2: handle the base */
1531         switch (type) {
1532         case OBJ_OFS_DELTA:
1533         case OBJ_REF_DELTA:
1534                 if (data)
1535                         die("BUG: unpack_entry: left loop at a valid delta");
1536                 break;
1537         case OBJ_COMMIT:
1538         case OBJ_TREE:
1539         case OBJ_BLOB:
1540         case OBJ_TAG:
1541                 if (!base_from_cache)
1542                         data = unpack_compressed_entry(p, &w_curs, curpos, size);
1543                 break;
1544         default:
1545                 data = NULL;
1546                 error("unknown object type %i at offset %"PRIuMAX" in %s",
1547                       type, (uintmax_t)obj_offset, p->pack_name);
1548         }
1549
1550         /* PHASE 3: apply deltas in order */
1551
1552         /* invariants:
1553          *   'data' holds the base data, or NULL if there was corruption
1554          */
1555         while (delta_stack_nr) {
1556                 void *delta_data;
1557                 void *base = data;
1558                 void *external_base = NULL;
1559                 unsigned long delta_size, base_size = size;
1560                 int i;
1561
1562                 data = NULL;
1563
1564                 if (base)
1565                         add_delta_base_cache(p, obj_offset, base, base_size, type);
1566
1567                 if (!base) {
1568                         /*
1569                          * We're probably in deep shit, but let's try to fetch
1570                          * the required base anyway from another pack or loose.
1571                          * This is costly but should happen only in the presence
1572                          * of a corrupted pack, and is better than failing outright.
1573                          */
1574                         struct revindex_entry *revidx;
1575                         const unsigned char *base_sha1;
1576                         revidx = find_pack_revindex(p, obj_offset);
1577                         if (revidx) {
1578                                 base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1579                                 error("failed to read delta base object %s"
1580                                       " at offset %"PRIuMAX" from %s",
1581                                       sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
1582                                       p->pack_name);
1583                                 mark_bad_packed_object(p, base_sha1);
1584                                 base = read_object(base_sha1, &type, &base_size);
1585                                 external_base = base;
1586                         }
1587                 }
1588
1589                 i = --delta_stack_nr;
1590                 obj_offset = delta_stack[i].obj_offset;
1591                 curpos = delta_stack[i].curpos;
1592                 delta_size = delta_stack[i].size;
1593
1594                 if (!base)
1595                         continue;
1596
1597                 delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
1598
1599                 if (!delta_data) {
1600                         error("failed to unpack compressed delta "
1601                               "at offset %"PRIuMAX" from %s",
1602                               (uintmax_t)curpos, p->pack_name);
1603                         data = NULL;
1604                         free(external_base);
1605                         continue;
1606                 }
1607
1608                 data = patch_delta(base, base_size,
1609                                    delta_data, delta_size,
1610                                    &size);
1611
1612                 /*
1613                  * We could not apply the delta; warn the user, but keep going.
1614                  * Our failure will be noticed either in the next iteration of
1615                  * the loop, or if this is the final delta, in the caller when
1616                  * we return NULL. Those code paths will take care of making
1617                  * a more explicit warning and retrying with another copy of
1618                  * the object.
1619                  */
1620                 if (!data)
1621                         error("failed to apply delta");
1622
1623                 free(delta_data);
1624                 free(external_base);
1625         }
1626
1627         if (final_type)
1628                 *final_type = type;
1629         if (final_size)
1630                 *final_size = size;
1631
1632 out:
1633         unuse_pack(&w_curs);
1634
1635         if (delta_stack != small_delta_stack)
1636                 free(delta_stack);
1637
1638         return data;
1639 }
1640
1641 const unsigned char *nth_packed_object_sha1(struct packed_git *p,
1642                                             uint32_t n)
1643 {
1644         const unsigned char *index = p->index_data;
1645         if (!index) {
1646                 if (open_pack_index(p))
1647                         return NULL;
1648                 index = p->index_data;
1649         }
1650         if (n >= p->num_objects)
1651                 return NULL;
1652         index += 4 * 256;
1653         if (p->index_version == 1) {
1654                 return index + 24 * n + 4;
1655         } else {
1656                 index += 8;
1657                 return index + 20 * n;
1658         }
1659 }
1660
1661 const struct object_id *nth_packed_object_oid(struct object_id *oid,
1662                                               struct packed_git *p,
1663                                               uint32_t n)
1664 {
1665         const unsigned char *hash = nth_packed_object_sha1(p, n);
1666         if (!hash)
1667                 return NULL;
1668         hashcpy(oid->hash, hash);
1669         return oid;
1670 }
1671
1672 void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
1673 {
1674         const unsigned char *ptr = vptr;
1675         const unsigned char *start = p->index_data;
1676         const unsigned char *end = start + p->index_size;
1677         if (ptr < start)
1678                 die(_("offset before start of pack index for %s (corrupt index?)"),
1679                     p->pack_name);
1680         /* No need to check for underflow; .idx files must be at least 8 bytes */
1681         if (ptr >= end - 8)
1682                 die(_("offset beyond end of pack index for %s (truncated index?)"),
1683                     p->pack_name);
1684 }
1685
1686 off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
1687 {
1688         const unsigned char *index = p->index_data;
1689         index += 4 * 256;
1690         if (p->index_version == 1) {
1691                 return ntohl(*((uint32_t *)(index + 24 * n)));
1692         } else {
1693                 uint32_t off;
1694                 index += 8 + p->num_objects * (20 + 4);
1695                 off = ntohl(*((uint32_t *)(index + 4 * n)));
1696                 if (!(off & 0x80000000))
1697                         return off;
1698                 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1699                 check_pack_index_ptr(p, index);
1700                 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
1701                                    ntohl(*((uint32_t *)(index + 4)));
1702         }
1703 }
1704
1705 off_t find_pack_entry_one(const unsigned char *sha1,
1706                                   struct packed_git *p)
1707 {
1708         const uint32_t *level1_ofs = p->index_data;
1709         const unsigned char *index = p->index_data;
1710         unsigned hi, lo, stride;
1711         static int debug_lookup = -1;
1712
1713         if (debug_lookup < 0)
1714                 debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
1715
1716         if (!index) {
1717                 if (open_pack_index(p))
1718                         return 0;
1719                 level1_ofs = p->index_data;
1720                 index = p->index_data;
1721         }
1722         if (p->index_version > 1) {
1723                 level1_ofs += 2;
1724                 index += 8;
1725         }
1726         index += 4 * 256;
1727         hi = ntohl(level1_ofs[*sha1]);
1728         lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
1729         if (p->index_version > 1) {
1730                 stride = 20;
1731         } else {
1732                 stride = 24;
1733                 index += 4;
1734         }
1735
1736         if (debug_lookup)
1737                 printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
1738                        sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
1739
1740         while (lo < hi) {
1741                 unsigned mi = (lo + hi) / 2;
1742                 int cmp = hashcmp(index + mi * stride, sha1);
1743
1744                 if (debug_lookup)
1745                         printf("lo %u hi %u rg %u mi %u\n",
1746                                lo, hi, hi - lo, mi);
1747                 if (!cmp)
1748                         return nth_packed_object_offset(p, mi);
1749                 if (cmp > 0)
1750                         hi = mi;
1751                 else
1752                         lo = mi+1;
1753         }
1754         return 0;
1755 }
1756
1757 int is_pack_valid(struct packed_git *p)
1758 {
1759         /* An already open pack is known to be valid. */
1760         if (p->pack_fd != -1)
1761                 return 1;
1762
1763         /* If the pack has one window completely covering the
1764          * file size, the pack is known to be valid even if
1765          * the descriptor is not currently open.
1766          */
1767         if (p->windows) {
1768                 struct pack_window *w = p->windows;
1769
1770                 if (!w->offset && w->len == p->pack_size)
1771                         return 1;
1772         }
1773
1774         /* Force the pack to open to prove its valid. */
1775         return !open_packed_git(p);
1776 }
1777
1778 struct packed_git *find_sha1_pack(const unsigned char *sha1,
1779                                   struct packed_git *packs)
1780 {
1781         struct packed_git *p;
1782
1783         for (p = packs; p; p = p->next) {
1784                 if (find_pack_entry_one(sha1, p))
1785                         return p;
1786         }
1787         return NULL;
1788
1789 }
1790
1791 static int fill_pack_entry(const unsigned char *sha1,
1792                            struct pack_entry *e,
1793                            struct packed_git *p)
1794 {
1795         off_t offset;
1796
1797         if (p->num_bad_objects) {
1798                 unsigned i;
1799                 for (i = 0; i < p->num_bad_objects; i++)
1800                         if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1801                                 return 0;
1802         }
1803
1804         offset = find_pack_entry_one(sha1, p);
1805         if (!offset)
1806                 return 0;
1807
1808         /*
1809          * We are about to tell the caller where they can locate the
1810          * requested object.  We better make sure the packfile is
1811          * still here and can be accessed before supplying that
1812          * answer, as it may have been deleted since the index was
1813          * loaded!
1814          */
1815         if (!is_pack_valid(p))
1816                 return 0;
1817         e->offset = offset;
1818         e->p = p;
1819         hashcpy(e->sha1, sha1);
1820         return 1;
1821 }
1822
1823 /*
1824  * Iff a pack file contains the object named by sha1, return true and
1825  * store its location to e.
1826  */
1827 int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
1828 {
1829         struct mru_entry *p;
1830
1831         prepare_packed_git();
1832         if (!packed_git)
1833                 return 0;
1834
1835         for (p = packed_git_mru->head; p; p = p->next) {
1836                 if (fill_pack_entry(sha1, e, p->item)) {
1837                         mru_mark(packed_git_mru, p);
1838                         return 1;
1839                 }
1840         }
1841         return 0;
1842 }
1843
1844 int has_sha1_pack(const unsigned char *sha1)
1845 {
1846         struct pack_entry e;
1847         return find_pack_entry(sha1, &e);
1848 }
1849
1850 int has_pack_index(const unsigned char *sha1)
1851 {
1852         struct stat st;
1853         if (stat(sha1_pack_index_name(sha1), &st))
1854                 return 0;
1855         return 1;
1856 }
1857
1858 static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
1859 {
1860         uint32_t i;
1861         int r = 0;
1862
1863         for (i = 0; i < p->num_objects; i++) {
1864                 struct object_id oid;
1865
1866                 if (!nth_packed_object_oid(&oid, p, i))
1867                         return error("unable to get sha1 of object %u in %s",
1868                                      i, p->pack_name);
1869
1870                 r = cb(&oid, p, i, data);
1871                 if (r)
1872                         break;
1873         }
1874         return r;
1875 }
1876
1877 int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
1878 {
1879         struct packed_git *p;
1880         int r = 0;
1881         int pack_errors = 0;
1882
1883         prepare_packed_git();
1884         for (p = packed_git; p; p = p->next) {
1885                 if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
1886                         continue;
1887                 if (open_pack_index(p)) {
1888                         pack_errors = 1;
1889                         continue;
1890                 }
1891                 r = for_each_object_in_pack(p, cb, data);
1892                 if (r)
1893                         break;
1894         }
1895         return r ? r : pack_errors;
1896 }