The second batch
[git] / builtin / pack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "repository.h"
4 #include "config.h"
5 #include "attr.h"
6 #include "object.h"
7 #include "blob.h"
8 #include "commit.h"
9 #include "tag.h"
10 #include "tree.h"
11 #include "delta.h"
12 #include "pack.h"
13 #include "pack-revindex.h"
14 #include "csum-file.h"
15 #include "tree-walk.h"
16 #include "diff.h"
17 #include "revision.h"
18 #include "list-objects.h"
19 #include "list-objects-filter.h"
20 #include "list-objects-filter-options.h"
21 #include "pack-objects.h"
22 #include "progress.h"
23 #include "refs.h"
24 #include "streaming.h"
25 #include "thread-utils.h"
26 #include "pack-bitmap.h"
27 #include "delta-islands.h"
28 #include "reachable.h"
29 #include "oid-array.h"
30 #include "strvec.h"
31 #include "list.h"
32 #include "packfile.h"
33 #include "object-store.h"
34 #include "dir.h"
35 #include "midx.h"
36 #include "trace2.h"
37 #include "shallow.h"
38 #include "promisor-remote.h"
39
40 /*
41  * Objects we are going to pack are collected in the `to_pack` structure.
42  * It contains an array (dynamically expanded) of the object data, and a map
43  * that can resolve SHA1s to their position in the array.
44  */
45 static struct packing_data to_pack;
46
47 static inline struct object_entry *oe_delta(
48                 const struct packing_data *pack,
49                 const struct object_entry *e)
50 {
51         if (!e->delta_idx)
52                 return NULL;
53         if (e->ext_base)
54                 return &pack->ext_bases[e->delta_idx - 1];
55         else
56                 return &pack->objects[e->delta_idx - 1];
57 }
58
59 static inline unsigned long oe_delta_size(struct packing_data *pack,
60                                           const struct object_entry *e)
61 {
62         if (e->delta_size_valid)
63                 return e->delta_size_;
64
65         /*
66          * pack->delta_size[] can't be NULL because oe_set_delta_size()
67          * must have been called when a new delta is saved with
68          * oe_set_delta().
69          * If oe_delta() returns NULL (i.e. default state, which means
70          * delta_size_valid is also false), then the caller must never
71          * call oe_delta_size().
72          */
73         return pack->delta_size[e - pack->objects];
74 }
75
76 unsigned long oe_get_size_slow(struct packing_data *pack,
77                                const struct object_entry *e);
78
79 static inline unsigned long oe_size(struct packing_data *pack,
80                                     const struct object_entry *e)
81 {
82         if (e->size_valid)
83                 return e->size_;
84
85         return oe_get_size_slow(pack, e);
86 }
87
88 static inline void oe_set_delta(struct packing_data *pack,
89                                 struct object_entry *e,
90                                 struct object_entry *delta)
91 {
92         if (delta)
93                 e->delta_idx = (delta - pack->objects) + 1;
94         else
95                 e->delta_idx = 0;
96 }
97
98 static inline struct object_entry *oe_delta_sibling(
99                 const struct packing_data *pack,
100                 const struct object_entry *e)
101 {
102         if (e->delta_sibling_idx)
103                 return &pack->objects[e->delta_sibling_idx - 1];
104         return NULL;
105 }
106
107 static inline struct object_entry *oe_delta_child(
108                 const struct packing_data *pack,
109                 const struct object_entry *e)
110 {
111         if (e->delta_child_idx)
112                 return &pack->objects[e->delta_child_idx - 1];
113         return NULL;
114 }
115
116 static inline void oe_set_delta_child(struct packing_data *pack,
117                                       struct object_entry *e,
118                                       struct object_entry *delta)
119 {
120         if (delta)
121                 e->delta_child_idx = (delta - pack->objects) + 1;
122         else
123                 e->delta_child_idx = 0;
124 }
125
126 static inline void oe_set_delta_sibling(struct packing_data *pack,
127                                         struct object_entry *e,
128                                         struct object_entry *delta)
129 {
130         if (delta)
131                 e->delta_sibling_idx = (delta - pack->objects) + 1;
132         else
133                 e->delta_sibling_idx = 0;
134 }
135
136 static inline void oe_set_size(struct packing_data *pack,
137                                struct object_entry *e,
138                                unsigned long size)
139 {
140         if (size < pack->oe_size_limit) {
141                 e->size_ = size;
142                 e->size_valid = 1;
143         } else {
144                 e->size_valid = 0;
145                 if (oe_get_size_slow(pack, e) != size)
146                         BUG("'size' is supposed to be the object size!");
147         }
148 }
149
150 static inline void oe_set_delta_size(struct packing_data *pack,
151                                      struct object_entry *e,
152                                      unsigned long size)
153 {
154         if (size < pack->oe_delta_size_limit) {
155                 e->delta_size_ = size;
156                 e->delta_size_valid = 1;
157         } else {
158                 packing_data_lock(pack);
159                 if (!pack->delta_size)
160                         ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
161                 packing_data_unlock(pack);
162
163                 pack->delta_size[e - pack->objects] = size;
164                 e->delta_size_valid = 0;
165         }
166 }
167
168 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
169 #define SIZE(obj) oe_size(&to_pack, obj)
170 #define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
171 #define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
172 #define DELTA(obj) oe_delta(&to_pack, obj)
173 #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
174 #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
175 #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
176 #define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
177 #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
178 #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
179 #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
180
181 static const char *pack_usage[] = {
182         N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
183         N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
184         NULL
185 };
186
187 static struct pack_idx_entry **written_list;
188 static uint32_t nr_result, nr_written, nr_seen;
189 static struct bitmap_index *bitmap_git;
190 static uint32_t write_layer;
191
192 static int non_empty;
193 static int reuse_delta = 1, reuse_object = 1;
194 static int keep_unreachable, unpack_unreachable, include_tag;
195 static timestamp_t unpack_unreachable_expiration;
196 static int pack_loose_unreachable;
197 static int local;
198 static int have_non_local_packs;
199 static int incremental;
200 static int ignore_packed_keep_on_disk;
201 static int ignore_packed_keep_in_core;
202 static int allow_ofs_delta;
203 static struct pack_idx_option pack_idx_opts;
204 static const char *base_name;
205 static int progress = 1;
206 static int window = 10;
207 static unsigned long pack_size_limit;
208 static int depth = 50;
209 static int delta_search_threads;
210 static int pack_to_stdout;
211 static int sparse;
212 static int thin;
213 static int num_preferred_base;
214 static struct progress *progress_state;
215
216 static struct packed_git *reuse_packfile;
217 static uint32_t reuse_packfile_objects;
218 static struct bitmap *reuse_packfile_bitmap;
219
220 static int use_bitmap_index_default = 1;
221 static int use_bitmap_index = -1;
222 static int allow_pack_reuse = 1;
223 static enum {
224         WRITE_BITMAP_FALSE = 0,
225         WRITE_BITMAP_QUIET,
226         WRITE_BITMAP_TRUE,
227 } write_bitmap_index;
228 static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
229
230 static int exclude_promisor_objects;
231
232 static int use_delta_islands;
233
234 static unsigned long delta_cache_size = 0;
235 static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
236 static unsigned long cache_max_small_delta_size = 1000;
237
238 static unsigned long window_memory_limit = 0;
239
240 static struct list_objects_filter_options filter_options;
241
242 static struct string_list uri_protocols = STRING_LIST_INIT_NODUP;
243
244 enum missing_action {
245         MA_ERROR = 0,      /* fail if any missing objects are encountered */
246         MA_ALLOW_ANY,      /* silently allow ALL missing objects */
247         MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
248 };
249 static enum missing_action arg_missing_action;
250 static show_object_fn fn_show_object;
251
252 struct configured_exclusion {
253         struct oidmap_entry e;
254         char *pack_hash_hex;
255         char *uri;
256 };
257 static struct oidmap configured_exclusions;
258
259 static struct oidset excluded_by_config;
260
261 /*
262  * stats
263  */
264 static uint32_t written, written_delta;
265 static uint32_t reused, reused_delta;
266
267 /*
268  * Indexed commits
269  */
270 static struct commit **indexed_commits;
271 static unsigned int indexed_commits_nr;
272 static unsigned int indexed_commits_alloc;
273
274 static void index_commit_for_bitmap(struct commit *commit)
275 {
276         if (indexed_commits_nr >= indexed_commits_alloc) {
277                 indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
278                 REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
279         }
280
281         indexed_commits[indexed_commits_nr++] = commit;
282 }
283
284 static void *get_delta(struct object_entry *entry)
285 {
286         unsigned long size, base_size, delta_size;
287         void *buf, *base_buf, *delta_buf;
288         enum object_type type;
289
290         buf = read_object_file(&entry->idx.oid, &type, &size);
291         if (!buf)
292                 die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
293         base_buf = read_object_file(&DELTA(entry)->idx.oid, &type,
294                                     &base_size);
295         if (!base_buf)
296                 die("unable to read %s",
297                     oid_to_hex(&DELTA(entry)->idx.oid));
298         delta_buf = diff_delta(base_buf, base_size,
299                                buf, size, &delta_size, 0);
300         /*
301          * We successfully computed this delta once but dropped it for
302          * memory reasons. Something is very wrong if this time we
303          * recompute and create a different delta.
304          */
305         if (!delta_buf || delta_size != DELTA_SIZE(entry))
306                 BUG("delta size changed");
307         free(buf);
308         free(base_buf);
309         return delta_buf;
310 }
311
312 static unsigned long do_compress(void **pptr, unsigned long size)
313 {
314         git_zstream stream;
315         void *in, *out;
316         unsigned long maxsize;
317
318         git_deflate_init(&stream, pack_compression_level);
319         maxsize = git_deflate_bound(&stream, size);
320
321         in = *pptr;
322         out = xmalloc(maxsize);
323         *pptr = out;
324
325         stream.next_in = in;
326         stream.avail_in = size;
327         stream.next_out = out;
328         stream.avail_out = maxsize;
329         while (git_deflate(&stream, Z_FINISH) == Z_OK)
330                 ; /* nothing */
331         git_deflate_end(&stream);
332
333         free(in);
334         return stream.total_out;
335 }
336
337 static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
338                                            const struct object_id *oid)
339 {
340         git_zstream stream;
341         unsigned char ibuf[1024 * 16];
342         unsigned char obuf[1024 * 16];
343         unsigned long olen = 0;
344
345         git_deflate_init(&stream, pack_compression_level);
346
347         for (;;) {
348                 ssize_t readlen;
349                 int zret = Z_OK;
350                 readlen = read_istream(st, ibuf, sizeof(ibuf));
351                 if (readlen == -1)
352                         die(_("unable to read %s"), oid_to_hex(oid));
353
354                 stream.next_in = ibuf;
355                 stream.avail_in = readlen;
356                 while ((stream.avail_in || readlen == 0) &&
357                        (zret == Z_OK || zret == Z_BUF_ERROR)) {
358                         stream.next_out = obuf;
359                         stream.avail_out = sizeof(obuf);
360                         zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
361                         hashwrite(f, obuf, stream.next_out - obuf);
362                         olen += stream.next_out - obuf;
363                 }
364                 if (stream.avail_in)
365                         die(_("deflate error (%d)"), zret);
366                 if (readlen == 0) {
367                         if (zret != Z_STREAM_END)
368                                 die(_("deflate error (%d)"), zret);
369                         break;
370                 }
371         }
372         git_deflate_end(&stream);
373         return olen;
374 }
375
376 /*
377  * we are going to reuse the existing object data as is.  make
378  * sure it is not corrupt.
379  */
380 static int check_pack_inflate(struct packed_git *p,
381                 struct pack_window **w_curs,
382                 off_t offset,
383                 off_t len,
384                 unsigned long expect)
385 {
386         git_zstream stream;
387         unsigned char fakebuf[4096], *in;
388         int st;
389
390         memset(&stream, 0, sizeof(stream));
391         git_inflate_init(&stream);
392         do {
393                 in = use_pack(p, w_curs, offset, &stream.avail_in);
394                 stream.next_in = in;
395                 stream.next_out = fakebuf;
396                 stream.avail_out = sizeof(fakebuf);
397                 st = git_inflate(&stream, Z_FINISH);
398                 offset += stream.next_in - in;
399         } while (st == Z_OK || st == Z_BUF_ERROR);
400         git_inflate_end(&stream);
401         return (st == Z_STREAM_END &&
402                 stream.total_out == expect &&
403                 stream.total_in == len) ? 0 : -1;
404 }
405
406 static void copy_pack_data(struct hashfile *f,
407                 struct packed_git *p,
408                 struct pack_window **w_curs,
409                 off_t offset,
410                 off_t len)
411 {
412         unsigned char *in;
413         unsigned long avail;
414
415         while (len) {
416                 in = use_pack(p, w_curs, offset, &avail);
417                 if (avail > len)
418                         avail = (unsigned long)len;
419                 hashwrite(f, in, avail);
420                 offset += avail;
421                 len -= avail;
422         }
423 }
424
425 static inline int oe_size_greater_than(struct packing_data *pack,
426                                        const struct object_entry *lhs,
427                                        unsigned long rhs)
428 {
429         if (lhs->size_valid)
430                 return lhs->size_ > rhs;
431         if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
432                 return 1;
433         return oe_get_size_slow(pack, lhs) > rhs;
434 }
435
436 /* Return 0 if we will bust the pack-size limit */
437 static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
438                                            unsigned long limit, int usable_delta)
439 {
440         unsigned long size, datalen;
441         unsigned char header[MAX_PACK_OBJECT_HEADER],
442                       dheader[MAX_PACK_OBJECT_HEADER];
443         unsigned hdrlen;
444         enum object_type type;
445         void *buf;
446         struct git_istream *st = NULL;
447         const unsigned hashsz = the_hash_algo->rawsz;
448
449         if (!usable_delta) {
450                 if (oe_type(entry) == OBJ_BLOB &&
451                     oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
452                     (st = open_istream(the_repository, &entry->idx.oid, &type,
453                                        &size, NULL)) != NULL)
454                         buf = NULL;
455                 else {
456                         buf = read_object_file(&entry->idx.oid, &type, &size);
457                         if (!buf)
458                                 die(_("unable to read %s"),
459                                     oid_to_hex(&entry->idx.oid));
460                 }
461                 /*
462                  * make sure no cached delta data remains from a
463                  * previous attempt before a pack split occurred.
464                  */
465                 FREE_AND_NULL(entry->delta_data);
466                 entry->z_delta_size = 0;
467         } else if (entry->delta_data) {
468                 size = DELTA_SIZE(entry);
469                 buf = entry->delta_data;
470                 entry->delta_data = NULL;
471                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
472                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
473         } else {
474                 buf = get_delta(entry);
475                 size = DELTA_SIZE(entry);
476                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
477                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
478         }
479
480         if (st) /* large blob case, just assume we don't compress well */
481                 datalen = size;
482         else if (entry->z_delta_size)
483                 datalen = entry->z_delta_size;
484         else
485                 datalen = do_compress(&buf, size);
486
487         /*
488          * The object header is a byte of 'type' followed by zero or
489          * more bytes of length.
490          */
491         hdrlen = encode_in_pack_object_header(header, sizeof(header),
492                                               type, size);
493
494         if (type == OBJ_OFS_DELTA) {
495                 /*
496                  * Deltas with relative base contain an additional
497                  * encoding of the relative offset for the delta
498                  * base from this object's position in the pack.
499                  */
500                 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
501                 unsigned pos = sizeof(dheader) - 1;
502                 dheader[pos] = ofs & 127;
503                 while (ofs >>= 7)
504                         dheader[--pos] = 128 | (--ofs & 127);
505                 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
506                         if (st)
507                                 close_istream(st);
508                         free(buf);
509                         return 0;
510                 }
511                 hashwrite(f, header, hdrlen);
512                 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
513                 hdrlen += sizeof(dheader) - pos;
514         } else if (type == OBJ_REF_DELTA) {
515                 /*
516                  * Deltas with a base reference contain
517                  * additional bytes for the base object ID.
518                  */
519                 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
520                         if (st)
521                                 close_istream(st);
522                         free(buf);
523                         return 0;
524                 }
525                 hashwrite(f, header, hdrlen);
526                 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
527                 hdrlen += hashsz;
528         } else {
529                 if (limit && hdrlen + datalen + hashsz >= limit) {
530                         if (st)
531                                 close_istream(st);
532                         free(buf);
533                         return 0;
534                 }
535                 hashwrite(f, header, hdrlen);
536         }
537         if (st) {
538                 datalen = write_large_blob_data(st, f, &entry->idx.oid);
539                 close_istream(st);
540         } else {
541                 hashwrite(f, buf, datalen);
542                 free(buf);
543         }
544
545         return hdrlen + datalen;
546 }
547
548 /* Return 0 if we will bust the pack-size limit */
549 static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
550                                 unsigned long limit, int usable_delta)
551 {
552         struct packed_git *p = IN_PACK(entry);
553         struct pack_window *w_curs = NULL;
554         uint32_t pos;
555         off_t offset;
556         enum object_type type = oe_type(entry);
557         off_t datalen;
558         unsigned char header[MAX_PACK_OBJECT_HEADER],
559                       dheader[MAX_PACK_OBJECT_HEADER];
560         unsigned hdrlen;
561         const unsigned hashsz = the_hash_algo->rawsz;
562         unsigned long entry_size = SIZE(entry);
563
564         if (DELTA(entry))
565                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
566                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
567         hdrlen = encode_in_pack_object_header(header, sizeof(header),
568                                               type, entry_size);
569
570         offset = entry->in_pack_offset;
571         if (offset_to_pack_pos(p, offset, &pos) < 0)
572                 die(_("write_reuse_object: could not locate %s, expected at "
573                       "offset %"PRIuMAX" in pack %s"),
574                     oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
575                     p->pack_name);
576         datalen = pack_pos_to_offset(p, pos + 1) - offset;
577         if (!pack_to_stdout && p->index_version > 1 &&
578             check_pack_crc(p, &w_curs, offset, datalen,
579                            pack_pos_to_index(p, pos))) {
580                 error(_("bad packed object CRC for %s"),
581                       oid_to_hex(&entry->idx.oid));
582                 unuse_pack(&w_curs);
583                 return write_no_reuse_object(f, entry, limit, usable_delta);
584         }
585
586         offset += entry->in_pack_header_size;
587         datalen -= entry->in_pack_header_size;
588
589         if (!pack_to_stdout && p->index_version == 1 &&
590             check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
591                 error(_("corrupt packed object for %s"),
592                       oid_to_hex(&entry->idx.oid));
593                 unuse_pack(&w_curs);
594                 return write_no_reuse_object(f, entry, limit, usable_delta);
595         }
596
597         if (type == OBJ_OFS_DELTA) {
598                 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
599                 unsigned pos = sizeof(dheader) - 1;
600                 dheader[pos] = ofs & 127;
601                 while (ofs >>= 7)
602                         dheader[--pos] = 128 | (--ofs & 127);
603                 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
604                         unuse_pack(&w_curs);
605                         return 0;
606                 }
607                 hashwrite(f, header, hdrlen);
608                 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
609                 hdrlen += sizeof(dheader) - pos;
610                 reused_delta++;
611         } else if (type == OBJ_REF_DELTA) {
612                 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
613                         unuse_pack(&w_curs);
614                         return 0;
615                 }
616                 hashwrite(f, header, hdrlen);
617                 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
618                 hdrlen += hashsz;
619                 reused_delta++;
620         } else {
621                 if (limit && hdrlen + datalen + hashsz >= limit) {
622                         unuse_pack(&w_curs);
623                         return 0;
624                 }
625                 hashwrite(f, header, hdrlen);
626         }
627         copy_pack_data(f, p, &w_curs, offset, datalen);
628         unuse_pack(&w_curs);
629         reused++;
630         return hdrlen + datalen;
631 }
632
633 /* Return 0 if we will bust the pack-size limit */
634 static off_t write_object(struct hashfile *f,
635                           struct object_entry *entry,
636                           off_t write_offset)
637 {
638         unsigned long limit;
639         off_t len;
640         int usable_delta, to_reuse;
641
642         if (!pack_to_stdout)
643                 crc32_begin(f);
644
645         /* apply size limit if limited packsize and not first object */
646         if (!pack_size_limit || !nr_written)
647                 limit = 0;
648         else if (pack_size_limit <= write_offset)
649                 /*
650                  * the earlier object did not fit the limit; avoid
651                  * mistaking this with unlimited (i.e. limit = 0).
652                  */
653                 limit = 1;
654         else
655                 limit = pack_size_limit - write_offset;
656
657         if (!DELTA(entry))
658                 usable_delta = 0;       /* no delta */
659         else if (!pack_size_limit)
660                usable_delta = 1;        /* unlimited packfile */
661         else if (DELTA(entry)->idx.offset == (off_t)-1)
662                 usable_delta = 0;       /* base was written to another pack */
663         else if (DELTA(entry)->idx.offset)
664                 usable_delta = 1;       /* base already exists in this pack */
665         else
666                 usable_delta = 0;       /* base could end up in another pack */
667
668         if (!reuse_object)
669                 to_reuse = 0;   /* explicit */
670         else if (!IN_PACK(entry))
671                 to_reuse = 0;   /* can't reuse what we don't have */
672         else if (oe_type(entry) == OBJ_REF_DELTA ||
673                  oe_type(entry) == OBJ_OFS_DELTA)
674                                 /* check_object() decided it for us ... */
675                 to_reuse = usable_delta;
676                                 /* ... but pack split may override that */
677         else if (oe_type(entry) != entry->in_pack_type)
678                 to_reuse = 0;   /* pack has delta which is unusable */
679         else if (DELTA(entry))
680                 to_reuse = 0;   /* we want to pack afresh */
681         else
682                 to_reuse = 1;   /* we have it in-pack undeltified,
683                                  * and we do not need to deltify it.
684                                  */
685
686         if (!to_reuse)
687                 len = write_no_reuse_object(f, entry, limit, usable_delta);
688         else
689                 len = write_reuse_object(f, entry, limit, usable_delta);
690         if (!len)
691                 return 0;
692
693         if (usable_delta)
694                 written_delta++;
695         written++;
696         if (!pack_to_stdout)
697                 entry->idx.crc32 = crc32_end(f);
698         return len;
699 }
700
701 enum write_one_status {
702         WRITE_ONE_SKIP = -1, /* already written */
703         WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
704         WRITE_ONE_WRITTEN = 1, /* normal */
705         WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
706 };
707
708 static enum write_one_status write_one(struct hashfile *f,
709                                        struct object_entry *e,
710                                        off_t *offset)
711 {
712         off_t size;
713         int recursing;
714
715         /*
716          * we set offset to 1 (which is an impossible value) to mark
717          * the fact that this object is involved in "write its base
718          * first before writing a deltified object" recursion.
719          */
720         recursing = (e->idx.offset == 1);
721         if (recursing) {
722                 warning(_("recursive delta detected for object %s"),
723                         oid_to_hex(&e->idx.oid));
724                 return WRITE_ONE_RECURSIVE;
725         } else if (e->idx.offset || e->preferred_base) {
726                 /* offset is non zero if object is written already. */
727                 return WRITE_ONE_SKIP;
728         }
729
730         /* if we are deltified, write out base object first. */
731         if (DELTA(e)) {
732                 e->idx.offset = 1; /* now recurse */
733                 switch (write_one(f, DELTA(e), offset)) {
734                 case WRITE_ONE_RECURSIVE:
735                         /* we cannot depend on this one */
736                         SET_DELTA(e, NULL);
737                         break;
738                 default:
739                         break;
740                 case WRITE_ONE_BREAK:
741                         e->idx.offset = recursing;
742                         return WRITE_ONE_BREAK;
743                 }
744         }
745
746         e->idx.offset = *offset;
747         size = write_object(f, e, *offset);
748         if (!size) {
749                 e->idx.offset = recursing;
750                 return WRITE_ONE_BREAK;
751         }
752         written_list[nr_written++] = &e->idx;
753
754         /* make sure off_t is sufficiently large not to wrap */
755         if (signed_add_overflows(*offset, size))
756                 die(_("pack too large for current definition of off_t"));
757         *offset += size;
758         return WRITE_ONE_WRITTEN;
759 }
760
761 static int mark_tagged(const char *path, const struct object_id *oid, int flag,
762                        void *cb_data)
763 {
764         struct object_id peeled;
765         struct object_entry *entry = packlist_find(&to_pack, oid);
766
767         if (entry)
768                 entry->tagged = 1;
769         if (!peel_iterated_oid(oid, &peeled)) {
770                 entry = packlist_find(&to_pack, &peeled);
771                 if (entry)
772                         entry->tagged = 1;
773         }
774         return 0;
775 }
776
777 static inline unsigned char oe_layer(struct packing_data *pack,
778                                      struct object_entry *e)
779 {
780         if (!pack->layer)
781                 return 0;
782         return pack->layer[e - pack->objects];
783 }
784
785 static inline void add_to_write_order(struct object_entry **wo,
786                                unsigned int *endp,
787                                struct object_entry *e)
788 {
789         if (e->filled || oe_layer(&to_pack, e) != write_layer)
790                 return;
791         wo[(*endp)++] = e;
792         e->filled = 1;
793 }
794
795 static void add_descendants_to_write_order(struct object_entry **wo,
796                                            unsigned int *endp,
797                                            struct object_entry *e)
798 {
799         int add_to_order = 1;
800         while (e) {
801                 if (add_to_order) {
802                         struct object_entry *s;
803                         /* add this node... */
804                         add_to_write_order(wo, endp, e);
805                         /* all its siblings... */
806                         for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
807                                 add_to_write_order(wo, endp, s);
808                         }
809                 }
810                 /* drop down a level to add left subtree nodes if possible */
811                 if (DELTA_CHILD(e)) {
812                         add_to_order = 1;
813                         e = DELTA_CHILD(e);
814                 } else {
815                         add_to_order = 0;
816                         /* our sibling might have some children, it is next */
817                         if (DELTA_SIBLING(e)) {
818                                 e = DELTA_SIBLING(e);
819                                 continue;
820                         }
821                         /* go back to our parent node */
822                         e = DELTA(e);
823                         while (e && !DELTA_SIBLING(e)) {
824                                 /* we're on the right side of a subtree, keep
825                                  * going up until we can go right again */
826                                 e = DELTA(e);
827                         }
828                         if (!e) {
829                                 /* done- we hit our original root node */
830                                 return;
831                         }
832                         /* pass it off to sibling at this level */
833                         e = DELTA_SIBLING(e);
834                 }
835         };
836 }
837
838 static void add_family_to_write_order(struct object_entry **wo,
839                                       unsigned int *endp,
840                                       struct object_entry *e)
841 {
842         struct object_entry *root;
843
844         for (root = e; DELTA(root); root = DELTA(root))
845                 ; /* nothing */
846         add_descendants_to_write_order(wo, endp, root);
847 }
848
849 static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
850 {
851         unsigned int i, last_untagged;
852         struct object_entry *objects = to_pack.objects;
853
854         for (i = 0; i < to_pack.nr_objects; i++) {
855                 if (objects[i].tagged)
856                         break;
857                 add_to_write_order(wo, wo_end, &objects[i]);
858         }
859         last_untagged = i;
860
861         /*
862          * Then fill all the tagged tips.
863          */
864         for (; i < to_pack.nr_objects; i++) {
865                 if (objects[i].tagged)
866                         add_to_write_order(wo, wo_end, &objects[i]);
867         }
868
869         /*
870          * And then all remaining commits and tags.
871          */
872         for (i = last_untagged; i < to_pack.nr_objects; i++) {
873                 if (oe_type(&objects[i]) != OBJ_COMMIT &&
874                     oe_type(&objects[i]) != OBJ_TAG)
875                         continue;
876                 add_to_write_order(wo, wo_end, &objects[i]);
877         }
878
879         /*
880          * And then all the trees.
881          */
882         for (i = last_untagged; i < to_pack.nr_objects; i++) {
883                 if (oe_type(&objects[i]) != OBJ_TREE)
884                         continue;
885                 add_to_write_order(wo, wo_end, &objects[i]);
886         }
887
888         /*
889          * Finally all the rest in really tight order
890          */
891         for (i = last_untagged; i < to_pack.nr_objects; i++) {
892                 if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
893                         add_family_to_write_order(wo, wo_end, &objects[i]);
894         }
895 }
896
897 static struct object_entry **compute_write_order(void)
898 {
899         uint32_t max_layers = 1;
900         unsigned int i, wo_end;
901
902         struct object_entry **wo;
903         struct object_entry *objects = to_pack.objects;
904
905         for (i = 0; i < to_pack.nr_objects; i++) {
906                 objects[i].tagged = 0;
907                 objects[i].filled = 0;
908                 SET_DELTA_CHILD(&objects[i], NULL);
909                 SET_DELTA_SIBLING(&objects[i], NULL);
910         }
911
912         /*
913          * Fully connect delta_child/delta_sibling network.
914          * Make sure delta_sibling is sorted in the original
915          * recency order.
916          */
917         for (i = to_pack.nr_objects; i > 0;) {
918                 struct object_entry *e = &objects[--i];
919                 if (!DELTA(e))
920                         continue;
921                 /* Mark me as the first child */
922                 e->delta_sibling_idx = DELTA(e)->delta_child_idx;
923                 SET_DELTA_CHILD(DELTA(e), e);
924         }
925
926         /*
927          * Mark objects that are at the tip of tags.
928          */
929         for_each_tag_ref(mark_tagged, NULL);
930
931         if (use_delta_islands)
932                 max_layers = compute_pack_layers(&to_pack);
933
934         ALLOC_ARRAY(wo, to_pack.nr_objects);
935         wo_end = 0;
936
937         for (; write_layer < max_layers; ++write_layer)
938                 compute_layer_order(wo, &wo_end);
939
940         if (wo_end != to_pack.nr_objects)
941                 die(_("ordered %u objects, expected %"PRIu32),
942                     wo_end, to_pack.nr_objects);
943
944         return wo;
945 }
946
947
948 /*
949  * A reused set of objects. All objects in a chunk have the same
950  * relative position in the original packfile and the generated
951  * packfile.
952  */
953
954 static struct reused_chunk {
955         /* The offset of the first object of this chunk in the original
956          * packfile. */
957         off_t original;
958         /* The difference for "original" minus the offset of the first object of
959          * this chunk in the generated packfile. */
960         off_t difference;
961 } *reused_chunks;
962 static int reused_chunks_nr;
963 static int reused_chunks_alloc;
964
965 static void record_reused_object(off_t where, off_t offset)
966 {
967         if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
968                 return;
969
970         ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
971                    reused_chunks_alloc);
972         reused_chunks[reused_chunks_nr].original = where;
973         reused_chunks[reused_chunks_nr].difference = offset;
974         reused_chunks_nr++;
975 }
976
977 /*
978  * Binary search to find the chunk that "where" is in. Note
979  * that we're not looking for an exact match, just the first
980  * chunk that contains it (which implicitly ends at the start
981  * of the next chunk.
982  */
983 static off_t find_reused_offset(off_t where)
984 {
985         int lo = 0, hi = reused_chunks_nr;
986         while (lo < hi) {
987                 int mi = lo + ((hi - lo) / 2);
988                 if (where == reused_chunks[mi].original)
989                         return reused_chunks[mi].difference;
990                 if (where < reused_chunks[mi].original)
991                         hi = mi;
992                 else
993                         lo = mi + 1;
994         }
995
996         /*
997          * The first chunk starts at zero, so we can't have gone below
998          * there.
999          */
1000         assert(lo);
1001         return reused_chunks[lo-1].difference;
1002 }
1003
1004 static void write_reused_pack_one(size_t pos, struct hashfile *out,
1005                                   struct pack_window **w_curs)
1006 {
1007         off_t offset, next, cur;
1008         enum object_type type;
1009         unsigned long size;
1010
1011         offset = pack_pos_to_offset(reuse_packfile, pos);
1012         next = pack_pos_to_offset(reuse_packfile, pos + 1);
1013
1014         record_reused_object(offset, offset - hashfile_total(out));
1015
1016         cur = offset;
1017         type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
1018         assert(type >= 0);
1019
1020         if (type == OBJ_OFS_DELTA) {
1021                 off_t base_offset;
1022                 off_t fixup;
1023
1024                 unsigned char header[MAX_PACK_OBJECT_HEADER];
1025                 unsigned len;
1026
1027                 base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
1028                 assert(base_offset != 0);
1029
1030                 /* Convert to REF_DELTA if we must... */
1031                 if (!allow_ofs_delta) {
1032                         uint32_t base_pos;
1033                         struct object_id base_oid;
1034
1035                         if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
1036                                 die(_("expected object at offset %"PRIuMAX" "
1037                                       "in pack %s"),
1038                                     (uintmax_t)base_offset,
1039                                     reuse_packfile->pack_name);
1040
1041                         nth_packed_object_id(&base_oid, reuse_packfile,
1042                                              pack_pos_to_index(reuse_packfile, base_pos));
1043
1044                         len = encode_in_pack_object_header(header, sizeof(header),
1045                                                            OBJ_REF_DELTA, size);
1046                         hashwrite(out, header, len);
1047                         hashwrite(out, base_oid.hash, the_hash_algo->rawsz);
1048                         copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1049                         return;
1050                 }
1051
1052                 /* Otherwise see if we need to rewrite the offset... */
1053                 fixup = find_reused_offset(offset) -
1054                         find_reused_offset(base_offset);
1055                 if (fixup) {
1056                         unsigned char ofs_header[10];
1057                         unsigned i, ofs_len;
1058                         off_t ofs = offset - base_offset - fixup;
1059
1060                         len = encode_in_pack_object_header(header, sizeof(header),
1061                                                            OBJ_OFS_DELTA, size);
1062
1063                         i = sizeof(ofs_header) - 1;
1064                         ofs_header[i] = ofs & 127;
1065                         while (ofs >>= 7)
1066                                 ofs_header[--i] = 128 | (--ofs & 127);
1067
1068                         ofs_len = sizeof(ofs_header) - i;
1069
1070                         hashwrite(out, header, len);
1071                         hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
1072                         copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1073                         return;
1074                 }
1075
1076                 /* ...otherwise we have no fixup, and can write it verbatim */
1077         }
1078
1079         copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
1080 }
1081
1082 static size_t write_reused_pack_verbatim(struct hashfile *out,
1083                                          struct pack_window **w_curs)
1084 {
1085         size_t pos = 0;
1086
1087         while (pos < reuse_packfile_bitmap->word_alloc &&
1088                         reuse_packfile_bitmap->words[pos] == (eword_t)~0)
1089                 pos++;
1090
1091         if (pos) {
1092                 off_t to_write;
1093
1094                 written = (pos * BITS_IN_EWORD);
1095                 to_write = pack_pos_to_offset(reuse_packfile, written)
1096                         - sizeof(struct pack_header);
1097
1098                 /* We're recording one chunk, not one object. */
1099                 record_reused_object(sizeof(struct pack_header), 0);
1100                 hashflush(out);
1101                 copy_pack_data(out, reuse_packfile, w_curs,
1102                         sizeof(struct pack_header), to_write);
1103
1104                 display_progress(progress_state, written);
1105         }
1106         return pos;
1107 }
1108
1109 static void write_reused_pack(struct hashfile *f)
1110 {
1111         size_t i = 0;
1112         uint32_t offset;
1113         struct pack_window *w_curs = NULL;
1114
1115         if (allow_ofs_delta)
1116                 i = write_reused_pack_verbatim(f, &w_curs);
1117
1118         for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
1119                 eword_t word = reuse_packfile_bitmap->words[i];
1120                 size_t pos = (i * BITS_IN_EWORD);
1121
1122                 for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1123                         if ((word >> offset) == 0)
1124                                 break;
1125
1126                         offset += ewah_bit_ctz64(word >> offset);
1127                         write_reused_pack_one(pos + offset, f, &w_curs);
1128                         display_progress(progress_state, ++written);
1129                 }
1130         }
1131
1132         unuse_pack(&w_curs);
1133 }
1134
1135 static void write_excluded_by_configs(void)
1136 {
1137         struct oidset_iter iter;
1138         const struct object_id *oid;
1139
1140         oidset_iter_init(&excluded_by_config, &iter);
1141         while ((oid = oidset_iter_next(&iter))) {
1142                 struct configured_exclusion *ex =
1143                         oidmap_get(&configured_exclusions, oid);
1144
1145                 if (!ex)
1146                         BUG("configured exclusion wasn't configured");
1147                 write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex));
1148                 write_in_full(1, " ", 1);
1149                 write_in_full(1, ex->uri, strlen(ex->uri));
1150                 write_in_full(1, "\n", 1);
1151         }
1152 }
1153
1154 static const char no_split_warning[] = N_(
1155 "disabling bitmap writing, packs are split due to pack.packSizeLimit"
1156 );
1157
1158 static void write_pack_file(void)
1159 {
1160         uint32_t i = 0, j;
1161         struct hashfile *f;
1162         off_t offset;
1163         uint32_t nr_remaining = nr_result;
1164         time_t last_mtime = 0;
1165         struct object_entry **write_order;
1166
1167         if (progress > pack_to_stdout)
1168                 progress_state = start_progress(_("Writing objects"), nr_result);
1169         ALLOC_ARRAY(written_list, to_pack.nr_objects);
1170         write_order = compute_write_order();
1171
1172         do {
1173                 unsigned char hash[GIT_MAX_RAWSZ];
1174                 char *pack_tmp_name = NULL;
1175
1176                 if (pack_to_stdout)
1177                         f = hashfd_throughput(1, "<stdout>", progress_state);
1178                 else
1179                         f = create_tmp_packfile(&pack_tmp_name);
1180
1181                 offset = write_pack_header(f, nr_remaining);
1182
1183                 if (reuse_packfile) {
1184                         assert(pack_to_stdout);
1185                         write_reused_pack(f);
1186                         offset = hashfile_total(f);
1187                 }
1188
1189                 nr_written = 0;
1190                 for (; i < to_pack.nr_objects; i++) {
1191                         struct object_entry *e = write_order[i];
1192                         if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
1193                                 break;
1194                         display_progress(progress_state, written);
1195                 }
1196
1197                 /*
1198                  * Did we write the wrong # entries in the header?
1199                  * If so, rewrite it like in fast-import
1200                  */
1201                 if (pack_to_stdout) {
1202                         finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1203                 } else if (nr_written == nr_remaining) {
1204                         finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1205                 } else {
1206                         int fd = finalize_hashfile(f, hash, 0);
1207                         fixup_pack_header_footer(fd, hash, pack_tmp_name,
1208                                                  nr_written, hash, offset);
1209                         close(fd);
1210                         if (write_bitmap_index) {
1211                                 if (write_bitmap_index != WRITE_BITMAP_QUIET)
1212                                         warning(_(no_split_warning));
1213                                 write_bitmap_index = 0;
1214                         }
1215                 }
1216
1217                 if (!pack_to_stdout) {
1218                         struct stat st;
1219                         struct strbuf tmpname = STRBUF_INIT;
1220
1221                         /*
1222                          * Packs are runtime accessed in their mtime
1223                          * order since newer packs are more likely to contain
1224                          * younger objects.  So if we are creating multiple
1225                          * packs then we should modify the mtime of later ones
1226                          * to preserve this property.
1227                          */
1228                         if (stat(pack_tmp_name, &st) < 0) {
1229                                 warning_errno(_("failed to stat %s"), pack_tmp_name);
1230                         } else if (!last_mtime) {
1231                                 last_mtime = st.st_mtime;
1232                         } else {
1233                                 struct utimbuf utb;
1234                                 utb.actime = st.st_atime;
1235                                 utb.modtime = --last_mtime;
1236                                 if (utime(pack_tmp_name, &utb) < 0)
1237                                         warning_errno(_("failed utime() on %s"), pack_tmp_name);
1238                         }
1239
1240                         strbuf_addf(&tmpname, "%s-", base_name);
1241
1242                         if (write_bitmap_index) {
1243                                 bitmap_writer_set_checksum(hash);
1244                                 bitmap_writer_build_type_index(
1245                                         &to_pack, written_list, nr_written);
1246                         }
1247
1248                         finish_tmp_packfile(&tmpname, pack_tmp_name,
1249                                             written_list, nr_written,
1250                                             &pack_idx_opts, hash);
1251
1252                         if (write_bitmap_index) {
1253                                 strbuf_addf(&tmpname, "%s.bitmap", hash_to_hex(hash));
1254
1255                                 stop_progress(&progress_state);
1256
1257                                 bitmap_writer_show_progress(progress);
1258                                 bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
1259                                 bitmap_writer_build(&to_pack);
1260                                 bitmap_writer_finish(written_list, nr_written,
1261                                                      tmpname.buf, write_bitmap_options);
1262                                 write_bitmap_index = 0;
1263                         }
1264
1265                         strbuf_release(&tmpname);
1266                         free(pack_tmp_name);
1267                         puts(hash_to_hex(hash));
1268                 }
1269
1270                 /* mark written objects as written to previous pack */
1271                 for (j = 0; j < nr_written; j++) {
1272                         written_list[j]->offset = (off_t)-1;
1273                 }
1274                 nr_remaining -= nr_written;
1275         } while (nr_remaining && i < to_pack.nr_objects);
1276
1277         free(written_list);
1278         free(write_order);
1279         stop_progress(&progress_state);
1280         if (written != nr_result)
1281                 die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
1282                     written, nr_result);
1283         trace2_data_intmax("pack-objects", the_repository,
1284                            "write_pack_file/wrote", nr_result);
1285 }
1286
1287 static int no_try_delta(const char *path)
1288 {
1289         static struct attr_check *check;
1290
1291         if (!check)
1292                 check = attr_check_initl("delta", NULL);
1293         git_check_attr(the_repository->index, path, check);
1294         if (ATTR_FALSE(check->items[0].value))
1295                 return 1;
1296         return 0;
1297 }
1298
1299 /*
1300  * When adding an object, check whether we have already added it
1301  * to our packing list. If so, we can skip. However, if we are
1302  * being asked to excludei t, but the previous mention was to include
1303  * it, make sure to adjust its flags and tweak our numbers accordingly.
1304  *
1305  * As an optimization, we pass out the index position where we would have
1306  * found the item, since that saves us from having to look it up again a
1307  * few lines later when we want to add the new entry.
1308  */
1309 static int have_duplicate_entry(const struct object_id *oid,
1310                                 int exclude)
1311 {
1312         struct object_entry *entry;
1313
1314         if (reuse_packfile_bitmap &&
1315             bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
1316                 return 1;
1317
1318         entry = packlist_find(&to_pack, oid);
1319         if (!entry)
1320                 return 0;
1321
1322         if (exclude) {
1323                 if (!entry->preferred_base)
1324                         nr_result--;
1325                 entry->preferred_base = 1;
1326         }
1327
1328         return 1;
1329 }
1330
1331 static int want_found_object(const struct object_id *oid, int exclude,
1332                              struct packed_git *p)
1333 {
1334         if (exclude)
1335                 return 1;
1336         if (incremental)
1337                 return 0;
1338
1339         /*
1340          * When asked to do --local (do not include an object that appears in a
1341          * pack we borrow from elsewhere) or --honor-pack-keep (do not include
1342          * an object that appears in a pack marked with .keep), finding a pack
1343          * that matches the criteria is sufficient for us to decide to omit it.
1344          * However, even if this pack does not satisfy the criteria, we need to
1345          * make sure no copy of this object appears in _any_ pack that makes us
1346          * to omit the object, so we need to check all the packs.
1347          *
1348          * We can however first check whether these options can possibly matter;
1349          * if they do not matter we know we want the object in generated pack.
1350          * Otherwise, we signal "-1" at the end to tell the caller that we do
1351          * not know either way, and it needs to check more packs.
1352          */
1353
1354         /*
1355          * Objects in packs borrowed from elsewhere are discarded regardless of
1356          * if they appear in other packs that weren't borrowed.
1357          */
1358         if (local && !p->pack_local)
1359                 return 0;
1360
1361         /*
1362          * Then handle .keep first, as we have a fast(er) path there.
1363          */
1364         if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
1365                 /*
1366                  * Set the flags for the kept-pack cache to be the ones we want
1367                  * to ignore.
1368                  *
1369                  * That is, if we are ignoring objects in on-disk keep packs,
1370                  * then we want to search through the on-disk keep and ignore
1371                  * the in-core ones.
1372                  */
1373                 unsigned flags = 0;
1374                 if (ignore_packed_keep_on_disk)
1375                         flags |= ON_DISK_KEEP_PACKS;
1376                 if (ignore_packed_keep_in_core)
1377                         flags |= IN_CORE_KEEP_PACKS;
1378
1379                 if (ignore_packed_keep_on_disk && p->pack_keep)
1380                         return 0;
1381                 if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1382                         return 0;
1383                 if (has_object_kept_pack(oid, flags))
1384                         return 0;
1385         }
1386
1387         /*
1388          * At this point we know definitively that either we don't care about
1389          * keep-packs, or the object is not in one. Keep checking other
1390          * conditions...
1391          */
1392         if (!local || !have_non_local_packs)
1393                 return 1;
1394
1395         /* we don't know yet; keep looking for more packs */
1396         return -1;
1397 }
1398
1399 static int want_object_in_pack_one(struct packed_git *p,
1400                                    const struct object_id *oid,
1401                                    int exclude,
1402                                    struct packed_git **found_pack,
1403                                    off_t *found_offset)
1404 {
1405         off_t offset;
1406
1407         if (p == *found_pack)
1408                 offset = *found_offset;
1409         else
1410                 offset = find_pack_entry_one(oid->hash, p);
1411
1412         if (offset) {
1413                 if (!*found_pack) {
1414                         if (!is_pack_valid(p))
1415                                 return -1;
1416                         *found_offset = offset;
1417                         *found_pack = p;
1418                 }
1419                 return want_found_object(oid, exclude, p);
1420         }
1421         return -1;
1422 }
1423
1424 /*
1425  * Check whether we want the object in the pack (e.g., we do not want
1426  * objects found in non-local stores if the "--local" option was used).
1427  *
1428  * If the caller already knows an existing pack it wants to take the object
1429  * from, that is passed in *found_pack and *found_offset; otherwise this
1430  * function finds if there is any pack that has the object and returns the pack
1431  * and its offset in these variables.
1432  */
1433 static int want_object_in_pack(const struct object_id *oid,
1434                                int exclude,
1435                                struct packed_git **found_pack,
1436                                off_t *found_offset)
1437 {
1438         int want;
1439         struct list_head *pos;
1440         struct multi_pack_index *m;
1441
1442         if (!exclude && local && has_loose_object_nonlocal(oid))
1443                 return 0;
1444
1445         /*
1446          * If we already know the pack object lives in, start checks from that
1447          * pack - in the usual case when neither --local was given nor .keep files
1448          * are present we will determine the answer right now.
1449          */
1450         if (*found_pack) {
1451                 want = want_found_object(oid, exclude, *found_pack);
1452                 if (want != -1)
1453                         return want;
1454         }
1455
1456         for (m = get_multi_pack_index(the_repository); m; m = m->next) {
1457                 struct pack_entry e;
1458                 if (fill_midx_entry(the_repository, oid, &e, m)) {
1459                         want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
1460                         if (want != -1)
1461                                 return want;
1462                 }
1463         }
1464
1465         list_for_each(pos, get_packed_git_mru(the_repository)) {
1466                 struct packed_git *p = list_entry(pos, struct packed_git, mru);
1467                 want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
1468                 if (!exclude && want > 0)
1469                         list_move(&p->mru,
1470                                   get_packed_git_mru(the_repository));
1471                 if (want != -1)
1472                         return want;
1473         }
1474
1475         if (uri_protocols.nr) {
1476                 struct configured_exclusion *ex =
1477                         oidmap_get(&configured_exclusions, oid);
1478                 int i;
1479                 const char *p;
1480
1481                 if (ex) {
1482                         for (i = 0; i < uri_protocols.nr; i++) {
1483                                 if (skip_prefix(ex->uri,
1484                                                 uri_protocols.items[i].string,
1485                                                 &p) &&
1486                                     *p == ':') {
1487                                         oidset_insert(&excluded_by_config, oid);
1488                                         return 0;
1489                                 }
1490                         }
1491                 }
1492         }
1493
1494         return 1;
1495 }
1496
1497 static void create_object_entry(const struct object_id *oid,
1498                                 enum object_type type,
1499                                 uint32_t hash,
1500                                 int exclude,
1501                                 int no_try_delta,
1502                                 struct packed_git *found_pack,
1503                                 off_t found_offset)
1504 {
1505         struct object_entry *entry;
1506
1507         entry = packlist_alloc(&to_pack, oid);
1508         entry->hash = hash;
1509         oe_set_type(entry, type);
1510         if (exclude)
1511                 entry->preferred_base = 1;
1512         else
1513                 nr_result++;
1514         if (found_pack) {
1515                 oe_set_in_pack(&to_pack, entry, found_pack);
1516                 entry->in_pack_offset = found_offset;
1517         }
1518
1519         entry->no_try_delta = no_try_delta;
1520 }
1521
1522 static const char no_closure_warning[] = N_(
1523 "disabling bitmap writing, as some objects are not being packed"
1524 );
1525
1526 static int add_object_entry(const struct object_id *oid, enum object_type type,
1527                             const char *name, int exclude)
1528 {
1529         struct packed_git *found_pack = NULL;
1530         off_t found_offset = 0;
1531
1532         display_progress(progress_state, ++nr_seen);
1533
1534         if (have_duplicate_entry(oid, exclude))
1535                 return 0;
1536
1537         if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1538                 /* The pack is missing an object, so it will not have closure */
1539                 if (write_bitmap_index) {
1540                         if (write_bitmap_index != WRITE_BITMAP_QUIET)
1541                                 warning(_(no_closure_warning));
1542                         write_bitmap_index = 0;
1543                 }
1544                 return 0;
1545         }
1546
1547         create_object_entry(oid, type, pack_name_hash(name),
1548                             exclude, name && no_try_delta(name),
1549                             found_pack, found_offset);
1550         return 1;
1551 }
1552
1553 static int add_object_entry_from_bitmap(const struct object_id *oid,
1554                                         enum object_type type,
1555                                         int flags, uint32_t name_hash,
1556                                         struct packed_git *pack, off_t offset)
1557 {
1558         display_progress(progress_state, ++nr_seen);
1559
1560         if (have_duplicate_entry(oid, 0))
1561                 return 0;
1562
1563         if (!want_object_in_pack(oid, 0, &pack, &offset))
1564                 return 0;
1565
1566         create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
1567         return 1;
1568 }
1569
1570 struct pbase_tree_cache {
1571         struct object_id oid;
1572         int ref;
1573         int temporary;
1574         void *tree_data;
1575         unsigned long tree_size;
1576 };
1577
1578 static struct pbase_tree_cache *(pbase_tree_cache[256]);
1579 static int pbase_tree_cache_ix(const struct object_id *oid)
1580 {
1581         return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1582 }
1583 static int pbase_tree_cache_ix_incr(int ix)
1584 {
1585         return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1586 }
1587
1588 static struct pbase_tree {
1589         struct pbase_tree *next;
1590         /* This is a phony "cache" entry; we are not
1591          * going to evict it or find it through _get()
1592          * mechanism -- this is for the toplevel node that
1593          * would almost always change with any commit.
1594          */
1595         struct pbase_tree_cache pcache;
1596 } *pbase_tree;
1597
1598 static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1599 {
1600         struct pbase_tree_cache *ent, *nent;
1601         void *data;
1602         unsigned long size;
1603         enum object_type type;
1604         int neigh;
1605         int my_ix = pbase_tree_cache_ix(oid);
1606         int available_ix = -1;
1607
1608         /* pbase-tree-cache acts as a limited hashtable.
1609          * your object will be found at your index or within a few
1610          * slots after that slot if it is cached.
1611          */
1612         for (neigh = 0; neigh < 8; neigh++) {
1613                 ent = pbase_tree_cache[my_ix];
1614                 if (ent && oideq(&ent->oid, oid)) {
1615                         ent->ref++;
1616                         return ent;
1617                 }
1618                 else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1619                          ((0 <= available_ix) &&
1620                           (!ent && pbase_tree_cache[available_ix])))
1621                         available_ix = my_ix;
1622                 if (!ent)
1623                         break;
1624                 my_ix = pbase_tree_cache_ix_incr(my_ix);
1625         }
1626
1627         /* Did not find one.  Either we got a bogus request or
1628          * we need to read and perhaps cache.
1629          */
1630         data = read_object_file(oid, &type, &size);
1631         if (!data)
1632                 return NULL;
1633         if (type != OBJ_TREE) {
1634                 free(data);
1635                 return NULL;
1636         }
1637
1638         /* We need to either cache or return a throwaway copy */
1639
1640         if (available_ix < 0)
1641                 ent = NULL;
1642         else {
1643                 ent = pbase_tree_cache[available_ix];
1644                 my_ix = available_ix;
1645         }
1646
1647         if (!ent) {
1648                 nent = xmalloc(sizeof(*nent));
1649                 nent->temporary = (available_ix < 0);
1650         }
1651         else {
1652                 /* evict and reuse */
1653                 free(ent->tree_data);
1654                 nent = ent;
1655         }
1656         oidcpy(&nent->oid, oid);
1657         nent->tree_data = data;
1658         nent->tree_size = size;
1659         nent->ref = 1;
1660         if (!nent->temporary)
1661                 pbase_tree_cache[my_ix] = nent;
1662         return nent;
1663 }
1664
1665 static void pbase_tree_put(struct pbase_tree_cache *cache)
1666 {
1667         if (!cache->temporary) {
1668                 cache->ref--;
1669                 return;
1670         }
1671         free(cache->tree_data);
1672         free(cache);
1673 }
1674
1675 static int name_cmp_len(const char *name)
1676 {
1677         int i;
1678         for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
1679                 ;
1680         return i;
1681 }
1682
1683 static void add_pbase_object(struct tree_desc *tree,
1684                              const char *name,
1685                              int cmplen,
1686                              const char *fullname)
1687 {
1688         struct name_entry entry;
1689         int cmp;
1690
1691         while (tree_entry(tree,&entry)) {
1692                 if (S_ISGITLINK(entry.mode))
1693                         continue;
1694                 cmp = tree_entry_len(&entry) != cmplen ? 1 :
1695                       memcmp(name, entry.path, cmplen);
1696                 if (cmp > 0)
1697                         continue;
1698                 if (cmp < 0)
1699                         return;
1700                 if (name[cmplen] != '/') {
1701                         add_object_entry(&entry.oid,
1702                                          object_type(entry.mode),
1703                                          fullname, 1);
1704                         return;
1705                 }
1706                 if (S_ISDIR(entry.mode)) {
1707                         struct tree_desc sub;
1708                         struct pbase_tree_cache *tree;
1709                         const char *down = name+cmplen+1;
1710                         int downlen = name_cmp_len(down);
1711
1712                         tree = pbase_tree_get(&entry.oid);
1713                         if (!tree)
1714                                 return;
1715                         init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1716
1717                         add_pbase_object(&sub, down, downlen, fullname);
1718                         pbase_tree_put(tree);
1719                 }
1720         }
1721 }
1722
1723 static unsigned *done_pbase_paths;
1724 static int done_pbase_paths_num;
1725 static int done_pbase_paths_alloc;
1726 static int done_pbase_path_pos(unsigned hash)
1727 {
1728         int lo = 0;
1729         int hi = done_pbase_paths_num;
1730         while (lo < hi) {
1731                 int mi = lo + (hi - lo) / 2;
1732                 if (done_pbase_paths[mi] == hash)
1733                         return mi;
1734                 if (done_pbase_paths[mi] < hash)
1735                         hi = mi;
1736                 else
1737                         lo = mi + 1;
1738         }
1739         return -lo-1;
1740 }
1741
1742 static int check_pbase_path(unsigned hash)
1743 {
1744         int pos = done_pbase_path_pos(hash);
1745         if (0 <= pos)
1746                 return 1;
1747         pos = -pos - 1;
1748         ALLOC_GROW(done_pbase_paths,
1749                    done_pbase_paths_num + 1,
1750                    done_pbase_paths_alloc);
1751         done_pbase_paths_num++;
1752         if (pos < done_pbase_paths_num)
1753                 MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
1754                            done_pbase_paths_num - pos - 1);
1755         done_pbase_paths[pos] = hash;
1756         return 0;
1757 }
1758
1759 static void add_preferred_base_object(const char *name)
1760 {
1761         struct pbase_tree *it;
1762         int cmplen;
1763         unsigned hash = pack_name_hash(name);
1764
1765         if (!num_preferred_base || check_pbase_path(hash))
1766                 return;
1767
1768         cmplen = name_cmp_len(name);
1769         for (it = pbase_tree; it; it = it->next) {
1770                 if (cmplen == 0) {
1771                         add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
1772                 }
1773                 else {
1774                         struct tree_desc tree;
1775                         init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1776                         add_pbase_object(&tree, name, cmplen, name);
1777                 }
1778         }
1779 }
1780
1781 static void add_preferred_base(struct object_id *oid)
1782 {
1783         struct pbase_tree *it;
1784         void *data;
1785         unsigned long size;
1786         struct object_id tree_oid;
1787
1788         if (window <= num_preferred_base++)
1789                 return;
1790
1791         data = read_object_with_reference(the_repository, oid,
1792                                           tree_type, &size, &tree_oid);
1793         if (!data)
1794                 return;
1795
1796         for (it = pbase_tree; it; it = it->next) {
1797                 if (oideq(&it->pcache.oid, &tree_oid)) {
1798                         free(data);
1799                         return;
1800                 }
1801         }
1802
1803         CALLOC_ARRAY(it, 1);
1804         it->next = pbase_tree;
1805         pbase_tree = it;
1806
1807         oidcpy(&it->pcache.oid, &tree_oid);
1808         it->pcache.tree_data = data;
1809         it->pcache.tree_size = size;
1810 }
1811
1812 static void cleanup_preferred_base(void)
1813 {
1814         struct pbase_tree *it;
1815         unsigned i;
1816
1817         it = pbase_tree;
1818         pbase_tree = NULL;
1819         while (it) {
1820                 struct pbase_tree *tmp = it;
1821                 it = tmp->next;
1822                 free(tmp->pcache.tree_data);
1823                 free(tmp);
1824         }
1825
1826         for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
1827                 if (!pbase_tree_cache[i])
1828                         continue;
1829                 free(pbase_tree_cache[i]->tree_data);
1830                 FREE_AND_NULL(pbase_tree_cache[i]);
1831         }
1832
1833         FREE_AND_NULL(done_pbase_paths);
1834         done_pbase_paths_num = done_pbase_paths_alloc = 0;
1835 }
1836
1837 /*
1838  * Return 1 iff the object specified by "delta" can be sent
1839  * literally as a delta against the base in "base_sha1". If
1840  * so, then *base_out will point to the entry in our packing
1841  * list, or NULL if we must use the external-base list.
1842  *
1843  * Depth value does not matter - find_deltas() will
1844  * never consider reused delta as the base object to
1845  * deltify other objects against, in order to avoid
1846  * circular deltas.
1847  */
1848 static int can_reuse_delta(const struct object_id *base_oid,
1849                            struct object_entry *delta,
1850                            struct object_entry **base_out)
1851 {
1852         struct object_entry *base;
1853
1854         /*
1855          * First see if we're already sending the base (or it's explicitly in
1856          * our "excluded" list).
1857          */
1858         base = packlist_find(&to_pack, base_oid);
1859         if (base) {
1860                 if (!in_same_island(&delta->idx.oid, &base->idx.oid))
1861                         return 0;
1862                 *base_out = base;
1863                 return 1;
1864         }
1865
1866         /*
1867          * Otherwise, reachability bitmaps may tell us if the receiver has it,
1868          * even if it was buried too deep in history to make it into the
1869          * packing list.
1870          */
1871         if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) {
1872                 if (use_delta_islands) {
1873                         if (!in_same_island(&delta->idx.oid, base_oid))
1874                                 return 0;
1875                 }
1876                 *base_out = NULL;
1877                 return 1;
1878         }
1879
1880         return 0;
1881 }
1882
1883 static void prefetch_to_pack(uint32_t object_index_start) {
1884         struct oid_array to_fetch = OID_ARRAY_INIT;
1885         uint32_t i;
1886
1887         for (i = object_index_start; i < to_pack.nr_objects; i++) {
1888                 struct object_entry *entry = to_pack.objects + i;
1889
1890                 if (!oid_object_info_extended(the_repository,
1891                                               &entry->idx.oid,
1892                                               NULL,
1893                                               OBJECT_INFO_FOR_PREFETCH))
1894                         continue;
1895                 oid_array_append(&to_fetch, &entry->idx.oid);
1896         }
1897         promisor_remote_get_direct(the_repository,
1898                                    to_fetch.oid, to_fetch.nr);
1899         oid_array_clear(&to_fetch);
1900 }
1901
1902 static void check_object(struct object_entry *entry, uint32_t object_index)
1903 {
1904         unsigned long canonical_size;
1905         enum object_type type;
1906         struct object_info oi = {.typep = &type, .sizep = &canonical_size};
1907
1908         if (IN_PACK(entry)) {
1909                 struct packed_git *p = IN_PACK(entry);
1910                 struct pack_window *w_curs = NULL;
1911                 int have_base = 0;
1912                 struct object_id base_ref;
1913                 struct object_entry *base_entry;
1914                 unsigned long used, used_0;
1915                 unsigned long avail;
1916                 off_t ofs;
1917                 unsigned char *buf, c;
1918                 enum object_type type;
1919                 unsigned long in_pack_size;
1920
1921                 buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1922
1923                 /*
1924                  * We want in_pack_type even if we do not reuse delta
1925                  * since non-delta representations could still be reused.
1926                  */
1927                 used = unpack_object_header_buffer(buf, avail,
1928                                                    &type,
1929                                                    &in_pack_size);
1930                 if (used == 0)
1931                         goto give_up;
1932
1933                 if (type < 0)
1934                         BUG("invalid type %d", type);
1935                 entry->in_pack_type = type;
1936
1937                 /*
1938                  * Determine if this is a delta and if so whether we can
1939                  * reuse it or not.  Otherwise let's find out as cheaply as
1940                  * possible what the actual type and size for this object is.
1941                  */
1942                 switch (entry->in_pack_type) {
1943                 default:
1944                         /* Not a delta hence we've already got all we need. */
1945                         oe_set_type(entry, entry->in_pack_type);
1946                         SET_SIZE(entry, in_pack_size);
1947                         entry->in_pack_header_size = used;
1948                         if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
1949                                 goto give_up;
1950                         unuse_pack(&w_curs);
1951                         return;
1952                 case OBJ_REF_DELTA:
1953                         if (reuse_delta && !entry->preferred_base) {
1954                                 oidread(&base_ref,
1955                                         use_pack(p, &w_curs,
1956                                                  entry->in_pack_offset + used,
1957                                                  NULL));
1958                                 have_base = 1;
1959                         }
1960                         entry->in_pack_header_size = used + the_hash_algo->rawsz;
1961                         break;
1962                 case OBJ_OFS_DELTA:
1963                         buf = use_pack(p, &w_curs,
1964                                        entry->in_pack_offset + used, NULL);
1965                         used_0 = 0;
1966                         c = buf[used_0++];
1967                         ofs = c & 127;
1968                         while (c & 128) {
1969                                 ofs += 1;
1970                                 if (!ofs || MSB(ofs, 7)) {
1971                                         error(_("delta base offset overflow in pack for %s"),
1972                                               oid_to_hex(&entry->idx.oid));
1973                                         goto give_up;
1974                                 }
1975                                 c = buf[used_0++];
1976                                 ofs = (ofs << 7) + (c & 127);
1977                         }
1978                         ofs = entry->in_pack_offset - ofs;
1979                         if (ofs <= 0 || ofs >= entry->in_pack_offset) {
1980                                 error(_("delta base offset out of bound for %s"),
1981                                       oid_to_hex(&entry->idx.oid));
1982                                 goto give_up;
1983                         }
1984                         if (reuse_delta && !entry->preferred_base) {
1985                                 uint32_t pos;
1986                                 if (offset_to_pack_pos(p, ofs, &pos) < 0)
1987                                         goto give_up;
1988                                 if (!nth_packed_object_id(&base_ref, p,
1989                                                           pack_pos_to_index(p, pos)))
1990                                         have_base = 1;
1991                         }
1992                         entry->in_pack_header_size = used + used_0;
1993                         break;
1994                 }
1995
1996                 if (have_base &&
1997                     can_reuse_delta(&base_ref, entry, &base_entry)) {
1998                         oe_set_type(entry, entry->in_pack_type);
1999                         SET_SIZE(entry, in_pack_size); /* delta size */
2000                         SET_DELTA_SIZE(entry, in_pack_size);
2001
2002                         if (base_entry) {
2003                                 SET_DELTA(entry, base_entry);
2004                                 entry->delta_sibling_idx = base_entry->delta_child_idx;
2005                                 SET_DELTA_CHILD(base_entry, entry);
2006                         } else {
2007                                 SET_DELTA_EXT(entry, &base_ref);
2008                         }
2009
2010                         unuse_pack(&w_curs);
2011                         return;
2012                 }
2013
2014                 if (oe_type(entry)) {
2015                         off_t delta_pos;
2016
2017                         /*
2018                          * This must be a delta and we already know what the
2019                          * final object type is.  Let's extract the actual
2020                          * object size from the delta header.
2021                          */
2022                         delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
2023                         canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
2024                         if (canonical_size == 0)
2025                                 goto give_up;
2026                         SET_SIZE(entry, canonical_size);
2027                         unuse_pack(&w_curs);
2028                         return;
2029                 }
2030
2031                 /*
2032                  * No choice but to fall back to the recursive delta walk
2033                  * with oid_object_info() to find about the object type
2034                  * at this point...
2035                  */
2036                 give_up:
2037                 unuse_pack(&w_curs);
2038         }
2039
2040         if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2041                                      OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
2042                 if (has_promisor_remote()) {
2043                         prefetch_to_pack(object_index);
2044                         if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2045                                                      OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
2046                                 type = -1;
2047                 } else {
2048                         type = -1;
2049                 }
2050         }
2051         oe_set_type(entry, type);
2052         if (entry->type_valid) {
2053                 SET_SIZE(entry, canonical_size);
2054         } else {
2055                 /*
2056                  * Bad object type is checked in prepare_pack().  This is
2057                  * to permit a missing preferred base object to be ignored
2058                  * as a preferred base.  Doing so can result in a larger
2059                  * pack file, but the transfer will still take place.
2060                  */
2061         }
2062 }
2063
2064 static int pack_offset_sort(const void *_a, const void *_b)
2065 {
2066         const struct object_entry *a = *(struct object_entry **)_a;
2067         const struct object_entry *b = *(struct object_entry **)_b;
2068         const struct packed_git *a_in_pack = IN_PACK(a);
2069         const struct packed_git *b_in_pack = IN_PACK(b);
2070
2071         /* avoid filesystem trashing with loose objects */
2072         if (!a_in_pack && !b_in_pack)
2073                 return oidcmp(&a->idx.oid, &b->idx.oid);
2074
2075         if (a_in_pack < b_in_pack)
2076                 return -1;
2077         if (a_in_pack > b_in_pack)
2078                 return 1;
2079         return a->in_pack_offset < b->in_pack_offset ? -1 :
2080                         (a->in_pack_offset > b->in_pack_offset);
2081 }
2082
2083 /*
2084  * Drop an on-disk delta we were planning to reuse. Naively, this would
2085  * just involve blanking out the "delta" field, but we have to deal
2086  * with some extra book-keeping:
2087  *
2088  *   1. Removing ourselves from the delta_sibling linked list.
2089  *
2090  *   2. Updating our size/type to the non-delta representation. These were
2091  *      either not recorded initially (size) or overwritten with the delta type
2092  *      (type) when check_object() decided to reuse the delta.
2093  *
2094  *   3. Resetting our delta depth, as we are now a base object.
2095  */
2096 static void drop_reused_delta(struct object_entry *entry)
2097 {
2098         unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
2099         struct object_info oi = OBJECT_INFO_INIT;
2100         enum object_type type;
2101         unsigned long size;
2102
2103         while (*idx) {
2104                 struct object_entry *oe = &to_pack.objects[*idx - 1];
2105
2106                 if (oe == entry)
2107                         *idx = oe->delta_sibling_idx;
2108                 else
2109                         idx = &oe->delta_sibling_idx;
2110         }
2111         SET_DELTA(entry, NULL);
2112         entry->depth = 0;
2113
2114         oi.sizep = &size;
2115         oi.typep = &type;
2116         if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
2117                 /*
2118                  * We failed to get the info from this pack for some reason;
2119                  * fall back to oid_object_info, which may find another copy.
2120                  * And if that fails, the error will be recorded in oe_type(entry)
2121                  * and dealt with in prepare_pack().
2122                  */
2123                 oe_set_type(entry,
2124                             oid_object_info(the_repository, &entry->idx.oid, &size));
2125         } else {
2126                 oe_set_type(entry, type);
2127         }
2128         SET_SIZE(entry, size);
2129 }
2130
2131 /*
2132  * Follow the chain of deltas from this entry onward, throwing away any links
2133  * that cause us to hit a cycle (as determined by the DFS state flags in
2134  * the entries).
2135  *
2136  * We also detect too-long reused chains that would violate our --depth
2137  * limit.
2138  */
2139 static void break_delta_chains(struct object_entry *entry)
2140 {
2141         /*
2142          * The actual depth of each object we will write is stored as an int,
2143          * as it cannot exceed our int "depth" limit. But before we break
2144          * changes based no that limit, we may potentially go as deep as the
2145          * number of objects, which is elsewhere bounded to a uint32_t.
2146          */
2147         uint32_t total_depth;
2148         struct object_entry *cur, *next;
2149
2150         for (cur = entry, total_depth = 0;
2151              cur;
2152              cur = DELTA(cur), total_depth++) {
2153                 if (cur->dfs_state == DFS_DONE) {
2154                         /*
2155                          * We've already seen this object and know it isn't
2156                          * part of a cycle. We do need to append its depth
2157                          * to our count.
2158                          */
2159                         total_depth += cur->depth;
2160                         break;
2161                 }
2162
2163                 /*
2164                  * We break cycles before looping, so an ACTIVE state (or any
2165                  * other cruft which made its way into the state variable)
2166                  * is a bug.
2167                  */
2168                 if (cur->dfs_state != DFS_NONE)
2169                         BUG("confusing delta dfs state in first pass: %d",
2170                             cur->dfs_state);
2171
2172                 /*
2173                  * Now we know this is the first time we've seen the object. If
2174                  * it's not a delta, we're done traversing, but we'll mark it
2175                  * done to save time on future traversals.
2176                  */
2177                 if (!DELTA(cur)) {
2178                         cur->dfs_state = DFS_DONE;
2179                         break;
2180                 }
2181
2182                 /*
2183                  * Mark ourselves as active and see if the next step causes
2184                  * us to cycle to another active object. It's important to do
2185                  * this _before_ we loop, because it impacts where we make the
2186                  * cut, and thus how our total_depth counter works.
2187                  * E.g., We may see a partial loop like:
2188                  *
2189                  *   A -> B -> C -> D -> B
2190                  *
2191                  * Cutting B->C breaks the cycle. But now the depth of A is
2192                  * only 1, and our total_depth counter is at 3. The size of the
2193                  * error is always one less than the size of the cycle we
2194                  * broke. Commits C and D were "lost" from A's chain.
2195                  *
2196                  * If we instead cut D->B, then the depth of A is correct at 3.
2197                  * We keep all commits in the chain that we examined.
2198                  */
2199                 cur->dfs_state = DFS_ACTIVE;
2200                 if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
2201                         drop_reused_delta(cur);
2202                         cur->dfs_state = DFS_DONE;
2203                         break;
2204                 }
2205         }
2206
2207         /*
2208          * And now that we've gone all the way to the bottom of the chain, we
2209          * need to clear the active flags and set the depth fields as
2210          * appropriate. Unlike the loop above, which can quit when it drops a
2211          * delta, we need to keep going to look for more depth cuts. So we need
2212          * an extra "next" pointer to keep going after we reset cur->delta.
2213          */
2214         for (cur = entry; cur; cur = next) {
2215                 next = DELTA(cur);
2216
2217                 /*
2218                  * We should have a chain of zero or more ACTIVE states down to
2219                  * a final DONE. We can quit after the DONE, because either it
2220                  * has no bases, or we've already handled them in a previous
2221                  * call.
2222                  */
2223                 if (cur->dfs_state == DFS_DONE)
2224                         break;
2225                 else if (cur->dfs_state != DFS_ACTIVE)
2226                         BUG("confusing delta dfs state in second pass: %d",
2227                             cur->dfs_state);
2228
2229                 /*
2230                  * If the total_depth is more than depth, then we need to snip
2231                  * the chain into two or more smaller chains that don't exceed
2232                  * the maximum depth. Most of the resulting chains will contain
2233                  * (depth + 1) entries (i.e., depth deltas plus one base), and
2234                  * the last chain (i.e., the one containing entry) will contain
2235                  * whatever entries are left over, namely
2236                  * (total_depth % (depth + 1)) of them.
2237                  *
2238                  * Since we are iterating towards decreasing depth, we need to
2239                  * decrement total_depth as we go, and we need to write to the
2240                  * entry what its final depth will be after all of the
2241                  * snipping. Since we're snipping into chains of length (depth
2242                  * + 1) entries, the final depth of an entry will be its
2243                  * original depth modulo (depth + 1). Any time we encounter an
2244                  * entry whose final depth is supposed to be zero, we snip it
2245                  * from its delta base, thereby making it so.
2246                  */
2247                 cur->depth = (total_depth--) % (depth + 1);
2248                 if (!cur->depth)
2249                         drop_reused_delta(cur);
2250
2251                 cur->dfs_state = DFS_DONE;
2252         }
2253 }
2254
2255 static void get_object_details(void)
2256 {
2257         uint32_t i;
2258         struct object_entry **sorted_by_offset;
2259
2260         if (progress)
2261                 progress_state = start_progress(_("Counting objects"),
2262                                                 to_pack.nr_objects);
2263
2264         CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects);
2265         for (i = 0; i < to_pack.nr_objects; i++)
2266                 sorted_by_offset[i] = to_pack.objects + i;
2267         QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
2268
2269         for (i = 0; i < to_pack.nr_objects; i++) {
2270                 struct object_entry *entry = sorted_by_offset[i];
2271                 check_object(entry, i);
2272                 if (entry->type_valid &&
2273                     oe_size_greater_than(&to_pack, entry, big_file_threshold))
2274                         entry->no_try_delta = 1;
2275                 display_progress(progress_state, i + 1);
2276         }
2277         stop_progress(&progress_state);
2278
2279         /*
2280          * This must happen in a second pass, since we rely on the delta
2281          * information for the whole list being completed.
2282          */
2283         for (i = 0; i < to_pack.nr_objects; i++)
2284                 break_delta_chains(&to_pack.objects[i]);
2285
2286         free(sorted_by_offset);
2287 }
2288
2289 /*
2290  * We search for deltas in a list sorted by type, by filename hash, and then
2291  * by size, so that we see progressively smaller and smaller files.
2292  * That's because we prefer deltas to be from the bigger file
2293  * to the smaller -- deletes are potentially cheaper, but perhaps
2294  * more importantly, the bigger file is likely the more recent
2295  * one.  The deepest deltas are therefore the oldest objects which are
2296  * less susceptible to be accessed often.
2297  */
2298 static int type_size_sort(const void *_a, const void *_b)
2299 {
2300         const struct object_entry *a = *(struct object_entry **)_a;
2301         const struct object_entry *b = *(struct object_entry **)_b;
2302         const enum object_type a_type = oe_type(a);
2303         const enum object_type b_type = oe_type(b);
2304         const unsigned long a_size = SIZE(a);
2305         const unsigned long b_size = SIZE(b);
2306
2307         if (a_type > b_type)
2308                 return -1;
2309         if (a_type < b_type)
2310                 return 1;
2311         if (a->hash > b->hash)
2312                 return -1;
2313         if (a->hash < b->hash)
2314                 return 1;
2315         if (a->preferred_base > b->preferred_base)
2316                 return -1;
2317         if (a->preferred_base < b->preferred_base)
2318                 return 1;
2319         if (use_delta_islands) {
2320                 const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
2321                 if (island_cmp)
2322                         return island_cmp;
2323         }
2324         if (a_size > b_size)
2325                 return -1;
2326         if (a_size < b_size)
2327                 return 1;
2328         return a < b ? -1 : (a > b);  /* newest first */
2329 }
2330
2331 struct unpacked {
2332         struct object_entry *entry;
2333         void *data;
2334         struct delta_index *index;
2335         unsigned depth;
2336 };
2337
2338 static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
2339                            unsigned long delta_size)
2340 {
2341         if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
2342                 return 0;
2343
2344         if (delta_size < cache_max_small_delta_size)
2345                 return 1;
2346
2347         /* cache delta, if objects are large enough compared to delta size */
2348         if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
2349                 return 1;
2350
2351         return 0;
2352 }
2353
2354 /* Protect delta_cache_size */
2355 static pthread_mutex_t cache_mutex;
2356 #define cache_lock()            pthread_mutex_lock(&cache_mutex)
2357 #define cache_unlock()          pthread_mutex_unlock(&cache_mutex)
2358
2359 /*
2360  * Protect object list partitioning (e.g. struct thread_param) and
2361  * progress_state
2362  */
2363 static pthread_mutex_t progress_mutex;
2364 #define progress_lock()         pthread_mutex_lock(&progress_mutex)
2365 #define progress_unlock()       pthread_mutex_unlock(&progress_mutex)
2366
2367 /*
2368  * Access to struct object_entry is unprotected since each thread owns
2369  * a portion of the main object list. Just don't access object entries
2370  * ahead in the list because they can be stolen and would need
2371  * progress_mutex for protection.
2372  */
2373
2374 static inline int oe_size_less_than(struct packing_data *pack,
2375                                     const struct object_entry *lhs,
2376                                     unsigned long rhs)
2377 {
2378         if (lhs->size_valid)
2379                 return lhs->size_ < rhs;
2380         if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
2381                 return 0;
2382         return oe_get_size_slow(pack, lhs) < rhs;
2383 }
2384
2385 static inline void oe_set_tree_depth(struct packing_data *pack,
2386                                      struct object_entry *e,
2387                                      unsigned int tree_depth)
2388 {
2389         if (!pack->tree_depth)
2390                 CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc);
2391         pack->tree_depth[e - pack->objects] = tree_depth;
2392 }
2393
2394 /*
2395  * Return the size of the object without doing any delta
2396  * reconstruction (so non-deltas are true object sizes, but deltas
2397  * return the size of the delta data).
2398  */
2399 unsigned long oe_get_size_slow(struct packing_data *pack,
2400                                const struct object_entry *e)
2401 {
2402         struct packed_git *p;
2403         struct pack_window *w_curs;
2404         unsigned char *buf;
2405         enum object_type type;
2406         unsigned long used, avail, size;
2407
2408         if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
2409                 packing_data_lock(&to_pack);
2410                 if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
2411                         die(_("unable to get size of %s"),
2412                             oid_to_hex(&e->idx.oid));
2413                 packing_data_unlock(&to_pack);
2414                 return size;
2415         }
2416
2417         p = oe_in_pack(pack, e);
2418         if (!p)
2419                 BUG("when e->type is a delta, it must belong to a pack");
2420
2421         packing_data_lock(&to_pack);
2422         w_curs = NULL;
2423         buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
2424         used = unpack_object_header_buffer(buf, avail, &type, &size);
2425         if (used == 0)
2426                 die(_("unable to parse object header of %s"),
2427                     oid_to_hex(&e->idx.oid));
2428
2429         unuse_pack(&w_curs);
2430         packing_data_unlock(&to_pack);
2431         return size;
2432 }
2433
2434 static int try_delta(struct unpacked *trg, struct unpacked *src,
2435                      unsigned max_depth, unsigned long *mem_usage)
2436 {
2437         struct object_entry *trg_entry = trg->entry;
2438         struct object_entry *src_entry = src->entry;
2439         unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
2440         unsigned ref_depth;
2441         enum object_type type;
2442         void *delta_buf;
2443
2444         /* Don't bother doing diffs between different types */
2445         if (oe_type(trg_entry) != oe_type(src_entry))
2446                 return -1;
2447
2448         /*
2449          * We do not bother to try a delta that we discarded on an
2450          * earlier try, but only when reusing delta data.  Note that
2451          * src_entry that is marked as the preferred_base should always
2452          * be considered, as even if we produce a suboptimal delta against
2453          * it, we will still save the transfer cost, as we already know
2454          * the other side has it and we won't send src_entry at all.
2455          */
2456         if (reuse_delta && IN_PACK(trg_entry) &&
2457             IN_PACK(trg_entry) == IN_PACK(src_entry) &&
2458             !src_entry->preferred_base &&
2459             trg_entry->in_pack_type != OBJ_REF_DELTA &&
2460             trg_entry->in_pack_type != OBJ_OFS_DELTA)
2461                 return 0;
2462
2463         /* Let's not bust the allowed depth. */
2464         if (src->depth >= max_depth)
2465                 return 0;
2466
2467         /* Now some size filtering heuristics. */
2468         trg_size = SIZE(trg_entry);
2469         if (!DELTA(trg_entry)) {
2470                 max_size = trg_size/2 - the_hash_algo->rawsz;
2471                 ref_depth = 1;
2472         } else {
2473                 max_size = DELTA_SIZE(trg_entry);
2474                 ref_depth = trg->depth;
2475         }
2476         max_size = (uint64_t)max_size * (max_depth - src->depth) /
2477                                                 (max_depth - ref_depth + 1);
2478         if (max_size == 0)
2479                 return 0;
2480         src_size = SIZE(src_entry);
2481         sizediff = src_size < trg_size ? trg_size - src_size : 0;
2482         if (sizediff >= max_size)
2483                 return 0;
2484         if (trg_size < src_size / 32)
2485                 return 0;
2486
2487         if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
2488                 return 0;
2489
2490         /* Load data if not already done */
2491         if (!trg->data) {
2492                 packing_data_lock(&to_pack);
2493                 trg->data = read_object_file(&trg_entry->idx.oid, &type, &sz);
2494                 packing_data_unlock(&to_pack);
2495                 if (!trg->data)
2496                         die(_("object %s cannot be read"),
2497                             oid_to_hex(&trg_entry->idx.oid));
2498                 if (sz != trg_size)
2499                         die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2500                             oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz,
2501                             (uintmax_t)trg_size);
2502                 *mem_usage += sz;
2503         }
2504         if (!src->data) {
2505                 packing_data_lock(&to_pack);
2506                 src->data = read_object_file(&src_entry->idx.oid, &type, &sz);
2507                 packing_data_unlock(&to_pack);
2508                 if (!src->data) {
2509                         if (src_entry->preferred_base) {
2510                                 static int warned = 0;
2511                                 if (!warned++)
2512                                         warning(_("object %s cannot be read"),
2513                                                 oid_to_hex(&src_entry->idx.oid));
2514                                 /*
2515                                  * Those objects are not included in the
2516                                  * resulting pack.  Be resilient and ignore
2517                                  * them if they can't be read, in case the
2518                                  * pack could be created nevertheless.
2519                                  */
2520                                 return 0;
2521                         }
2522                         die(_("object %s cannot be read"),
2523                             oid_to_hex(&src_entry->idx.oid));
2524                 }
2525                 if (sz != src_size)
2526                         die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2527                             oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz,
2528                             (uintmax_t)src_size);
2529                 *mem_usage += sz;
2530         }
2531         if (!src->index) {
2532                 src->index = create_delta_index(src->data, src_size);
2533                 if (!src->index) {
2534                         static int warned = 0;
2535                         if (!warned++)
2536                                 warning(_("suboptimal pack - out of memory"));
2537                         return 0;
2538                 }
2539                 *mem_usage += sizeof_delta_index(src->index);
2540         }
2541
2542         delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2543         if (!delta_buf)
2544                 return 0;
2545
2546         if (DELTA(trg_entry)) {
2547                 /* Prefer only shallower same-sized deltas. */
2548                 if (delta_size == DELTA_SIZE(trg_entry) &&
2549                     src->depth + 1 >= trg->depth) {
2550                         free(delta_buf);
2551                         return 0;
2552                 }
2553         }
2554
2555         /*
2556          * Handle memory allocation outside of the cache
2557          * accounting lock.  Compiler will optimize the strangeness
2558          * away when NO_PTHREADS is defined.
2559          */
2560         free(trg_entry->delta_data);
2561         cache_lock();
2562         if (trg_entry->delta_data) {
2563                 delta_cache_size -= DELTA_SIZE(trg_entry);
2564                 trg_entry->delta_data = NULL;
2565         }
2566         if (delta_cacheable(src_size, trg_size, delta_size)) {
2567                 delta_cache_size += delta_size;
2568                 cache_unlock();
2569                 trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2570         } else {
2571                 cache_unlock();
2572                 free(delta_buf);
2573         }
2574
2575         SET_DELTA(trg_entry, src_entry);
2576         SET_DELTA_SIZE(trg_entry, delta_size);
2577         trg->depth = src->depth + 1;
2578
2579         return 1;
2580 }
2581
2582 static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2583 {
2584         struct object_entry *child = DELTA_CHILD(me);
2585         unsigned int m = n;
2586         while (child) {
2587                 const unsigned int c = check_delta_limit(child, n + 1);
2588                 if (m < c)
2589                         m = c;
2590                 child = DELTA_SIBLING(child);
2591         }
2592         return m;
2593 }
2594
2595 static unsigned long free_unpacked(struct unpacked *n)
2596 {
2597         unsigned long freed_mem = sizeof_delta_index(n->index);
2598         free_delta_index(n->index);
2599         n->index = NULL;
2600         if (n->data) {
2601                 freed_mem += SIZE(n->entry);
2602                 FREE_AND_NULL(n->data);
2603         }
2604         n->entry = NULL;
2605         n->depth = 0;
2606         return freed_mem;
2607 }
2608
2609 static void find_deltas(struct object_entry **list, unsigned *list_size,
2610                         int window, int depth, unsigned *processed)
2611 {
2612         uint32_t i, idx = 0, count = 0;
2613         struct unpacked *array;
2614         unsigned long mem_usage = 0;
2615
2616         CALLOC_ARRAY(array, window);
2617
2618         for (;;) {
2619                 struct object_entry *entry;
2620                 struct unpacked *n = array + idx;
2621                 int j, max_depth, best_base = -1;
2622
2623                 progress_lock();
2624                 if (!*list_size) {
2625                         progress_unlock();
2626                         break;
2627                 }
2628                 entry = *list++;
2629                 (*list_size)--;
2630                 if (!entry->preferred_base) {
2631                         (*processed)++;
2632                         display_progress(progress_state, *processed);
2633                 }
2634                 progress_unlock();
2635
2636                 mem_usage -= free_unpacked(n);
2637                 n->entry = entry;
2638
2639                 while (window_memory_limit &&
2640                        mem_usage > window_memory_limit &&
2641                        count > 1) {
2642                         const uint32_t tail = (idx + window - count) % window;
2643                         mem_usage -= free_unpacked(array + tail);
2644                         count--;
2645                 }
2646
2647                 /* We do not compute delta to *create* objects we are not
2648                  * going to pack.
2649                  */
2650                 if (entry->preferred_base)
2651                         goto next;
2652
2653                 /*
2654                  * If the current object is at pack edge, take the depth the
2655                  * objects that depend on the current object into account
2656                  * otherwise they would become too deep.
2657                  */
2658                 max_depth = depth;
2659                 if (DELTA_CHILD(entry)) {
2660                         max_depth -= check_delta_limit(entry, 0);
2661                         if (max_depth <= 0)
2662                                 goto next;
2663                 }
2664
2665                 j = window;
2666                 while (--j > 0) {
2667                         int ret;
2668                         uint32_t other_idx = idx + j;
2669                         struct unpacked *m;
2670                         if (other_idx >= window)
2671                                 other_idx -= window;
2672                         m = array + other_idx;
2673                         if (!m->entry)
2674                                 break;
2675                         ret = try_delta(n, m, max_depth, &mem_usage);
2676                         if (ret < 0)
2677                                 break;
2678                         else if (ret > 0)
2679                                 best_base = other_idx;
2680                 }
2681
2682                 /*
2683                  * If we decided to cache the delta data, then it is best
2684                  * to compress it right away.  First because we have to do
2685                  * it anyway, and doing it here while we're threaded will
2686                  * save a lot of time in the non threaded write phase,
2687                  * as well as allow for caching more deltas within
2688                  * the same cache size limit.
2689                  * ...
2690                  * But only if not writing to stdout, since in that case
2691                  * the network is most likely throttling writes anyway,
2692                  * and therefore it is best to go to the write phase ASAP
2693                  * instead, as we can afford spending more time compressing
2694                  * between writes at that moment.
2695                  */
2696                 if (entry->delta_data && !pack_to_stdout) {
2697                         unsigned long size;
2698
2699                         size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
2700                         if (size < (1U << OE_Z_DELTA_BITS)) {
2701                                 entry->z_delta_size = size;
2702                                 cache_lock();
2703                                 delta_cache_size -= DELTA_SIZE(entry);
2704                                 delta_cache_size += entry->z_delta_size;
2705                                 cache_unlock();
2706                         } else {
2707                                 FREE_AND_NULL(entry->delta_data);
2708                                 entry->z_delta_size = 0;
2709                         }
2710                 }
2711
2712                 /* if we made n a delta, and if n is already at max
2713                  * depth, leaving it in the window is pointless.  we
2714                  * should evict it first.
2715                  */
2716                 if (DELTA(entry) && max_depth <= n->depth)
2717                         continue;
2718
2719                 /*
2720                  * Move the best delta base up in the window, after the
2721                  * currently deltified object, to keep it longer.  It will
2722                  * be the first base object to be attempted next.
2723                  */
2724                 if (DELTA(entry)) {
2725                         struct unpacked swap = array[best_base];
2726                         int dist = (window + idx - best_base) % window;
2727                         int dst = best_base;
2728                         while (dist--) {
2729                                 int src = (dst + 1) % window;
2730                                 array[dst] = array[src];
2731                                 dst = src;
2732                         }
2733                         array[dst] = swap;
2734                 }
2735
2736                 next:
2737                 idx++;
2738                 if (count + 1 < window)
2739                         count++;
2740                 if (idx >= window)
2741                         idx = 0;
2742         }
2743
2744         for (i = 0; i < window; ++i) {
2745                 free_delta_index(array[i].index);
2746                 free(array[i].data);
2747         }
2748         free(array);
2749 }
2750
2751 /*
2752  * The main object list is split into smaller lists, each is handed to
2753  * one worker.
2754  *
2755  * The main thread waits on the condition that (at least) one of the workers
2756  * has stopped working (which is indicated in the .working member of
2757  * struct thread_params).
2758  *
2759  * When a work thread has completed its work, it sets .working to 0 and
2760  * signals the main thread and waits on the condition that .data_ready
2761  * becomes 1.
2762  *
2763  * The main thread steals half of the work from the worker that has
2764  * most work left to hand it to the idle worker.
2765  */
2766
2767 struct thread_params {
2768         pthread_t thread;
2769         struct object_entry **list;
2770         unsigned list_size;
2771         unsigned remaining;
2772         int window;
2773         int depth;
2774         int working;
2775         int data_ready;
2776         pthread_mutex_t mutex;
2777         pthread_cond_t cond;
2778         unsigned *processed;
2779 };
2780
2781 static pthread_cond_t progress_cond;
2782
2783 /*
2784  * Mutex and conditional variable can't be statically-initialized on Windows.
2785  */
2786 static void init_threaded_search(void)
2787 {
2788         pthread_mutex_init(&cache_mutex, NULL);
2789         pthread_mutex_init(&progress_mutex, NULL);
2790         pthread_cond_init(&progress_cond, NULL);
2791 }
2792
2793 static void cleanup_threaded_search(void)
2794 {
2795         pthread_cond_destroy(&progress_cond);
2796         pthread_mutex_destroy(&cache_mutex);
2797         pthread_mutex_destroy(&progress_mutex);
2798 }
2799
2800 static void *threaded_find_deltas(void *arg)
2801 {
2802         struct thread_params *me = arg;
2803
2804         progress_lock();
2805         while (me->remaining) {
2806                 progress_unlock();
2807
2808                 find_deltas(me->list, &me->remaining,
2809                             me->window, me->depth, me->processed);
2810
2811                 progress_lock();
2812                 me->working = 0;
2813                 pthread_cond_signal(&progress_cond);
2814                 progress_unlock();
2815
2816                 /*
2817                  * We must not set ->data_ready before we wait on the
2818                  * condition because the main thread may have set it to 1
2819                  * before we get here. In order to be sure that new
2820                  * work is available if we see 1 in ->data_ready, it
2821                  * was initialized to 0 before this thread was spawned
2822                  * and we reset it to 0 right away.
2823                  */
2824                 pthread_mutex_lock(&me->mutex);
2825                 while (!me->data_ready)
2826                         pthread_cond_wait(&me->cond, &me->mutex);
2827                 me->data_ready = 0;
2828                 pthread_mutex_unlock(&me->mutex);
2829
2830                 progress_lock();
2831         }
2832         progress_unlock();
2833         /* leave ->working 1 so that this doesn't get more work assigned */
2834         return NULL;
2835 }
2836
2837 static void ll_find_deltas(struct object_entry **list, unsigned list_size,
2838                            int window, int depth, unsigned *processed)
2839 {
2840         struct thread_params *p;
2841         int i, ret, active_threads = 0;
2842
2843         init_threaded_search();
2844
2845         if (delta_search_threads <= 1) {
2846                 find_deltas(list, &list_size, window, depth, processed);
2847                 cleanup_threaded_search();
2848                 return;
2849         }
2850         if (progress > pack_to_stdout)
2851                 fprintf_ln(stderr, _("Delta compression using up to %d threads"),
2852                            delta_search_threads);
2853         CALLOC_ARRAY(p, delta_search_threads);
2854
2855         /* Partition the work amongst work threads. */
2856         for (i = 0; i < delta_search_threads; i++) {
2857                 unsigned sub_size = list_size / (delta_search_threads - i);
2858
2859                 /* don't use too small segments or no deltas will be found */
2860                 if (sub_size < 2*window && i+1 < delta_search_threads)
2861                         sub_size = 0;
2862
2863                 p[i].window = window;
2864                 p[i].depth = depth;
2865                 p[i].processed = processed;
2866                 p[i].working = 1;
2867                 p[i].data_ready = 0;
2868
2869                 /* try to split chunks on "path" boundaries */
2870                 while (sub_size && sub_size < list_size &&
2871                        list[sub_size]->hash &&
2872                        list[sub_size]->hash == list[sub_size-1]->hash)
2873                         sub_size++;
2874
2875                 p[i].list = list;
2876                 p[i].list_size = sub_size;
2877                 p[i].remaining = sub_size;
2878
2879                 list += sub_size;
2880                 list_size -= sub_size;
2881         }
2882
2883         /* Start work threads. */
2884         for (i = 0; i < delta_search_threads; i++) {
2885                 if (!p[i].list_size)
2886                         continue;
2887                 pthread_mutex_init(&p[i].mutex, NULL);
2888                 pthread_cond_init(&p[i].cond, NULL);
2889                 ret = pthread_create(&p[i].thread, NULL,
2890                                      threaded_find_deltas, &p[i]);
2891                 if (ret)
2892                         die(_("unable to create thread: %s"), strerror(ret));
2893                 active_threads++;
2894         }
2895
2896         /*
2897          * Now let's wait for work completion.  Each time a thread is done
2898          * with its work, we steal half of the remaining work from the
2899          * thread with the largest number of unprocessed objects and give
2900          * it to that newly idle thread.  This ensure good load balancing
2901          * until the remaining object list segments are simply too short
2902          * to be worth splitting anymore.
2903          */
2904         while (active_threads) {
2905                 struct thread_params *target = NULL;
2906                 struct thread_params *victim = NULL;
2907                 unsigned sub_size = 0;
2908
2909                 progress_lock();
2910                 for (;;) {
2911                         for (i = 0; !target && i < delta_search_threads; i++)
2912                                 if (!p[i].working)
2913                                         target = &p[i];
2914                         if (target)
2915                                 break;
2916                         pthread_cond_wait(&progress_cond, &progress_mutex);
2917                 }
2918
2919                 for (i = 0; i < delta_search_threads; i++)
2920                         if (p[i].remaining > 2*window &&
2921                             (!victim || victim->remaining < p[i].remaining))
2922                                 victim = &p[i];
2923                 if (victim) {
2924                         sub_size = victim->remaining / 2;
2925                         list = victim->list + victim->list_size - sub_size;
2926                         while (sub_size && list[0]->hash &&
2927                                list[0]->hash == list[-1]->hash) {
2928                                 list++;
2929                                 sub_size--;
2930                         }
2931                         if (!sub_size) {
2932                                 /*
2933                                  * It is possible for some "paths" to have
2934                                  * so many objects that no hash boundary
2935                                  * might be found.  Let's just steal the
2936                                  * exact half in that case.
2937                                  */
2938                                 sub_size = victim->remaining / 2;
2939                                 list -= sub_size;
2940                         }
2941                         target->list = list;
2942                         victim->list_size -= sub_size;
2943                         victim->remaining -= sub_size;
2944                 }
2945                 target->list_size = sub_size;
2946                 target->remaining = sub_size;
2947                 target->working = 1;
2948                 progress_unlock();
2949
2950                 pthread_mutex_lock(&target->mutex);
2951                 target->data_ready = 1;
2952                 pthread_cond_signal(&target->cond);
2953                 pthread_mutex_unlock(&target->mutex);
2954
2955                 if (!sub_size) {
2956                         pthread_join(target->thread, NULL);
2957                         pthread_cond_destroy(&target->cond);
2958                         pthread_mutex_destroy(&target->mutex);
2959                         active_threads--;
2960                 }
2961         }
2962         cleanup_threaded_search();
2963         free(p);
2964 }
2965
2966 static int obj_is_packed(const struct object_id *oid)
2967 {
2968         return packlist_find(&to_pack, oid) ||
2969                 (reuse_packfile_bitmap &&
2970                  bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
2971 }
2972
2973 static void add_tag_chain(const struct object_id *oid)
2974 {
2975         struct tag *tag;
2976
2977         /*
2978          * We catch duplicates already in add_object_entry(), but we'd
2979          * prefer to do this extra check to avoid having to parse the
2980          * tag at all if we already know that it's being packed (e.g., if
2981          * it was included via bitmaps, we would not have parsed it
2982          * previously).
2983          */
2984         if (obj_is_packed(oid))
2985                 return;
2986
2987         tag = lookup_tag(the_repository, oid);
2988         while (1) {
2989                 if (!tag || parse_tag(tag) || !tag->tagged)
2990                         die(_("unable to pack objects reachable from tag %s"),
2991                             oid_to_hex(oid));
2992
2993                 add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
2994
2995                 if (tag->tagged->type != OBJ_TAG)
2996                         return;
2997
2998                 tag = (struct tag *)tag->tagged;
2999         }
3000 }
3001
3002 static int add_ref_tag(const char *tag, const struct object_id *oid, int flag, void *cb_data)
3003 {
3004         struct object_id peeled;
3005
3006         if (!peel_iterated_oid(oid, &peeled) && obj_is_packed(&peeled))
3007                 add_tag_chain(oid);
3008         return 0;
3009 }
3010
3011 static void prepare_pack(int window, int depth)
3012 {
3013         struct object_entry **delta_list;
3014         uint32_t i, nr_deltas;
3015         unsigned n;
3016
3017         if (use_delta_islands)
3018                 resolve_tree_islands(the_repository, progress, &to_pack);
3019
3020         get_object_details();
3021
3022         /*
3023          * If we're locally repacking then we need to be doubly careful
3024          * from now on in order to make sure no stealth corruption gets
3025          * propagated to the new pack.  Clients receiving streamed packs
3026          * should validate everything they get anyway so no need to incur
3027          * the additional cost here in that case.
3028          */
3029         if (!pack_to_stdout)
3030                 do_check_packed_object_crc = 1;
3031
3032         if (!to_pack.nr_objects || !window || !depth)
3033                 return;
3034
3035         ALLOC_ARRAY(delta_list, to_pack.nr_objects);
3036         nr_deltas = n = 0;
3037
3038         for (i = 0; i < to_pack.nr_objects; i++) {
3039                 struct object_entry *entry = to_pack.objects + i;
3040
3041                 if (DELTA(entry))
3042                         /* This happens if we decided to reuse existing
3043                          * delta from a pack.  "reuse_delta &&" is implied.
3044                          */
3045                         continue;
3046
3047                 if (!entry->type_valid ||
3048                     oe_size_less_than(&to_pack, entry, 50))
3049                         continue;
3050
3051                 if (entry->no_try_delta)
3052                         continue;
3053
3054                 if (!entry->preferred_base) {
3055                         nr_deltas++;
3056                         if (oe_type(entry) < 0)
3057                                 die(_("unable to get type of object %s"),
3058                                     oid_to_hex(&entry->idx.oid));
3059                 } else {
3060                         if (oe_type(entry) < 0) {
3061                                 /*
3062                                  * This object is not found, but we
3063                                  * don't have to include it anyway.
3064                                  */
3065                                 continue;
3066                         }
3067                 }
3068
3069                 delta_list[n++] = entry;
3070         }
3071
3072         if (nr_deltas && n > 1) {
3073                 unsigned nr_done = 0;
3074
3075                 if (progress)
3076                         progress_state = start_progress(_("Compressing objects"),
3077                                                         nr_deltas);
3078                 QSORT(delta_list, n, type_size_sort);
3079                 ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
3080                 stop_progress(&progress_state);
3081                 if (nr_done != nr_deltas)
3082                         die(_("inconsistency with delta count"));
3083         }
3084         free(delta_list);
3085 }
3086
3087 static int git_pack_config(const char *k, const char *v, void *cb)
3088 {
3089         if (!strcmp(k, "pack.window")) {
3090                 window = git_config_int(k, v);
3091                 return 0;
3092         }
3093         if (!strcmp(k, "pack.windowmemory")) {
3094                 window_memory_limit = git_config_ulong(k, v);
3095                 return 0;
3096         }
3097         if (!strcmp(k, "pack.depth")) {
3098                 depth = git_config_int(k, v);
3099                 return 0;
3100         }
3101         if (!strcmp(k, "pack.deltacachesize")) {
3102                 max_delta_cache_size = git_config_int(k, v);
3103                 return 0;
3104         }
3105         if (!strcmp(k, "pack.deltacachelimit")) {
3106                 cache_max_small_delta_size = git_config_int(k, v);
3107                 return 0;
3108         }
3109         if (!strcmp(k, "pack.writebitmaphashcache")) {
3110                 if (git_config_bool(k, v))
3111                         write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
3112                 else
3113                         write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
3114         }
3115         if (!strcmp(k, "pack.usebitmaps")) {
3116                 use_bitmap_index_default = git_config_bool(k, v);
3117                 return 0;
3118         }
3119         if (!strcmp(k, "pack.allowpackreuse")) {
3120                 allow_pack_reuse = git_config_bool(k, v);
3121                 return 0;
3122         }
3123         if (!strcmp(k, "pack.threads")) {
3124                 delta_search_threads = git_config_int(k, v);
3125                 if (delta_search_threads < 0)
3126                         die(_("invalid number of threads specified (%d)"),
3127                             delta_search_threads);
3128                 if (!HAVE_THREADS && delta_search_threads != 1) {
3129                         warning(_("no threads support, ignoring %s"), k);
3130                         delta_search_threads = 0;
3131                 }
3132                 return 0;
3133         }
3134         if (!strcmp(k, "pack.indexversion")) {
3135                 pack_idx_opts.version = git_config_int(k, v);
3136                 if (pack_idx_opts.version > 2)
3137                         die(_("bad pack.indexversion=%"PRIu32),
3138                             pack_idx_opts.version);
3139                 return 0;
3140         }
3141         if (!strcmp(k, "pack.writereverseindex")) {
3142                 if (git_config_bool(k, v))
3143                         pack_idx_opts.flags |= WRITE_REV;
3144                 else
3145                         pack_idx_opts.flags &= ~WRITE_REV;
3146                 return 0;
3147         }
3148         if (!strcmp(k, "uploadpack.blobpackfileuri")) {
3149                 struct configured_exclusion *ex = xmalloc(sizeof(*ex));
3150                 const char *oid_end, *pack_end;
3151                 /*
3152                  * Stores the pack hash. This is not a true object ID, but is
3153                  * of the same form.
3154                  */
3155                 struct object_id pack_hash;
3156
3157                 if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
3158                     *oid_end != ' ' ||
3159                     parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
3160                     *pack_end != ' ')
3161                         die(_("value of uploadpack.blobpackfileuri must be "
3162                               "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
3163                 if (oidmap_get(&configured_exclusions, &ex->e.oid))
3164                         die(_("object already configured in another "
3165                               "uploadpack.blobpackfileuri (got '%s')"), v);
3166                 ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
3167                 memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
3168                 ex->uri = xstrdup(pack_end + 1);
3169                 oidmap_put(&configured_exclusions, ex);
3170         }
3171         return git_default_config(k, v, cb);
3172 }
3173
3174 /* Counters for trace2 output when in --stdin-packs mode. */
3175 static int stdin_packs_found_nr;
3176 static int stdin_packs_hints_nr;
3177
3178 static int add_object_entry_from_pack(const struct object_id *oid,
3179                                       struct packed_git *p,
3180                                       uint32_t pos,
3181                                       void *_data)
3182 {
3183         struct rev_info *revs = _data;
3184         struct object_info oi = OBJECT_INFO_INIT;
3185         off_t ofs;
3186         enum object_type type;
3187
3188         display_progress(progress_state, ++nr_seen);
3189
3190         if (have_duplicate_entry(oid, 0))
3191                 return 0;
3192
3193         ofs = nth_packed_object_offset(p, pos);
3194         if (!want_object_in_pack(oid, 0, &p, &ofs))
3195                 return 0;
3196
3197         oi.typep = &type;
3198         if (packed_object_info(the_repository, p, ofs, &oi) < 0)
3199                 die(_("could not get type of object %s in pack %s"),
3200                     oid_to_hex(oid), p->pack_name);
3201         else if (type == OBJ_COMMIT) {
3202                 /*
3203                  * commits in included packs are used as starting points for the
3204                  * subsequent revision walk
3205                  */
3206                 add_pending_oid(revs, NULL, oid, 0);
3207         }
3208
3209         stdin_packs_found_nr++;
3210
3211         create_object_entry(oid, type, 0, 0, 0, p, ofs);
3212
3213         return 0;
3214 }
3215
3216 static void show_commit_pack_hint(struct commit *commit, void *_data)
3217 {
3218         /* nothing to do; commits don't have a namehash */
3219 }
3220
3221 static void show_object_pack_hint(struct object *object, const char *name,
3222                                   void *_data)
3223 {
3224         struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3225         if (!oe)
3226                 return;
3227
3228         /*
3229          * Our 'to_pack' list was constructed by iterating all objects packed in
3230          * included packs, and so doesn't have a non-zero hash field that you
3231          * would typically pick up during a reachability traversal.
3232          *
3233          * Make a best-effort attempt to fill in the ->hash and ->no_try_delta
3234          * here using a now in order to perhaps improve the delta selection
3235          * process.
3236          */
3237         oe->hash = pack_name_hash(name);
3238         oe->no_try_delta = name && no_try_delta(name);
3239
3240         stdin_packs_hints_nr++;
3241 }
3242
3243 static int pack_mtime_cmp(const void *_a, const void *_b)
3244 {
3245         struct packed_git *a = ((const struct string_list_item*)_a)->util;
3246         struct packed_git *b = ((const struct string_list_item*)_b)->util;
3247
3248         /*
3249          * order packs by descending mtime so that objects are laid out
3250          * roughly as newest-to-oldest
3251          */
3252         if (a->mtime < b->mtime)
3253                 return 1;
3254         else if (b->mtime < a->mtime)
3255                 return -1;
3256         else
3257                 return 0;
3258 }
3259
3260 static void read_packs_list_from_stdin(void)
3261 {
3262         struct strbuf buf = STRBUF_INIT;
3263         struct string_list include_packs = STRING_LIST_INIT_DUP;
3264         struct string_list exclude_packs = STRING_LIST_INIT_DUP;
3265         struct string_list_item *item = NULL;
3266
3267         struct packed_git *p;
3268         struct rev_info revs;
3269
3270         repo_init_revisions(the_repository, &revs, NULL);
3271         /*
3272          * Use a revision walk to fill in the namehash of objects in the include
3273          * packs. To save time, we'll avoid traversing through objects that are
3274          * in excluded packs.
3275          *
3276          * That may cause us to avoid populating all of the namehash fields of
3277          * all included objects, but our goal is best-effort, since this is only
3278          * an optimization during delta selection.
3279          */
3280         revs.no_kept_objects = 1;
3281         revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3282         revs.blob_objects = 1;
3283         revs.tree_objects = 1;
3284         revs.tag_objects = 1;
3285         revs.ignore_missing_links = 1;
3286
3287         while (strbuf_getline(&buf, stdin) != EOF) {
3288                 if (!buf.len)
3289                         continue;
3290
3291                 if (*buf.buf == '^')
3292                         string_list_append(&exclude_packs, buf.buf + 1);
3293                 else
3294                         string_list_append(&include_packs, buf.buf);
3295
3296                 strbuf_reset(&buf);
3297         }
3298
3299         string_list_sort(&include_packs);
3300         string_list_sort(&exclude_packs);
3301
3302         for (p = get_all_packs(the_repository); p; p = p->next) {
3303                 const char *pack_name = pack_basename(p);
3304
3305                 item = string_list_lookup(&include_packs, pack_name);
3306                 if (!item)
3307                         item = string_list_lookup(&exclude_packs, pack_name);
3308
3309                 if (item)
3310                         item->util = p;
3311         }
3312
3313         /*
3314          * First handle all of the excluded packs, marking them as kept in-core
3315          * so that later calls to add_object_entry() discards any objects that
3316          * are also found in excluded packs.
3317          */
3318         for_each_string_list_item(item, &exclude_packs) {
3319                 struct packed_git *p = item->util;
3320                 if (!p)
3321                         die(_("could not find pack '%s'"), item->string);
3322                 p->pack_keep_in_core = 1;
3323         }
3324
3325         /*
3326          * Order packs by ascending mtime; use QSORT directly to access the
3327          * string_list_item's ->util pointer, which string_list_sort() does not
3328          * provide.
3329          */
3330         QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
3331
3332         for_each_string_list_item(item, &include_packs) {
3333                 struct packed_git *p = item->util;
3334                 if (!p)
3335                         die(_("could not find pack '%s'"), item->string);
3336                 for_each_object_in_pack(p,
3337                                         add_object_entry_from_pack,
3338                                         &revs,
3339                                         FOR_EACH_OBJECT_PACK_ORDER);
3340         }
3341
3342         if (prepare_revision_walk(&revs))
3343                 die(_("revision walk setup failed"));
3344         traverse_commit_list(&revs,
3345                              show_commit_pack_hint,
3346                              show_object_pack_hint,
3347                              NULL);
3348
3349         trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
3350                            stdin_packs_found_nr);
3351         trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
3352                            stdin_packs_hints_nr);
3353
3354         strbuf_release(&buf);
3355         string_list_clear(&include_packs, 0);
3356         string_list_clear(&exclude_packs, 0);
3357 }
3358
3359 static void read_object_list_from_stdin(void)
3360 {
3361         char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
3362         struct object_id oid;
3363         const char *p;
3364
3365         for (;;) {
3366                 if (!fgets(line, sizeof(line), stdin)) {
3367                         if (feof(stdin))
3368                                 break;
3369                         if (!ferror(stdin))
3370                                 die("BUG: fgets returned NULL, not EOF, not error!");
3371                         if (errno != EINTR)
3372                                 die_errno("fgets");
3373                         clearerr(stdin);
3374                         continue;
3375                 }
3376                 if (line[0] == '-') {
3377                         if (get_oid_hex(line+1, &oid))
3378                                 die(_("expected edge object ID, got garbage:\n %s"),
3379                                     line);
3380                         add_preferred_base(&oid);
3381                         continue;
3382                 }
3383                 if (parse_oid_hex(line, &oid, &p))
3384                         die(_("expected object ID, got garbage:\n %s"), line);
3385
3386                 add_preferred_base_object(p + 1);
3387                 add_object_entry(&oid, OBJ_NONE, p + 1, 0);
3388         }
3389 }
3390
3391 /* Remember to update object flag allocation in object.h */
3392 #define OBJECT_ADDED (1u<<20)
3393
3394 static void show_commit(struct commit *commit, void *data)
3395 {
3396         add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
3397         commit->object.flags |= OBJECT_ADDED;
3398
3399         if (write_bitmap_index)
3400                 index_commit_for_bitmap(commit);
3401
3402         if (use_delta_islands)
3403                 propagate_island_marks(commit);
3404 }
3405
3406 static void show_object(struct object *obj, const char *name, void *data)
3407 {
3408         add_preferred_base_object(name);
3409         add_object_entry(&obj->oid, obj->type, name, 0);
3410         obj->flags |= OBJECT_ADDED;
3411
3412         if (use_delta_islands) {
3413                 const char *p;
3414                 unsigned depth;
3415                 struct object_entry *ent;
3416
3417                 /* the empty string is a root tree, which is depth 0 */
3418                 depth = *name ? 1 : 0;
3419                 for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
3420                         depth++;
3421
3422                 ent = packlist_find(&to_pack, &obj->oid);
3423                 if (ent && depth > oe_tree_depth(&to_pack, ent))
3424                         oe_set_tree_depth(&to_pack, ent, depth);
3425         }
3426 }
3427
3428 static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
3429 {
3430         assert(arg_missing_action == MA_ALLOW_ANY);
3431
3432         /*
3433          * Quietly ignore ALL missing objects.  This avoids problems with
3434          * staging them now and getting an odd error later.
3435          */
3436         if (!has_object(the_repository, &obj->oid, 0))
3437                 return;
3438
3439         show_object(obj, name, data);
3440 }
3441
3442 static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
3443 {
3444         assert(arg_missing_action == MA_ALLOW_PROMISOR);
3445
3446         /*
3447          * Quietly ignore EXPECTED missing objects.  This avoids problems with
3448          * staging them now and getting an odd error later.
3449          */
3450         if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
3451                 return;
3452
3453         show_object(obj, name, data);
3454 }
3455
3456 static int option_parse_missing_action(const struct option *opt,
3457                                        const char *arg, int unset)
3458 {
3459         assert(arg);
3460         assert(!unset);
3461
3462         if (!strcmp(arg, "error")) {
3463                 arg_missing_action = MA_ERROR;
3464                 fn_show_object = show_object;
3465                 return 0;
3466         }
3467
3468         if (!strcmp(arg, "allow-any")) {
3469                 arg_missing_action = MA_ALLOW_ANY;
3470                 fetch_if_missing = 0;
3471                 fn_show_object = show_object__ma_allow_any;
3472                 return 0;
3473         }
3474
3475         if (!strcmp(arg, "allow-promisor")) {
3476                 arg_missing_action = MA_ALLOW_PROMISOR;
3477                 fetch_if_missing = 0;
3478                 fn_show_object = show_object__ma_allow_promisor;
3479                 return 0;
3480         }
3481
3482         die(_("invalid value for --missing"));
3483         return 0;
3484 }
3485
3486 static void show_edge(struct commit *commit)
3487 {
3488         add_preferred_base(&commit->object.oid);
3489 }
3490
3491 struct in_pack_object {
3492         off_t offset;
3493         struct object *object;
3494 };
3495
3496 struct in_pack {
3497         unsigned int alloc;
3498         unsigned int nr;
3499         struct in_pack_object *array;
3500 };
3501
3502 static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
3503 {
3504         in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->oid.hash, p);
3505         in_pack->array[in_pack->nr].object = object;
3506         in_pack->nr++;
3507 }
3508
3509 /*
3510  * Compare the objects in the offset order, in order to emulate the
3511  * "git rev-list --objects" output that produced the pack originally.
3512  */
3513 static int ofscmp(const void *a_, const void *b_)
3514 {
3515         struct in_pack_object *a = (struct in_pack_object *)a_;
3516         struct in_pack_object *b = (struct in_pack_object *)b_;
3517
3518         if (a->offset < b->offset)
3519                 return -1;
3520         else if (a->offset > b->offset)
3521                 return 1;
3522         else
3523                 return oidcmp(&a->object->oid, &b->object->oid);
3524 }
3525
3526 static void add_objects_in_unpacked_packs(void)
3527 {
3528         struct packed_git *p;
3529         struct in_pack in_pack;
3530         uint32_t i;
3531
3532         memset(&in_pack, 0, sizeof(in_pack));
3533
3534         for (p = get_all_packs(the_repository); p; p = p->next) {
3535                 struct object_id oid;
3536                 struct object *o;
3537
3538                 if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
3539                         continue;
3540                 if (open_pack_index(p))
3541                         die(_("cannot open pack index"));
3542
3543                 ALLOC_GROW(in_pack.array,
3544                            in_pack.nr + p->num_objects,
3545                            in_pack.alloc);
3546
3547                 for (i = 0; i < p->num_objects; i++) {
3548                         nth_packed_object_id(&oid, p, i);
3549                         o = lookup_unknown_object(the_repository, &oid);
3550                         if (!(o->flags & OBJECT_ADDED))
3551                                 mark_in_pack_object(o, p, &in_pack);
3552                         o->flags |= OBJECT_ADDED;
3553                 }
3554         }
3555
3556         if (in_pack.nr) {
3557                 QSORT(in_pack.array, in_pack.nr, ofscmp);
3558                 for (i = 0; i < in_pack.nr; i++) {
3559                         struct object *o = in_pack.array[i].object;
3560                         add_object_entry(&o->oid, o->type, "", 0);
3561                 }
3562         }
3563         free(in_pack.array);
3564 }
3565
3566 static int add_loose_object(const struct object_id *oid, const char *path,
3567                             void *data)
3568 {
3569         enum object_type type = oid_object_info(the_repository, oid, NULL);
3570
3571         if (type < 0) {
3572                 warning(_("loose object at %s could not be examined"), path);
3573                 return 0;
3574         }
3575
3576         add_object_entry(oid, type, "", 0);
3577         return 0;
3578 }
3579
3580 /*
3581  * We actually don't even have to worry about reachability here.
3582  * add_object_entry will weed out duplicates, so we just add every
3583  * loose object we find.
3584  */
3585 static void add_unreachable_loose_objects(void)
3586 {
3587         for_each_loose_file_in_objdir(get_object_directory(),
3588                                       add_loose_object,
3589                                       NULL, NULL, NULL);
3590 }
3591
3592 static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
3593 {
3594         static struct packed_git *last_found = (void *)1;
3595         struct packed_git *p;
3596
3597         p = (last_found != (void *)1) ? last_found :
3598                                         get_all_packs(the_repository);
3599
3600         while (p) {
3601                 if ((!p->pack_local || p->pack_keep ||
3602                                 p->pack_keep_in_core) &&
3603                         find_pack_entry_one(oid->hash, p)) {
3604                         last_found = p;
3605                         return 1;
3606                 }
3607                 if (p == last_found)
3608                         p = get_all_packs(the_repository);
3609                 else
3610                         p = p->next;
3611                 if (p == last_found)
3612                         p = p->next;
3613         }
3614         return 0;
3615 }
3616
3617 /*
3618  * Store a list of sha1s that are should not be discarded
3619  * because they are either written too recently, or are
3620  * reachable from another object that was.
3621  *
3622  * This is filled by get_object_list.
3623  */
3624 static struct oid_array recent_objects;
3625
3626 static int loosened_object_can_be_discarded(const struct object_id *oid,
3627                                             timestamp_t mtime)
3628 {
3629         if (!unpack_unreachable_expiration)
3630                 return 0;
3631         if (mtime > unpack_unreachable_expiration)
3632                 return 0;
3633         if (oid_array_lookup(&recent_objects, oid) >= 0)
3634                 return 0;
3635         return 1;
3636 }
3637
3638 static void loosen_unused_packed_objects(void)
3639 {
3640         struct packed_git *p;
3641         uint32_t i;
3642         uint32_t loosened_objects_nr = 0;
3643         struct object_id oid;
3644
3645         for (p = get_all_packs(the_repository); p; p = p->next) {
3646                 if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
3647                         continue;
3648
3649                 if (open_pack_index(p))
3650                         die(_("cannot open pack index"));
3651
3652                 for (i = 0; i < p->num_objects; i++) {
3653                         nth_packed_object_id(&oid, p, i);
3654                         if (!packlist_find(&to_pack, &oid) &&
3655                             !has_sha1_pack_kept_or_nonlocal(&oid) &&
3656                             !loosened_object_can_be_discarded(&oid, p->mtime)) {
3657                                 if (force_object_loose(&oid, p->mtime))
3658                                         die(_("unable to force loose object"));
3659                                 loosened_objects_nr++;
3660                         }
3661                 }
3662         }
3663
3664         trace2_data_intmax("pack-objects", the_repository,
3665                            "loosen_unused_packed_objects/loosened", loosened_objects_nr);
3666 }
3667
3668 /*
3669  * This tracks any options which pack-reuse code expects to be on, or which a
3670  * reader of the pack might not understand, and which would therefore prevent
3671  * blind reuse of what we have on disk.
3672  */
3673 static int pack_options_allow_reuse(void)
3674 {
3675         return allow_pack_reuse &&
3676                pack_to_stdout &&
3677                !ignore_packed_keep_on_disk &&
3678                !ignore_packed_keep_in_core &&
3679                (!local || !have_non_local_packs) &&
3680                !incremental;
3681 }
3682
3683 static int get_object_list_from_bitmap(struct rev_info *revs)
3684 {
3685         if (!(bitmap_git = prepare_bitmap_walk(revs, &filter_options, 0)))
3686                 return -1;
3687
3688         if (pack_options_allow_reuse() &&
3689             !reuse_partial_packfile_from_bitmap(
3690                         bitmap_git,
3691                         &reuse_packfile,
3692                         &reuse_packfile_objects,
3693                         &reuse_packfile_bitmap)) {
3694                 assert(reuse_packfile_objects);
3695                 nr_result += reuse_packfile_objects;
3696                 nr_seen += reuse_packfile_objects;
3697                 display_progress(progress_state, nr_seen);
3698         }
3699
3700         traverse_bitmap_commit_list(bitmap_git, revs,
3701                                     &add_object_entry_from_bitmap);
3702         return 0;
3703 }
3704
3705 static void record_recent_object(struct object *obj,
3706                                  const char *name,
3707                                  void *data)
3708 {
3709         oid_array_append(&recent_objects, &obj->oid);
3710 }
3711
3712 static void record_recent_commit(struct commit *commit, void *data)
3713 {
3714         oid_array_append(&recent_objects, &commit->object.oid);
3715 }
3716
3717 static int mark_bitmap_preferred_tip(const char *refname,
3718                                      const struct object_id *oid, int flags,
3719                                      void *_data)
3720 {
3721         struct object_id peeled;
3722         struct object *object;
3723
3724         if (!peel_iterated_oid(oid, &peeled))
3725                 oid = &peeled;
3726
3727         object = parse_object_or_die(oid, refname);
3728         if (object->type == OBJ_COMMIT)
3729                 object->flags |= NEEDS_BITMAP;
3730
3731         return 0;
3732 }
3733
3734 static void mark_bitmap_preferred_tips(void)
3735 {
3736         struct string_list_item *item;
3737         const struct string_list *preferred_tips;
3738
3739         preferred_tips = bitmap_preferred_tips(the_repository);
3740         if (!preferred_tips)
3741                 return;
3742
3743         for_each_string_list_item(item, preferred_tips) {
3744                 for_each_ref_in(item->string, mark_bitmap_preferred_tip, NULL);
3745         }
3746 }
3747
3748 static void get_object_list(int ac, const char **av)
3749 {
3750         struct rev_info revs;
3751         struct setup_revision_opt s_r_opt = {
3752                 .allow_exclude_promisor_objects = 1,
3753         };
3754         char line[1000];
3755         int flags = 0;
3756         int save_warning;
3757
3758         repo_init_revisions(the_repository, &revs, NULL);
3759         save_commit_buffer = 0;
3760         setup_revisions(ac, av, &revs, &s_r_opt);
3761
3762         /* make sure shallows are read */
3763         is_repository_shallow(the_repository);
3764
3765         save_warning = warn_on_object_refname_ambiguity;
3766         warn_on_object_refname_ambiguity = 0;
3767
3768         while (fgets(line, sizeof(line), stdin) != NULL) {
3769                 int len = strlen(line);
3770                 if (len && line[len - 1] == '\n')
3771                         line[--len] = 0;
3772                 if (!len)
3773                         break;
3774                 if (*line == '-') {
3775                         if (!strcmp(line, "--not")) {
3776                                 flags ^= UNINTERESTING;
3777                                 write_bitmap_index = 0;
3778                                 continue;
3779                         }
3780                         if (starts_with(line, "--shallow ")) {
3781                                 struct object_id oid;
3782                                 if (get_oid_hex(line + 10, &oid))
3783                                         die("not an object name '%s'", line + 10);
3784                                 register_shallow(the_repository, &oid);
3785                                 use_bitmap_index = 0;
3786                                 continue;
3787                         }
3788                         die(_("not a rev '%s'"), line);
3789                 }
3790                 if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))
3791                         die(_("bad revision '%s'"), line);
3792         }
3793
3794         warn_on_object_refname_ambiguity = save_warning;
3795
3796         if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
3797                 return;
3798
3799         if (use_delta_islands)
3800                 load_delta_islands(the_repository, progress);
3801
3802         if (write_bitmap_index)
3803                 mark_bitmap_preferred_tips();
3804
3805         if (prepare_revision_walk(&revs))
3806                 die(_("revision walk setup failed"));
3807         mark_edges_uninteresting(&revs, show_edge, sparse);
3808
3809         if (!fn_show_object)
3810                 fn_show_object = show_object;
3811         traverse_commit_list_filtered(&filter_options, &revs,
3812                                       show_commit, fn_show_object, NULL,
3813                                       NULL);
3814
3815         if (unpack_unreachable_expiration) {
3816                 revs.ignore_missing_links = 1;
3817                 if (add_unseen_recent_objects_to_traversal(&revs,
3818                                 unpack_unreachable_expiration))
3819                         die(_("unable to add recent objects"));
3820                 if (prepare_revision_walk(&revs))
3821                         die(_("revision walk setup failed"));
3822                 traverse_commit_list(&revs, record_recent_commit,
3823                                      record_recent_object, NULL);
3824         }
3825
3826         if (keep_unreachable)
3827                 add_objects_in_unpacked_packs();
3828         if (pack_loose_unreachable)
3829                 add_unreachable_loose_objects();
3830         if (unpack_unreachable)
3831                 loosen_unused_packed_objects();
3832
3833         oid_array_clear(&recent_objects);
3834 }
3835
3836 static void add_extra_kept_packs(const struct string_list *names)
3837 {
3838         struct packed_git *p;
3839
3840         if (!names->nr)
3841                 return;
3842
3843         for (p = get_all_packs(the_repository); p; p = p->next) {
3844                 const char *name = basename(p->pack_name);
3845                 int i;
3846
3847                 if (!p->pack_local)
3848                         continue;
3849
3850                 for (i = 0; i < names->nr; i++)
3851                         if (!fspathcmp(name, names->items[i].string))
3852                                 break;
3853
3854                 if (i < names->nr) {
3855                         p->pack_keep_in_core = 1;
3856                         ignore_packed_keep_in_core = 1;
3857                         continue;
3858                 }
3859         }
3860 }
3861
3862 static int option_parse_index_version(const struct option *opt,
3863                                       const char *arg, int unset)
3864 {
3865         char *c;
3866         const char *val = arg;
3867
3868         BUG_ON_OPT_NEG(unset);
3869
3870         pack_idx_opts.version = strtoul(val, &c, 10);
3871         if (pack_idx_opts.version > 2)
3872                 die(_("unsupported index version %s"), val);
3873         if (*c == ',' && c[1])
3874                 pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);
3875         if (*c || pack_idx_opts.off32_limit & 0x80000000)
3876                 die(_("bad index version '%s'"), val);
3877         return 0;
3878 }
3879
3880 static int option_parse_unpack_unreachable(const struct option *opt,
3881                                            const char *arg, int unset)
3882 {
3883         if (unset) {
3884                 unpack_unreachable = 0;
3885                 unpack_unreachable_expiration = 0;
3886         }
3887         else {
3888                 unpack_unreachable = 1;
3889                 if (arg)
3890                         unpack_unreachable_expiration = approxidate(arg);
3891         }
3892         return 0;
3893 }
3894
3895 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
3896 {
3897         int use_internal_rev_list = 0;
3898         int shallow = 0;
3899         int all_progress_implied = 0;
3900         struct strvec rp = STRVEC_INIT;
3901         int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
3902         int rev_list_index = 0;
3903         int stdin_packs = 0;
3904         struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
3905         struct option pack_objects_options[] = {
3906                 OPT_SET_INT('q', "quiet", &progress,
3907                             N_("do not show progress meter"), 0),
3908                 OPT_SET_INT(0, "progress", &progress,
3909                             N_("show progress meter"), 1),
3910                 OPT_SET_INT(0, "all-progress", &progress,
3911                             N_("show progress meter during object writing phase"), 2),
3912                 OPT_BOOL(0, "all-progress-implied",
3913                          &all_progress_implied,
3914                          N_("similar to --all-progress when progress meter is shown")),
3915                 OPT_CALLBACK_F(0, "index-version", NULL, N_("<version>[,<offset>]"),
3916                   N_("write the pack index file in the specified idx format version"),
3917                   PARSE_OPT_NONEG, option_parse_index_version),
3918                 OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,
3919                               N_("maximum size of each output pack file")),
3920                 OPT_BOOL(0, "local", &local,
3921                          N_("ignore borrowed objects from alternate object store")),
3922                 OPT_BOOL(0, "incremental", &incremental,
3923                          N_("ignore packed objects")),
3924                 OPT_INTEGER(0, "window", &window,
3925                             N_("limit pack window by objects")),
3926                 OPT_MAGNITUDE(0, "window-memory", &window_memory_limit,
3927                               N_("limit pack window by memory in addition to object limit")),
3928                 OPT_INTEGER(0, "depth", &depth,
3929                             N_("maximum length of delta chain allowed in the resulting pack")),
3930                 OPT_BOOL(0, "reuse-delta", &reuse_delta,
3931                          N_("reuse existing deltas")),
3932                 OPT_BOOL(0, "reuse-object", &reuse_object,
3933                          N_("reuse existing objects")),
3934                 OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
3935                          N_("use OFS_DELTA objects")),
3936                 OPT_INTEGER(0, "threads", &delta_search_threads,
3937                             N_("use threads when searching for best delta matches")),
3938                 OPT_BOOL(0, "non-empty", &non_empty,
3939                          N_("do not create an empty pack output")),
3940                 OPT_BOOL(0, "revs", &use_internal_rev_list,
3941                          N_("read revision arguments from standard input")),
3942                 OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
3943                               N_("limit the objects to those that are not yet packed"),
3944                               1, PARSE_OPT_NONEG),
3945                 OPT_SET_INT_F(0, "all", &rev_list_all,
3946                               N_("include objects reachable from any reference"),
3947                               1, PARSE_OPT_NONEG),
3948                 OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
3949                               N_("include objects referred by reflog entries"),
3950                               1, PARSE_OPT_NONEG),
3951                 OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
3952                               N_("include objects referred to by the index"),
3953                               1, PARSE_OPT_NONEG),
3954                 OPT_BOOL(0, "stdin-packs", &stdin_packs,
3955                          N_("read packs from stdin")),
3956                 OPT_BOOL(0, "stdout", &pack_to_stdout,
3957                          N_("output pack to stdout")),
3958                 OPT_BOOL(0, "include-tag", &include_tag,
3959                          N_("include tag objects that refer to objects to be packed")),
3960                 OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
3961                          N_("keep unreachable objects")),
3962                 OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
3963                          N_("pack loose unreachable objects")),
3964                 OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
3965                   N_("unpack unreachable objects newer than <time>"),
3966                   PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
3967                 OPT_BOOL(0, "sparse", &sparse,
3968                          N_("use the sparse reachability algorithm")),
3969                 OPT_BOOL(0, "thin", &thin,
3970                          N_("create thin packs")),
3971                 OPT_BOOL(0, "shallow", &shallow,
3972                          N_("create packs suitable for shallow fetches")),
3973                 OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
3974                          N_("ignore packs that have companion .keep file")),
3975                 OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
3976                                 N_("ignore this pack")),
3977                 OPT_INTEGER(0, "compression", &pack_compression_level,
3978                             N_("pack compression level")),
3979                 OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
3980                             N_("do not hide commits by grafts"), 0),
3981                 OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
3982                          N_("use a bitmap index if available to speed up counting objects")),
3983                 OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index,
3984                             N_("write a bitmap index together with the pack index"),
3985                             WRITE_BITMAP_TRUE),
3986                 OPT_SET_INT_F(0, "write-bitmap-index-quiet",
3987                               &write_bitmap_index,
3988                               N_("write a bitmap index if possible"),
3989                               WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
3990                 OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
3991                 OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
3992                   N_("handling for missing objects"), PARSE_OPT_NONEG,
3993                   option_parse_missing_action),
3994                 OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
3995                          N_("do not pack objects in promisor packfiles")),
3996                 OPT_BOOL(0, "delta-islands", &use_delta_islands,
3997                          N_("respect islands during delta compression")),
3998                 OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
3999                                 N_("protocol"),
4000                                 N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
4001                 OPT_END(),
4002         };
4003
4004         if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
4005                 BUG("too many dfs states, increase OE_DFS_STATE_BITS");
4006
4007         read_replace_refs = 0;
4008
4009         sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
4010         prepare_repo_settings(the_repository);
4011         if (sparse < 0)
4012                 sparse = the_repository->settings.pack_use_sparse;
4013
4014         reset_pack_idx_option(&pack_idx_opts);
4015         git_config(git_pack_config, NULL);
4016         if (git_env_bool(GIT_TEST_WRITE_REV_INDEX, 0))
4017                 pack_idx_opts.flags |= WRITE_REV;
4018
4019         progress = isatty(2);
4020         argc = parse_options(argc, argv, prefix, pack_objects_options,
4021                              pack_usage, 0);
4022
4023         if (argc) {
4024                 base_name = argv[0];
4025                 argc--;
4026         }
4027         if (pack_to_stdout != !base_name || argc)
4028                 usage_with_options(pack_usage, pack_objects_options);
4029
4030         if (depth < 0)
4031                 depth = 0;
4032         if (depth >= (1 << OE_DEPTH_BITS)) {
4033                 warning(_("delta chain depth %d is too deep, forcing %d"),
4034                         depth, (1 << OE_DEPTH_BITS) - 1);
4035                 depth = (1 << OE_DEPTH_BITS) - 1;
4036         }
4037         if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
4038                 warning(_("pack.deltaCacheLimit is too high, forcing %d"),
4039                         (1U << OE_Z_DELTA_BITS) - 1);
4040                 cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
4041         }
4042         if (window < 0)
4043                 window = 0;
4044
4045         strvec_push(&rp, "pack-objects");
4046         if (thin) {
4047                 use_internal_rev_list = 1;
4048                 strvec_push(&rp, shallow
4049                                 ? "--objects-edge-aggressive"
4050                                 : "--objects-edge");
4051         } else
4052                 strvec_push(&rp, "--objects");
4053
4054         if (rev_list_all) {
4055                 use_internal_rev_list = 1;
4056                 strvec_push(&rp, "--all");
4057         }
4058         if (rev_list_reflog) {
4059                 use_internal_rev_list = 1;
4060                 strvec_push(&rp, "--reflog");
4061         }
4062         if (rev_list_index) {
4063                 use_internal_rev_list = 1;
4064                 strvec_push(&rp, "--indexed-objects");
4065         }
4066         if (rev_list_unpacked && !stdin_packs) {
4067                 use_internal_rev_list = 1;
4068                 strvec_push(&rp, "--unpacked");
4069         }
4070
4071         if (exclude_promisor_objects) {
4072                 use_internal_rev_list = 1;
4073                 fetch_if_missing = 0;
4074                 strvec_push(&rp, "--exclude-promisor-objects");
4075         }
4076         if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
4077                 use_internal_rev_list = 1;
4078
4079         if (!reuse_object)
4080                 reuse_delta = 0;
4081         if (pack_compression_level == -1)
4082                 pack_compression_level = Z_DEFAULT_COMPRESSION;
4083         else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
4084                 die(_("bad pack compression level %d"), pack_compression_level);
4085
4086         if (!delta_search_threads)      /* --threads=0 means autodetect */
4087                 delta_search_threads = online_cpus();
4088
4089         if (!HAVE_THREADS && delta_search_threads != 1)
4090                 warning(_("no threads support, ignoring --threads"));
4091         if (!pack_to_stdout && !pack_size_limit)
4092                 pack_size_limit = pack_size_limit_cfg;
4093         if (pack_to_stdout && pack_size_limit)
4094                 die(_("--max-pack-size cannot be used to build a pack for transfer"));
4095         if (pack_size_limit && pack_size_limit < 1024*1024) {
4096                 warning(_("minimum pack size limit is 1 MiB"));
4097                 pack_size_limit = 1024*1024;
4098         }
4099
4100         if (!pack_to_stdout && thin)
4101                 die(_("--thin cannot be used to build an indexable pack"));
4102
4103         if (keep_unreachable && unpack_unreachable)
4104                 die(_("--keep-unreachable and --unpack-unreachable are incompatible"));
4105         if (!rev_list_all || !rev_list_reflog || !rev_list_index)
4106                 unpack_unreachable_expiration = 0;
4107
4108         if (filter_options.choice) {
4109                 if (!pack_to_stdout)
4110                         die(_("cannot use --filter without --stdout"));
4111                 if (stdin_packs)
4112                         die(_("cannot use --filter with --stdin-packs"));
4113         }
4114
4115         if (stdin_packs && use_internal_rev_list)
4116                 die(_("cannot use internal rev list with --stdin-packs"));
4117
4118         /*
4119          * "soft" reasons not to use bitmaps - for on-disk repack by default we want
4120          *
4121          * - to produce good pack (with bitmap index not-yet-packed objects are
4122          *   packed in suboptimal order).
4123          *
4124          * - to use more robust pack-generation codepath (avoiding possible
4125          *   bugs in bitmap code and possible bitmap index corruption).
4126          */
4127         if (!pack_to_stdout)
4128                 use_bitmap_index_default = 0;
4129
4130         if (use_bitmap_index < 0)
4131                 use_bitmap_index = use_bitmap_index_default;
4132
4133         /* "hard" reasons not to use bitmaps; these just won't work at all */
4134         if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
4135                 use_bitmap_index = 0;
4136
4137         if (pack_to_stdout || !rev_list_all)
4138                 write_bitmap_index = 0;
4139
4140         if (use_delta_islands)
4141                 strvec_push(&rp, "--topo-order");
4142
4143         if (progress && all_progress_implied)
4144                 progress = 2;
4145
4146         add_extra_kept_packs(&keep_pack_list);
4147         if (ignore_packed_keep_on_disk) {
4148                 struct packed_git *p;
4149                 for (p = get_all_packs(the_repository); p; p = p->next)
4150                         if (p->pack_local && p->pack_keep)
4151                                 break;
4152                 if (!p) /* no keep-able packs found */
4153                         ignore_packed_keep_on_disk = 0;
4154         }
4155         if (local) {
4156                 /*
4157                  * unlike ignore_packed_keep_on_disk above, we do not
4158                  * want to unset "local" based on looking at packs, as
4159                  * it also covers non-local objects
4160                  */
4161                 struct packed_git *p;
4162                 for (p = get_all_packs(the_repository); p; p = p->next) {
4163                         if (!p->pack_local) {
4164                                 have_non_local_packs = 1;
4165                                 break;
4166                         }
4167                 }
4168         }
4169
4170         trace2_region_enter("pack-objects", "enumerate-objects",
4171                             the_repository);
4172         prepare_packing_data(the_repository, &to_pack);
4173
4174         if (progress)
4175                 progress_state = start_progress(_("Enumerating objects"), 0);
4176         if (stdin_packs) {
4177                 /* avoids adding objects in excluded packs */
4178                 ignore_packed_keep_in_core = 1;
4179                 read_packs_list_from_stdin();
4180                 if (rev_list_unpacked)
4181                         add_unreachable_loose_objects();
4182         } else if (!use_internal_rev_list)
4183                 read_object_list_from_stdin();
4184         else {
4185                 get_object_list(rp.nr, rp.v);
4186                 strvec_clear(&rp);
4187         }
4188         cleanup_preferred_base();
4189         if (include_tag && nr_result)
4190                 for_each_tag_ref(add_ref_tag, NULL);
4191         stop_progress(&progress_state);
4192         trace2_region_leave("pack-objects", "enumerate-objects",
4193                             the_repository);
4194
4195         if (non_empty && !nr_result)
4196                 return 0;
4197         if (nr_result) {
4198                 trace2_region_enter("pack-objects", "prepare-pack",
4199                                     the_repository);
4200                 prepare_pack(window, depth);
4201                 trace2_region_leave("pack-objects", "prepare-pack",
4202                                     the_repository);
4203         }
4204
4205         trace2_region_enter("pack-objects", "write-pack-file", the_repository);
4206         write_excluded_by_configs();
4207         write_pack_file();
4208         trace2_region_leave("pack-objects", "write-pack-file", the_repository);
4209
4210         if (progress)
4211                 fprintf_ln(stderr,
4212                            _("Total %"PRIu32" (delta %"PRIu32"),"
4213                              " reused %"PRIu32" (delta %"PRIu32"),"
4214                              " pack-reused %"PRIu32),
4215                            written, written_delta, reused, reused_delta,
4216                            reuse_packfile_objects);
4217         return 0;
4218 }