Merge branch 'jk/filter-branch-sha256'
[git] / builtin / repack.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "config.h"
4 #include "dir.h"
5 #include "parse-options.h"
6 #include "run-command.h"
7 #include "sigchain.h"
8 #include "strbuf.h"
9 #include "string-list.h"
10 #include "strvec.h"
11 #include "midx.h"
12 #include "packfile.h"
13 #include "prune-packed.h"
14 #include "object-store.h"
15 #include "promisor-remote.h"
16 #include "shallow.h"
17 #include "pack.h"
18
19 static int delta_base_offset = 1;
20 static int pack_kept_objects = -1;
21 static int write_bitmaps = -1;
22 static int use_delta_islands;
23 static char *packdir, *packtmp;
24
25 static const char *const git_repack_usage[] = {
26         N_("git repack [<options>]"),
27         NULL
28 };
29
30 static const char incremental_bitmap_conflict_error[] = N_(
31 "Incremental repacks are incompatible with bitmap indexes.  Use\n"
32 "--no-write-bitmap-index or disable the pack.writebitmaps configuration."
33 );
34
35
36 static int repack_config(const char *var, const char *value, void *cb)
37 {
38         if (!strcmp(var, "repack.usedeltabaseoffset")) {
39                 delta_base_offset = git_config_bool(var, value);
40                 return 0;
41         }
42         if (!strcmp(var, "repack.packkeptobjects")) {
43                 pack_kept_objects = git_config_bool(var, value);
44                 return 0;
45         }
46         if (!strcmp(var, "repack.writebitmaps") ||
47             !strcmp(var, "pack.writebitmaps")) {
48                 write_bitmaps = git_config_bool(var, value);
49                 return 0;
50         }
51         if (!strcmp(var, "repack.usedeltaislands")) {
52                 use_delta_islands = git_config_bool(var, value);
53                 return 0;
54         }
55         return git_default_config(var, value, cb);
56 }
57
58 /*
59  * Remove temporary $GIT_OBJECT_DIRECTORY/pack/.tmp-$$-pack-* files.
60  */
61 static void remove_temporary_files(void)
62 {
63         struct strbuf buf = STRBUF_INIT;
64         size_t dirlen, prefixlen;
65         DIR *dir;
66         struct dirent *e;
67
68         dir = opendir(packdir);
69         if (!dir)
70                 return;
71
72         /* Point at the slash at the end of ".../objects/pack/" */
73         dirlen = strlen(packdir) + 1;
74         strbuf_addstr(&buf, packtmp);
75         /* Hold the length of  ".tmp-%d-pack-" */
76         prefixlen = buf.len - dirlen;
77
78         while ((e = readdir(dir))) {
79                 if (strncmp(e->d_name, buf.buf + dirlen, prefixlen))
80                         continue;
81                 strbuf_setlen(&buf, dirlen);
82                 strbuf_addstr(&buf, e->d_name);
83                 unlink(buf.buf);
84         }
85         closedir(dir);
86         strbuf_release(&buf);
87 }
88
89 static void remove_pack_on_signal(int signo)
90 {
91         remove_temporary_files();
92         sigchain_pop(signo);
93         raise(signo);
94 }
95
96 /*
97  * Adds all packs hex strings to the fname list, which do not
98  * have a corresponding .keep file. These packs are not to
99  * be kept if we are going to pack everything into one file.
100  */
101 static void get_non_kept_pack_filenames(struct string_list *fname_list,
102                                         const struct string_list *extra_keep)
103 {
104         DIR *dir;
105         struct dirent *e;
106         char *fname;
107
108         if (!(dir = opendir(packdir)))
109                 return;
110
111         while ((e = readdir(dir)) != NULL) {
112                 size_t len;
113                 int i;
114
115                 for (i = 0; i < extra_keep->nr; i++)
116                         if (!fspathcmp(e->d_name, extra_keep->items[i].string))
117                                 break;
118                 if (extra_keep->nr > 0 && i < extra_keep->nr)
119                         continue;
120
121                 if (!strip_suffix(e->d_name, ".pack", &len))
122                         continue;
123
124                 fname = xmemdupz(e->d_name, len);
125
126                 if (!file_exists(mkpath("%s/%s.keep", packdir, fname)))
127                         string_list_append_nodup(fname_list, fname);
128                 else
129                         free(fname);
130         }
131         closedir(dir);
132 }
133
134 static void remove_redundant_pack(const char *dir_name, const char *base_name)
135 {
136         struct strbuf buf = STRBUF_INIT;
137         struct multi_pack_index *m = get_local_multi_pack_index(the_repository);
138         strbuf_addf(&buf, "%s.pack", base_name);
139         if (m && midx_contains_pack(m, buf.buf))
140                 clear_midx_file(the_repository);
141         strbuf_insertf(&buf, 0, "%s/", dir_name);
142         unlink_pack_path(buf.buf, 1);
143         strbuf_release(&buf);
144 }
145
146 struct pack_objects_args {
147         const char *window;
148         const char *window_memory;
149         const char *depth;
150         const char *threads;
151         const char *max_pack_size;
152         int no_reuse_delta;
153         int no_reuse_object;
154         int quiet;
155         int local;
156 };
157
158 static void prepare_pack_objects(struct child_process *cmd,
159                                  const struct pack_objects_args *args)
160 {
161         strvec_push(&cmd->args, "pack-objects");
162         if (args->window)
163                 strvec_pushf(&cmd->args, "--window=%s", args->window);
164         if (args->window_memory)
165                 strvec_pushf(&cmd->args, "--window-memory=%s", args->window_memory);
166         if (args->depth)
167                 strvec_pushf(&cmd->args, "--depth=%s", args->depth);
168         if (args->threads)
169                 strvec_pushf(&cmd->args, "--threads=%s", args->threads);
170         if (args->max_pack_size)
171                 strvec_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
172         if (args->no_reuse_delta)
173                 strvec_pushf(&cmd->args, "--no-reuse-delta");
174         if (args->no_reuse_object)
175                 strvec_pushf(&cmd->args, "--no-reuse-object");
176         if (args->local)
177                 strvec_push(&cmd->args,  "--local");
178         if (args->quiet)
179                 strvec_push(&cmd->args,  "--quiet");
180         if (delta_base_offset)
181                 strvec_push(&cmd->args,  "--delta-base-offset");
182         strvec_push(&cmd->args, packtmp);
183         cmd->git_cmd = 1;
184         cmd->out = -1;
185 }
186
187 /*
188  * Write oid to the given struct child_process's stdin, starting it first if
189  * necessary.
190  */
191 static int write_oid(const struct object_id *oid, struct packed_git *pack,
192                      uint32_t pos, void *data)
193 {
194         struct child_process *cmd = data;
195
196         if (cmd->in == -1) {
197                 if (start_command(cmd))
198                         die(_("could not start pack-objects to repack promisor objects"));
199         }
200
201         xwrite(cmd->in, oid_to_hex(oid), the_hash_algo->hexsz);
202         xwrite(cmd->in, "\n", 1);
203         return 0;
204 }
205
206 static struct {
207         const char *name;
208         unsigned optional:1;
209 } exts[] = {
210         {".pack"},
211         {".idx"},
212         {".rev", 1},
213         {".bitmap", 1},
214         {".promisor", 1},
215 };
216
217 static unsigned populate_pack_exts(char *name)
218 {
219         struct stat statbuf;
220         struct strbuf path = STRBUF_INIT;
221         unsigned ret = 0;
222         int i;
223
224         for (i = 0; i < ARRAY_SIZE(exts); i++) {
225                 strbuf_reset(&path);
226                 strbuf_addf(&path, "%s-%s%s", packtmp, name, exts[i].name);
227
228                 if (stat(path.buf, &statbuf))
229                         continue;
230
231                 ret |= (1 << i);
232         }
233
234         strbuf_release(&path);
235         return ret;
236 }
237
238 static void repack_promisor_objects(const struct pack_objects_args *args,
239                                     struct string_list *names)
240 {
241         struct child_process cmd = CHILD_PROCESS_INIT;
242         FILE *out;
243         struct strbuf line = STRBUF_INIT;
244
245         prepare_pack_objects(&cmd, args);
246         cmd.in = -1;
247
248         /*
249          * NEEDSWORK: Giving pack-objects only the OIDs without any ordering
250          * hints may result in suboptimal deltas in the resulting pack. See if
251          * the OIDs can be sent with fake paths such that pack-objects can use a
252          * {type -> existing pack order} ordering when computing deltas instead
253          * of a {type -> size} ordering, which may produce better deltas.
254          */
255         for_each_packed_object(write_oid, &cmd,
256                                FOR_EACH_OBJECT_PROMISOR_ONLY);
257
258         if (cmd.in == -1)
259                 /* No packed objects; cmd was never started */
260                 return;
261
262         close(cmd.in);
263
264         out = xfdopen(cmd.out, "r");
265         while (strbuf_getline_lf(&line, out) != EOF) {
266                 struct string_list_item *item;
267                 char *promisor_name;
268
269                 if (line.len != the_hash_algo->hexsz)
270                         die(_("repack: Expecting full hex object ID lines only from pack-objects."));
271                 item = string_list_append(names, line.buf);
272
273                 /*
274                  * pack-objects creates the .pack and .idx files, but not the
275                  * .promisor file. Create the .promisor file, which is empty.
276                  *
277                  * NEEDSWORK: fetch-pack sometimes generates non-empty
278                  * .promisor files containing the ref names and associated
279                  * hashes at the point of generation of the corresponding
280                  * packfile, but this would not preserve their contents. Maybe
281                  * concatenate the contents of all .promisor files instead of
282                  * just creating a new empty file.
283                  */
284                 promisor_name = mkpathdup("%s-%s.promisor", packtmp,
285                                           line.buf);
286                 write_promisor_file(promisor_name, NULL, 0);
287
288                 item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
289
290                 free(promisor_name);
291         }
292         fclose(out);
293         if (finish_command(&cmd))
294                 die(_("could not finish pack-objects to repack promisor objects"));
295 }
296
297 #define ALL_INTO_ONE 1
298 #define LOOSEN_UNREACHABLE 2
299
300 int cmd_repack(int argc, const char **argv, const char *prefix)
301 {
302         struct child_process cmd = CHILD_PROCESS_INIT;
303         struct string_list_item *item;
304         struct string_list names = STRING_LIST_INIT_DUP;
305         struct string_list rollback = STRING_LIST_INIT_NODUP;
306         struct string_list existing_packs = STRING_LIST_INIT_DUP;
307         struct strbuf line = STRBUF_INIT;
308         int i, ext, ret;
309         FILE *out;
310
311         /* variables to be filled by option parsing */
312         int pack_everything = 0;
313         int delete_redundant = 0;
314         const char *unpack_unreachable = NULL;
315         int keep_unreachable = 0;
316         struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
317         int no_update_server_info = 0;
318         struct pack_objects_args po_args = {NULL};
319
320         struct option builtin_repack_options[] = {
321                 OPT_BIT('a', NULL, &pack_everything,
322                                 N_("pack everything in a single pack"), ALL_INTO_ONE),
323                 OPT_BIT('A', NULL, &pack_everything,
324                                 N_("same as -a, and turn unreachable objects loose"),
325                                    LOOSEN_UNREACHABLE | ALL_INTO_ONE),
326                 OPT_BOOL('d', NULL, &delete_redundant,
327                                 N_("remove redundant packs, and run git-prune-packed")),
328                 OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
329                                 N_("pass --no-reuse-delta to git-pack-objects")),
330                 OPT_BOOL('F', NULL, &po_args.no_reuse_object,
331                                 N_("pass --no-reuse-object to git-pack-objects")),
332                 OPT_BOOL('n', NULL, &no_update_server_info,
333                                 N_("do not run git-update-server-info")),
334                 OPT__QUIET(&po_args.quiet, N_("be quiet")),
335                 OPT_BOOL('l', "local", &po_args.local,
336                                 N_("pass --local to git-pack-objects")),
337                 OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
338                                 N_("write bitmap index")),
339                 OPT_BOOL('i', "delta-islands", &use_delta_islands,
340                                 N_("pass --delta-islands to git-pack-objects")),
341                 OPT_STRING(0, "unpack-unreachable", &unpack_unreachable, N_("approxidate"),
342                                 N_("with -A, do not loosen objects older than this")),
343                 OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
344                                 N_("with -a, repack unreachable objects")),
345                 OPT_STRING(0, "window", &po_args.window, N_("n"),
346                                 N_("size of the window used for delta compression")),
347                 OPT_STRING(0, "window-memory", &po_args.window_memory, N_("bytes"),
348                                 N_("same as the above, but limit memory size instead of entries count")),
349                 OPT_STRING(0, "depth", &po_args.depth, N_("n"),
350                                 N_("limits the maximum delta depth")),
351                 OPT_STRING(0, "threads", &po_args.threads, N_("n"),
352                                 N_("limits the maximum number of threads")),
353                 OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
354                                 N_("maximum size of each packfile")),
355                 OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
356                                 N_("repack objects in packs marked with .keep")),
357                 OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
358                                 N_("do not repack this pack")),
359                 OPT_END()
360         };
361
362         git_config(repack_config, NULL);
363
364         argc = parse_options(argc, argv, prefix, builtin_repack_options,
365                                 git_repack_usage, 0);
366
367         if (delete_redundant && repository_format_precious_objects)
368                 die(_("cannot delete packs in a precious-objects repo"));
369
370         if (keep_unreachable &&
371             (unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE)))
372                 die(_("--keep-unreachable and -A are incompatible"));
373
374         if (write_bitmaps < 0) {
375                 if (!(pack_everything & ALL_INTO_ONE) ||
376                     !is_bare_repository())
377                         write_bitmaps = 0;
378         }
379         if (pack_kept_objects < 0)
380                 pack_kept_objects = write_bitmaps > 0;
381
382         if (write_bitmaps && !(pack_everything & ALL_INTO_ONE))
383                 die(_(incremental_bitmap_conflict_error));
384
385         packdir = mkpathdup("%s/pack", get_object_directory());
386         packtmp = mkpathdup("%s/.tmp-%d-pack", packdir, (int)getpid());
387
388         sigchain_push_common(remove_pack_on_signal);
389
390         prepare_pack_objects(&cmd, &po_args);
391
392         strvec_push(&cmd.args, "--keep-true-parents");
393         if (!pack_kept_objects)
394                 strvec_push(&cmd.args, "--honor-pack-keep");
395         for (i = 0; i < keep_pack_list.nr; i++)
396                 strvec_pushf(&cmd.args, "--keep-pack=%s",
397                              keep_pack_list.items[i].string);
398         strvec_push(&cmd.args, "--non-empty");
399         strvec_push(&cmd.args, "--all");
400         strvec_push(&cmd.args, "--reflog");
401         strvec_push(&cmd.args, "--indexed-objects");
402         if (has_promisor_remote())
403                 strvec_push(&cmd.args, "--exclude-promisor-objects");
404         if (write_bitmaps > 0)
405                 strvec_push(&cmd.args, "--write-bitmap-index");
406         else if (write_bitmaps < 0)
407                 strvec_push(&cmd.args, "--write-bitmap-index-quiet");
408         if (use_delta_islands)
409                 strvec_push(&cmd.args, "--delta-islands");
410
411         if (pack_everything & ALL_INTO_ONE) {
412                 get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
413
414                 repack_promisor_objects(&po_args, &names);
415
416                 if (existing_packs.nr && delete_redundant) {
417                         if (unpack_unreachable) {
418                                 strvec_pushf(&cmd.args,
419                                              "--unpack-unreachable=%s",
420                                              unpack_unreachable);
421                                 strvec_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
422                         } else if (pack_everything & LOOSEN_UNREACHABLE) {
423                                 strvec_push(&cmd.args,
424                                             "--unpack-unreachable");
425                         } else if (keep_unreachable) {
426                                 strvec_push(&cmd.args, "--keep-unreachable");
427                                 strvec_push(&cmd.args, "--pack-loose-unreachable");
428                         } else {
429                                 strvec_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
430                         }
431                 }
432         } else {
433                 strvec_push(&cmd.args, "--unpacked");
434                 strvec_push(&cmd.args, "--incremental");
435         }
436
437         cmd.no_stdin = 1;
438
439         ret = start_command(&cmd);
440         if (ret)
441                 return ret;
442
443         out = xfdopen(cmd.out, "r");
444         while (strbuf_getline_lf(&line, out) != EOF) {
445                 if (line.len != the_hash_algo->hexsz)
446                         die(_("repack: Expecting full hex object ID lines only from pack-objects."));
447                 string_list_append(&names, line.buf);
448         }
449         fclose(out);
450         ret = finish_command(&cmd);
451         if (ret)
452                 return ret;
453
454         if (!names.nr && !po_args.quiet)
455                 printf_ln(_("Nothing new to pack."));
456
457         for_each_string_list_item(item, &names) {
458                 item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
459         }
460
461         close_object_store(the_repository->objects);
462
463         /*
464          * Ok we have prepared all new packfiles.
465          */
466         for_each_string_list_item(item, &names) {
467                 for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
468                         char *fname, *fname_old;
469
470                         fname = mkpathdup("%s/pack-%s%s",
471                                         packdir, item->string, exts[ext].name);
472                         fname_old = mkpathdup("%s-%s%s",
473                                         packtmp, item->string, exts[ext].name);
474
475                         if (((uintptr_t)item->util) & (1 << ext)) {
476                                 struct stat statbuffer;
477                                 if (!stat(fname_old, &statbuffer)) {
478                                         statbuffer.st_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
479                                         chmod(fname_old, statbuffer.st_mode);
480                                 }
481
482                                 if (rename(fname_old, fname))
483                                         die_errno(_("renaming '%s' failed"), fname_old);
484                         } else if (!exts[ext].optional)
485                                 die(_("missing required file: %s"), fname_old);
486                         else if (unlink(fname) < 0 && errno != ENOENT)
487                                 die_errno(_("could not unlink: %s"), fname);
488
489                         free(fname);
490                         free(fname_old);
491                 }
492         }
493         /* End of pack replacement. */
494
495         reprepare_packed_git(the_repository);
496
497         if (delete_redundant) {
498                 const int hexsz = the_hash_algo->hexsz;
499                 int opts = 0;
500                 string_list_sort(&names);
501                 for_each_string_list_item(item, &existing_packs) {
502                         char *sha1;
503                         size_t len = strlen(item->string);
504                         if (len < hexsz)
505                                 continue;
506                         sha1 = item->string + len - hexsz;
507                         if (!string_list_has_string(&names, sha1))
508                                 remove_redundant_pack(packdir, item->string);
509                 }
510                 if (!po_args.quiet && isatty(2))
511                         opts |= PRUNE_PACKED_VERBOSE;
512                 prune_packed_objects(opts);
513
514                 if (!keep_unreachable &&
515                     (!(pack_everything & LOOSEN_UNREACHABLE) ||
516                      unpack_unreachable) &&
517                     is_repository_shallow(the_repository))
518                         prune_shallow(PRUNE_QUICK);
519         }
520
521         if (!no_update_server_info)
522                 update_server_info(0);
523         remove_temporary_files();
524
525         if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0))
526                 write_midx_file(get_object_directory(), 0);
527
528         string_list_clear(&names, 0);
529         string_list_clear(&rollback, 0);
530         string_list_clear(&existing_packs, 0);
531         strbuf_release(&line);
532
533         return 0;
534 }