Merge branch 'ds/commit-graph-fsck'
[git] / builtin / fast-export.c
1 /*
2  * "git fast-export" builtin command
3  *
4  * Copyright (C) 2007 Johannes E. Schindelin
5  */
6 #include "builtin.h"
7 #include "cache.h"
8 #include "config.h"
9 #include "refs.h"
10 #include "refspec.h"
11 #include "object-store.h"
12 #include "commit.h"
13 #include "object.h"
14 #include "tag.h"
15 #include "diff.h"
16 #include "diffcore.h"
17 #include "log-tree.h"
18 #include "revision.h"
19 #include "decorate.h"
20 #include "string-list.h"
21 #include "utf8.h"
22 #include "parse-options.h"
23 #include "quote.h"
24 #include "remote.h"
25 #include "blob.h"
26 #include "commit-slab.h"
27
28 static const char *fast_export_usage[] = {
29         N_("git fast-export [rev-list-opts]"),
30         NULL
31 };
32
33 static int progress;
34 static enum { ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = ABORT;
35 static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR;
36 static int fake_missing_tagger;
37 static int use_done_feature;
38 static int no_data;
39 static int full_tree;
40 static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
41 static struct refspec refspecs = REFSPEC_INIT_FETCH;
42 static int anonymize;
43 static struct revision_sources revision_sources;
44
45 static int parse_opt_signed_tag_mode(const struct option *opt,
46                                      const char *arg, int unset)
47 {
48         if (unset || !strcmp(arg, "abort"))
49                 signed_tag_mode = ABORT;
50         else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
51                 signed_tag_mode = VERBATIM;
52         else if (!strcmp(arg, "warn"))
53                 signed_tag_mode = WARN;
54         else if (!strcmp(arg, "warn-strip"))
55                 signed_tag_mode = WARN_STRIP;
56         else if (!strcmp(arg, "strip"))
57                 signed_tag_mode = STRIP;
58         else
59                 return error("Unknown signed-tags mode: %s", arg);
60         return 0;
61 }
62
63 static int parse_opt_tag_of_filtered_mode(const struct option *opt,
64                                           const char *arg, int unset)
65 {
66         if (unset || !strcmp(arg, "abort"))
67                 tag_of_filtered_mode = ERROR;
68         else if (!strcmp(arg, "drop"))
69                 tag_of_filtered_mode = DROP;
70         else if (!strcmp(arg, "rewrite"))
71                 tag_of_filtered_mode = REWRITE;
72         else
73                 return error("Unknown tag-of-filtered mode: %s", arg);
74         return 0;
75 }
76
77 static struct decoration idnums;
78 static uint32_t last_idnum;
79
80 static int has_unshown_parent(struct commit *commit)
81 {
82         struct commit_list *parent;
83
84         for (parent = commit->parents; parent; parent = parent->next)
85                 if (!(parent->item->object.flags & SHOWN) &&
86                     !(parent->item->object.flags & UNINTERESTING))
87                         return 1;
88         return 0;
89 }
90
91 struct anonymized_entry {
92         struct hashmap_entry hash;
93         const char *orig;
94         size_t orig_len;
95         const char *anon;
96         size_t anon_len;
97 };
98
99 static int anonymized_entry_cmp(const void *unused_cmp_data,
100                                 const void *va, const void *vb,
101                                 const void *unused_keydata)
102 {
103         const struct anonymized_entry *a = va, *b = vb;
104         return a->orig_len != b->orig_len ||
105                 memcmp(a->orig, b->orig, a->orig_len);
106 }
107
108 /*
109  * Basically keep a cache of X->Y so that we can repeatedly replace
110  * the same anonymized string with another. The actual generation
111  * is farmed out to the generate function.
112  */
113 static const void *anonymize_mem(struct hashmap *map,
114                                  void *(*generate)(const void *, size_t *),
115                                  const void *orig, size_t *len)
116 {
117         struct anonymized_entry key, *ret;
118
119         if (!map->cmpfn)
120                 hashmap_init(map, anonymized_entry_cmp, NULL, 0);
121
122         hashmap_entry_init(&key, memhash(orig, *len));
123         key.orig = orig;
124         key.orig_len = *len;
125         ret = hashmap_get(map, &key, NULL);
126
127         if (!ret) {
128                 ret = xmalloc(sizeof(*ret));
129                 hashmap_entry_init(&ret->hash, key.hash.hash);
130                 ret->orig = xstrdup(orig);
131                 ret->orig_len = *len;
132                 ret->anon = generate(orig, len);
133                 ret->anon_len = *len;
134                 hashmap_put(map, ret);
135         }
136
137         *len = ret->anon_len;
138         return ret->anon;
139 }
140
141 /*
142  * We anonymize each component of a path individually,
143  * so that paths a/b and a/c will share a common root.
144  * The paths are cached via anonymize_mem so that repeated
145  * lookups for "a" will yield the same value.
146  */
147 static void anonymize_path(struct strbuf *out, const char *path,
148                            struct hashmap *map,
149                            void *(*generate)(const void *, size_t *))
150 {
151         while (*path) {
152                 const char *end_of_component = strchrnul(path, '/');
153                 size_t len = end_of_component - path;
154                 const char *c = anonymize_mem(map, generate, path, &len);
155                 strbuf_add(out, c, len);
156                 path = end_of_component;
157                 if (*path)
158                         strbuf_addch(out, *path++);
159         }
160 }
161
162 static inline void *mark_to_ptr(uint32_t mark)
163 {
164         return (void *)(uintptr_t)mark;
165 }
166
167 static inline uint32_t ptr_to_mark(void * mark)
168 {
169         return (uint32_t)(uintptr_t)mark;
170 }
171
172 static inline void mark_object(struct object *object, uint32_t mark)
173 {
174         add_decoration(&idnums, object, mark_to_ptr(mark));
175 }
176
177 static inline void mark_next_object(struct object *object)
178 {
179         mark_object(object, ++last_idnum);
180 }
181
182 static int get_object_mark(struct object *object)
183 {
184         void *decoration = lookup_decoration(&idnums, object);
185         if (!decoration)
186                 return 0;
187         return ptr_to_mark(decoration);
188 }
189
190 static void show_progress(void)
191 {
192         static int counter = 0;
193         if (!progress)
194                 return;
195         if ((++counter % progress) == 0)
196                 printf("progress %d objects\n", counter);
197 }
198
199 /*
200  * Ideally we would want some transformation of the blob data here
201  * that is unreversible, but would still be the same size and have
202  * the same data relationship to other blobs (so that we get the same
203  * delta and packing behavior as the original). But the first and last
204  * requirements there are probably mutually exclusive, so let's take
205  * the easy way out for now, and just generate arbitrary content.
206  *
207  * There's no need to cache this result with anonymize_mem, since
208  * we already handle blob content caching with marks.
209  */
210 static char *anonymize_blob(unsigned long *size)
211 {
212         static int counter;
213         struct strbuf out = STRBUF_INIT;
214         strbuf_addf(&out, "anonymous blob %d", counter++);
215         *size = out.len;
216         return strbuf_detach(&out, NULL);
217 }
218
219 static void export_blob(const struct object_id *oid)
220 {
221         unsigned long size;
222         enum object_type type;
223         char *buf;
224         struct object *object;
225         int eaten;
226
227         if (no_data)
228                 return;
229
230         if (is_null_oid(oid))
231                 return;
232
233         object = lookup_object(the_repository, oid->hash);
234         if (object && object->flags & SHOWN)
235                 return;
236
237         if (anonymize) {
238                 buf = anonymize_blob(&size);
239                 object = (struct object *)lookup_blob(the_repository, oid);
240                 eaten = 0;
241         } else {
242                 buf = read_object_file(oid, &type, &size);
243                 if (!buf)
244                         die("could not read blob %s", oid_to_hex(oid));
245                 if (check_object_signature(oid, buf, size, type_name(type)) < 0)
246                         die("sha1 mismatch in blob %s", oid_to_hex(oid));
247                 object = parse_object_buffer(the_repository, oid, type,
248                                              size, buf, &eaten);
249         }
250
251         if (!object)
252                 die("Could not read blob %s", oid_to_hex(oid));
253
254         mark_next_object(object);
255
256         printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size);
257         if (size && fwrite(buf, size, 1, stdout) != 1)
258                 die_errno("could not write blob '%s'", oid_to_hex(oid));
259         printf("\n");
260
261         show_progress();
262
263         object->flags |= SHOWN;
264         if (!eaten)
265                 free(buf);
266 }
267
268 static int depth_first(const void *a_, const void *b_)
269 {
270         const struct diff_filepair *a = *((const struct diff_filepair **)a_);
271         const struct diff_filepair *b = *((const struct diff_filepair **)b_);
272         const char *name_a, *name_b;
273         int len_a, len_b, len;
274         int cmp;
275
276         name_a = a->one ? a->one->path : a->two->path;
277         name_b = b->one ? b->one->path : b->two->path;
278
279         len_a = strlen(name_a);
280         len_b = strlen(name_b);
281         len = (len_a < len_b) ? len_a : len_b;
282
283         /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
284         cmp = memcmp(name_a, name_b, len);
285         if (cmp)
286                 return cmp;
287         cmp = len_b - len_a;
288         if (cmp)
289                 return cmp;
290         /*
291          * Move 'R'ename entries last so that all references of the file
292          * appear in the output before it is renamed (e.g., when a file
293          * was copied and renamed in the same commit).
294          */
295         return (a->status == 'R') - (b->status == 'R');
296 }
297
298 static void print_path_1(const char *path)
299 {
300         int need_quote = quote_c_style(path, NULL, NULL, 0);
301         if (need_quote)
302                 quote_c_style(path, NULL, stdout, 0);
303         else if (strchr(path, ' '))
304                 printf("\"%s\"", path);
305         else
306                 printf("%s", path);
307 }
308
309 static void *anonymize_path_component(const void *path, size_t *len)
310 {
311         static int counter;
312         struct strbuf out = STRBUF_INIT;
313         strbuf_addf(&out, "path%d", counter++);
314         return strbuf_detach(&out, len);
315 }
316
317 static void print_path(const char *path)
318 {
319         if (!anonymize)
320                 print_path_1(path);
321         else {
322                 static struct hashmap paths;
323                 static struct strbuf anon = STRBUF_INIT;
324
325                 anonymize_path(&anon, path, &paths, anonymize_path_component);
326                 print_path_1(anon.buf);
327                 strbuf_reset(&anon);
328         }
329 }
330
331 static void *generate_fake_oid(const void *old, size_t *len)
332 {
333         static uint32_t counter = 1; /* avoid null sha1 */
334         unsigned char *out = xcalloc(GIT_SHA1_RAWSZ, 1);
335         put_be32(out + GIT_SHA1_RAWSZ - 4, counter++);
336         return out;
337 }
338
339 static const unsigned char *anonymize_sha1(const struct object_id *oid)
340 {
341         static struct hashmap sha1s;
342         size_t len = GIT_SHA1_RAWSZ;
343         return anonymize_mem(&sha1s, generate_fake_oid, oid, &len);
344 }
345
346 static void show_filemodify(struct diff_queue_struct *q,
347                             struct diff_options *options, void *data)
348 {
349         int i;
350         struct string_list *changed = data;
351
352         /*
353          * Handle files below a directory first, in case they are all deleted
354          * and the directory changes to a file or symlink.
355          */
356         QSORT(q->queue, q->nr, depth_first);
357
358         for (i = 0; i < q->nr; i++) {
359                 struct diff_filespec *ospec = q->queue[i]->one;
360                 struct diff_filespec *spec = q->queue[i]->two;
361
362                 switch (q->queue[i]->status) {
363                 case DIFF_STATUS_DELETED:
364                         printf("D ");
365                         print_path(spec->path);
366                         string_list_insert(changed, spec->path);
367                         putchar('\n');
368                         break;
369
370                 case DIFF_STATUS_COPIED:
371                 case DIFF_STATUS_RENAMED:
372                         /*
373                          * If a change in the file corresponding to ospec->path
374                          * has been observed, we cannot trust its contents
375                          * because the diff is calculated based on the prior
376                          * contents, not the current contents.  So, declare a
377                          * copy or rename only if there was no change observed.
378                          */
379                         if (!string_list_has_string(changed, ospec->path)) {
380                                 printf("%c ", q->queue[i]->status);
381                                 print_path(ospec->path);
382                                 putchar(' ');
383                                 print_path(spec->path);
384                                 string_list_insert(changed, spec->path);
385                                 putchar('\n');
386
387                                 if (!oidcmp(&ospec->oid, &spec->oid) &&
388                                     ospec->mode == spec->mode)
389                                         break;
390                         }
391                         /* fallthrough */
392
393                 case DIFF_STATUS_TYPE_CHANGED:
394                 case DIFF_STATUS_MODIFIED:
395                 case DIFF_STATUS_ADDED:
396                         /*
397                          * Links refer to objects in another repositories;
398                          * output the SHA-1 verbatim.
399                          */
400                         if (no_data || S_ISGITLINK(spec->mode))
401                                 printf("M %06o %s ", spec->mode,
402                                        sha1_to_hex(anonymize ?
403                                                    anonymize_sha1(&spec->oid) :
404                                                    spec->oid.hash));
405                         else {
406                                 struct object *object = lookup_object(the_repository,
407                                                                       spec->oid.hash);
408                                 printf("M %06o :%d ", spec->mode,
409                                        get_object_mark(object));
410                         }
411                         print_path(spec->path);
412                         string_list_insert(changed, spec->path);
413                         putchar('\n');
414                         break;
415
416                 default:
417                         die("Unexpected comparison status '%c' for %s, %s",
418                                 q->queue[i]->status,
419                                 ospec->path ? ospec->path : "none",
420                                 spec->path ? spec->path : "none");
421                 }
422         }
423 }
424
425 static const char *find_encoding(const char *begin, const char *end)
426 {
427         const char *needle = "\nencoding ";
428         char *bol, *eol;
429
430         bol = memmem(begin, end ? end - begin : strlen(begin),
431                      needle, strlen(needle));
432         if (!bol)
433                 return git_commit_encoding;
434         bol += strlen(needle);
435         eol = strchrnul(bol, '\n');
436         *eol = '\0';
437         return bol;
438 }
439
440 static void *anonymize_ref_component(const void *old, size_t *len)
441 {
442         static int counter;
443         struct strbuf out = STRBUF_INIT;
444         strbuf_addf(&out, "ref%d", counter++);
445         return strbuf_detach(&out, len);
446 }
447
448 static const char *anonymize_refname(const char *refname)
449 {
450         /*
451          * If any of these prefixes is found, we will leave it intact
452          * so that tags remain tags and so forth.
453          */
454         static const char *prefixes[] = {
455                 "refs/heads/",
456                 "refs/tags/",
457                 "refs/remotes/",
458                 "refs/"
459         };
460         static struct hashmap refs;
461         static struct strbuf anon = STRBUF_INIT;
462         int i;
463
464         /*
465          * We also leave "master" as a special case, since it does not reveal
466          * anything interesting.
467          */
468         if (!strcmp(refname, "refs/heads/master"))
469                 return refname;
470
471         strbuf_reset(&anon);
472         for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
473                 if (skip_prefix(refname, prefixes[i], &refname)) {
474                         strbuf_addstr(&anon, prefixes[i]);
475                         break;
476                 }
477         }
478
479         anonymize_path(&anon, refname, &refs, anonymize_ref_component);
480         return anon.buf;
481 }
482
483 /*
484  * We do not even bother to cache commit messages, as they are unlikely
485  * to be repeated verbatim, and it is not that interesting when they are.
486  */
487 static char *anonymize_commit_message(const char *old)
488 {
489         static int counter;
490         return xstrfmt("subject %d\n\nbody\n", counter++);
491 }
492
493 static struct hashmap idents;
494 static void *anonymize_ident(const void *old, size_t *len)
495 {
496         static int counter;
497         struct strbuf out = STRBUF_INIT;
498         strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
499         counter++;
500         return strbuf_detach(&out, len);
501 }
502
503 /*
504  * Our strategy here is to anonymize the names and email addresses,
505  * but keep timestamps intact, as they influence things like traversal
506  * order (and by themselves should not be too revealing).
507  */
508 static void anonymize_ident_line(const char **beg, const char **end)
509 {
510         static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT };
511         static unsigned which_buffer;
512
513         struct strbuf *out;
514         struct ident_split split;
515         const char *end_of_header;
516
517         out = &buffers[which_buffer++];
518         which_buffer %= ARRAY_SIZE(buffers);
519         strbuf_reset(out);
520
521         /* skip "committer", "author", "tagger", etc */
522         end_of_header = strchr(*beg, ' ');
523         if (!end_of_header)
524                 BUG("malformed line fed to anonymize_ident_line: %.*s",
525                     (int)(*end - *beg), *beg);
526         end_of_header++;
527         strbuf_add(out, *beg, end_of_header - *beg);
528
529         if (!split_ident_line(&split, end_of_header, *end - end_of_header) &&
530             split.date_begin) {
531                 const char *ident;
532                 size_t len;
533
534                 len = split.mail_end - split.name_begin;
535                 ident = anonymize_mem(&idents, anonymize_ident,
536                                       split.name_begin, &len);
537                 strbuf_add(out, ident, len);
538                 strbuf_addch(out, ' ');
539                 strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
540         } else {
541                 strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000");
542         }
543
544         *beg = out->buf;
545         *end = out->buf + out->len;
546 }
547
548 static void handle_commit(struct commit *commit, struct rev_info *rev,
549                           struct string_list *paths_of_changed_objects)
550 {
551         int saved_output_format = rev->diffopt.output_format;
552         const char *commit_buffer;
553         const char *author, *author_end, *committer, *committer_end;
554         const char *encoding, *message;
555         char *reencoded = NULL;
556         struct commit_list *p;
557         const char *refname;
558         int i;
559
560         rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
561
562         parse_commit_or_die(commit);
563         commit_buffer = get_commit_buffer(commit, NULL);
564         author = strstr(commit_buffer, "\nauthor ");
565         if (!author)
566                 die("could not find author in commit %s",
567                     oid_to_hex(&commit->object.oid));
568         author++;
569         author_end = strchrnul(author, '\n');
570         committer = strstr(author_end, "\ncommitter ");
571         if (!committer)
572                 die("could not find committer in commit %s",
573                     oid_to_hex(&commit->object.oid));
574         committer++;
575         committer_end = strchrnul(committer, '\n');
576         message = strstr(committer_end, "\n\n");
577         encoding = find_encoding(committer_end, message);
578         if (message)
579                 message += 2;
580
581         if (commit->parents &&
582             get_object_mark(&commit->parents->item->object) != 0 &&
583             !full_tree) {
584                 parse_commit_or_die(commit->parents->item);
585                 diff_tree_oid(get_commit_tree_oid(commit->parents->item),
586                               get_commit_tree_oid(commit), "", &rev->diffopt);
587         }
588         else
589                 diff_root_tree_oid(get_commit_tree_oid(commit),
590                                    "", &rev->diffopt);
591
592         /* Export the referenced blobs, and remember the marks. */
593         for (i = 0; i < diff_queued_diff.nr; i++)
594                 if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
595                         export_blob(&diff_queued_diff.queue[i]->two->oid);
596
597         refname = *revision_sources_at(&revision_sources, commit);
598         if (anonymize) {
599                 refname = anonymize_refname(refname);
600                 anonymize_ident_line(&committer, &committer_end);
601                 anonymize_ident_line(&author, &author_end);
602         }
603
604         mark_next_object(&commit->object);
605         if (anonymize)
606                 reencoded = anonymize_commit_message(message);
607         else if (!is_encoding_utf8(encoding))
608                 reencoded = reencode_string(message, "UTF-8", encoding);
609         if (!commit->parents)
610                 printf("reset %s\n", refname);
611         printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
612                refname, last_idnum,
613                (int)(author_end - author), author,
614                (int)(committer_end - committer), committer,
615                (unsigned)(reencoded
616                           ? strlen(reencoded) : message
617                           ? strlen(message) : 0),
618                reencoded ? reencoded : message ? message : "");
619         free(reencoded);
620         unuse_commit_buffer(commit, commit_buffer);
621
622         for (i = 0, p = commit->parents; p; p = p->next) {
623                 int mark = get_object_mark(&p->item->object);
624                 if (!mark)
625                         continue;
626                 if (i == 0)
627                         printf("from :%d\n", mark);
628                 else
629                         printf("merge :%d\n", mark);
630                 i++;
631         }
632
633         if (full_tree)
634                 printf("deleteall\n");
635         log_tree_diff_flush(rev);
636         string_list_clear(paths_of_changed_objects, 0);
637         rev->diffopt.output_format = saved_output_format;
638
639         printf("\n");
640
641         show_progress();
642 }
643
644 static void *anonymize_tag(const void *old, size_t *len)
645 {
646         static int counter;
647         struct strbuf out = STRBUF_INIT;
648         strbuf_addf(&out, "tag message %d", counter++);
649         return strbuf_detach(&out, len);
650 }
651
652 static void handle_tail(struct object_array *commits, struct rev_info *revs,
653                         struct string_list *paths_of_changed_objects)
654 {
655         struct commit *commit;
656         while (commits->nr) {
657                 commit = (struct commit *)object_array_pop(commits);
658                 if (has_unshown_parent(commit)) {
659                         /* Queue again, to be handled later */
660                         add_object_array(&commit->object, NULL, commits);
661                         return;
662                 }
663                 handle_commit(commit, revs, paths_of_changed_objects);
664         }
665 }
666
667 static void handle_tag(const char *name, struct tag *tag)
668 {
669         unsigned long size;
670         enum object_type type;
671         char *buf;
672         const char *tagger, *tagger_end, *message;
673         size_t message_size = 0;
674         struct object *tagged;
675         int tagged_mark;
676         struct commit *p;
677
678         /* Trees have no identifier in fast-export output, thus we have no way
679          * to output tags of trees, tags of tags of trees, etc.  Simply omit
680          * such tags.
681          */
682         tagged = tag->tagged;
683         while (tagged->type == OBJ_TAG) {
684                 tagged = ((struct tag *)tagged)->tagged;
685         }
686         if (tagged->type == OBJ_TREE) {
687                 warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
688                         oid_to_hex(&tag->object.oid));
689                 return;
690         }
691
692         buf = read_object_file(&tag->object.oid, &type, &size);
693         if (!buf)
694                 die("could not read tag %s", oid_to_hex(&tag->object.oid));
695         message = memmem(buf, size, "\n\n", 2);
696         if (message) {
697                 message += 2;
698                 message_size = strlen(message);
699         }
700         tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
701         if (!tagger) {
702                 if (fake_missing_tagger)
703                         tagger = "tagger Unspecified Tagger "
704                                 "<unspecified-tagger> 0 +0000";
705                 else
706                         tagger = "";
707                 tagger_end = tagger + strlen(tagger);
708         } else {
709                 tagger++;
710                 tagger_end = strchrnul(tagger, '\n');
711                 if (anonymize)
712                         anonymize_ident_line(&tagger, &tagger_end);
713         }
714
715         if (anonymize) {
716                 name = anonymize_refname(name);
717                 if (message) {
718                         static struct hashmap tags;
719                         message = anonymize_mem(&tags, anonymize_tag,
720                                                 message, &message_size);
721                 }
722         }
723
724         /* handle signed tags */
725         if (message) {
726                 const char *signature = strstr(message,
727                                                "\n-----BEGIN PGP SIGNATURE-----\n");
728                 if (signature)
729                         switch(signed_tag_mode) {
730                         case ABORT:
731                                 die("encountered signed tag %s; use "
732                                     "--signed-tags=<mode> to handle it",
733                                     oid_to_hex(&tag->object.oid));
734                         case WARN:
735                                 warning("exporting signed tag %s",
736                                         oid_to_hex(&tag->object.oid));
737                                 /* fallthru */
738                         case VERBATIM:
739                                 break;
740                         case WARN_STRIP:
741                                 warning("stripping signature from tag %s",
742                                         oid_to_hex(&tag->object.oid));
743                                 /* fallthru */
744                         case STRIP:
745                                 message_size = signature + 1 - message;
746                                 break;
747                         }
748         }
749
750         /* handle tag->tagged having been filtered out due to paths specified */
751         tagged = tag->tagged;
752         tagged_mark = get_object_mark(tagged);
753         if (!tagged_mark) {
754                 switch(tag_of_filtered_mode) {
755                 case ABORT:
756                         die("tag %s tags unexported object; use "
757                             "--tag-of-filtered-object=<mode> to handle it",
758                             oid_to_hex(&tag->object.oid));
759                 case DROP:
760                         /* Ignore this tag altogether */
761                         free(buf);
762                         return;
763                 case REWRITE:
764                         if (tagged->type != OBJ_COMMIT) {
765                                 die("tag %s tags unexported %s!",
766                                     oid_to_hex(&tag->object.oid),
767                                     type_name(tagged->type));
768                         }
769                         p = (struct commit *)tagged;
770                         for (;;) {
771                                 if (p->parents && p->parents->next)
772                                         break;
773                                 if (p->object.flags & UNINTERESTING)
774                                         break;
775                                 if (!(p->object.flags & TREESAME))
776                                         break;
777                                 if (!p->parents)
778                                         die("can't find replacement commit for tag %s",
779                                              oid_to_hex(&tag->object.oid));
780                                 p = p->parents->item;
781                         }
782                         tagged_mark = get_object_mark(&p->object);
783                 }
784         }
785
786         if (starts_with(name, "refs/tags/"))
787                 name += 10;
788         printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
789                name, tagged_mark,
790                (int)(tagger_end - tagger), tagger,
791                tagger == tagger_end ? "" : "\n",
792                (int)message_size, (int)message_size, message ? message : "");
793         free(buf);
794 }
795
796 static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name)
797 {
798         switch (e->item->type) {
799         case OBJ_COMMIT:
800                 return (struct commit *)e->item;
801         case OBJ_TAG: {
802                 struct tag *tag = (struct tag *)e->item;
803
804                 /* handle nested tags */
805                 while (tag && tag->object.type == OBJ_TAG) {
806                         parse_object(the_repository, &tag->object.oid);
807                         string_list_append(&extra_refs, full_name)->util = tag;
808                         tag = (struct tag *)tag->tagged;
809                 }
810                 if (!tag)
811                         die("Tag %s points nowhere?", e->name);
812                 return (struct commit *)tag;
813                 break;
814         }
815         default:
816                 return NULL;
817         }
818 }
819
820 static void get_tags_and_duplicates(struct rev_cmdline_info *info)
821 {
822         int i;
823
824         for (i = 0; i < info->nr; i++) {
825                 struct rev_cmdline_entry *e = info->rev + i;
826                 struct object_id oid;
827                 struct commit *commit;
828                 char *full_name;
829
830                 if (e->flags & UNINTERESTING)
831                         continue;
832
833                 if (dwim_ref(e->name, strlen(e->name), &oid, &full_name) != 1)
834                         continue;
835
836                 if (refspecs.nr) {
837                         char *private;
838                         private = apply_refspecs(&refspecs, full_name);
839                         if (private) {
840                                 free(full_name);
841                                 full_name = private;
842                         }
843                 }
844
845                 commit = get_commit(e, full_name);
846                 if (!commit) {
847                         warning("%s: Unexpected object of type %s, skipping.",
848                                 e->name,
849                                 type_name(e->item->type));
850                         continue;
851                 }
852
853                 switch(commit->object.type) {
854                 case OBJ_COMMIT:
855                         break;
856                 case OBJ_BLOB:
857                         export_blob(&commit->object.oid);
858                         continue;
859                 default: /* OBJ_TAG (nested tags) is already handled */
860                         warning("Tag points to object of unexpected type %s, skipping.",
861                                 type_name(commit->object.type));
862                         continue;
863                 }
864
865                 /*
866                  * This ref will not be updated through a commit, lets make
867                  * sure it gets properly updated eventually.
868                  */
869                 if (*revision_sources_at(&revision_sources, commit) ||
870                     commit->object.flags & SHOWN)
871                         string_list_append(&extra_refs, full_name)->util = commit;
872                 if (!*revision_sources_at(&revision_sources, commit))
873                         *revision_sources_at(&revision_sources, commit) = full_name;
874         }
875 }
876
877 static void handle_tags_and_duplicates(void)
878 {
879         struct commit *commit;
880         int i;
881
882         for (i = extra_refs.nr - 1; i >= 0; i--) {
883                 const char *name = extra_refs.items[i].string;
884                 struct object *object = extra_refs.items[i].util;
885                 switch (object->type) {
886                 case OBJ_TAG:
887                         handle_tag(name, (struct tag *)object);
888                         break;
889                 case OBJ_COMMIT:
890                         if (anonymize)
891                                 name = anonymize_refname(name);
892                         /* create refs pointing to already seen commits */
893                         commit = (struct commit *)object;
894                         printf("reset %s\nfrom :%d\n\n", name,
895                                get_object_mark(&commit->object));
896                         show_progress();
897                         break;
898                 }
899         }
900 }
901
902 static void export_marks(char *file)
903 {
904         unsigned int i;
905         uint32_t mark;
906         struct decoration_entry *deco = idnums.entries;
907         FILE *f;
908         int e = 0;
909
910         f = fopen_for_writing(file);
911         if (!f)
912                 die_errno("Unable to open marks file %s for writing.", file);
913
914         for (i = 0; i < idnums.size; i++) {
915                 if (deco->base && deco->base->type == 1) {
916                         mark = ptr_to_mark(deco->decoration);
917                         if (fprintf(f, ":%"PRIu32" %s\n", mark,
918                                 oid_to_hex(&deco->base->oid)) < 0) {
919                             e = 1;
920                             break;
921                         }
922                 }
923                 deco++;
924         }
925
926         e |= ferror(f);
927         e |= fclose(f);
928         if (e)
929                 error("Unable to write marks file %s.", file);
930 }
931
932 static void import_marks(char *input_file)
933 {
934         char line[512];
935         FILE *f = xfopen(input_file, "r");
936
937         while (fgets(line, sizeof(line), f)) {
938                 uint32_t mark;
939                 char *line_end, *mark_end;
940                 struct object_id oid;
941                 struct object *object;
942                 struct commit *commit;
943                 enum object_type type;
944
945                 line_end = strchr(line, '\n');
946                 if (line[0] != ':' || !line_end)
947                         die("corrupt mark line: %s", line);
948                 *line_end = '\0';
949
950                 mark = strtoumax(line + 1, &mark_end, 10);
951                 if (!mark || mark_end == line + 1
952                         || *mark_end != ' ' || get_oid_hex(mark_end + 1, &oid))
953                         die("corrupt mark line: %s", line);
954
955                 if (last_idnum < mark)
956                         last_idnum = mark;
957
958                 type = oid_object_info(the_repository, &oid, NULL);
959                 if (type < 0)
960                         die("object not found: %s", oid_to_hex(&oid));
961
962                 if (type != OBJ_COMMIT)
963                         /* only commits */
964                         continue;
965
966                 commit = lookup_commit(the_repository, &oid);
967                 if (!commit)
968                         die("not a commit? can't happen: %s", oid_to_hex(&oid));
969
970                 object = &commit->object;
971
972                 if (object->flags & SHOWN)
973                         error("Object %s already has a mark", oid_to_hex(&oid));
974
975                 mark_object(object, mark);
976
977                 object->flags |= SHOWN;
978         }
979         fclose(f);
980 }
981
982 static void handle_deletes(void)
983 {
984         int i;
985         for (i = 0; i < refspecs.nr; i++) {
986                 struct refspec_item *refspec = &refspecs.items[i];
987                 if (*refspec->src)
988                         continue;
989
990                 printf("reset %s\nfrom %s\n\n",
991                                 refspec->dst, sha1_to_hex(null_sha1));
992         }
993 }
994
995 int cmd_fast_export(int argc, const char **argv, const char *prefix)
996 {
997         struct rev_info revs;
998         struct object_array commits = OBJECT_ARRAY_INIT;
999         struct commit *commit;
1000         char *export_filename = NULL, *import_filename = NULL;
1001         uint32_t lastimportid;
1002         struct string_list refspecs_list = STRING_LIST_INIT_NODUP;
1003         struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP;
1004         struct option options[] = {
1005                 OPT_INTEGER(0, "progress", &progress,
1006                             N_("show progress after <n> objects")),
1007                 OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"),
1008                              N_("select handling of signed tags"),
1009                              parse_opt_signed_tag_mode),
1010                 OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
1011                              N_("select handling of tags that tag filtered objects"),
1012                              parse_opt_tag_of_filtered_mode),
1013                 OPT_STRING(0, "export-marks", &export_filename, N_("file"),
1014                              N_("Dump marks to this file")),
1015                 OPT_STRING(0, "import-marks", &import_filename, N_("file"),
1016                              N_("Import marks from this file")),
1017                 OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger,
1018                          N_("Fake a tagger when tags lack one")),
1019                 OPT_BOOL(0, "full-tree", &full_tree,
1020                          N_("Output full tree for each commit")),
1021                 OPT_BOOL(0, "use-done-feature", &use_done_feature,
1022                              N_("Use the done feature to terminate the stream")),
1023                 OPT_BOOL(0, "no-data", &no_data, N_("Skip output of blob data")),
1024                 OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
1025                              N_("Apply refspec to exported refs")),
1026                 OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
1027                 OPT_END()
1028         };
1029
1030         if (argc == 1)
1031                 usage_with_options (fast_export_usage, options);
1032
1033         /* we handle encodings */
1034         git_config(git_default_config, NULL);
1035
1036         init_revisions(&revs, prefix);
1037         init_revision_sources(&revision_sources);
1038         revs.topo_order = 1;
1039         revs.sources = &revision_sources;
1040         revs.rewrite_parents = 1;
1041         argc = parse_options(argc, argv, prefix, options, fast_export_usage,
1042                         PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN);
1043         argc = setup_revisions(argc, argv, &revs, NULL);
1044         if (argc > 1)
1045                 usage_with_options (fast_export_usage, options);
1046
1047         if (refspecs_list.nr) {
1048                 int i;
1049
1050                 for (i = 0; i < refspecs_list.nr; i++)
1051                         refspec_append(&refspecs, refspecs_list.items[i].string);
1052
1053                 string_list_clear(&refspecs_list, 1);
1054         }
1055
1056         if (use_done_feature)
1057                 printf("feature done\n");
1058
1059         if (import_filename)
1060                 import_marks(import_filename);
1061         lastimportid = last_idnum;
1062
1063         if (import_filename && revs.prune_data.nr)
1064                 full_tree = 1;
1065
1066         get_tags_and_duplicates(&revs.cmdline);
1067
1068         if (prepare_revision_walk(&revs))
1069                 die("revision walk setup failed");
1070         revs.diffopt.format_callback = show_filemodify;
1071         revs.diffopt.format_callback_data = &paths_of_changed_objects;
1072         revs.diffopt.flags.recursive = 1;
1073         while ((commit = get_revision(&revs))) {
1074                 if (has_unshown_parent(commit)) {
1075                         add_object_array(&commit->object, NULL, &commits);
1076                 }
1077                 else {
1078                         handle_commit(commit, &revs, &paths_of_changed_objects);
1079                         handle_tail(&commits, &revs, &paths_of_changed_objects);
1080                 }
1081         }
1082
1083         handle_tags_and_duplicates();
1084         handle_deletes();
1085
1086         if (export_filename && lastimportid != last_idnum)
1087                 export_marks(export_filename);
1088
1089         if (use_done_feature)
1090                 printf("done\n");
1091
1092         refspec_clear(&refspecs);
1093
1094         return 0;
1095 }