Merge branch 'ds/commit-graph'
[git] / builtin / fast-export.c
1 /*
2  * "git fast-export" builtin command
3  *
4  * Copyright (C) 2007 Johannes E. Schindelin
5  */
6 #include "builtin.h"
7 #include "cache.h"
8 #include "config.h"
9 #include "refs.h"
10 #include "refspec.h"
11 #include "object-store.h"
12 #include "commit.h"
13 #include "object.h"
14 #include "tag.h"
15 #include "diff.h"
16 #include "diffcore.h"
17 #include "log-tree.h"
18 #include "revision.h"
19 #include "decorate.h"
20 #include "string-list.h"
21 #include "utf8.h"
22 #include "parse-options.h"
23 #include "quote.h"
24 #include "remote.h"
25 #include "blob.h"
26 #include "commit-slab.h"
27
28 static const char *fast_export_usage[] = {
29         N_("git fast-export [rev-list-opts]"),
30         NULL
31 };
32
33 static int progress;
34 static enum { ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = ABORT;
35 static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR;
36 static int fake_missing_tagger;
37 static int use_done_feature;
38 static int no_data;
39 static int full_tree;
40 static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
41 static struct refspec refspecs = REFSPEC_INIT_FETCH;
42 static int anonymize;
43 static struct revision_sources revision_sources;
44
45 static int parse_opt_signed_tag_mode(const struct option *opt,
46                                      const char *arg, int unset)
47 {
48         if (unset || !strcmp(arg, "abort"))
49                 signed_tag_mode = ABORT;
50         else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
51                 signed_tag_mode = VERBATIM;
52         else if (!strcmp(arg, "warn"))
53                 signed_tag_mode = WARN;
54         else if (!strcmp(arg, "warn-strip"))
55                 signed_tag_mode = WARN_STRIP;
56         else if (!strcmp(arg, "strip"))
57                 signed_tag_mode = STRIP;
58         else
59                 return error("Unknown signed-tags mode: %s", arg);
60         return 0;
61 }
62
63 static int parse_opt_tag_of_filtered_mode(const struct option *opt,
64                                           const char *arg, int unset)
65 {
66         if (unset || !strcmp(arg, "abort"))
67                 tag_of_filtered_mode = ERROR;
68         else if (!strcmp(arg, "drop"))
69                 tag_of_filtered_mode = DROP;
70         else if (!strcmp(arg, "rewrite"))
71                 tag_of_filtered_mode = REWRITE;
72         else
73                 return error("Unknown tag-of-filtered mode: %s", arg);
74         return 0;
75 }
76
77 static struct decoration idnums;
78 static uint32_t last_idnum;
79
80 static int has_unshown_parent(struct commit *commit)
81 {
82         struct commit_list *parent;
83
84         for (parent = commit->parents; parent; parent = parent->next)
85                 if (!(parent->item->object.flags & SHOWN) &&
86                     !(parent->item->object.flags & UNINTERESTING))
87                         return 1;
88         return 0;
89 }
90
91 struct anonymized_entry {
92         struct hashmap_entry hash;
93         const char *orig;
94         size_t orig_len;
95         const char *anon;
96         size_t anon_len;
97 };
98
99 static int anonymized_entry_cmp(const void *unused_cmp_data,
100                                 const void *va, const void *vb,
101                                 const void *unused_keydata)
102 {
103         const struct anonymized_entry *a = va, *b = vb;
104         return a->orig_len != b->orig_len ||
105                 memcmp(a->orig, b->orig, a->orig_len);
106 }
107
108 /*
109  * Basically keep a cache of X->Y so that we can repeatedly replace
110  * the same anonymized string with another. The actual generation
111  * is farmed out to the generate function.
112  */
113 static const void *anonymize_mem(struct hashmap *map,
114                                  void *(*generate)(const void *, size_t *),
115                                  const void *orig, size_t *len)
116 {
117         struct anonymized_entry key, *ret;
118
119         if (!map->cmpfn)
120                 hashmap_init(map, anonymized_entry_cmp, NULL, 0);
121
122         hashmap_entry_init(&key, memhash(orig, *len));
123         key.orig = orig;
124         key.orig_len = *len;
125         ret = hashmap_get(map, &key, NULL);
126
127         if (!ret) {
128                 ret = xmalloc(sizeof(*ret));
129                 hashmap_entry_init(&ret->hash, key.hash.hash);
130                 ret->orig = xstrdup(orig);
131                 ret->orig_len = *len;
132                 ret->anon = generate(orig, len);
133                 ret->anon_len = *len;
134                 hashmap_put(map, ret);
135         }
136
137         *len = ret->anon_len;
138         return ret->anon;
139 }
140
141 /*
142  * We anonymize each component of a path individually,
143  * so that paths a/b and a/c will share a common root.
144  * The paths are cached via anonymize_mem so that repeated
145  * lookups for "a" will yield the same value.
146  */
147 static void anonymize_path(struct strbuf *out, const char *path,
148                            struct hashmap *map,
149                            void *(*generate)(const void *, size_t *))
150 {
151         while (*path) {
152                 const char *end_of_component = strchrnul(path, '/');
153                 size_t len = end_of_component - path;
154                 const char *c = anonymize_mem(map, generate, path, &len);
155                 strbuf_add(out, c, len);
156                 path = end_of_component;
157                 if (*path)
158                         strbuf_addch(out, *path++);
159         }
160 }
161
162 static inline void *mark_to_ptr(uint32_t mark)
163 {
164         return (void *)(uintptr_t)mark;
165 }
166
167 static inline uint32_t ptr_to_mark(void * mark)
168 {
169         return (uint32_t)(uintptr_t)mark;
170 }
171
172 static inline void mark_object(struct object *object, uint32_t mark)
173 {
174         add_decoration(&idnums, object, mark_to_ptr(mark));
175 }
176
177 static inline void mark_next_object(struct object *object)
178 {
179         mark_object(object, ++last_idnum);
180 }
181
182 static int get_object_mark(struct object *object)
183 {
184         void *decoration = lookup_decoration(&idnums, object);
185         if (!decoration)
186                 return 0;
187         return ptr_to_mark(decoration);
188 }
189
190 static void show_progress(void)
191 {
192         static int counter = 0;
193         if (!progress)
194                 return;
195         if ((++counter % progress) == 0)
196                 printf("progress %d objects\n", counter);
197 }
198
199 /*
200  * Ideally we would want some transformation of the blob data here
201  * that is unreversible, but would still be the same size and have
202  * the same data relationship to other blobs (so that we get the same
203  * delta and packing behavior as the original). But the first and last
204  * requirements there are probably mutually exclusive, so let's take
205  * the easy way out for now, and just generate arbitrary content.
206  *
207  * There's no need to cache this result with anonymize_mem, since
208  * we already handle blob content caching with marks.
209  */
210 static char *anonymize_blob(unsigned long *size)
211 {
212         static int counter;
213         struct strbuf out = STRBUF_INIT;
214         strbuf_addf(&out, "anonymous blob %d", counter++);
215         *size = out.len;
216         return strbuf_detach(&out, NULL);
217 }
218
219 static void export_blob(const struct object_id *oid)
220 {
221         unsigned long size;
222         enum object_type type;
223         char *buf;
224         struct object *object;
225         int eaten;
226
227         if (no_data)
228                 return;
229
230         if (is_null_oid(oid))
231                 return;
232
233         object = lookup_object(oid->hash);
234         if (object && object->flags & SHOWN)
235                 return;
236
237         if (anonymize) {
238                 buf = anonymize_blob(&size);
239                 object = (struct object *)lookup_blob(oid);
240                 eaten = 0;
241         } else {
242                 buf = read_object_file(oid, &type, &size);
243                 if (!buf)
244                         die ("Could not read blob %s", oid_to_hex(oid));
245                 if (check_object_signature(oid, buf, size, type_name(type)) < 0)
246                         die("sha1 mismatch in blob %s", oid_to_hex(oid));
247                 object = parse_object_buffer(oid, type, size, buf, &eaten);
248         }
249
250         if (!object)
251                 die("Could not read blob %s", oid_to_hex(oid));
252
253         mark_next_object(object);
254
255         printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size);
256         if (size && fwrite(buf, size, 1, stdout) != 1)
257                 die_errno ("Could not write blob '%s'", oid_to_hex(oid));
258         printf("\n");
259
260         show_progress();
261
262         object->flags |= SHOWN;
263         if (!eaten)
264                 free(buf);
265 }
266
267 static int depth_first(const void *a_, const void *b_)
268 {
269         const struct diff_filepair *a = *((const struct diff_filepair **)a_);
270         const struct diff_filepair *b = *((const struct diff_filepair **)b_);
271         const char *name_a, *name_b;
272         int len_a, len_b, len;
273         int cmp;
274
275         name_a = a->one ? a->one->path : a->two->path;
276         name_b = b->one ? b->one->path : b->two->path;
277
278         len_a = strlen(name_a);
279         len_b = strlen(name_b);
280         len = (len_a < len_b) ? len_a : len_b;
281
282         /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
283         cmp = memcmp(name_a, name_b, len);
284         if (cmp)
285                 return cmp;
286         cmp = len_b - len_a;
287         if (cmp)
288                 return cmp;
289         /*
290          * Move 'R'ename entries last so that all references of the file
291          * appear in the output before it is renamed (e.g., when a file
292          * was copied and renamed in the same commit).
293          */
294         return (a->status == 'R') - (b->status == 'R');
295 }
296
297 static void print_path_1(const char *path)
298 {
299         int need_quote = quote_c_style(path, NULL, NULL, 0);
300         if (need_quote)
301                 quote_c_style(path, NULL, stdout, 0);
302         else if (strchr(path, ' '))
303                 printf("\"%s\"", path);
304         else
305                 printf("%s", path);
306 }
307
308 static void *anonymize_path_component(const void *path, size_t *len)
309 {
310         static int counter;
311         struct strbuf out = STRBUF_INIT;
312         strbuf_addf(&out, "path%d", counter++);
313         return strbuf_detach(&out, len);
314 }
315
316 static void print_path(const char *path)
317 {
318         if (!anonymize)
319                 print_path_1(path);
320         else {
321                 static struct hashmap paths;
322                 static struct strbuf anon = STRBUF_INIT;
323
324                 anonymize_path(&anon, path, &paths, anonymize_path_component);
325                 print_path_1(anon.buf);
326                 strbuf_reset(&anon);
327         }
328 }
329
330 static void *generate_fake_oid(const void *old, size_t *len)
331 {
332         static uint32_t counter = 1; /* avoid null sha1 */
333         unsigned char *out = xcalloc(GIT_SHA1_RAWSZ, 1);
334         put_be32(out + GIT_SHA1_RAWSZ - 4, counter++);
335         return out;
336 }
337
338 static const unsigned char *anonymize_sha1(const struct object_id *oid)
339 {
340         static struct hashmap sha1s;
341         size_t len = GIT_SHA1_RAWSZ;
342         return anonymize_mem(&sha1s, generate_fake_oid, oid, &len);
343 }
344
345 static void show_filemodify(struct diff_queue_struct *q,
346                             struct diff_options *options, void *data)
347 {
348         int i;
349         struct string_list *changed = data;
350
351         /*
352          * Handle files below a directory first, in case they are all deleted
353          * and the directory changes to a file or symlink.
354          */
355         QSORT(q->queue, q->nr, depth_first);
356
357         for (i = 0; i < q->nr; i++) {
358                 struct diff_filespec *ospec = q->queue[i]->one;
359                 struct diff_filespec *spec = q->queue[i]->two;
360
361                 switch (q->queue[i]->status) {
362                 case DIFF_STATUS_DELETED:
363                         printf("D ");
364                         print_path(spec->path);
365                         string_list_insert(changed, spec->path);
366                         putchar('\n');
367                         break;
368
369                 case DIFF_STATUS_COPIED:
370                 case DIFF_STATUS_RENAMED:
371                         /*
372                          * If a change in the file corresponding to ospec->path
373                          * has been observed, we cannot trust its contents
374                          * because the diff is calculated based on the prior
375                          * contents, not the current contents.  So, declare a
376                          * copy or rename only if there was no change observed.
377                          */
378                         if (!string_list_has_string(changed, ospec->path)) {
379                                 printf("%c ", q->queue[i]->status);
380                                 print_path(ospec->path);
381                                 putchar(' ');
382                                 print_path(spec->path);
383                                 string_list_insert(changed, spec->path);
384                                 putchar('\n');
385
386                                 if (!oidcmp(&ospec->oid, &spec->oid) &&
387                                     ospec->mode == spec->mode)
388                                         break;
389                         }
390                         /* fallthrough */
391
392                 case DIFF_STATUS_TYPE_CHANGED:
393                 case DIFF_STATUS_MODIFIED:
394                 case DIFF_STATUS_ADDED:
395                         /*
396                          * Links refer to objects in another repositories;
397                          * output the SHA-1 verbatim.
398                          */
399                         if (no_data || S_ISGITLINK(spec->mode))
400                                 printf("M %06o %s ", spec->mode,
401                                        sha1_to_hex(anonymize ?
402                                                    anonymize_sha1(&spec->oid) :
403                                                    spec->oid.hash));
404                         else {
405                                 struct object *object = lookup_object(spec->oid.hash);
406                                 printf("M %06o :%d ", spec->mode,
407                                        get_object_mark(object));
408                         }
409                         print_path(spec->path);
410                         string_list_insert(changed, spec->path);
411                         putchar('\n');
412                         break;
413
414                 default:
415                         die("Unexpected comparison status '%c' for %s, %s",
416                                 q->queue[i]->status,
417                                 ospec->path ? ospec->path : "none",
418                                 spec->path ? spec->path : "none");
419                 }
420         }
421 }
422
423 static const char *find_encoding(const char *begin, const char *end)
424 {
425         const char *needle = "\nencoding ";
426         char *bol, *eol;
427
428         bol = memmem(begin, end ? end - begin : strlen(begin),
429                      needle, strlen(needle));
430         if (!bol)
431                 return git_commit_encoding;
432         bol += strlen(needle);
433         eol = strchrnul(bol, '\n');
434         *eol = '\0';
435         return bol;
436 }
437
438 static void *anonymize_ref_component(const void *old, size_t *len)
439 {
440         static int counter;
441         struct strbuf out = STRBUF_INIT;
442         strbuf_addf(&out, "ref%d", counter++);
443         return strbuf_detach(&out, len);
444 }
445
446 static const char *anonymize_refname(const char *refname)
447 {
448         /*
449          * If any of these prefixes is found, we will leave it intact
450          * so that tags remain tags and so forth.
451          */
452         static const char *prefixes[] = {
453                 "refs/heads/",
454                 "refs/tags/",
455                 "refs/remotes/",
456                 "refs/"
457         };
458         static struct hashmap refs;
459         static struct strbuf anon = STRBUF_INIT;
460         int i;
461
462         /*
463          * We also leave "master" as a special case, since it does not reveal
464          * anything interesting.
465          */
466         if (!strcmp(refname, "refs/heads/master"))
467                 return refname;
468
469         strbuf_reset(&anon);
470         for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
471                 if (skip_prefix(refname, prefixes[i], &refname)) {
472                         strbuf_addstr(&anon, prefixes[i]);
473                         break;
474                 }
475         }
476
477         anonymize_path(&anon, refname, &refs, anonymize_ref_component);
478         return anon.buf;
479 }
480
481 /*
482  * We do not even bother to cache commit messages, as they are unlikely
483  * to be repeated verbatim, and it is not that interesting when they are.
484  */
485 static char *anonymize_commit_message(const char *old)
486 {
487         static int counter;
488         return xstrfmt("subject %d\n\nbody\n", counter++);
489 }
490
491 static struct hashmap idents;
492 static void *anonymize_ident(const void *old, size_t *len)
493 {
494         static int counter;
495         struct strbuf out = STRBUF_INIT;
496         strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
497         counter++;
498         return strbuf_detach(&out, len);
499 }
500
501 /*
502  * Our strategy here is to anonymize the names and email addresses,
503  * but keep timestamps intact, as they influence things like traversal
504  * order (and by themselves should not be too revealing).
505  */
506 static void anonymize_ident_line(const char **beg, const char **end)
507 {
508         static struct strbuf buffers[] = { STRBUF_INIT, STRBUF_INIT };
509         static unsigned which_buffer;
510
511         struct strbuf *out;
512         struct ident_split split;
513         const char *end_of_header;
514
515         out = &buffers[which_buffer++];
516         which_buffer %= ARRAY_SIZE(buffers);
517         strbuf_reset(out);
518
519         /* skip "committer", "author", "tagger", etc */
520         end_of_header = strchr(*beg, ' ');
521         if (!end_of_header)
522                 BUG("malformed line fed to anonymize_ident_line: %.*s",
523                     (int)(*end - *beg), *beg);
524         end_of_header++;
525         strbuf_add(out, *beg, end_of_header - *beg);
526
527         if (!split_ident_line(&split, end_of_header, *end - end_of_header) &&
528             split.date_begin) {
529                 const char *ident;
530                 size_t len;
531
532                 len = split.mail_end - split.name_begin;
533                 ident = anonymize_mem(&idents, anonymize_ident,
534                                       split.name_begin, &len);
535                 strbuf_add(out, ident, len);
536                 strbuf_addch(out, ' ');
537                 strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
538         } else {
539                 strbuf_addstr(out, "Malformed Ident <malformed@example.com> 0 -0000");
540         }
541
542         *beg = out->buf;
543         *end = out->buf + out->len;
544 }
545
546 static void handle_commit(struct commit *commit, struct rev_info *rev,
547                           struct string_list *paths_of_changed_objects)
548 {
549         int saved_output_format = rev->diffopt.output_format;
550         const char *commit_buffer;
551         const char *author, *author_end, *committer, *committer_end;
552         const char *encoding, *message;
553         char *reencoded = NULL;
554         struct commit_list *p;
555         const char *refname;
556         int i;
557
558         rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
559
560         parse_commit_or_die(commit);
561         commit_buffer = get_commit_buffer(commit, NULL);
562         author = strstr(commit_buffer, "\nauthor ");
563         if (!author)
564                 die ("Could not find author in commit %s",
565                      oid_to_hex(&commit->object.oid));
566         author++;
567         author_end = strchrnul(author, '\n');
568         committer = strstr(author_end, "\ncommitter ");
569         if (!committer)
570                 die ("Could not find committer in commit %s",
571                      oid_to_hex(&commit->object.oid));
572         committer++;
573         committer_end = strchrnul(committer, '\n');
574         message = strstr(committer_end, "\n\n");
575         encoding = find_encoding(committer_end, message);
576         if (message)
577                 message += 2;
578
579         if (commit->parents &&
580             get_object_mark(&commit->parents->item->object) != 0 &&
581             !full_tree) {
582                 parse_commit_or_die(commit->parents->item);
583                 diff_tree_oid(get_commit_tree_oid(commit->parents->item),
584                               get_commit_tree_oid(commit), "", &rev->diffopt);
585         }
586         else
587                 diff_root_tree_oid(get_commit_tree_oid(commit),
588                                    "", &rev->diffopt);
589
590         /* Export the referenced blobs, and remember the marks. */
591         for (i = 0; i < diff_queued_diff.nr; i++)
592                 if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
593                         export_blob(&diff_queued_diff.queue[i]->two->oid);
594
595         refname = *revision_sources_at(&revision_sources, commit);
596         if (anonymize) {
597                 refname = anonymize_refname(refname);
598                 anonymize_ident_line(&committer, &committer_end);
599                 anonymize_ident_line(&author, &author_end);
600         }
601
602         mark_next_object(&commit->object);
603         if (anonymize)
604                 reencoded = anonymize_commit_message(message);
605         else if (!is_encoding_utf8(encoding))
606                 reencoded = reencode_string(message, "UTF-8", encoding);
607         if (!commit->parents)
608                 printf("reset %s\n", refname);
609         printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
610                refname, last_idnum,
611                (int)(author_end - author), author,
612                (int)(committer_end - committer), committer,
613                (unsigned)(reencoded
614                           ? strlen(reencoded) : message
615                           ? strlen(message) : 0),
616                reencoded ? reencoded : message ? message : "");
617         free(reencoded);
618         unuse_commit_buffer(commit, commit_buffer);
619
620         for (i = 0, p = commit->parents; p; p = p->next) {
621                 int mark = get_object_mark(&p->item->object);
622                 if (!mark)
623                         continue;
624                 if (i == 0)
625                         printf("from :%d\n", mark);
626                 else
627                         printf("merge :%d\n", mark);
628                 i++;
629         }
630
631         if (full_tree)
632                 printf("deleteall\n");
633         log_tree_diff_flush(rev);
634         string_list_clear(paths_of_changed_objects, 0);
635         rev->diffopt.output_format = saved_output_format;
636
637         printf("\n");
638
639         show_progress();
640 }
641
642 static void *anonymize_tag(const void *old, size_t *len)
643 {
644         static int counter;
645         struct strbuf out = STRBUF_INIT;
646         strbuf_addf(&out, "tag message %d", counter++);
647         return strbuf_detach(&out, len);
648 }
649
650 static void handle_tail(struct object_array *commits, struct rev_info *revs,
651                         struct string_list *paths_of_changed_objects)
652 {
653         struct commit *commit;
654         while (commits->nr) {
655                 commit = (struct commit *)object_array_pop(commits);
656                 if (has_unshown_parent(commit)) {
657                         /* Queue again, to be handled later */
658                         add_object_array(&commit->object, NULL, commits);
659                         return;
660                 }
661                 handle_commit(commit, revs, paths_of_changed_objects);
662         }
663 }
664
665 static void handle_tag(const char *name, struct tag *tag)
666 {
667         unsigned long size;
668         enum object_type type;
669         char *buf;
670         const char *tagger, *tagger_end, *message;
671         size_t message_size = 0;
672         struct object *tagged;
673         int tagged_mark;
674         struct commit *p;
675
676         /* Trees have no identifier in fast-export output, thus we have no way
677          * to output tags of trees, tags of tags of trees, etc.  Simply omit
678          * such tags.
679          */
680         tagged = tag->tagged;
681         while (tagged->type == OBJ_TAG) {
682                 tagged = ((struct tag *)tagged)->tagged;
683         }
684         if (tagged->type == OBJ_TREE) {
685                 warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
686                         oid_to_hex(&tag->object.oid));
687                 return;
688         }
689
690         buf = read_object_file(&tag->object.oid, &type, &size);
691         if (!buf)
692                 die ("Could not read tag %s", oid_to_hex(&tag->object.oid));
693         message = memmem(buf, size, "\n\n", 2);
694         if (message) {
695                 message += 2;
696                 message_size = strlen(message);
697         }
698         tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
699         if (!tagger) {
700                 if (fake_missing_tagger)
701                         tagger = "tagger Unspecified Tagger "
702                                 "<unspecified-tagger> 0 +0000";
703                 else
704                         tagger = "";
705                 tagger_end = tagger + strlen(tagger);
706         } else {
707                 tagger++;
708                 tagger_end = strchrnul(tagger, '\n');
709                 if (anonymize)
710                         anonymize_ident_line(&tagger, &tagger_end);
711         }
712
713         if (anonymize) {
714                 name = anonymize_refname(name);
715                 if (message) {
716                         static struct hashmap tags;
717                         message = anonymize_mem(&tags, anonymize_tag,
718                                                 message, &message_size);
719                 }
720         }
721
722         /* handle signed tags */
723         if (message) {
724                 const char *signature = strstr(message,
725                                                "\n-----BEGIN PGP SIGNATURE-----\n");
726                 if (signature)
727                         switch(signed_tag_mode) {
728                         case ABORT:
729                                 die ("Encountered signed tag %s; use "
730                                      "--signed-tags=<mode> to handle it.",
731                                      oid_to_hex(&tag->object.oid));
732                         case WARN:
733                                 warning ("Exporting signed tag %s",
734                                          oid_to_hex(&tag->object.oid));
735                                 /* fallthru */
736                         case VERBATIM:
737                                 break;
738                         case WARN_STRIP:
739                                 warning ("Stripping signature from tag %s",
740                                          oid_to_hex(&tag->object.oid));
741                                 /* fallthru */
742                         case STRIP:
743                                 message_size = signature + 1 - message;
744                                 break;
745                         }
746         }
747
748         /* handle tag->tagged having been filtered out due to paths specified */
749         tagged = tag->tagged;
750         tagged_mark = get_object_mark(tagged);
751         if (!tagged_mark) {
752                 switch(tag_of_filtered_mode) {
753                 case ABORT:
754                         die ("Tag %s tags unexported object; use "
755                              "--tag-of-filtered-object=<mode> to handle it.",
756                              oid_to_hex(&tag->object.oid));
757                 case DROP:
758                         /* Ignore this tag altogether */
759                         free(buf);
760                         return;
761                 case REWRITE:
762                         if (tagged->type != OBJ_COMMIT) {
763                                 die ("Tag %s tags unexported %s!",
764                                      oid_to_hex(&tag->object.oid),
765                                      type_name(tagged->type));
766                         }
767                         p = (struct commit *)tagged;
768                         for (;;) {
769                                 if (p->parents && p->parents->next)
770                                         break;
771                                 if (p->object.flags & UNINTERESTING)
772                                         break;
773                                 if (!(p->object.flags & TREESAME))
774                                         break;
775                                 if (!p->parents)
776                                         die ("Can't find replacement commit for tag %s\n",
777                                              oid_to_hex(&tag->object.oid));
778                                 p = p->parents->item;
779                         }
780                         tagged_mark = get_object_mark(&p->object);
781                 }
782         }
783
784         if (starts_with(name, "refs/tags/"))
785                 name += 10;
786         printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
787                name, tagged_mark,
788                (int)(tagger_end - tagger), tagger,
789                tagger == tagger_end ? "" : "\n",
790                (int)message_size, (int)message_size, message ? message : "");
791         free(buf);
792 }
793
794 static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name)
795 {
796         switch (e->item->type) {
797         case OBJ_COMMIT:
798                 return (struct commit *)e->item;
799         case OBJ_TAG: {
800                 struct tag *tag = (struct tag *)e->item;
801
802                 /* handle nested tags */
803                 while (tag && tag->object.type == OBJ_TAG) {
804                         parse_object(&tag->object.oid);
805                         string_list_append(&extra_refs, full_name)->util = tag;
806                         tag = (struct tag *)tag->tagged;
807                 }
808                 if (!tag)
809                         die("Tag %s points nowhere?", e->name);
810                 return (struct commit *)tag;
811                 break;
812         }
813         default:
814                 return NULL;
815         }
816 }
817
818 static void get_tags_and_duplicates(struct rev_cmdline_info *info)
819 {
820         int i;
821
822         for (i = 0; i < info->nr; i++) {
823                 struct rev_cmdline_entry *e = info->rev + i;
824                 struct object_id oid;
825                 struct commit *commit;
826                 char *full_name;
827
828                 if (e->flags & UNINTERESTING)
829                         continue;
830
831                 if (dwim_ref(e->name, strlen(e->name), &oid, &full_name) != 1)
832                         continue;
833
834                 if (refspecs.nr) {
835                         char *private;
836                         private = apply_refspecs(&refspecs, full_name);
837                         if (private) {
838                                 free(full_name);
839                                 full_name = private;
840                         }
841                 }
842
843                 commit = get_commit(e, full_name);
844                 if (!commit) {
845                         warning("%s: Unexpected object of type %s, skipping.",
846                                 e->name,
847                                 type_name(e->item->type));
848                         continue;
849                 }
850
851                 switch(commit->object.type) {
852                 case OBJ_COMMIT:
853                         break;
854                 case OBJ_BLOB:
855                         export_blob(&commit->object.oid);
856                         continue;
857                 default: /* OBJ_TAG (nested tags) is already handled */
858                         warning("Tag points to object of unexpected type %s, skipping.",
859                                 type_name(commit->object.type));
860                         continue;
861                 }
862
863                 /*
864                  * This ref will not be updated through a commit, lets make
865                  * sure it gets properly updated eventually.
866                  */
867                 if (*revision_sources_at(&revision_sources, commit) ||
868                     commit->object.flags & SHOWN)
869                         string_list_append(&extra_refs, full_name)->util = commit;
870                 if (!*revision_sources_at(&revision_sources, commit))
871                         *revision_sources_at(&revision_sources, commit) = full_name;
872         }
873 }
874
875 static void handle_tags_and_duplicates(void)
876 {
877         struct commit *commit;
878         int i;
879
880         for (i = extra_refs.nr - 1; i >= 0; i--) {
881                 const char *name = extra_refs.items[i].string;
882                 struct object *object = extra_refs.items[i].util;
883                 switch (object->type) {
884                 case OBJ_TAG:
885                         handle_tag(name, (struct tag *)object);
886                         break;
887                 case OBJ_COMMIT:
888                         if (anonymize)
889                                 name = anonymize_refname(name);
890                         /* create refs pointing to already seen commits */
891                         commit = (struct commit *)object;
892                         printf("reset %s\nfrom :%d\n\n", name,
893                                get_object_mark(&commit->object));
894                         show_progress();
895                         break;
896                 }
897         }
898 }
899
900 static void export_marks(char *file)
901 {
902         unsigned int i;
903         uint32_t mark;
904         struct decoration_entry *deco = idnums.entries;
905         FILE *f;
906         int e = 0;
907
908         f = fopen_for_writing(file);
909         if (!f)
910                 die_errno("Unable to open marks file %s for writing.", file);
911
912         for (i = 0; i < idnums.size; i++) {
913                 if (deco->base && deco->base->type == 1) {
914                         mark = ptr_to_mark(deco->decoration);
915                         if (fprintf(f, ":%"PRIu32" %s\n", mark,
916                                 oid_to_hex(&deco->base->oid)) < 0) {
917                             e = 1;
918                             break;
919                         }
920                 }
921                 deco++;
922         }
923
924         e |= ferror(f);
925         e |= fclose(f);
926         if (e)
927                 error("Unable to write marks file %s.", file);
928 }
929
930 static void import_marks(char *input_file)
931 {
932         char line[512];
933         FILE *f = xfopen(input_file, "r");
934
935         while (fgets(line, sizeof(line), f)) {
936                 uint32_t mark;
937                 char *line_end, *mark_end;
938                 struct object_id oid;
939                 struct object *object;
940                 struct commit *commit;
941                 enum object_type type;
942
943                 line_end = strchr(line, '\n');
944                 if (line[0] != ':' || !line_end)
945                         die("corrupt mark line: %s", line);
946                 *line_end = '\0';
947
948                 mark = strtoumax(line + 1, &mark_end, 10);
949                 if (!mark || mark_end == line + 1
950                         || *mark_end != ' ' || get_oid_hex(mark_end + 1, &oid))
951                         die("corrupt mark line: %s", line);
952
953                 if (last_idnum < mark)
954                         last_idnum = mark;
955
956                 type = oid_object_info(the_repository, &oid, NULL);
957                 if (type < 0)
958                         die("object not found: %s", oid_to_hex(&oid));
959
960                 if (type != OBJ_COMMIT)
961                         /* only commits */
962                         continue;
963
964                 commit = lookup_commit(&oid);
965                 if (!commit)
966                         die("not a commit? can't happen: %s", oid_to_hex(&oid));
967
968                 object = &commit->object;
969
970                 if (object->flags & SHOWN)
971                         error("Object %s already has a mark", oid_to_hex(&oid));
972
973                 mark_object(object, mark);
974
975                 object->flags |= SHOWN;
976         }
977         fclose(f);
978 }
979
980 static void handle_deletes(void)
981 {
982         int i;
983         for (i = 0; i < refspecs.nr; i++) {
984                 struct refspec_item *refspec = &refspecs.items[i];
985                 if (*refspec->src)
986                         continue;
987
988                 printf("reset %s\nfrom %s\n\n",
989                                 refspec->dst, sha1_to_hex(null_sha1));
990         }
991 }
992
993 int cmd_fast_export(int argc, const char **argv, const char *prefix)
994 {
995         struct rev_info revs;
996         struct object_array commits = OBJECT_ARRAY_INIT;
997         struct commit *commit;
998         char *export_filename = NULL, *import_filename = NULL;
999         uint32_t lastimportid;
1000         struct string_list refspecs_list = STRING_LIST_INIT_NODUP;
1001         struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP;
1002         struct option options[] = {
1003                 OPT_INTEGER(0, "progress", &progress,
1004                             N_("show progress after <n> objects")),
1005                 OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, N_("mode"),
1006                              N_("select handling of signed tags"),
1007                              parse_opt_signed_tag_mode),
1008                 OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
1009                              N_("select handling of tags that tag filtered objects"),
1010                              parse_opt_tag_of_filtered_mode),
1011                 OPT_STRING(0, "export-marks", &export_filename, N_("file"),
1012                              N_("Dump marks to this file")),
1013                 OPT_STRING(0, "import-marks", &import_filename, N_("file"),
1014                              N_("Import marks from this file")),
1015                 OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger,
1016                          N_("Fake a tagger when tags lack one")),
1017                 OPT_BOOL(0, "full-tree", &full_tree,
1018                          N_("Output full tree for each commit")),
1019                 OPT_BOOL(0, "use-done-feature", &use_done_feature,
1020                              N_("Use the done feature to terminate the stream")),
1021                 OPT_BOOL(0, "no-data", &no_data, N_("Skip output of blob data")),
1022                 OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
1023                              N_("Apply refspec to exported refs")),
1024                 OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
1025                 OPT_END()
1026         };
1027
1028         if (argc == 1)
1029                 usage_with_options (fast_export_usage, options);
1030
1031         /* we handle encodings */
1032         git_config(git_default_config, NULL);
1033
1034         init_revisions(&revs, prefix);
1035         init_revision_sources(&revision_sources);
1036         revs.topo_order = 1;
1037         revs.sources = &revision_sources;
1038         revs.rewrite_parents = 1;
1039         argc = parse_options(argc, argv, prefix, options, fast_export_usage,
1040                         PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN);
1041         argc = setup_revisions(argc, argv, &revs, NULL);
1042         if (argc > 1)
1043                 usage_with_options (fast_export_usage, options);
1044
1045         if (refspecs_list.nr) {
1046                 int i;
1047
1048                 for (i = 0; i < refspecs_list.nr; i++)
1049                         refspec_append(&refspecs, refspecs_list.items[i].string);
1050
1051                 string_list_clear(&refspecs_list, 1);
1052         }
1053
1054         if (use_done_feature)
1055                 printf("feature done\n");
1056
1057         if (import_filename)
1058                 import_marks(import_filename);
1059         lastimportid = last_idnum;
1060
1061         if (import_filename && revs.prune_data.nr)
1062                 full_tree = 1;
1063
1064         get_tags_and_duplicates(&revs.cmdline);
1065
1066         if (prepare_revision_walk(&revs))
1067                 die("revision walk setup failed");
1068         revs.diffopt.format_callback = show_filemodify;
1069         revs.diffopt.format_callback_data = &paths_of_changed_objects;
1070         revs.diffopt.flags.recursive = 1;
1071         while ((commit = get_revision(&revs))) {
1072                 if (has_unshown_parent(commit)) {
1073                         add_object_array(&commit->object, NULL, &commits);
1074                 }
1075                 else {
1076                         handle_commit(commit, &revs, &paths_of_changed_objects);
1077                         handle_tail(&commits, &revs, &paths_of_changed_objects);
1078                 }
1079         }
1080
1081         handle_tags_and_duplicates();
1082         handle_deletes();
1083
1084         if (export_filename && lastimportid != last_idnum)
1085                 export_marks(export_filename);
1086
1087         if (use_done_feature)
1088                 printf("done\n");
1089
1090         refspec_clear(&refspecs);
1091
1092         return 0;
1093 }