vcs-svn: pass paths through to fast-import
[git] / vcs-svn / svndump.c
1 /*
2  * Parse and rearrange a svnadmin dump.
3  * Create the dump with:
4  * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
5  *
6  * Licensed under a two-clause BSD-style license.
7  * See LICENSE for details.
8  */
9
10 #include "cache.h"
11 #include "repo_tree.h"
12 #include "fast_export.h"
13 #include "line_buffer.h"
14 #include "strbuf.h"
15
16 #define REPORT_FILENO 3
17
18 /*
19  * Compare start of string to literal of equal length;
20  * must be guarded by length test.
21  */
22 #define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1)
23
24 #define NODEACT_REPLACE 4
25 #define NODEACT_DELETE 3
26 #define NODEACT_ADD 2
27 #define NODEACT_CHANGE 1
28 #define NODEACT_UNKNOWN 0
29
30 /* States: */
31 #define DUMP_CTX 0      /* dump metadata */
32 #define REV_CTX  1      /* revision metadata */
33 #define NODE_CTX 2      /* node metadata */
34 #define INTERNODE_CTX 3 /* between nodes */
35
36 #define LENGTH_UNKNOWN (~0)
37 #define DATE_RFC2822_LEN 31
38
39 static struct line_buffer input = LINE_BUFFER_INIT;
40
41 static struct {
42         uint32_t action, propLength, textLength, srcRev, type;
43         struct strbuf src, dst;
44         uint32_t text_delta, prop_delta;
45 } node_ctx;
46
47 static struct {
48         uint32_t revision;
49         unsigned long timestamp;
50         struct strbuf log, author;
51 } rev_ctx;
52
53 static struct {
54         uint32_t version;
55         struct strbuf uuid, url;
56 } dump_ctx;
57
58 static void reset_node_ctx(char *fname)
59 {
60         node_ctx.type = 0;
61         node_ctx.action = NODEACT_UNKNOWN;
62         node_ctx.propLength = LENGTH_UNKNOWN;
63         node_ctx.textLength = LENGTH_UNKNOWN;
64         strbuf_reset(&node_ctx.src);
65         node_ctx.srcRev = 0;
66         strbuf_reset(&node_ctx.dst);
67         if (fname)
68                 strbuf_addstr(&node_ctx.dst, fname);
69         node_ctx.text_delta = 0;
70         node_ctx.prop_delta = 0;
71 }
72
73 static void reset_rev_ctx(uint32_t revision)
74 {
75         rev_ctx.revision = revision;
76         rev_ctx.timestamp = 0;
77         strbuf_reset(&rev_ctx.log);
78         strbuf_reset(&rev_ctx.author);
79 }
80
81 static void reset_dump_ctx(const char *url)
82 {
83         strbuf_reset(&dump_ctx.url);
84         if (url)
85                 strbuf_addstr(&dump_ctx.url, url);
86         dump_ctx.version = 1;
87         strbuf_reset(&dump_ctx.uuid);
88 }
89
90 static void handle_property(const struct strbuf *key_buf,
91                                 const char *val, uint32_t len,
92                                 uint32_t *type_set)
93 {
94         const char *key = key_buf->buf;
95         size_t keylen = key_buf->len;
96
97         switch (keylen + 1) {
98         case sizeof("svn:log"):
99                 if (constcmp(key, "svn:log"))
100                         break;
101                 if (!val)
102                         die("invalid dump: unsets svn:log");
103                 strbuf_reset(&rev_ctx.log);
104                 strbuf_add(&rev_ctx.log, val, len);
105                 break;
106         case sizeof("svn:author"):
107                 if (constcmp(key, "svn:author"))
108                         break;
109                 strbuf_reset(&rev_ctx.author);
110                 if (val)
111                         strbuf_add(&rev_ctx.author, val, len);
112                 break;
113         case sizeof("svn:date"):
114                 if (constcmp(key, "svn:date"))
115                         break;
116                 if (!val)
117                         die("invalid dump: unsets svn:date");
118                 if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
119                         warning("invalid timestamp: %s", val);
120                 break;
121         case sizeof("svn:executable"):
122         case sizeof("svn:special"):
123                 if (keylen == strlen("svn:executable") &&
124                     constcmp(key, "svn:executable"))
125                         break;
126                 if (keylen == strlen("svn:special") &&
127                     constcmp(key, "svn:special"))
128                         break;
129                 if (*type_set) {
130                         if (!val)
131                                 return;
132                         die("invalid dump: sets type twice");
133                 }
134                 if (!val) {
135                         node_ctx.type = REPO_MODE_BLB;
136                         return;
137                 }
138                 *type_set = 1;
139                 node_ctx.type = keylen == strlen("svn:executable") ?
140                                 REPO_MODE_EXE :
141                                 REPO_MODE_LNK;
142         }
143 }
144
145 static void die_short_read(void)
146 {
147         if (buffer_ferror(&input))
148                 die_errno("error reading dump file");
149         die("invalid dump: unexpected end of file");
150 }
151
152 static void read_props(void)
153 {
154         static struct strbuf key = STRBUF_INIT;
155         const char *t;
156         /*
157          * NEEDSWORK: to support simple mode changes like
158          *      K 11
159          *      svn:special
160          *      V 1
161          *      *
162          *      D 14
163          *      svn:executable
164          * we keep track of whether a mode has been set and reset to
165          * plain file only if not.  We should be keeping track of the
166          * symlink and executable bits separately instead.
167          */
168         uint32_t type_set = 0;
169         while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) {
170                 uint32_t len;
171                 const char *val;
172                 const char type = t[0];
173                 int ch;
174
175                 if (!type || t[1] != ' ')
176                         die("invalid property line: %s\n", t);
177                 len = atoi(&t[2]);
178                 val = buffer_read_string(&input, len);
179                 if (!val || strlen(val) != len)
180                         die_short_read();
181
182                 /* Discard trailing newline. */
183                 ch = buffer_read_char(&input);
184                 if (ch == EOF)
185                         die_short_read();
186                 if (ch != '\n')
187                         die("invalid dump: expected newline after %s", val);
188
189                 switch (type) {
190                 case 'K':
191                 case 'D':
192                         strbuf_reset(&key);
193                         if (val)
194                                 strbuf_add(&key, val, len);
195                         if (type == 'K')
196                                 continue;
197                         assert(type == 'D');
198                         val = NULL;
199                         len = 0;
200                         /* fall through */
201                 case 'V':
202                         handle_property(&key, val, len, &type_set);
203                         strbuf_reset(&key);
204                         continue;
205                 default:
206                         die("invalid property line: %s\n", t);
207                 }
208         }
209 }
210
211 static void handle_node(void)
212 {
213         const uint32_t type = node_ctx.type;
214         const int have_props = node_ctx.propLength != LENGTH_UNKNOWN;
215         const int have_text = node_ctx.textLength != LENGTH_UNKNOWN;
216         /*
217          * Old text for this node:
218          *  NULL        - directory or bug
219          *  empty_blob  - empty
220          *  "<dataref>" - data retrievable from fast-import
221          */
222         static const char *const empty_blob = "::empty::";
223         const char *old_data = NULL;
224
225         if (node_ctx.text_delta)
226                 die("text deltas not supported");
227
228         if (node_ctx.action == NODEACT_DELETE) {
229                 if (have_text || have_props || node_ctx.srcRev)
230                         die("invalid dump: deletion node has "
231                                 "copyfrom info, text, or properties");
232                 return repo_delete(node_ctx.dst.buf);
233         }
234         if (node_ctx.action == NODEACT_REPLACE) {
235                 repo_delete(node_ctx.dst.buf);
236                 node_ctx.action = NODEACT_ADD;
237         }
238         if (node_ctx.srcRev) {
239                 repo_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf);
240                 if (node_ctx.action == NODEACT_ADD)
241                         node_ctx.action = NODEACT_CHANGE;
242         }
243         if (have_text && type == REPO_MODE_DIR)
244                 die("invalid dump: directories cannot have text attached");
245
246         /*
247          * Find old content (old_data) and decide on the new mode.
248          */
249         if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) {
250                 if (type != REPO_MODE_DIR)
251                         die("invalid dump: root of tree is not a regular file");
252                 old_data = NULL;
253         } else if (node_ctx.action == NODEACT_CHANGE) {
254                 uint32_t mode;
255                 old_data = repo_read_path(node_ctx.dst.buf);
256                 mode = repo_read_mode(node_ctx.dst.buf);
257                 if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
258                         die("invalid dump: cannot modify a directory into a file");
259                 if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
260                         die("invalid dump: cannot modify a file into a directory");
261                 node_ctx.type = mode;
262         } else if (node_ctx.action == NODEACT_ADD) {
263                 if (type == REPO_MODE_DIR)
264                         old_data = NULL;
265                 else if (have_text)
266                         old_data = empty_blob;
267                 else
268                         die("invalid dump: adds node without text");
269         } else {
270                 die("invalid dump: Node-path block lacks Node-action");
271         }
272
273         /*
274          * Adjust mode to reflect properties.
275          */
276         if (have_props) {
277                 if (!node_ctx.prop_delta)
278                         node_ctx.type = type;
279                 if (node_ctx.propLength)
280                         read_props();
281         }
282
283         /*
284          * Save the result.
285          */
286         if (type == REPO_MODE_DIR)      /* directories are not tracked. */
287                 return;
288         assert(old_data);
289         if (old_data == empty_blob)
290                 /* For the fast_export_* functions, NULL means empty. */
291                 old_data = NULL;
292         if (!have_text) {
293                 fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
294                 return;
295         }
296         fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
297         fast_export_data(node_ctx.type, node_ctx.textLength, &input);
298 }
299
300 static void begin_revision(void)
301 {
302         if (!rev_ctx.revision)  /* revision 0 gets no git commit. */
303                 return;
304         fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf,
305                 rev_ctx.log.buf, dump_ctx.uuid.buf, dump_ctx.url.buf,
306                 rev_ctx.timestamp);
307 }
308
309 static void end_revision(void)
310 {
311         if (rev_ctx.revision)
312                 fast_export_end_commit(rev_ctx.revision);
313 }
314
315 void svndump_read(const char *url)
316 {
317         char *val;
318         char *t;
319         uint32_t active_ctx = DUMP_CTX;
320         uint32_t len;
321
322         reset_dump_ctx(url);
323         while ((t = buffer_read_line(&input))) {
324                 val = strstr(t, ": ");
325                 if (!val)
326                         continue;
327                 val += 2;
328
329                 /* strlen(key) + 1 */
330                 switch (val - t - 1) {
331                 case sizeof("SVN-fs-dump-format-version"):
332                         if (constcmp(t, "SVN-fs-dump-format-version"))
333                                 continue;
334                         dump_ctx.version = atoi(val);
335                         if (dump_ctx.version > 3)
336                                 die("expected svn dump format version <= 3, found %"PRIu32,
337                                     dump_ctx.version);
338                         break;
339                 case sizeof("UUID"):
340                         if (constcmp(t, "UUID"))
341                                 continue;
342                         strbuf_reset(&dump_ctx.uuid);
343                         strbuf_addstr(&dump_ctx.uuid, val);
344                         break;
345                 case sizeof("Revision-number"):
346                         if (constcmp(t, "Revision-number"))
347                                 continue;
348                         if (active_ctx == NODE_CTX)
349                                 handle_node();
350                         if (active_ctx == REV_CTX)
351                                 begin_revision();
352                         if (active_ctx != DUMP_CTX)
353                                 end_revision();
354                         active_ctx = REV_CTX;
355                         reset_rev_ctx(atoi(val));
356                         break;
357                 case sizeof("Node-path"):
358                         if (prefixcmp(t, "Node-"))
359                                 continue;
360                         if (!constcmp(t + strlen("Node-"), "path")) {
361                                 if (active_ctx == NODE_CTX)
362                                         handle_node();
363                                 if (active_ctx == REV_CTX)
364                                         begin_revision();
365                                 active_ctx = NODE_CTX;
366                                 reset_node_ctx(val);
367                                 break;
368                         }
369                         if (constcmp(t + strlen("Node-"), "kind"))
370                                 continue;
371                         if (!strcmp(val, "dir"))
372                                 node_ctx.type = REPO_MODE_DIR;
373                         else if (!strcmp(val, "file"))
374                                 node_ctx.type = REPO_MODE_BLB;
375                         else
376                                 fprintf(stderr, "Unknown node-kind: %s\n", val);
377                         break;
378                 case sizeof("Node-action"):
379                         if (constcmp(t, "Node-action"))
380                                 continue;
381                         if (!strcmp(val, "delete")) {
382                                 node_ctx.action = NODEACT_DELETE;
383                         } else if (!strcmp(val, "add")) {
384                                 node_ctx.action = NODEACT_ADD;
385                         } else if (!strcmp(val, "change")) {
386                                 node_ctx.action = NODEACT_CHANGE;
387                         } else if (!strcmp(val, "replace")) {
388                                 node_ctx.action = NODEACT_REPLACE;
389                         } else {
390                                 fprintf(stderr, "Unknown node-action: %s\n", val);
391                                 node_ctx.action = NODEACT_UNKNOWN;
392                         }
393                         break;
394                 case sizeof("Node-copyfrom-path"):
395                         if (constcmp(t, "Node-copyfrom-path"))
396                                 continue;
397                         strbuf_reset(&node_ctx.src);
398                         strbuf_addstr(&node_ctx.src, val);
399                         break;
400                 case sizeof("Node-copyfrom-rev"):
401                         if (constcmp(t, "Node-copyfrom-rev"))
402                                 continue;
403                         node_ctx.srcRev = atoi(val);
404                         break;
405                 case sizeof("Text-content-length"):
406                         if (!constcmp(t, "Text-content-length")) {
407                                 node_ctx.textLength = atoi(val);
408                                 break;
409                         }
410                         if (constcmp(t, "Prop-content-length"))
411                                 continue;
412                         node_ctx.propLength = atoi(val);
413                         break;
414                 case sizeof("Text-delta"):
415                         if (!constcmp(t, "Text-delta")) {
416                                 node_ctx.text_delta = !strcmp(val, "true");
417                                 break;
418                         }
419                         if (constcmp(t, "Prop-delta"))
420                                 continue;
421                         node_ctx.prop_delta = !strcmp(val, "true");
422                         break;
423                 case sizeof("Content-length"):
424                         if (constcmp(t, "Content-length"))
425                                 continue;
426                         len = atoi(val);
427                         t = buffer_read_line(&input);
428                         if (!t)
429                                 die_short_read();
430                         if (*t)
431                                 die("invalid dump: expected blank line after content length header");
432                         if (active_ctx == REV_CTX) {
433                                 read_props();
434                         } else if (active_ctx == NODE_CTX) {
435                                 handle_node();
436                                 active_ctx = INTERNODE_CTX;
437                         } else {
438                                 fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
439                                 if (buffer_skip_bytes(&input, len) != len)
440                                         die_short_read();
441                         }
442                 }
443         }
444         if (buffer_ferror(&input))
445                 die_short_read();
446         if (active_ctx == NODE_CTX)
447                 handle_node();
448         if (active_ctx == REV_CTX)
449                 begin_revision();
450         if (active_ctx != DUMP_CTX)
451                 end_revision();
452 }
453
454 int svndump_init(const char *filename)
455 {
456         if (buffer_init(&input, filename))
457                 return error("cannot open %s: %s", filename, strerror(errno));
458         fast_export_init(REPORT_FILENO);
459         strbuf_init(&dump_ctx.uuid, 4096);
460         strbuf_init(&dump_ctx.url, 4096);
461         strbuf_init(&rev_ctx.log, 4096);
462         strbuf_init(&rev_ctx.author, 4096);
463         strbuf_init(&node_ctx.src, 4096);
464         strbuf_init(&node_ctx.dst, 4096);
465         reset_dump_ctx(NULL);
466         reset_rev_ctx(0);
467         reset_node_ctx(NULL);
468         return 0;
469 }
470
471 void svndump_deinit(void)
472 {
473         fast_export_deinit();
474         reset_dump_ctx(NULL);
475         reset_rev_ctx(0);
476         reset_node_ctx(NULL);
477         strbuf_release(&rev_ctx.log);
478         strbuf_release(&node_ctx.src);
479         strbuf_release(&node_ctx.dst);
480         if (buffer_deinit(&input))
481                 fprintf(stderr, "Input error\n");
482         if (ferror(stdout))
483                 fprintf(stderr, "Output error\n");
484 }
485
486 void svndump_reset(void)
487 {
488         fast_export_reset();
489         buffer_reset(&input);
490         strbuf_release(&dump_ctx.uuid);
491         strbuf_release(&dump_ctx.url);
492         strbuf_release(&rev_ctx.log);
493         strbuf_release(&rev_ctx.author);
494 }