Merge branch 'db/delta-applier' into db/text-delta
[git] / vcs-svn / svndump.c
1 /*
2  * Parse and rearrange a svnadmin dump.
3  * Create the dump with:
4  * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
5  *
6  * Licensed under a two-clause BSD-style license.
7  * See LICENSE for details.
8  */
9
10 #include "cache.h"
11 #include "repo_tree.h"
12 #include "fast_export.h"
13 #include "line_buffer.h"
14 #include "strbuf.h"
15
16 /*
17  * Compare start of string to literal of equal length;
18  * must be guarded by length test.
19  */
20 #define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1)
21
22 #define REPORT_FILENO 3
23
24 #define NODEACT_REPLACE 4
25 #define NODEACT_DELETE 3
26 #define NODEACT_ADD 2
27 #define NODEACT_CHANGE 1
28 #define NODEACT_UNKNOWN 0
29
30 /* States: */
31 #define DUMP_CTX 0      /* dump metadata */
32 #define REV_CTX  1      /* revision metadata */
33 #define NODE_CTX 2      /* node metadata */
34 #define INTERNODE_CTX 3 /* between nodes */
35
36 #define LENGTH_UNKNOWN (~0)
37 #define DATE_RFC2822_LEN 31
38
39 static struct line_buffer input = LINE_BUFFER_INIT;
40
41 static struct {
42         uint32_t action, propLength, textLength, srcRev, type;
43         struct strbuf src, dst;
44         uint32_t text_delta, prop_delta;
45 } node_ctx;
46
47 static struct {
48         uint32_t revision;
49         unsigned long timestamp;
50         struct strbuf log, author;
51 } rev_ctx;
52
53 static struct {
54         uint32_t version;
55         struct strbuf uuid, url;
56 } dump_ctx;
57
58 static void reset_node_ctx(char *fname)
59 {
60         node_ctx.type = 0;
61         node_ctx.action = NODEACT_UNKNOWN;
62         node_ctx.propLength = LENGTH_UNKNOWN;
63         node_ctx.textLength = LENGTH_UNKNOWN;
64         strbuf_reset(&node_ctx.src);
65         node_ctx.srcRev = 0;
66         strbuf_reset(&node_ctx.dst);
67         if (fname)
68                 strbuf_addstr(&node_ctx.dst, fname);
69         node_ctx.text_delta = 0;
70         node_ctx.prop_delta = 0;
71 }
72
73 static void reset_rev_ctx(uint32_t revision)
74 {
75         rev_ctx.revision = revision;
76         rev_ctx.timestamp = 0;
77         strbuf_reset(&rev_ctx.log);
78         strbuf_reset(&rev_ctx.author);
79 }
80
81 static void reset_dump_ctx(const char *url)
82 {
83         strbuf_reset(&dump_ctx.url);
84         if (url)
85                 strbuf_addstr(&dump_ctx.url, url);
86         dump_ctx.version = 1;
87         strbuf_reset(&dump_ctx.uuid);
88 }
89
90 static void handle_property(const struct strbuf *key_buf,
91                                 struct strbuf *val,
92                                 uint32_t *type_set)
93 {
94         const char *key = key_buf->buf;
95         size_t keylen = key_buf->len;
96
97         switch (keylen + 1) {
98         case sizeof("svn:log"):
99                 if (constcmp(key, "svn:log"))
100                         break;
101                 if (!val)
102                         die("invalid dump: unsets svn:log");
103                 strbuf_swap(&rev_ctx.log, val);
104                 break;
105         case sizeof("svn:author"):
106                 if (constcmp(key, "svn:author"))
107                         break;
108                 if (!val)
109                         strbuf_reset(&rev_ctx.author);
110                 else
111                         strbuf_swap(&rev_ctx.author, val);
112                 break;
113         case sizeof("svn:date"):
114                 if (constcmp(key, "svn:date"))
115                         break;
116                 if (!val)
117                         die("invalid dump: unsets svn:date");
118                 if (parse_date_basic(val->buf, &rev_ctx.timestamp, NULL))
119                         warning("invalid timestamp: %s", val->buf);
120                 break;
121         case sizeof("svn:executable"):
122         case sizeof("svn:special"):
123                 if (keylen == strlen("svn:executable") &&
124                     constcmp(key, "svn:executable"))
125                         break;
126                 if (keylen == strlen("svn:special") &&
127                     constcmp(key, "svn:special"))
128                         break;
129                 if (*type_set) {
130                         if (!val)
131                                 return;
132                         die("invalid dump: sets type twice");
133                 }
134                 if (!val) {
135                         node_ctx.type = REPO_MODE_BLB;
136                         return;
137                 }
138                 *type_set = 1;
139                 node_ctx.type = keylen == strlen("svn:executable") ?
140                                 REPO_MODE_EXE :
141                                 REPO_MODE_LNK;
142         }
143 }
144
145 static void die_short_read(void)
146 {
147         if (buffer_ferror(&input))
148                 die_errno("error reading dump file");
149         die("invalid dump: unexpected end of file");
150 }
151
152 static void read_props(void)
153 {
154         static struct strbuf key = STRBUF_INIT;
155         static struct strbuf val = STRBUF_INIT;
156         const char *t;
157         /*
158          * NEEDSWORK: to support simple mode changes like
159          *      K 11
160          *      svn:special
161          *      V 1
162          *      *
163          *      D 14
164          *      svn:executable
165          * we keep track of whether a mode has been set and reset to
166          * plain file only if not.  We should be keeping track of the
167          * symlink and executable bits separately instead.
168          */
169         uint32_t type_set = 0;
170         while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) {
171                 uint32_t len;
172                 const char type = t[0];
173                 int ch;
174
175                 if (!type || t[1] != ' ')
176                         die("invalid property line: %s\n", t);
177                 len = atoi(&t[2]);
178                 strbuf_reset(&val);
179                 buffer_read_binary(&input, &val, len);
180                 if (val.len < len)
181                         die_short_read();
182
183                 /* Discard trailing newline. */
184                 ch = buffer_read_char(&input);
185                 if (ch == EOF)
186                         die_short_read();
187                 if (ch != '\n')
188                         die("invalid dump: expected newline after %s", val.buf);
189
190                 switch (type) {
191                 case 'K':
192                         strbuf_swap(&key, &val);
193                         continue;
194                 case 'D':
195                         handle_property(&val, NULL, &type_set);
196                         continue;
197                 case 'V':
198                         handle_property(&key, &val, &type_set);
199                         strbuf_reset(&key);
200                         continue;
201                 default:
202                         die("invalid property line: %s\n", t);
203                 }
204         }
205 }
206
207 static void handle_node(void)
208 {
209         const uint32_t type = node_ctx.type;
210         const int have_props = node_ctx.propLength != LENGTH_UNKNOWN;
211         const int have_text = node_ctx.textLength != LENGTH_UNKNOWN;
212         /*
213          * Old text for this node:
214          *  NULL        - directory or bug
215          *  empty_blob  - empty
216          *  "<dataref>" - data retrievable from fast-import
217          */
218         static const char *const empty_blob = "::empty::";
219         const char *old_data = NULL;
220         uint32_t old_mode = REPO_MODE_BLB;
221
222         if (node_ctx.action == NODEACT_DELETE) {
223                 if (have_text || have_props || node_ctx.srcRev)
224                         die("invalid dump: deletion node has "
225                                 "copyfrom info, text, or properties");
226                 repo_delete(node_ctx.dst.buf);
227                 return;
228         }
229         if (node_ctx.action == NODEACT_REPLACE) {
230                 repo_delete(node_ctx.dst.buf);
231                 node_ctx.action = NODEACT_ADD;
232         }
233         if (node_ctx.srcRev) {
234                 repo_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf);
235                 if (node_ctx.action == NODEACT_ADD)
236                         node_ctx.action = NODEACT_CHANGE;
237         }
238         if (have_text && type == REPO_MODE_DIR)
239                 die("invalid dump: directories cannot have text attached");
240
241         /*
242          * Find old content (old_data) and decide on the new mode.
243          */
244         if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) {
245                 if (type != REPO_MODE_DIR)
246                         die("invalid dump: root of tree is not a regular file");
247                 old_data = NULL;
248         } else if (node_ctx.action == NODEACT_CHANGE) {
249                 uint32_t mode;
250                 old_data = repo_read_path(node_ctx.dst.buf, &mode);
251                 if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
252                         die("invalid dump: cannot modify a directory into a file");
253                 if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
254                         die("invalid dump: cannot modify a file into a directory");
255                 node_ctx.type = mode;
256                 old_mode = mode;
257         } else if (node_ctx.action == NODEACT_ADD) {
258                 if (type == REPO_MODE_DIR)
259                         old_data = NULL;
260                 else if (have_text)
261                         old_data = empty_blob;
262                 else
263                         die("invalid dump: adds node without text");
264         } else {
265                 die("invalid dump: Node-path block lacks Node-action");
266         }
267
268         /*
269          * Adjust mode to reflect properties.
270          */
271         if (have_props) {
272                 if (!node_ctx.prop_delta)
273                         node_ctx.type = type;
274                 if (node_ctx.propLength)
275                         read_props();
276         }
277
278         /*
279          * Save the result.
280          */
281         if (type == REPO_MODE_DIR)      /* directories are not tracked. */
282                 return;
283         assert(old_data);
284         if (old_data == empty_blob)
285                 /* For the fast_export_* functions, NULL means empty. */
286                 old_data = NULL;
287         if (!have_text) {
288                 fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
289                 return;
290         }
291         if (!node_ctx.text_delta) {
292                 fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
293                 fast_export_data(node_ctx.type, node_ctx.textLength, &input);
294                 return;
295         }
296         fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
297         fast_export_blob_delta(node_ctx.type, old_mode, old_data,
298                                 node_ctx.textLength, &input);
299 }
300
301 static void begin_revision(void)
302 {
303         if (!rev_ctx.revision)  /* revision 0 gets no git commit. */
304                 return;
305         fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf,
306                 &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf,
307                 rev_ctx.timestamp);
308 }
309
310 static void end_revision(void)
311 {
312         if (rev_ctx.revision)
313                 fast_export_end_commit(rev_ctx.revision);
314 }
315
316 void svndump_read(const char *url)
317 {
318         char *val;
319         char *t;
320         uint32_t active_ctx = DUMP_CTX;
321         uint32_t len;
322
323         reset_dump_ctx(url);
324         while ((t = buffer_read_line(&input))) {
325                 val = strchr(t, ':');
326                 if (!val)
327                         continue;
328                 val++;
329                 if (*val != ' ')
330                         continue;
331                 val++;
332
333                 /* strlen(key) + 1 */
334                 switch (val - t - 1) {
335                 case sizeof("SVN-fs-dump-format-version"):
336                         if (constcmp(t, "SVN-fs-dump-format-version"))
337                                 continue;
338                         dump_ctx.version = atoi(val);
339                         if (dump_ctx.version > 3)
340                                 die("expected svn dump format version <= 3, found %"PRIu32,
341                                     dump_ctx.version);
342                         break;
343                 case sizeof("UUID"):
344                         if (constcmp(t, "UUID"))
345                                 continue;
346                         strbuf_reset(&dump_ctx.uuid);
347                         strbuf_addstr(&dump_ctx.uuid, val);
348                         break;
349                 case sizeof("Revision-number"):
350                         if (constcmp(t, "Revision-number"))
351                                 continue;
352                         if (active_ctx == NODE_CTX)
353                                 handle_node();
354                         if (active_ctx == REV_CTX)
355                                 begin_revision();
356                         if (active_ctx != DUMP_CTX)
357                                 end_revision();
358                         active_ctx = REV_CTX;
359                         reset_rev_ctx(atoi(val));
360                         break;
361                 case sizeof("Node-path"):
362                         if (prefixcmp(t, "Node-"))
363                                 continue;
364                         if (!constcmp(t + strlen("Node-"), "path")) {
365                                 if (active_ctx == NODE_CTX)
366                                         handle_node();
367                                 if (active_ctx == REV_CTX)
368                                         begin_revision();
369                                 active_ctx = NODE_CTX;
370                                 reset_node_ctx(val);
371                                 break;
372                         }
373                         if (constcmp(t + strlen("Node-"), "kind"))
374                                 continue;
375                         if (!strcmp(val, "dir"))
376                                 node_ctx.type = REPO_MODE_DIR;
377                         else if (!strcmp(val, "file"))
378                                 node_ctx.type = REPO_MODE_BLB;
379                         else
380                                 fprintf(stderr, "Unknown node-kind: %s\n", val);
381                         break;
382                 case sizeof("Node-action"):
383                         if (constcmp(t, "Node-action"))
384                                 continue;
385                         if (!strcmp(val, "delete")) {
386                                 node_ctx.action = NODEACT_DELETE;
387                         } else if (!strcmp(val, "add")) {
388                                 node_ctx.action = NODEACT_ADD;
389                         } else if (!strcmp(val, "change")) {
390                                 node_ctx.action = NODEACT_CHANGE;
391                         } else if (!strcmp(val, "replace")) {
392                                 node_ctx.action = NODEACT_REPLACE;
393                         } else {
394                                 fprintf(stderr, "Unknown node-action: %s\n", val);
395                                 node_ctx.action = NODEACT_UNKNOWN;
396                         }
397                         break;
398                 case sizeof("Node-copyfrom-path"):
399                         if (constcmp(t, "Node-copyfrom-path"))
400                                 continue;
401                         strbuf_reset(&node_ctx.src);
402                         strbuf_addstr(&node_ctx.src, val);
403                         break;
404                 case sizeof("Node-copyfrom-rev"):
405                         if (constcmp(t, "Node-copyfrom-rev"))
406                                 continue;
407                         node_ctx.srcRev = atoi(val);
408                         break;
409                 case sizeof("Text-content-length"):
410                         if (!constcmp(t, "Text-content-length")) {
411                                 node_ctx.textLength = atoi(val);
412                                 break;
413                         }
414                         if (constcmp(t, "Prop-content-length"))
415                                 continue;
416                         node_ctx.propLength = atoi(val);
417                         break;
418                 case sizeof("Text-delta"):
419                         if (!constcmp(t, "Text-delta")) {
420                                 node_ctx.text_delta = !strcmp(val, "true");
421                                 break;
422                         }
423                         if (constcmp(t, "Prop-delta"))
424                                 continue;
425                         node_ctx.prop_delta = !strcmp(val, "true");
426                         break;
427                 case sizeof("Content-length"):
428                         if (constcmp(t, "Content-length"))
429                                 continue;
430                         len = atoi(val);
431                         t = buffer_read_line(&input);
432                         if (!t)
433                                 die_short_read();
434                         if (*t)
435                                 die("invalid dump: expected blank line after content length header");
436                         if (active_ctx == REV_CTX) {
437                                 read_props();
438                         } else if (active_ctx == NODE_CTX) {
439                                 handle_node();
440                                 active_ctx = INTERNODE_CTX;
441                         } else {
442                                 fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
443                                 if (buffer_skip_bytes(&input, len) != len)
444                                         die_short_read();
445                         }
446                 }
447         }
448         if (buffer_ferror(&input))
449                 die_short_read();
450         if (active_ctx == NODE_CTX)
451                 handle_node();
452         if (active_ctx == REV_CTX)
453                 begin_revision();
454         if (active_ctx != DUMP_CTX)
455                 end_revision();
456 }
457
458 int svndump_init(const char *filename)
459 {
460         if (buffer_init(&input, filename))
461                 return error("cannot open %s: %s", filename, strerror(errno));
462         fast_export_init(REPORT_FILENO);
463         strbuf_init(&dump_ctx.uuid, 4096);
464         strbuf_init(&dump_ctx.url, 4096);
465         strbuf_init(&rev_ctx.log, 4096);
466         strbuf_init(&rev_ctx.author, 4096);
467         strbuf_init(&node_ctx.src, 4096);
468         strbuf_init(&node_ctx.dst, 4096);
469         reset_dump_ctx(NULL);
470         reset_rev_ctx(0);
471         reset_node_ctx(NULL);
472         return 0;
473 }
474
475 void svndump_deinit(void)
476 {
477         fast_export_deinit();
478         reset_dump_ctx(NULL);
479         reset_rev_ctx(0);
480         reset_node_ctx(NULL);
481         strbuf_release(&rev_ctx.log);
482         strbuf_release(&node_ctx.src);
483         strbuf_release(&node_ctx.dst);
484         if (buffer_deinit(&input))
485                 fprintf(stderr, "Input error\n");
486         if (ferror(stdout))
487                 fprintf(stderr, "Output error\n");
488 }
489
490 void svndump_reset(void)
491 {
492         fast_export_reset();
493         buffer_reset(&input);
494         strbuf_release(&dump_ctx.uuid);
495         strbuf_release(&dump_ctx.url);
496         strbuf_release(&rev_ctx.log);
497         strbuf_release(&rev_ctx.author);
498 }