Implemented branch handling and basic tree support in fast-import.
[git] / fast-import.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "blob.h"
5 #include "delta.h"
6 #include "pack.h"
7 #include "csum-file.h"
8
9 struct object_entry
10 {
11         struct object_entry *next;
12         unsigned long offset;
13         unsigned char sha1[20];
14 };
15
16 struct object_entry_block
17 {
18         struct object_entry_block *next_block;
19         struct object_entry *next_free;
20         struct object_entry *end;
21         struct object_entry entries[FLEX_ARRAY]; /* more */
22 };
23
24 struct last_object
25 {
26         void *data;
27         unsigned int len;
28         unsigned int depth;
29         unsigned char sha1[20];
30 };
31
32 struct tree;
33 struct tree_entry
34 {
35         struct tree *tree;
36         mode_t mode;
37         unsigned char sha1[20];
38         char name[FLEX_ARRAY]; /* more */
39 };
40
41 struct tree
42 {
43         struct last_object last_tree;
44         unsigned long entry_count;
45         struct tree_entry **entries;
46 };
47
48 struct branch
49 {
50         struct branch *next_branch;
51         struct tree_entry tree;
52         unsigned char sha1[20];
53         const char *name;
54 };
55
56 /* Stats and misc. counters. */
57 static int max_depth = 10;
58 static unsigned long alloc_count;
59 static unsigned long branch_count;
60 static unsigned long object_count;
61 static unsigned long duplicate_count;
62 static unsigned long object_count_by_type[9];
63 static unsigned long duplicate_count_by_type[9];
64
65 /* The .pack file */
66 static int pack_fd;
67 static unsigned long pack_offset;
68 static unsigned char pack_sha1[20];
69
70 /* Table of objects we've written. */
71 struct object_entry_block *blocks;
72 struct object_entry *object_table[1 << 16];
73
74 /* Our last blob */
75 struct last_object last_blob;
76
77 /* Branch data */
78 struct branch *branches;
79 struct branch *current_branch;
80
81 static void alloc_objects(int cnt)
82 {
83         struct object_entry_block *b;
84
85         b = xmalloc(sizeof(struct object_entry_block)
86                 + cnt * sizeof(struct object_entry));
87         b->next_block = blocks;
88         b->next_free = b->entries;
89         b->end = b->entries + cnt;
90         blocks = b;
91         alloc_count += cnt;
92 }
93
94 static struct object_entry* new_object(unsigned char *sha1)
95 {
96         struct object_entry *e;
97
98         if (blocks->next_free == blocks->end)
99                 alloc_objects(1000);
100
101         e = blocks->next_free++;
102         memcpy(e->sha1, sha1, sizeof(e->sha1));
103         return e;
104 }
105
106 static struct object_entry* insert_object(unsigned char *sha1)
107 {
108         unsigned int h = sha1[0] << 8 | sha1[1];
109         struct object_entry *e = object_table[h];
110         struct object_entry *p = 0;
111
112         while (e) {
113                 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
114                         return e;
115                 p = e;
116                 e = e->next;
117         }
118
119         e = new_object(sha1);
120         e->next = 0;
121         e->offset = 0;
122         if (p)
123                 p->next = e;
124         else
125                 object_table[h] = e;
126         return e;
127 }
128
129 static ssize_t yread(int fd, void *buffer, size_t length)
130 {
131         ssize_t ret = 0;
132         while (ret < length) {
133                 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
134                 if (size < 0) {
135                         return size;
136                 }
137                 if (size == 0) {
138                         return ret;
139                 }
140                 ret += size;
141         }
142         return ret;
143 }
144
145 static ssize_t ywrite(int fd, void *buffer, size_t length)
146 {
147         ssize_t ret = 0;
148         while (ret < length) {
149                 ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
150                 if (size < 0) {
151                         return size;
152                 }
153                 if (size == 0) {
154                         return ret;
155                 }
156                 ret += size;
157         }
158         return ret;
159 }
160
161 static const char* read_string()
162 {
163         static char sn[PATH_MAX];
164         unsigned long slen;
165
166         if (yread(0, &slen, 4) != 4)
167                 die("Can't obtain string");
168         if (!slen)
169                 return 0;
170         if (slen > (PATH_MAX - 1))
171                 die("Can't handle excessive string length %lu", slen);
172
173         if (yread(0, sn, slen) != slen)
174                 die("Can't obtain string of length %lu", slen);
175         sn[slen] = 0;
176         return sn;
177 }
178
179 static const char* read_required_string()
180 {
181         const char *r = read_string();
182         if (!r)
183                 die("Expected string command parameter, didn't find one");
184         return r;
185 }
186
187 static unsigned long encode_header(
188         enum object_type type,
189         unsigned long size,
190         unsigned char *hdr)
191 {
192         int n = 1;
193         unsigned char c;
194
195         if (type < OBJ_COMMIT || type > OBJ_DELTA)
196                 die("bad type %d", type);
197
198         c = (type << 4) | (size & 15);
199         size >>= 4;
200         while (size) {
201                 *hdr++ = c | 0x80;
202                 c = size & 0x7f;
203                 size >>= 7;
204                 n++;
205         }
206         *hdr = c;
207         return n;
208 }
209
210 static int store_object(
211         enum object_type type,
212         void *dat,
213         unsigned long datlen,
214         struct last_object *last,
215         unsigned char *sha1out)
216 {
217         void *out, *delta;
218         struct object_entry *e;
219         unsigned char hdr[96];
220         unsigned char sha1[20];
221         unsigned long hdrlen, deltalen;
222         SHA_CTX c;
223         z_stream s;
224
225         hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
226         SHA1_Init(&c);
227         SHA1_Update(&c, hdr, hdrlen);
228         SHA1_Update(&c, dat, datlen);
229         SHA1_Final(sha1, &c);
230         if (sha1out)
231                 memcpy(sha1out, sha1, sizeof(sha1));
232
233         e = insert_object(sha1);
234         if (e->offset) {
235                 duplicate_count++;
236                 duplicate_count_by_type[type]++;
237                 return 0;
238         }
239         e->offset = pack_offset;
240         object_count++;
241         object_count_by_type[type]++;
242
243         if (last->data && last->depth < max_depth)
244                 delta = diff_delta(last->data, last->len,
245                         dat, datlen,
246                         &deltalen, 0);
247         else
248                 delta = 0;
249
250         memset(&s, 0, sizeof(s));
251         deflateInit(&s, zlib_compression_level);
252
253         if (delta) {
254                 last->depth++;
255                 s.next_in = delta;
256                 s.avail_in = deltalen;
257                 hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
258                 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
259                         die("Can't write object header: %s", strerror(errno));
260                 if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1))
261                         die("Can't write object base: %s", strerror(errno));
262                 pack_offset += hdrlen + sizeof(sha1);
263         } else {
264                 last->depth = 0;
265                 s.next_in = dat;
266                 s.avail_in = datlen;
267                 hdrlen = encode_header(type, datlen, hdr);
268                 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
269                         die("Can't write object header: %s", strerror(errno));
270                 pack_offset += hdrlen;
271         }
272
273         s.avail_out = deflateBound(&s, s.avail_in);
274         s.next_out = out = xmalloc(s.avail_out);
275         while (deflate(&s, Z_FINISH) == Z_OK)
276                 /* nothing */;
277         deflateEnd(&s);
278
279         if (ywrite(pack_fd, out, s.total_out) != s.total_out)
280                 die("Failed writing compressed data %s", strerror(errno));
281         pack_offset += s.total_out;
282
283         free(out);
284         if (delta)
285                 free(delta);
286         if (last->data)
287                 free(last->data);
288         last->data = dat;
289         last->len = datlen;
290         memcpy(last->sha1, sha1, sizeof(sha1));
291         return 1;
292 }
293
294 static void init_pack_header()
295 {
296         const char* magic = "PACK";
297         unsigned long version = 3;
298         unsigned long zero = 0;
299
300         version = htonl(version);
301
302         if (ywrite(pack_fd, (char*)magic, 4) != 4)
303                 die("Can't write pack magic: %s", strerror(errno));
304         if (ywrite(pack_fd, &version, 4) != 4)
305                 die("Can't write pack version: %s", strerror(errno));
306         if (ywrite(pack_fd, &zero, 4) != 4)
307                 die("Can't write 0 object count: %s", strerror(errno));
308         pack_offset = 4 * 3;
309 }
310
311 static void fixup_header_footer()
312 {
313         SHA_CTX c;
314         char hdr[8];
315         unsigned long cnt;
316         char *buf;
317         size_t n;
318
319         if (lseek(pack_fd, 0, SEEK_SET) != 0)
320                 die("Failed seeking to start: %s", strerror(errno));
321
322         SHA1_Init(&c);
323         if (yread(pack_fd, hdr, 8) != 8)
324                 die("Failed reading header: %s", strerror(errno));
325         SHA1_Update(&c, hdr, 8);
326
327         cnt = htonl(object_count);
328         SHA1_Update(&c, &cnt, 4);
329         if (ywrite(pack_fd, &cnt, 4) != 4)
330                 die("Failed writing object count: %s", strerror(errno));
331
332         buf = xmalloc(128 * 1024);
333         for (;;) {
334                 n = xread(pack_fd, buf, 128 * 1024);
335                 if (n <= 0)
336                         break;
337                 SHA1_Update(&c, buf, n);
338         }
339         free(buf);
340
341         SHA1_Final(pack_sha1, &c);
342         if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1))
343                 die("Failed writing pack checksum: %s", strerror(errno));
344 }
345
346 static int oecmp (const void *_a, const void *_b)
347 {
348         struct object_entry *a = *((struct object_entry**)_a);
349         struct object_entry *b = *((struct object_entry**)_b);
350         return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
351 }
352
353 static void write_index(const char *idx_name)
354 {
355         struct sha1file *f;
356         struct object_entry **idx, **c, **last;
357         struct object_entry *e;
358         struct object_entry_block *o;
359         unsigned int array[256];
360         int i;
361
362         /* Build the sorted table of object IDs. */
363         idx = xmalloc(object_count * sizeof(struct object_entry*));
364         c = idx;
365         for (o = blocks; o; o = o->next_block)
366                 for (e = o->entries; e != o->next_free; e++)
367                         *c++ = e;
368         last = idx + object_count;
369         qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
370
371         /* Generate the fan-out array. */
372         c = idx;
373         for (i = 0; i < 256; i++) {
374                 struct object_entry **next = c;;
375                 while (next < last) {
376                         if ((*next)->sha1[0] != i)
377                                 break;
378                         next++;
379                 }
380                 array[i] = htonl(next - idx);
381                 c = next;
382         }
383
384         f = sha1create("%s", idx_name);
385         sha1write(f, array, 256 * sizeof(int));
386         for (c = idx; c != last; c++) {
387                 unsigned int offset = htonl((*c)->offset);
388                 sha1write(f, &offset, 4);
389                 sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
390         }
391         sha1write(f, pack_sha1, sizeof(pack_sha1));
392         sha1close(f, NULL, 1);
393         free(idx);
394 }
395
396 static void new_blob()
397 {
398         unsigned long datlen;
399         void *dat;
400
401         if (yread(0, &datlen, 4) != 4)
402                 die("Can't obtain blob length");
403
404         dat = xmalloc(datlen);
405         if (yread(0, dat, datlen) != datlen)
406                 die("Con't obtain %lu bytes of blob data", datlen);
407
408         if (!store_object(OBJ_BLOB, dat, datlen, &last_blob, 0))
409                 free(dat);
410 }
411
412 static struct branch* lookup_branch(const char *name)
413 {
414         struct branch *b;
415         for (b = branches; b; b = b->next_branch) {
416                 if (!strcmp(name, b->name))
417                         return b;
418         }
419         die("No branch named '%s' has been declared", name);
420 }
421
422 static struct tree* deep_copy_tree (struct tree *t)
423 {
424         struct tree *r = xmalloc(sizeof(struct tree));
425         unsigned long i;
426
427         if (t->last_tree.data) {
428                 r->last_tree.data = xmalloc(t->last_tree.len);
429                 r->last_tree.len = t->last_tree.len;
430                 r->last_tree.depth = t->last_tree.depth;
431                 memcpy(r->last_tree.data, t->last_tree.data, t->last_tree.len);
432                 memcpy(r->last_tree.sha1, t->last_tree.sha1, sizeof(t->last_tree.sha1));
433         }
434
435         r->entry_count = t->entry_count;
436         r->entries = xmalloc(t->entry_count * sizeof(struct tree_entry*));
437         for (i = 0; i < t->entry_count; i++) {
438                 struct tree_entry *a = t->entries[i];
439                 struct tree_entry *b;
440
441                 b = xmalloc(sizeof(struct tree_entry) + strlen(a->name) + 1);
442                 b->tree = a->tree ? deep_copy_tree(a->tree) : 0;
443                 b->mode = a->mode;
444                 memcpy(b->sha1, a->sha1, sizeof(a->sha1));
445                 strcpy(b->name, a->name);
446                 r->entries[i] = b;
447         }
448
449         return r;
450 }
451
452 static void store_tree (struct tree_entry *e)
453 {
454         struct tree *t = e->tree;
455         unsigned long maxlen, i;
456         char *buf, *c;
457
458         if (memcmp(null_sha1, e->sha1, sizeof(e->sha1)))
459                 return;
460
461         maxlen = t->entry_count * 32;
462         for (i = 0; i < t->entry_count; i++)
463                 maxlen += strlen(t->entries[i]->name);
464
465         buf = c = xmalloc(maxlen);
466         for (i = 0; i < t->entry_count; i++) {
467                 struct tree_entry *e = t->entries[i];
468                 c += sprintf(c, "%o %s", e->mode, e->name) + 1;
469                 if (e->tree)
470                         store_tree(e);
471                 memcpy(c, e->sha1, sizeof(e->sha1));
472                 c += sizeof(e->sha1);
473         }
474
475         if (!store_object(OBJ_TREE, buf, c - buf, &t->last_tree, e->sha1))
476                 free(buf);
477 }
478
479 static void new_branch()
480 {
481         struct branch *nb = xcalloc(1, sizeof(struct branch));
482         const char *source_name;
483
484         nb->name = strdup(read_required_string());
485         source_name = read_string();
486         if (source_name) {
487                 struct branch *sb = lookup_branch(source_name);
488                 nb->tree.tree = deep_copy_tree(sb->tree.tree);
489                 memcpy(nb->tree.sha1, sb->tree.sha1, sizeof(sb->tree.sha1));
490                 memcpy(nb->sha1, sb->sha1, sizeof(sb->sha1));
491         } else {
492                 nb->tree.tree = xcalloc(1, sizeof(struct tree));
493                 nb->tree.tree->entries = xmalloc(8*sizeof(struct tree_entry*));
494         }
495         nb->next_branch = branches;
496         branches = nb;
497         branch_count++;
498 }
499
500 static void set_branch()
501 {
502         current_branch = lookup_branch(read_required_string());
503 }
504
505 static void commit()
506 {
507         store_tree(&current_branch->tree);
508 }
509
510 int main(int argc, const char **argv)
511 {
512         const char *base_name = argv[1];
513         int est_obj_cnt = atoi(argv[2]);
514         char *pack_name;
515         char *idx_name;
516         struct stat sb;
517
518         pack_name = xmalloc(strlen(base_name) + 6);
519         sprintf(pack_name, "%s.pack", base_name);
520         idx_name = xmalloc(strlen(base_name) + 5);
521         sprintf(idx_name, "%s.idx", base_name);
522
523         pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
524         if (pack_fd < 0)
525                 die("Can't create %s: %s", pack_name, strerror(errno));
526
527         alloc_objects(est_obj_cnt);
528         init_pack_header();
529         for (;;) {
530                 unsigned long cmd;
531                 if (yread(0, &cmd, 4) != 4)
532                         break;
533
534                 switch (cmd) {
535                 case 'blob': new_blob();   break;
536                 case 'newb': new_branch(); break;
537                 case 'setb': set_branch(); break;
538                 case 'comt': commit();     break;
539                 default:
540                         die("Invalid command %lu", cmd);
541                 }
542         }
543         fixup_header_footer();
544         close(pack_fd);
545         write_index(idx_name);
546
547         fprintf(stderr, "%s statistics:\n", argv[0]);
548         fprintf(stderr, "---------------------------------------------------\n");
549         fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow  )\n", alloc_count, alloc_count - est_obj_cnt);
550         fprintf(stderr, "Total objects:   %10lu (%10lu duplicates)\n", object_count, duplicate_count);
551         fprintf(stderr, "      blobs  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
552         fprintf(stderr, "      trees  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
553         fprintf(stderr, "      commits:   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
554         fprintf(stderr, "      tags   :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
555         fprintf(stderr, "Total branches:  %10lu\n", branch_count);
556         fprintf(stderr, "---------------------------------------------------\n");
557
558         stat(pack_name, &sb);
559         fprintf(stderr, "Pack size:       %10lu KiB\n", (unsigned long)(sb.st_size/1024));
560         stat(idx_name, &sb);
561         fprintf(stderr, "Index size:      %10lu KiB\n", (unsigned long)(sb.st_size/1024));
562
563         fprintf(stderr, "\n");
564
565         return 0;
566 }