Added tree and commit writing to fast-import.
[git] / fast-import.c
1 /*
2 Format of STDIN stream:
3
4   stream ::= cmd*;
5
6   cmd ::= new_blob
7         | new_commit
8         | new_branch
9         | new_tag
10         ;
11
12   new_blob ::= 'blob' blob_data;
13
14   new_commit ::= 'comt' ref_name author_committer_msg
15     file_change*
16     '0';
17
18   new_branch ::= 'brch' dst_ref_name src_ref_name;
19   dst_ref_name ::= ref_name;
20   src_ref_name ::= ref_name | sha1_exp;
21
22   new_tag ::= 'tagg' ref_name tag_name tagger_msg;
23
24   file_change ::= 'M' path_name hexsha1
25                 | 'D' path_name
26                 ;
27
28   author_committer_msg ::= len32
29     'author' sp name '<' email '>' ts tz lf
30     'committer' sp name '<' email '>' ts tz lf
31     lf
32     binary_data;
33
34   tagger_msg ::= len32
35     'tagger' sp name '<' email '>' ts tz lf
36     lf
37     binary_data;
38
39   blob_data ::= len32 binary_data; # max len is 2^32-1
40   path_name ::= len32 path;        # max len is PATH_MAX-1
41   ref_name  ::= len32 ref;         # max len is PATH_MAX-1
42   tag_name  ::= len32 tag;         # max len is PATH_MAX-1
43   sha1_exp  ::= len32 sha1exp;     # max len is PATH_MAX-1
44
45   len32 ::= # unsigned 32 bit value, native format;
46   binary_data ::= # file content, not interpreted;
47   sp ::= # ASCII space character;
48   lf ::= # ASCII newline (LF) character;
49   path ::= # GIT style file path, e.g. "a/b/c";
50   ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
51   tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
52   sha1exp ::= # Any valid GIT SHA1 expression;
53   hexsha1 ::= # SHA1 in hexadecimal format;
54   name ::= # valid GIT author/committer name;
55   email ::= # valid GIT author/committer email;
56   ts ::= # time since the epoch in seconds, ascii decimal;
57   tz ::= # GIT style timezone;
58 */
59
60 #include "builtin.h"
61 #include "cache.h"
62 #include "object.h"
63 #include "blob.h"
64 #include "tree.h"
65 #include "delta.h"
66 #include "pack.h"
67 #include "refs.h"
68 #include "csum-file.h"
69
70 struct object_entry
71 {
72         struct object_entry *next;
73         unsigned long offset;
74         unsigned char sha1[20];
75 };
76
77 struct object_entry_pool
78 {
79         struct object_entry_pool *next_pool;
80         struct object_entry *next_free;
81         struct object_entry *end;
82         struct object_entry entries[FLEX_ARRAY]; /* more */
83 };
84
85 struct last_object
86 {
87         void *data;
88         unsigned int len;
89         unsigned int depth;
90         unsigned char sha1[20];
91 };
92
93 struct mem_pool
94 {
95         struct mem_pool *next_pool;
96         char *next_free;
97         char *end;
98         char space[FLEX_ARRAY]; /* more */
99 };
100
101 struct atom_str
102 {
103         struct atom_str *next_atom;
104         int str_len;
105         char str_dat[FLEX_ARRAY]; /* more */
106 };
107
108 struct tree_content;
109 struct tree_entry
110 {
111         struct tree_content *tree;
112         struct atom_str* name;
113         unsigned int mode;
114         unsigned char sha1[20];
115 };
116
117 struct tree_content
118 {
119         unsigned int entry_capacity; /* must match avail_tree_content */
120         unsigned int entry_count;
121         struct tree_entry *entries[FLEX_ARRAY]; /* more */
122 };
123
124 struct avail_tree_content
125 {
126         unsigned int entry_capacity; /* must match tree_content */
127         struct avail_tree_content *next_avail;
128 };
129
130 struct branch
131 {
132         struct branch *table_next_branch;
133         struct branch *active_next_branch;
134         const char *name;
135         unsigned long last_commit;
136         struct tree_entry branch_tree;
137         unsigned char sha1[20];
138 };
139
140
141 /* Stats and misc. counters */
142 static int max_depth = 10;
143 static unsigned long alloc_count;
144 static unsigned long branch_count;
145 static unsigned long object_count;
146 static unsigned long duplicate_count;
147 static unsigned long object_count_by_type[9];
148 static unsigned long duplicate_count_by_type[9];
149
150 /* Memory pools */
151 static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
152 static size_t total_allocd;
153 static struct mem_pool *mem_pool;
154
155 /* atom management */
156 static unsigned int atom_table_sz = 4451;
157 static unsigned int atom_cnt;
158 static struct atom_str **atom_table;
159
160 /* The .pack file being generated */
161 static int pack_fd;
162 static unsigned long pack_offset;
163 static unsigned char pack_sha1[20];
164
165 /* Table of objects we've written. */
166 static unsigned int object_entry_alloc = 1000;
167 static struct object_entry_pool *blocks;
168 static struct object_entry *object_table[1 << 16];
169
170 /* Our last blob */
171 static struct last_object last_blob;
172
173 /* Tree management */
174 static unsigned int tree_entry_alloc = 1000;
175 static void *avail_tree_entry;
176 static unsigned int avail_tree_table_sz = 100;
177 static struct avail_tree_content **avail_tree_table;
178
179 /* Branch data */
180 static unsigned int max_active_branches = 5;
181 static unsigned int cur_active_branches;
182 static unsigned int branch_table_sz = 1039;
183 static struct branch **branch_table;
184 static struct branch *active_branches;
185
186
187 static void alloc_objects(int cnt)
188 {
189         struct object_entry_pool *b;
190
191         b = xmalloc(sizeof(struct object_entry_pool)
192                 + cnt * sizeof(struct object_entry));
193         b->next_pool = blocks;
194         b->next_free = b->entries;
195         b->end = b->entries + cnt;
196         blocks = b;
197         alloc_count += cnt;
198 }
199
200 static struct object_entry* new_object(unsigned char *sha1)
201 {
202         struct object_entry *e;
203
204         if (blocks->next_free == blocks->end)
205                 alloc_objects(object_entry_alloc);
206
207         e = blocks->next_free++;
208         memcpy(e->sha1, sha1, sizeof(e->sha1));
209         return e;
210 }
211
212 static struct object_entry* find_object(unsigned char *sha1)
213 {
214         unsigned int h = sha1[0] << 8 | sha1[1];
215         struct object_entry *e;
216         for (e = object_table[h]; e; e = e->next)
217                 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
218                         return e;
219         return NULL;
220 }
221
222 static struct object_entry* insert_object(unsigned char *sha1)
223 {
224         unsigned int h = sha1[0] << 8 | sha1[1];
225         struct object_entry *e = object_table[h];
226         struct object_entry *p = NULL;
227
228         while (e) {
229                 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
230                         return e;
231                 p = e;
232                 e = e->next;
233         }
234
235         e = new_object(sha1);
236         e->next = NULL;
237         e->offset = 0;
238         if (p)
239                 p->next = e;
240         else
241                 object_table[h] = e;
242         return e;
243 }
244
245 static unsigned int hc_str(const char *s, size_t len)
246 {
247         unsigned int r = 0;
248         while (len-- > 0)
249                 r = r * 31 + *s++;
250         return r;
251 }
252
253 static void* pool_alloc(size_t len)
254 {
255         struct mem_pool *p;
256         void *r;
257
258         for (p = mem_pool; p; p = p->next_pool)
259                 if ((p->end - p->next_free >= len))
260                         break;
261
262         if (!p) {
263                 if (len >= (mem_pool_alloc/2)) {
264                         total_allocd += len;
265                         return xmalloc(len);
266                 }
267                 total_allocd += sizeof(struct mem_pool) + mem_pool_alloc;
268                 p = xmalloc(sizeof(struct mem_pool) + mem_pool_alloc);
269                 p->next_pool = mem_pool;
270                 p->next_free = p->space;
271                 p->end = p->next_free + mem_pool_alloc;
272                 mem_pool = p;
273         }
274
275         r = p->next_free;
276         p->next_free += len;
277         return r;
278 }
279
280 static void* pool_calloc(size_t count, size_t size)
281 {
282         size_t len = count * size;
283         void *r = pool_alloc(len);
284         memset(r, 0, len);
285         return r;
286 }
287
288 static char* pool_strdup(const char *s)
289 {
290         char *r = pool_alloc(strlen(s) + 1);
291         strcpy(r, s);
292         return r;
293 }
294
295 static struct atom_str* to_atom(const char *s, size_t len)
296 {
297         unsigned int hc = hc_str(s, len) % atom_table_sz;
298         struct atom_str *c;
299
300         for (c = atom_table[hc]; c; c = c->next_atom)
301                 if (c->str_len == len && !strncmp(s, c->str_dat, len))
302                         return c;
303
304         c = pool_alloc(sizeof(struct atom_str) + len + 1);
305         c->str_len = len;
306         strncpy(c->str_dat, s, len);
307         c->str_dat[len] = 0;
308         c->next_atom = atom_table[hc];
309         atom_table[hc] = c;
310         atom_cnt++;
311         return c;
312 }
313
314 static struct branch* lookup_branch(const char *name)
315 {
316         unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
317         struct branch *b;
318
319         for (b = branch_table[hc]; b; b = b->table_next_branch)
320                 if (!strcmp(name, b->name))
321                         return b;
322         return NULL;
323 }
324
325 static struct branch* new_branch(const char *name)
326 {
327         unsigned int hc = hc_str(name, strlen(name)) % branch_table_sz;
328         struct branch* b = lookup_branch(name);
329
330         if (b)
331                 die("Invalid attempt to create duplicate branch: %s", name);
332
333         b = pool_calloc(1, sizeof(struct branch));
334         b->name = pool_strdup(name);
335         b->table_next_branch = branch_table[hc];
336         branch_table[hc] = b;
337         branch_count++;
338         return b;
339 }
340
341 static unsigned int hc_entries(unsigned int cnt)
342 {
343         cnt = cnt & 7 ? (cnt / 8) + 1 : cnt / 8;
344         return cnt < avail_tree_table_sz ? cnt : avail_tree_table_sz - 1;
345 }
346
347 static struct tree_content* new_tree_content(unsigned int cnt)
348 {
349         struct avail_tree_content *f, *l = NULL;
350         struct tree_content *t;
351         unsigned int hc = hc_entries(cnt);
352
353         for (f = avail_tree_table[hc]; f; l = f, f = f->next_avail)
354                 if (f->entry_capacity >= cnt)
355                         break;
356
357         if (f) {
358                 if (l)
359                         l->next_avail = f->next_avail;
360                 else
361                         avail_tree_table[hc] = f->next_avail;
362         } else {
363                 cnt = cnt & 7 ? ((cnt / 8) + 1) * 8 : cnt;
364                 f = pool_alloc(sizeof(*t) + sizeof(t->entries[0]) * cnt);
365                 f->entry_capacity = cnt;
366         }
367
368         t = (struct tree_content*)f;
369         t->entry_count = 0;
370         return t;
371 }
372
373 static void release_tree_entry(struct tree_entry *e);
374 static void release_tree_content(struct tree_content *t)
375 {
376         struct avail_tree_content *f = (struct avail_tree_content*)t;
377         unsigned int hc = hc_entries(f->entry_capacity);
378         unsigned int i;
379         for (i = 0; i < t->entry_count; i++)
380                 release_tree_entry(t->entries[i]);
381         f->next_avail = avail_tree_table[hc];
382         avail_tree_table[hc] = f;
383 }
384
385 static struct tree_content* grow_tree_content(
386         struct tree_content *t,
387         int amt)
388 {
389         struct tree_content *r = new_tree_content(t->entry_count + amt);
390         r->entry_count = t->entry_count;
391         memcpy(r->entries,t->entries,t->entry_count*sizeof(t->entries[0]));
392         release_tree_content(t);
393         return r;
394 }
395
396 static struct tree_entry* new_tree_entry()
397 {
398         struct tree_entry *e;
399
400         if (!avail_tree_entry) {
401                 unsigned int n = tree_entry_alloc;
402                 avail_tree_entry = e = xmalloc(n * sizeof(struct tree_entry));
403                 while (n--) {
404                         *((void**)e) = e + 1;
405                         e++;
406                 }
407         }
408
409         e = avail_tree_entry;
410         avail_tree_entry = *((void**)e);
411         return e;
412 }
413
414 static void release_tree_entry(struct tree_entry *e)
415 {
416         if (e->tree)
417                 release_tree_content(e->tree);
418         *((void**)e) = avail_tree_entry;
419         avail_tree_entry = e;
420 }
421
422 static void yread(int fd, void *buffer, size_t length)
423 {
424         ssize_t ret = 0;
425         while (ret < length) {
426                 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
427                 if (!size)
428                         die("Read from descriptor %i: end of stream", fd);
429                 if (size < 0)
430                         die("Read from descriptor %i: %s", fd, strerror(errno));
431                 ret += size;
432         }
433 }
434
435 static int optional_read(int fd, void *buffer, size_t length)
436 {
437         ssize_t ret = 0;
438         while (ret < length) {
439                 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
440                 if (!size && !ret)
441                         return 1;
442                 if (!size)
443                         die("Read from descriptor %i: end of stream", fd);
444                 if (size < 0)
445                         die("Read from descriptor %i: %s", fd, strerror(errno));
446                 ret += size;
447         }
448         return 0;
449 }
450
451 static void ywrite(int fd, void *buffer, size_t length)
452 {
453         ssize_t ret = 0;
454         while (ret < length) {
455                 ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
456                 if (!size)
457                         die("Write to descriptor %i: end of file", fd);
458                 if (size < 0)
459                         die("Write to descriptor %i: %s", fd, strerror(errno));
460                 ret += size;
461         }
462 }
463
464 static const char* read_path()
465 {
466         static char sn[PATH_MAX];
467         unsigned long slen;
468
469         yread(0, &slen, 4);
470         if (!slen)
471                 die("Expected string command parameter, didn't find one");
472         if (slen > (PATH_MAX - 1))
473                 die("Can't handle excessive string length %lu", slen);
474         yread(0, sn, slen);
475         sn[slen] = 0;
476         return sn;
477 }
478
479 static unsigned long encode_header(
480         enum object_type type,
481         unsigned long size,
482         unsigned char *hdr)
483 {
484         int n = 1;
485         unsigned char c;
486
487         if (type < OBJ_COMMIT || type > OBJ_DELTA)
488                 die("bad type %d", type);
489
490         c = (type << 4) | (size & 15);
491         size >>= 4;
492         while (size) {
493                 *hdr++ = c | 0x80;
494                 c = size & 0x7f;
495                 size >>= 7;
496                 n++;
497         }
498         *hdr = c;
499         return n;
500 }
501
502 static int store_object(
503         enum object_type type,
504         void *dat,
505         unsigned long datlen,
506         struct last_object *last,
507         unsigned char *sha1out)
508 {
509         void *out, *delta;
510         struct object_entry *e;
511         unsigned char hdr[96];
512         unsigned char sha1[20];
513         unsigned long hdrlen, deltalen;
514         SHA_CTX c;
515         z_stream s;
516
517         hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
518         SHA1_Init(&c);
519         SHA1_Update(&c, hdr, hdrlen);
520         SHA1_Update(&c, dat, datlen);
521         SHA1_Final(sha1, &c);
522         if (sha1out)
523                 memcpy(sha1out, sha1, sizeof(sha1));
524
525         e = insert_object(sha1);
526         if (e->offset) {
527                 duplicate_count++;
528                 duplicate_count_by_type[type]++;
529                 return 1;
530         }
531         e->offset = pack_offset;
532         object_count++;
533         object_count_by_type[type]++;
534
535         if (last && last->data && last->depth < max_depth)
536                 delta = diff_delta(last->data, last->len,
537                         dat, datlen,
538                         &deltalen, 0);
539         else
540                 delta = 0;
541
542         memset(&s, 0, sizeof(s));
543         deflateInit(&s, zlib_compression_level);
544
545         if (delta) {
546                 last->depth++;
547                 s.next_in = delta;
548                 s.avail_in = deltalen;
549                 hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
550                 ywrite(pack_fd, hdr, hdrlen);
551                 ywrite(pack_fd, last->sha1, sizeof(sha1));
552                 pack_offset += hdrlen + sizeof(sha1);
553         } else {
554                 if (last)
555                         last->depth = 0;
556                 s.next_in = dat;
557                 s.avail_in = datlen;
558                 hdrlen = encode_header(type, datlen, hdr);
559                 ywrite(pack_fd, hdr, hdrlen);
560                 pack_offset += hdrlen;
561         }
562
563         s.avail_out = deflateBound(&s, s.avail_in);
564         s.next_out = out = xmalloc(s.avail_out);
565         while (deflate(&s, Z_FINISH) == Z_OK)
566                 /* nothing */;
567         deflateEnd(&s);
568
569         ywrite(pack_fd, out, s.total_out);
570         pack_offset += s.total_out;
571
572         free(out);
573         if (delta)
574                 free(delta);
575         if (last) {
576                 if (last->data)
577                         free(last->data);
578                 last->data = dat;
579                 last->len = datlen;
580                 memcpy(last->sha1, sha1, sizeof(sha1));
581         }
582         return 0;
583 }
584
585 static const char *get_mode(const char *str, unsigned int *modep)
586 {
587         unsigned char c;
588         unsigned int mode = 0;
589
590         while ((c = *str++) != ' ') {
591                 if (c < '0' || c > '7')
592                         return NULL;
593                 mode = (mode << 3) + (c - '0');
594         }
595         *modep = mode;
596         return str;
597 }
598
599 static void load_tree(struct tree_entry *root)
600 {
601         struct object_entry *myoe;
602         struct tree_content *t;
603         unsigned long size;
604         char *buf;
605         const char *c;
606         char type[20];
607
608         root->tree = t = new_tree_content(8);
609         if (!memcmp(root->sha1, null_sha1, 20))
610                 return;
611
612         myoe = find_object(root->sha1);
613         if (myoe) {
614                 die("FIXME");
615         } else {
616                 buf = read_sha1_file(root->sha1, type, &size);
617                 if (!buf || strcmp(type, tree_type))
618                         die("Can't load existing tree %s", sha1_to_hex(root->sha1));
619         }
620
621         c = buf;
622         while (c != (buf + size)) {
623                 struct tree_entry *e = new_tree_entry();
624
625                 if (t->entry_count == t->entry_capacity)
626                         root->tree = t = grow_tree_content(t, 8);
627                 t->entries[t->entry_count++] = e;
628
629                 e->tree = NULL;
630                 c = get_mode(c, &e->mode);
631                 if (!c)
632                         die("Corrupt mode in %s", sha1_to_hex(root->sha1));
633                 e->name = to_atom(c, strlen(c));
634                 c += e->name->str_len + 1;
635                 memcpy(e->sha1, c, sizeof(e->sha1));
636                 c += 20;
637         }
638         free(buf);
639 }
640
641 static int tecmp (const void *_a, const void *_b)
642 {
643         struct tree_entry *a = *((struct tree_entry**)_a);
644         struct tree_entry *b = *((struct tree_entry**)_b);
645         return base_name_compare(
646                 a->name->str_dat, a->name->str_len, a->mode,
647                 b->name->str_dat, b->name->str_len, b->mode);
648 }
649
650 static void store_tree(struct tree_entry *root)
651 {
652         struct tree_content *t = root->tree;
653         unsigned int i;
654         size_t maxlen;
655         char *buf, *c;
656
657         if (memcmp(root->sha1, null_sha1, 20))
658                 return;
659
660         maxlen = 0;
661         for (i = 0; i < t->entry_count; i++) {
662                 maxlen += t->entries[i]->name->str_len + 34;
663                 if (t->entries[i]->tree)
664                         store_tree(t->entries[i]);
665         }
666
667         qsort(t->entries, t->entry_count, sizeof(t->entries[0]), tecmp);
668         buf = c = xmalloc(maxlen);
669         for (i = 0; i < t->entry_count; i++) {
670                 struct tree_entry *e = t->entries[i];
671                 c += sprintf(c, "%o", e->mode);
672                 *c++ = ' ';
673                 strcpy(c, e->name->str_dat);
674                 c += e->name->str_len + 1;
675                 memcpy(c, e->sha1, 20);
676                 c += 20;
677         }
678         store_object(OBJ_TREE, buf, c - buf, NULL, root->sha1);
679         free(buf);
680 }
681
682 static int tree_content_set(
683         struct tree_entry *root,
684         const char *p,
685         const unsigned char *sha1,
686         const unsigned int mode)
687 {
688         struct tree_content *t = root->tree;
689         const char *slash1;
690         unsigned int i, n;
691         struct tree_entry *e;
692
693         slash1 = strchr(p, '/');
694         if (slash1)
695                 n = slash1 - p;
696         else
697                 n = strlen(p);
698
699         for (i = 0; i < t->entry_count; i++) {
700                 e = t->entries[i];
701                 if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
702                         if (!slash1) {
703                                 if (e->mode == mode && !memcmp(e->sha1, sha1, 20))
704                                         return 0;
705                                 e->mode = mode;
706                                 memcpy(e->sha1, sha1, 20);
707                                 if (e->tree) {
708                                         release_tree_content(e->tree);
709                                         e->tree = NULL;
710                                 }
711                                 memcpy(root->sha1, null_sha1, 20);
712                                 return 1;
713                         }
714                         if (!S_ISDIR(e->mode)) {
715                                 e->tree = new_tree_content(8);
716                                 e->mode = 040000;
717                         }
718                         if (!e->tree)
719                                 load_tree(e);
720                         if (tree_content_set(e, slash1 + 1, sha1, mode)) {
721                                 memcpy(root->sha1, null_sha1, 20);
722                                 return 1;
723                         }
724                         return 0;
725                 }
726         }
727
728         if (t->entry_count == t->entry_capacity)
729                 root->tree = t = grow_tree_content(t, 8);
730         e = new_tree_entry();
731         e->name = to_atom(p, n);
732         t->entries[t->entry_count++] = e;
733         if (slash1) {
734                 e->tree = new_tree_content(8);
735                 e->mode = 040000;
736                 tree_content_set(e, slash1 + 1, sha1, mode);
737         } else {
738                 e->tree = NULL;
739                 e->mode = mode;
740                 memcpy(e->sha1, sha1, 20);
741         }
742         memcpy(root->sha1, null_sha1, 20);
743         return 1;
744 }
745
746 static int tree_content_remove(struct tree_entry *root, const char *p)
747 {
748         struct tree_content *t = root->tree;
749         const char *slash1;
750         unsigned int i, n;
751         struct tree_entry *e;
752
753         slash1 = strchr(p, '/');
754         if (slash1)
755                 n = slash1 - p;
756         else
757                 n = strlen(p);
758
759         for (i = 0; i < t->entry_count; i++) {
760                 e = t->entries[i];
761                 if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
762                         if (!slash1 || !S_ISDIR(e->mode))
763                                 goto del_entry;
764                         if (!e->tree)
765                                 load_tree(e);
766                         if (tree_content_remove(e, slash1 + 1)) {
767                                 if (!e->tree->entry_count)
768                                         goto del_entry;
769                                 memcpy(root->sha1, null_sha1, 20);
770                                 return 1;
771                         }
772                         return 0;
773                 }
774         }
775         return 0;
776
777 del_entry:
778         for (i++; i < t->entry_count; i++)
779                 t->entries[i-1] = t->entries[i];
780         t->entry_count--;
781         release_tree_entry(e);
782         memcpy(root->sha1, null_sha1, 20);
783         return 1;
784 }
785
786 static void init_pack_header()
787 {
788         const char* magic = "PACK";
789         unsigned long version = 3;
790         unsigned long zero = 0;
791
792         version = htonl(version);
793         ywrite(pack_fd, (char*)magic, 4);
794         ywrite(pack_fd, &version, 4);
795         ywrite(pack_fd, &zero, 4);
796         pack_offset = 4 * 3;
797 }
798
799 static void fixup_header_footer()
800 {
801         SHA_CTX c;
802         char hdr[8];
803         unsigned long cnt;
804         char *buf;
805         size_t n;
806
807         if (lseek(pack_fd, 0, SEEK_SET) != 0)
808                 die("Failed seeking to start: %s", strerror(errno));
809
810         SHA1_Init(&c);
811         yread(pack_fd, hdr, 8);
812         SHA1_Update(&c, hdr, 8);
813
814         cnt = htonl(object_count);
815         SHA1_Update(&c, &cnt, 4);
816         ywrite(pack_fd, &cnt, 4);
817
818         buf = xmalloc(128 * 1024);
819         for (;;) {
820                 n = xread(pack_fd, buf, 128 * 1024);
821                 if (n <= 0)
822                         break;
823                 SHA1_Update(&c, buf, n);
824         }
825         free(buf);
826
827         SHA1_Final(pack_sha1, &c);
828         ywrite(pack_fd, pack_sha1, sizeof(pack_sha1));
829 }
830
831 static int oecmp (const void *_a, const void *_b)
832 {
833         struct object_entry *a = *((struct object_entry**)_a);
834         struct object_entry *b = *((struct object_entry**)_b);
835         return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
836 }
837
838 static void write_index(const char *idx_name)
839 {
840         struct sha1file *f;
841         struct object_entry **idx, **c, **last;
842         struct object_entry *e;
843         struct object_entry_pool *o;
844         unsigned int array[256];
845         int i;
846
847         /* Build the sorted table of object IDs. */
848         idx = xmalloc(object_count * sizeof(struct object_entry*));
849         c = idx;
850         for (o = blocks; o; o = o->next_pool)
851                 for (e = o->entries; e != o->next_free; e++)
852                         *c++ = e;
853         last = idx + object_count;
854         qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
855
856         /* Generate the fan-out array. */
857         c = idx;
858         for (i = 0; i < 256; i++) {
859                 struct object_entry **next = c;;
860                 while (next < last) {
861                         if ((*next)->sha1[0] != i)
862                                 break;
863                         next++;
864                 }
865                 array[i] = htonl(next - idx);
866                 c = next;
867         }
868
869         f = sha1create("%s", idx_name);
870         sha1write(f, array, 256 * sizeof(int));
871         for (c = idx; c != last; c++) {
872                 unsigned int offset = htonl((*c)->offset);
873                 sha1write(f, &offset, 4);
874                 sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
875         }
876         sha1write(f, pack_sha1, sizeof(pack_sha1));
877         sha1close(f, NULL, 1);
878         free(idx);
879 }
880
881 static void dump_branches()
882 {
883         static const char *msg = "fast-import";
884         unsigned int i;
885         struct branch *b;
886         struct ref_lock *lock;
887
888         for (i = 0; i < branch_table_sz; i++) {
889                 for (b = branch_table[i]; b; b = b->table_next_branch) {
890                         lock = lock_any_ref_for_update(b->name, NULL, 0);
891                         if (!lock || write_ref_sha1(lock, b->sha1, msg) < 0)
892                                 die("Can't write %s", b->name);
893                 }
894         }
895 }
896
897 static void cmd_new_blob()
898 {
899         unsigned long datlen;
900         unsigned char sha1[20];
901         void *dat;
902
903         yread(0, &datlen, 4);
904         dat = xmalloc(datlen);
905         yread(0, dat, datlen);
906         if (store_object(OBJ_BLOB, dat, datlen, &last_blob, sha1))
907                 free(dat);
908 }
909
910 static void unload_one_branch()
911 {
912         while (cur_active_branches >= max_active_branches) {
913                 unsigned long min_commit = ULONG_MAX;
914                 struct branch *e, *l = NULL, *p = NULL;
915
916                 for (e = active_branches; e; e = e->active_next_branch) {
917                         if (e->last_commit < min_commit) {
918                                 p = l;
919                                 min_commit = e->last_commit;
920                         }
921                         l = e;
922                 }
923
924                 if (p) {
925                         e = p->active_next_branch;
926                         p->active_next_branch = e->active_next_branch;
927                 } else {
928                         e = active_branches;
929                         active_branches = e->active_next_branch;
930                 }
931                 e->active_next_branch = NULL;
932                 if (e->branch_tree.tree) {
933                         release_tree_content(e->branch_tree.tree);
934                         e->branch_tree.tree = NULL;
935                 }
936                 cur_active_branches--;
937         }
938 }
939
940 static void load_branch(struct branch *b)
941 {
942         load_tree(&b->branch_tree);
943         b->active_next_branch = active_branches;
944         active_branches = b;
945         cur_active_branches++;
946 }
947
948 static void file_change_m(struct branch *b)
949 {
950         const char *path = read_path();
951         char hexsha1[41];
952         unsigned char sha1[20];
953
954         yread(0, hexsha1, 40);
955         hexsha1[40] = 0;
956
957         if (get_sha1_hex(hexsha1, sha1))
958                 die("Invalid sha1 %s for %s", hexsha1, path);
959
960         tree_content_set(&b->branch_tree, path, sha1, 0100644);
961 }
962
963 static void file_change_d(struct branch *b)
964 {
965         tree_content_remove(&b->branch_tree, read_path());
966 }
967
968 static void cmd_new_commit()
969 {
970         static const unsigned int max_hdr_len = 94;
971         const char *name = read_path();
972         struct branch *b = lookup_branch(name);
973         unsigned int acmsglen;
974         char *body, *c;
975
976         if (!b)
977                 die("Branch not declared: %s", name);
978         if (!b->branch_tree.tree) {
979                 unload_one_branch();
980                 load_branch(b);
981         }
982
983         /* author_committer_msg */
984         yread(0, &acmsglen, 4);
985         body = xmalloc(acmsglen + max_hdr_len);
986         c = body + max_hdr_len;
987         yread(0, c, acmsglen);
988
989         /* file_change* */
990         for (;;) {
991                 unsigned char cmd;
992                 yread(0, &cmd, 1);
993                 if (cmd == '0')
994                         break;
995                 else if (cmd == 'M')
996                         file_change_m(b);
997                 else if (cmd == 'D')
998                         file_change_d(b);
999                 else
1000                         die("Unsupported file_change: %c", cmd);
1001         }
1002
1003         if (memcmp(b->sha1, null_sha1, 20)) {
1004                 sprintf(c - 48, "parent %s", sha1_to_hex(b->sha1));
1005                 *(c - 1) = '\n';
1006                 c -= 48;
1007         }
1008         store_tree(&b->branch_tree);
1009         sprintf(c - 46, "tree %s", sha1_to_hex(b->branch_tree.sha1));
1010         *(c - 1) = '\n';
1011         c -= 46;
1012
1013         store_object(OBJ_COMMIT,
1014                 c, (body + max_hdr_len + acmsglen) - c,
1015                 NULL, b->sha1);
1016         free(body);
1017         b->last_commit = object_count_by_type[OBJ_COMMIT];
1018 }
1019
1020 static void cmd_new_branch()
1021 {
1022         struct branch *b = new_branch(read_path());
1023         const char *base = read_path();
1024         struct branch *s = lookup_branch(base);
1025
1026         if (!strcmp(b->name, base))
1027                 die("Can't create a branch from itself: %s", base);
1028         else if (s) {
1029                 memcpy(b->sha1, s->sha1, 20);
1030                 memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20);
1031         }
1032         else if (!get_sha1(base, b->sha1)) {
1033                 if (!memcmp(b->sha1, null_sha1, 20))
1034                         memcpy(b->branch_tree.sha1, null_sha1, 20);
1035                 else {
1036                         unsigned long size;
1037                         char *buf;
1038
1039                         buf = read_object_with_reference(b->sha1,
1040                                 type_names[OBJ_COMMIT], &size, b->sha1);
1041                         if (!buf || size < 46)
1042                                 die("Not a valid commit: %s", base);
1043                         if (memcmp("tree ", buf, 5)
1044                                 || get_sha1_hex(buf + 5, b->branch_tree.sha1))
1045                                 die("The commit %s is corrupt", sha1_to_hex(b->sha1));
1046                         free(buf);
1047                 }
1048         } else
1049                 die("Not a SHA1 or branch: %s", base);
1050 }
1051
1052 int main(int argc, const char **argv)
1053 {
1054         const char *base_name = argv[1];
1055         int est_obj_cnt = atoi(argv[2]);
1056         char *pack_name;
1057         char *idx_name;
1058         struct stat sb;
1059
1060         setup_ident();
1061         git_config(git_default_config);
1062
1063         pack_name = xmalloc(strlen(base_name) + 6);
1064         sprintf(pack_name, "%s.pack", base_name);
1065         idx_name = xmalloc(strlen(base_name) + 5);
1066         sprintf(idx_name, "%s.idx", base_name);
1067
1068         pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
1069         if (pack_fd < 0)
1070                 die("Can't create %s: %s", pack_name, strerror(errno));
1071
1072         alloc_objects(est_obj_cnt);
1073
1074         atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
1075         branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
1076         avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
1077
1078         init_pack_header();
1079         for (;;) {
1080                 unsigned long cmd;
1081                 if (optional_read(0, &cmd, 4))
1082                         break;
1083
1084                 switch (ntohl(cmd)) {
1085                 case 'blob': cmd_new_blob();   break;
1086                 case 'comt': cmd_new_commit(); break;
1087                 case 'brch': cmd_new_branch(); break;
1088                 default:
1089                         die("Invalid command %lu", cmd);
1090                 }
1091         }
1092         fixup_header_footer();
1093         close(pack_fd);
1094         write_index(idx_name);
1095         dump_branches();
1096
1097         fprintf(stderr, "%s statistics:\n", argv[0]);
1098         fprintf(stderr, "---------------------------------------------------\n");
1099         fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow  )\n", alloc_count, alloc_count - est_obj_cnt);
1100         fprintf(stderr, "Total objects:   %10lu (%10lu duplicates)\n", object_count, duplicate_count);
1101         fprintf(stderr, "      blobs  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
1102         fprintf(stderr, "      trees  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
1103         fprintf(stderr, "      commits:   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
1104         fprintf(stderr, "      tags   :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
1105         fprintf(stderr, "Total branches:  %10lu\n", branch_count);
1106         fprintf(stderr, "Total atoms:     %10u\n", atom_cnt);
1107         fprintf(stderr, "Memory pools:    %10lu MiB\n", total_allocd/(1024*1024));
1108         fprintf(stderr, "---------------------------------------------------\n");
1109
1110         stat(pack_name, &sb);
1111         fprintf(stderr, "Pack size:       %10lu KiB\n", (unsigned long)(sb.st_size/1024));
1112         stat(idx_name, &sb);
1113         fprintf(stderr, "Index size:      %10lu KiB\n", (unsigned long)(sb.st_size/1024));
1114
1115         fprintf(stderr, "\n");
1116
1117         return 0;
1118 }