Added basic command handler to fast-import.
[git] / fast-import.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "blob.h"
5 #include "delta.h"
6 #include "pack.h"
7 #include "csum-file.h"
8
9 struct object_entry
10 {
11         struct object_entry *next;
12         unsigned long offset;
13         unsigned char sha1[20];
14 };
15
16 struct object_entry_block
17 {
18         struct object_entry_block *next_block;
19         struct object_entry *next_free;
20         struct object_entry *end;
21         struct object_entry entries[FLEX_ARRAY]; /* more */
22 };
23
24 struct last_object
25 {
26         void *data;
27         unsigned long len;
28         int depth;
29         unsigned char sha1[20];
30 };
31
32 /* Stats and misc. counters. */
33 static int max_depth = 10;
34 static unsigned long alloc_count;
35 static unsigned long object_count;
36 static unsigned long duplicate_count;
37 static unsigned long object_count_by_type[9];
38 static unsigned long duplicate_count_by_type[9];
39
40 /* The .pack file */
41 static int pack_fd;
42 static unsigned long pack_offset;
43 static unsigned char pack_sha1[20];
44
45 /* Table of objects we've written. */
46 struct object_entry_block *blocks;
47 struct object_entry *object_table[1 << 16];
48
49 /* Our last blob */
50 struct last_object last_blob;
51
52 static void alloc_objects(int cnt)
53 {
54         struct object_entry_block *b;
55
56         b = xmalloc(sizeof(struct object_entry_block)
57                 + cnt * sizeof(struct object_entry));
58         b->next_block = blocks;
59         b->next_free = b->entries;
60         b->end = b->entries + cnt;
61         blocks = b;
62         alloc_count += cnt;
63 }
64
65 static struct object_entry* new_object(unsigned char *sha1)
66 {
67         struct object_entry *e;
68
69         if (blocks->next_free == blocks->end)
70                 alloc_objects(1000);
71
72         e = blocks->next_free++;
73         memcpy(e->sha1, sha1, sizeof(e->sha1));
74         return e;
75 }
76
77 static struct object_entry* insert_object(unsigned char *sha1)
78 {
79         unsigned int h = sha1[0] << 8 | sha1[1];
80         struct object_entry *e = object_table[h];
81         struct object_entry *p = 0;
82
83         while (e) {
84                 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
85                         return e;
86                 p = e;
87                 e = e->next;
88         }
89
90         e = new_object(sha1);
91         e->next = 0;
92         e->offset = 0;
93         if (p)
94                 p->next = e;
95         else
96                 object_table[h] = e;
97         return e;
98 }
99
100 static ssize_t yread(int fd, void *buffer, size_t length)
101 {
102         ssize_t ret = 0;
103         while (ret < length) {
104                 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
105                 if (size < 0) {
106                         return size;
107                 }
108                 if (size == 0) {
109                         return ret;
110                 }
111                 ret += size;
112         }
113         return ret;
114 }
115
116 static ssize_t ywrite(int fd, void *buffer, size_t length)
117 {
118         ssize_t ret = 0;
119         while (ret < length) {
120                 ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
121                 if (size < 0) {
122                         return size;
123                 }
124                 if (size == 0) {
125                         return ret;
126                 }
127                 ret += size;
128         }
129         return ret;
130 }
131
132 static unsigned long encode_header(
133         enum object_type type,
134         unsigned long size,
135         unsigned char *hdr)
136 {
137         int n = 1;
138         unsigned char c;
139
140         if (type < OBJ_COMMIT || type > OBJ_DELTA)
141                 die("bad type %d", type);
142
143         c = (type << 4) | (size & 15);
144         size >>= 4;
145         while (size) {
146                 *hdr++ = c | 0x80;
147                 c = size & 0x7f;
148                 size >>= 7;
149                 n++;
150         }
151         *hdr = c;
152         return n;
153 }
154
155 static int store_object(
156         enum object_type type,
157         void *dat,
158         unsigned long datlen,
159         struct last_object *last)
160 {
161         void *out, *delta;
162         struct object_entry *e;
163         unsigned char hdr[96];
164         unsigned char sha1[20];
165         unsigned long hdrlen, deltalen;
166         SHA_CTX c;
167         z_stream s;
168
169         hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
170         SHA1_Init(&c);
171         SHA1_Update(&c, hdr, hdrlen);
172         SHA1_Update(&c, dat, datlen);
173         SHA1_Final(sha1, &c);
174
175         e = insert_object(sha1);
176         if (e->offset) {
177                 duplicate_count++;
178                 duplicate_count_by_type[type]++;
179                 return 0;
180         }
181         e->offset = pack_offset;
182         object_count++;
183         object_count_by_type[type]++;
184
185         if (last->data && last->depth < max_depth)
186                 delta = diff_delta(last->data, last->len,
187                         dat, datlen,
188                         &deltalen, 0);
189         else
190                 delta = 0;
191
192         memset(&s, 0, sizeof(s));
193         deflateInit(&s, zlib_compression_level);
194
195         if (delta) {
196                 last->depth++;
197                 s.next_in = delta;
198                 s.avail_in = deltalen;
199                 hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
200                 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
201                         die("Can't write object header: %s", strerror(errno));
202                 if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1))
203                         die("Can't write object base: %s", strerror(errno));
204                 pack_offset += hdrlen + sizeof(sha1);
205         } else {
206                 last->depth = 0;
207                 s.next_in = dat;
208                 s.avail_in = datlen;
209                 hdrlen = encode_header(type, datlen, hdr);
210                 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
211                         die("Can't write object header: %s", strerror(errno));
212                 pack_offset += hdrlen;
213         }
214
215         s.avail_out = deflateBound(&s, s.avail_in);
216         s.next_out = out = xmalloc(s.avail_out);
217         while (deflate(&s, Z_FINISH) == Z_OK)
218                 /* nothing */;
219         deflateEnd(&s);
220
221         if (ywrite(pack_fd, out, s.total_out) != s.total_out)
222                 die("Failed writing compressed data %s", strerror(errno));
223         pack_offset += s.total_out;
224
225         free(out);
226         if (delta)
227                 free(delta);
228         if (last->data)
229                 free(last->data);
230         last->data = dat;
231         last->len = datlen;
232         memcpy(last->sha1, sha1, sizeof(sha1));
233         return 1;
234 }
235
236 static void init_pack_header()
237 {
238         const char* magic = "PACK";
239         unsigned long version = 3;
240         unsigned long zero = 0;
241
242         version = htonl(version);
243
244         if (ywrite(pack_fd, (char*)magic, 4) != 4)
245                 die("Can't write pack magic: %s", strerror(errno));
246         if (ywrite(pack_fd, &version, 4) != 4)
247                 die("Can't write pack version: %s", strerror(errno));
248         if (ywrite(pack_fd, &zero, 4) != 4)
249                 die("Can't write 0 object count: %s", strerror(errno));
250         pack_offset = 4 * 3;
251 }
252
253 static void fixup_header_footer()
254 {
255         SHA_CTX c;
256         char hdr[8];
257         unsigned long cnt;
258         char *buf;
259         size_t n;
260
261         if (lseek(pack_fd, 0, SEEK_SET) != 0)
262                 die("Failed seeking to start: %s", strerror(errno));
263
264         SHA1_Init(&c);
265         if (yread(pack_fd, hdr, 8) != 8)
266                 die("Failed reading header: %s", strerror(errno));
267         SHA1_Update(&c, hdr, 8);
268
269         cnt = htonl(object_count);
270         SHA1_Update(&c, &cnt, 4);
271         if (ywrite(pack_fd, &cnt, 4) != 4)
272                 die("Failed writing object count: %s", strerror(errno));
273
274         buf = xmalloc(128 * 1024);
275         for (;;) {
276                 n = xread(pack_fd, buf, 128 * 1024);
277                 if (n <= 0)
278                         break;
279                 SHA1_Update(&c, buf, n);
280         }
281         free(buf);
282
283         SHA1_Final(pack_sha1, &c);
284         if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1))
285                 die("Failed writing pack checksum: %s", strerror(errno));
286 }
287
288 static int oecmp (const void *_a, const void *_b)
289 {
290         struct object_entry *a = *((struct object_entry**)_a);
291         struct object_entry *b = *((struct object_entry**)_b);
292         return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
293 }
294
295 static void write_index(const char *idx_name)
296 {
297         struct sha1file *f;
298         struct object_entry **idx, **c, **last;
299         struct object_entry *e;
300         struct object_entry_block *o;
301         unsigned int array[256];
302         int i;
303
304         /* Build the sorted table of object IDs. */
305         idx = xmalloc(object_count * sizeof(struct object_entry*));
306         c = idx;
307         for (o = blocks; o; o = o->next_block)
308                 for (e = o->entries; e != o->next_free; e++)
309                         *c++ = e;
310         last = idx + object_count;
311         qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
312
313         /* Generate the fan-out array. */
314         c = idx;
315         for (i = 0; i < 256; i++) {
316                 struct object_entry **next = c;;
317                 while (next < last) {
318                         if ((*next)->sha1[0] != i)
319                                 break;
320                         next++;
321                 }
322                 array[i] = htonl(next - idx);
323                 c = next;
324         }
325
326         f = sha1create("%s", idx_name);
327         sha1write(f, array, 256 * sizeof(int));
328         for (c = idx; c != last; c++) {
329                 unsigned int offset = htonl((*c)->offset);
330                 sha1write(f, &offset, 4);
331                 sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
332         }
333         sha1write(f, pack_sha1, sizeof(pack_sha1));
334         sha1close(f, NULL, 1);
335         free(idx);
336 }
337
338 static void new_blob()
339 {
340         unsigned long datlen;
341         void *dat;
342
343         if (yread(0, &datlen, 4) != 4)
344                 die("Can't obtain blob length");
345
346         dat = xmalloc(datlen);
347         if (yread(0, dat, datlen) != datlen)
348                 die("Con't obtain %lu bytes of blob data", datlen);
349
350         if (!store_object(OBJ_BLOB, dat, datlen, &last_blob))
351                 free(dat);
352 }
353
354 int main(int argc, const char **argv)
355 {
356         const char *base_name = argv[1];
357         int est_obj_cnt = atoi(argv[2]);
358         char *pack_name;
359         char *idx_name;
360         struct stat sb;
361
362         pack_name = xmalloc(strlen(base_name) + 6);
363         sprintf(pack_name, "%s.pack", base_name);
364         idx_name = xmalloc(strlen(base_name) + 5);
365         sprintf(idx_name, "%s.idx", base_name);
366
367         pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
368         if (pack_fd < 0)
369                 die("Can't create %s: %s", pack_name, strerror(errno));
370
371         alloc_objects(est_obj_cnt);
372         init_pack_header();
373         for (;;) {
374                 unsigned long cmd;
375                 if (yread(0, &cmd, 4) != 4)
376                         break;
377
378                 switch (cmd) {
379                 case 'blob': new_blob(); break;
380                 default:
381                         die("Invalid command %lu", cmd);
382                 }
383         }
384         fixup_header_footer();
385         close(pack_fd);
386         write_index(idx_name);
387
388         fprintf(stderr, "%s statistics:\n", argv[0]);
389         fprintf(stderr, "---------------------------------------------------\n");
390         fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow  )\n", alloc_count, alloc_count - est_obj_cnt);
391         fprintf(stderr, "Total objects:   %10lu (%10lu duplicates)\n", object_count, duplicate_count);
392         fprintf(stderr, "      blobs  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
393         fprintf(stderr, "      trees  :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
394         fprintf(stderr, "      commits:   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
395         fprintf(stderr, "      tags   :   %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
396         fprintf(stderr, "---------------------------------------------------\n");
397
398         stat(pack_name, &sb);
399         fprintf(stderr, "Pack size:       %10lu KiB\n", (unsigned long)(sb.st_size/1024));
400         stat(idx_name, &sb);
401         fprintf(stderr, "Index size:      %10lu KiB\n", (unsigned long)(sb.st_size/1024));
402
403         fprintf(stderr, "\n");
404
405         return 0;
406 }