[PATCH] create tar archives of tree on the fly
[git] / tar-tree.c
1 #include <time.h>
2 #include "cache.h"
3
4 #define RECORDSIZE      (512)
5 #define BLOCKSIZE       (RECORDSIZE * 20)
6
7 static const char *tar_tree_usage = "tar-tree <key> [basedir]";
8
9 static char block[BLOCKSIZE];
10 static unsigned long offset;
11
12 static const char *basedir;
13 static time_t archive_time;
14
15 struct path_prefix {
16         struct path_prefix *prev;
17         const char *name;
18 };
19
20 /* tries hard to write, either succeeds or dies in the attempt */
21 static void reliable_write(void *buf, unsigned long size)
22 {
23         while (size > 0) {
24                 long ret = write(1, buf, size);
25                 if (ret < 0) {
26                         if (errno == EAGAIN)
27                                 continue;
28                         if (errno == EPIPE)
29                                 exit(0);
30                         die("tar-tree: %s", strerror(errno));
31                 } else if (!ret) {
32                         die("tar-tree: disk full?");
33                 }
34                 size -= ret;
35                 buf += ret;
36         }
37 }
38
39 /* writes out the whole block, but only if it is full */
40 static void write_if_needed(void)
41 {
42         if (offset == BLOCKSIZE) {
43                 reliable_write(block, BLOCKSIZE);
44                 offset = 0;
45         }
46 }
47
48 /*
49  * The end of tar archives is marked by 1024 nul bytes and after that
50  * follows the rest of the block (if any).
51  */
52 static void write_trailer(void)
53 {
54         memset(block + offset, 0, RECORDSIZE);
55         offset += RECORDSIZE;
56         write_if_needed();
57         memset(block + offset, 0, RECORDSIZE);
58         offset += RECORDSIZE;
59         write_if_needed();
60         if (offset) {
61                 memset(block + offset, 0, BLOCKSIZE - offset);
62                 reliable_write(block, BLOCKSIZE);
63                 offset = 0;
64         }
65 }
66
67 /*
68  * queues up writes, so that all our write(2) calls write exactly one
69  * full block; pads writes to RECORDSIZE
70  */
71 static void write_blocked(void *buf, unsigned long size)
72 {
73         unsigned long tail;
74
75         if (offset) {
76                 unsigned long chunk = BLOCKSIZE - offset;
77                 if (size < chunk)
78                         chunk = size;
79                 memcpy(block + offset, buf, chunk);
80                 size -= chunk;
81                 offset += chunk;
82                 buf += chunk;
83                 write_if_needed();
84         }
85         while (size >= BLOCKSIZE) {
86                 reliable_write(buf, BLOCKSIZE);
87                 size -= BLOCKSIZE;
88                 buf += BLOCKSIZE;
89         }
90         if (size) {
91                 memcpy(block + offset, buf, size);
92                 buf += size;
93                 offset += size;
94         }
95         tail = offset % RECORDSIZE;
96         if (tail)  {
97                 memset(block + offset, 0, RECORDSIZE - tail);
98                 offset += RECORDSIZE - tail;
99         }
100         write_if_needed();
101 }
102
103 static void append_string(char **p, const char *s)
104 {
105         unsigned int len = strlen(s);
106         memcpy(*p, s, len);
107         *p += len;
108 }
109
110 static void append_char(char **p, char c)
111 {
112         **p = c;
113         *p += 1;
114 }
115
116 static void append_long(char **p, long n)
117 {
118         int len = sprintf(*p, "%ld", n);
119         *p += len;
120 }
121
122 static void append_path_prefix(char **buffer, struct path_prefix *prefix)
123 {
124         if (!prefix)
125                 return;
126         append_path_prefix(buffer, prefix->prev);
127         append_string(buffer, prefix->name);
128         append_char(buffer, '/');
129 }
130
131 static unsigned int path_prefix_len(struct path_prefix *prefix)
132 {
133         if (!prefix)
134                 return 0;
135         return path_prefix_len(prefix->prev) + strlen(prefix->name) + 1;
136 }
137
138 static void append_path(char **p, int is_dir, const char *basepath,
139                         struct path_prefix *prefix, const char *path)
140 {
141         if (basepath) {
142                 append_string(p, basepath);
143                 append_char(p, '/');
144         }
145         append_path_prefix(p, prefix);
146         append_string(p, path);
147         if (is_dir)
148                 append_char(p, '/');
149 }
150
151 static unsigned int path_len(int is_dir, const char *basepath,
152                              struct path_prefix *prefix, const char *path)
153 {
154         unsigned int len = 0;
155         if (basepath)
156                 len += strlen(basepath) + 1;
157         len += path_prefix_len(prefix) + strlen(path);
158         if (is_dir)
159                 len++;
160         return len;
161 }
162
163 static void write_header(const char *, const char *, struct path_prefix *,
164                          const char *, unsigned int, unsigned long);
165
166 /* stores a pax extended header directly in the block buffer */
167 static void write_extended_header(const char *headerfilename, int is_dir,
168                                   const char *basepath,
169                                   struct path_prefix *prefix,
170                                   const char *path, unsigned int namelen)
171 {
172         char *records, *p;
173         unsigned int size = 1 + 6 + namelen + 1;
174         if (size > 9)
175                 size++;
176         if (size > 99)
177                 size++;
178         if (size > RECORDSIZE)
179                 die("tar-tree: extended header too big, wtf?");
180         write_header(NULL, NULL, NULL, headerfilename, 0100600, size);
181
182         records = block + offset;
183         memset(records, 0, RECORDSIZE);
184         offset += RECORDSIZE;
185         p = records;
186         append_long(&p, size);
187         append_string(&p, " path=");
188         append_path(&p, is_dir, basepath, prefix, path);
189         append_char(&p, '\n');
190         write_if_needed();
191 }
192
193 /* stores a ustar header directly in the block buffer */
194 static void write_header(const char *sha1, const char *basepath,
195                          struct path_prefix *prefix, const char *path,
196                          unsigned int mode, unsigned long size)
197 {
198         unsigned int namelen; 
199         char *p, *header = NULL;
200         unsigned int checksum = 0;
201         int i;
202
203         namelen = path_len(S_ISDIR(mode), basepath, prefix, path);
204         if (namelen > 500) {
205                 die("tar-tree: name too log of object %s\n", sha1_to_hex(sha1));
206         } else if (namelen > 100) {
207                 char *sha1_hex = sha1_to_hex(sha1);
208                 char headerfilename[51];
209                 sprintf(headerfilename, "%s.paxheader", sha1_hex);
210                 /* the extended header must be written before the normal one */
211                 write_extended_header(headerfilename, S_ISDIR(mode), basepath,
212                                       prefix, path, namelen);
213
214                 header = block + offset;
215                 memset(header, 0, RECORDSIZE);
216                 offset += RECORDSIZE;
217                 sprintf(header, "%s.data", sha1_hex);
218         } else {
219                 header = block + offset;
220                 memset(header, 0, RECORDSIZE);
221                 offset += RECORDSIZE;
222                 p = header;
223                 append_path(&p, S_ISDIR(mode), basepath, prefix, path);
224         }
225
226         if (S_ISDIR(mode))
227                 mode |= 0755;   /* GIT doesn't store permissions of dirs */
228         sprintf(&header[100], "%07o", mode & 07777);
229
230         /* XXX: should we provide more meaningful info here? */
231         sprintf(&header[108], "%07o", 0);       /* uid */
232         sprintf(&header[116], "%07o", 0);       /* gid */
233         strncpy(&header[265], "git", 31);       /* uname */
234         strncpy(&header[297], "git", 31);       /* gname */
235
236         sprintf(&header[124], "%011lo", S_ISDIR(mode) ? 0 : size);
237         sprintf(&header[136], "%011lo", archive_time);
238
239         /* typeflag */
240         if (!sha1)
241                 header[156] = 'x';      /* extended header */
242         else
243                 header[156] = S_ISDIR(mode) ? '5' : '0';
244
245         memcpy(&header[257], "ustar", 6);
246         memcpy(&header[263], "00", 2);
247
248         printf(&header[329], "%07o", 0);        /* devmajor */
249         printf(&header[337], "%07o", 0);        /* devminor */
250
251         memset(&header[148], ' ', 8);
252         for (i = 0; i < RECORDSIZE; i++)
253                 checksum += header[i];
254         sprintf(&header[148], "%07o", checksum & 0x1fffff);
255
256         write_if_needed();
257 }
258
259 static void traverse_tree(void *buffer, unsigned long size,
260                           struct path_prefix *prefix)
261 {
262         struct path_prefix this_prefix;
263         this_prefix.prev = prefix;
264
265         while (size) {
266                 int namelen = strlen(buffer)+1;
267                 void *eltbuf;
268                 char elttype[20];
269                 unsigned long eltsize;
270                 unsigned char *sha1 = buffer + namelen;
271                 char *path = strchr(buffer, ' ') + 1;
272                 unsigned int mode;
273
274                 if (size < namelen + 20 || sscanf(buffer, "%o", &mode) != 1)
275                         die("corrupt 'tree' file");
276                 buffer = sha1 + 20;
277                 size -= namelen + 20;
278
279                 eltbuf = read_sha1_file(sha1, elttype, &eltsize);
280                 if (!eltbuf)
281                         die("cannot read %s", sha1_to_hex(sha1));
282                 write_header(sha1, basedir, prefix, path, mode, eltsize);
283                 if (!strcmp(elttype, "tree")) {
284                         this_prefix.name = path;
285                         traverse_tree(eltbuf, eltsize, &this_prefix);
286                 } else if (!strcmp(elttype, "blob")) {
287                         write_blocked(eltbuf, eltsize);
288                 }
289                 free(eltbuf);
290         }
291 }
292
293 /* get commit time from committer line of commit object */
294 time_t commit_time(const unsigned char *sha1)
295 {
296         char type[20];
297         void *buffer;
298         unsigned long size;
299         time_t result = 0;
300
301         buffer = read_sha1_file(sha1, type, &size);
302         if (buffer) {
303                 char *p = buffer;
304                 while (size > 0) {
305                         char *endp = memchr(p, '\n', size);
306                         if (!endp || endp == p)
307                                 break;
308                         *endp = '\0';
309                         if (endp - p > 10 && !memcmp(p, "committer ", 10)) {
310                                 char *nump = strrchr(p, '>');
311                                 if (!nump)
312                                         break;
313                                 nump++;
314                                 result = strtoul(nump, &endp, 10);
315                                 if (*endp != ' ')
316                                         result = 0;
317                                 break;
318                         }
319                         size -= endp - p - 1;
320                         p = endp + 1;
321                 }
322                 free(buffer);
323         }
324         return result;
325 }
326
327 int main(int argc, char **argv)
328 {
329         unsigned char sha1[20];
330         void *buffer;
331         unsigned long size;
332         unsigned char tree_sha1[20];
333
334         switch (argc) {
335         case 3:
336                 basedir = argv[2];
337                 /* FALLTHROUGH */
338         case 2:
339                 if (get_sha1_hex(argv[1], sha1) < 0)
340                         usage(tar_tree_usage);
341                 break;
342         default:
343                 usage(tar_tree_usage);
344         }
345
346         sha1_file_directory = getenv(DB_ENVIRONMENT);
347         if (!sha1_file_directory)
348                 sha1_file_directory = DEFAULT_DB_ENVIRONMENT;
349
350         buffer = read_tree_with_tree_or_commit_sha1(sha1, &size, tree_sha1);
351         if (!buffer)
352                 die("unable to read sha1 file");
353         if (memcmp(sha1, tree_sha1, 20))        /* is sha1 a commit object? */
354                 archive_time = commit_time(sha1);
355         if (!archive_time)
356                 archive_time = time(NULL);
357         if (basedir)
358                 write_header("0", NULL, NULL, basedir, 040755, 0);
359         traverse_tree(buffer, size, NULL);
360         free(buffer);
361         write_trailer();
362         return 0;
363 }