git-cherry: document limit and add diagram
[git] / builtin-unpack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "delta.h"
5 #include "pack.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "tag.h"
9 #include "tree.h"
10
11 #include <sys/time.h>
12
13 static int dry_run, quiet, recover, has_errors;
14 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
15
16 /* We always read in 4kB chunks. */
17 static unsigned char buffer[4096];
18 static unsigned long offset, len;
19 static SHA_CTX ctx;
20
21 /*
22  * Make sure at least "min" bytes are available in the buffer, and
23  * return the pointer to the buffer.
24  */
25 static void * fill(int min)
26 {
27         if (min <= len)
28                 return buffer + offset;
29         if (min > sizeof(buffer))
30                 die("cannot fill %d bytes", min);
31         if (offset) {
32                 SHA1_Update(&ctx, buffer, offset);
33                 memcpy(buffer, buffer + offset, len);
34                 offset = 0;
35         }
36         do {
37                 int ret = xread(0, buffer + len, sizeof(buffer) - len);
38                 if (ret <= 0) {
39                         if (!ret)
40                                 die("early EOF");
41                         die("read error on input: %s", strerror(errno));
42                 }
43                 len += ret;
44         } while (len < min);
45         return buffer;
46 }
47
48 static void use(int bytes)
49 {
50         if (bytes > len)
51                 die("used more bytes than were available");
52         len -= bytes;
53         offset += bytes;
54 }
55
56 static void *get_data(unsigned long size)
57 {
58         z_stream stream;
59         void *buf = xmalloc(size);
60
61         memset(&stream, 0, sizeof(stream));
62
63         stream.next_out = buf;
64         stream.avail_out = size;
65         stream.next_in = fill(1);
66         stream.avail_in = len;
67         inflateInit(&stream);
68
69         for (;;) {
70                 int ret = inflate(&stream, 0);
71                 use(len - stream.avail_in);
72                 if (stream.total_out == size && ret == Z_STREAM_END)
73                         break;
74                 if (ret != Z_OK) {
75                         error("inflate returned %d\n", ret);
76                         free(buf);
77                         buf = NULL;
78                         if (!recover)
79                                 exit(1);
80                         has_errors = 1;
81                         break;
82                 }
83                 stream.next_in = fill(1);
84                 stream.avail_in = len;
85         }
86         inflateEnd(&stream);
87         return buf;
88 }
89
90 struct delta_info {
91         unsigned char base_sha1[20];
92         unsigned long size;
93         void *delta;
94         struct delta_info *next;
95 };
96
97 static struct delta_info *delta_list;
98
99 static void add_delta_to_list(unsigned char *base_sha1, void *delta, unsigned long size)
100 {
101         struct delta_info *info = xmalloc(sizeof(*info));
102
103         hashcpy(info->base_sha1, base_sha1);
104         info->size = size;
105         info->delta = delta;
106         info->next = delta_list;
107         delta_list = info;
108 }
109
110 static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size);
111
112 static void write_object(void *buf, unsigned long size, const char *type)
113 {
114         unsigned char sha1[20];
115         if (write_sha1_file(buf, size, type, sha1) < 0)
116                 die("failed to write object");
117         added_object(sha1, type, buf, size);
118 }
119
120 static void resolve_delta(const char *type,
121                           void *base, unsigned long base_size,
122                           void *delta, unsigned long delta_size)
123 {
124         void *result;
125         unsigned long result_size;
126
127         result = patch_delta(base, base_size,
128                              delta, delta_size,
129                              &result_size);
130         if (!result)
131                 die("failed to apply delta");
132         free(delta);
133         write_object(result, result_size, type);
134         free(result);
135 }
136
137 static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size)
138 {
139         struct delta_info **p = &delta_list;
140         struct delta_info *info;
141
142         while ((info = *p) != NULL) {
143                 if (!hashcmp(info->base_sha1, sha1)) {
144                         *p = info->next;
145                         p = &delta_list;
146                         resolve_delta(type, data, size, info->delta, info->size);
147                         free(info);
148                         continue;
149                 }
150                 p = &info->next;
151         }
152 }
153
154 static void unpack_non_delta_entry(enum object_type kind, unsigned long size)
155 {
156         void *buf = get_data(size);
157         const char *type;
158
159         switch (kind) {
160         case OBJ_COMMIT: type = commit_type; break;
161         case OBJ_TREE:   type = tree_type; break;
162         case OBJ_BLOB:   type = blob_type; break;
163         case OBJ_TAG:    type = tag_type; break;
164         default: die("bad type %d", kind);
165         }
166         if (!dry_run && buf)
167                 write_object(buf, size, type);
168         free(buf);
169 }
170
171 static void unpack_delta_entry(unsigned long delta_size)
172 {
173         void *delta_data, *base;
174         unsigned long base_size;
175         char type[20];
176         unsigned char base_sha1[20];
177
178         hashcpy(base_sha1, fill(20));
179         use(20);
180
181         delta_data = get_data(delta_size);
182         if (dry_run || !delta_data) {
183                 free(delta_data);
184                 return;
185         }
186
187         if (!has_sha1_file(base_sha1)) {
188                 add_delta_to_list(base_sha1, delta_data, delta_size);
189                 return;
190         }
191         base = read_sha1_file(base_sha1, type, &base_size);
192         if (!base) {
193                 error("failed to read delta-pack base object %s",
194                       sha1_to_hex(base_sha1));
195                 if (!recover)
196                         exit(1);
197                 has_errors = 1;
198                 return;
199         }
200         resolve_delta(type, base, base_size, delta_data, delta_size);
201         free(base);
202 }
203
204 static void unpack_one(unsigned nr, unsigned total)
205 {
206         unsigned shift;
207         unsigned char *pack, c;
208         unsigned long size;
209         enum object_type type;
210
211         pack = fill(1);
212         c = *pack;
213         use(1);
214         type = (c >> 4) & 7;
215         size = (c & 15);
216         shift = 4;
217         while (c & 0x80) {
218                 pack = fill(1);
219                 c = *pack++;
220                 use(1);
221                 size += (c & 0x7f) << shift;
222                 shift += 7;
223         }
224         if (!quiet) {
225                 static unsigned long last_sec;
226                 static unsigned last_percent;
227                 struct timeval now;
228                 unsigned percentage = (nr * 100) / total;
229
230                 gettimeofday(&now, NULL);
231                 if (percentage != last_percent || now.tv_sec != last_sec) {
232                         last_sec = now.tv_sec;
233                         last_percent = percentage;
234                         fprintf(stderr, "%4u%% (%u/%u) done\r", percentage, nr, total);
235                 }
236         }
237         switch (type) {
238         case OBJ_COMMIT:
239         case OBJ_TREE:
240         case OBJ_BLOB:
241         case OBJ_TAG:
242                 unpack_non_delta_entry(type, size);
243                 return;
244         case OBJ_DELTA:
245                 unpack_delta_entry(size);
246                 return;
247         default:
248                 error("bad object type %d", type);
249                 has_errors = 1;
250                 if (recover)
251                         return;
252                 exit(1);
253         }
254 }
255
256 static void unpack_all(void)
257 {
258         int i;
259         struct pack_header *hdr = fill(sizeof(struct pack_header));
260         unsigned nr_objects = ntohl(hdr->hdr_entries);
261
262         if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
263                 die("bad pack file");
264         if (!pack_version_ok(hdr->hdr_version))
265                 die("unknown pack file version %d", ntohl(hdr->hdr_version));
266         fprintf(stderr, "Unpacking %d objects\n", nr_objects);
267
268         use(sizeof(struct pack_header));
269         for (i = 0; i < nr_objects; i++)
270                 unpack_one(i+1, nr_objects);
271         if (delta_list)
272                 die("unresolved deltas left after unpacking");
273 }
274
275 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
276 {
277         int i;
278         unsigned char sha1[20];
279
280         git_config(git_default_config);
281
282         quiet = !isatty(2);
283
284         for (i = 1 ; i < argc; i++) {
285                 const char *arg = argv[i];
286
287                 if (*arg == '-') {
288                         if (!strcmp(arg, "-n")) {
289                                 dry_run = 1;
290                                 continue;
291                         }
292                         if (!strcmp(arg, "-q")) {
293                                 quiet = 1;
294                                 continue;
295                         }
296                         if (!strcmp(arg, "-r")) {
297                                 recover = 1;
298                                 continue;
299                         }
300                         usage(unpack_usage);
301                 }
302
303                 /* We don't take any non-flag arguments now.. Maybe some day */
304                 usage(unpack_usage);
305         }
306         SHA1_Init(&ctx);
307         unpack_all();
308         SHA1_Update(&ctx, buffer, offset);
309         SHA1_Final(sha1, &ctx);
310         if (hashcmp(fill(20), sha1))
311                 die("final sha1 did not match");
312         use(20);
313
314         /* Write the last part of the buffer to stdout */
315         while (len) {
316                 int ret = xwrite(1, buffer + offset, len);
317                 if (ret <= 0)
318                         break;
319                 len -= ret;
320                 offset += ret;
321         }
322
323         /* All done */
324         if (!quiet)
325                 fprintf(stderr, "\n");
326         return has_errors;
327 }