diff --binary generates full index on binary files.
[git] / builtin-unpack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "object.h"
4 #include "delta.h"
5 #include "pack.h"
6 #include "blob.h"
7 #include "commit.h"
8 #include "tag.h"
9 #include "tree.h"
10
11 #include <sys/time.h>
12
13 static int dry_run, quiet;
14 static const char unpack_usage[] = "git-unpack-objects [-n] [-q] < pack-file";
15
16 /* We always read in 4kB chunks. */
17 static unsigned char buffer[4096];
18 static unsigned long offset, len;
19 static SHA_CTX ctx;
20
21 /*
22  * Make sure at least "min" bytes are available in the buffer, and
23  * return the pointer to the buffer.
24  */
25 static void * fill(int min)
26 {
27         if (min <= len)
28                 return buffer + offset;
29         if (min > sizeof(buffer))
30                 die("cannot fill %d bytes", min);
31         if (offset) {
32                 SHA1_Update(&ctx, buffer, offset);
33                 memcpy(buffer, buffer + offset, len);
34                 offset = 0;
35         }
36         do {
37                 int ret = xread(0, buffer + len, sizeof(buffer) - len);
38                 if (ret <= 0) {
39                         if (!ret)
40                                 die("early EOF");
41                         die("read error on input: %s", strerror(errno));
42                 }
43                 len += ret;
44         } while (len < min);
45         return buffer;
46 }
47
48 static void use(int bytes)
49 {
50         if (bytes > len)
51                 die("used more bytes than were available");
52         len -= bytes;
53         offset += bytes;
54 }
55
56 static void *get_data(unsigned long size)
57 {
58         z_stream stream;
59         void *buf = xmalloc(size);
60
61         memset(&stream, 0, sizeof(stream));
62
63         stream.next_out = buf;
64         stream.avail_out = size;
65         stream.next_in = fill(1);
66         stream.avail_in = len;
67         inflateInit(&stream);
68
69         for (;;) {
70                 int ret = inflate(&stream, 0);
71                 use(len - stream.avail_in);
72                 if (stream.total_out == size && ret == Z_STREAM_END)
73                         break;
74                 if (ret != Z_OK)
75                         die("inflate returned %d\n", ret);
76                 stream.next_in = fill(1);
77                 stream.avail_in = len;
78         }
79         inflateEnd(&stream);
80         return buf;
81 }
82
83 struct delta_info {
84         unsigned char base_sha1[20];
85         unsigned long size;
86         void *delta;
87         struct delta_info *next;
88 };
89
90 static struct delta_info *delta_list;
91
92 static void add_delta_to_list(unsigned char *base_sha1, void *delta, unsigned long size)
93 {
94         struct delta_info *info = xmalloc(sizeof(*info));
95
96         hashcpy(info->base_sha1, base_sha1);
97         info->size = size;
98         info->delta = delta;
99         info->next = delta_list;
100         delta_list = info;
101 }
102
103 static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size);
104
105 static void write_object(void *buf, unsigned long size, const char *type)
106 {
107         unsigned char sha1[20];
108         if (write_sha1_file(buf, size, type, sha1) < 0)
109                 die("failed to write object");
110         added_object(sha1, type, buf, size);
111 }
112
113 static int resolve_delta(const char *type,
114         void *base, unsigned long base_size,
115         void *delta, unsigned long delta_size)
116 {
117         void *result;
118         unsigned long result_size;
119
120         result = patch_delta(base, base_size,
121                              delta, delta_size,
122                              &result_size);
123         if (!result)
124                 die("failed to apply delta");
125         free(delta);
126         write_object(result, result_size, type);
127         free(result);
128         return 0;
129 }
130
131 static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size)
132 {
133         struct delta_info **p = &delta_list;
134         struct delta_info *info;
135
136         while ((info = *p) != NULL) {
137                 if (!hashcmp(info->base_sha1, sha1)) {
138                         *p = info->next;
139                         p = &delta_list;
140                         resolve_delta(type, data, size, info->delta, info->size);
141                         free(info);
142                         continue;
143                 }
144                 p = &info->next;
145         }
146 }
147
148 static int unpack_non_delta_entry(enum object_type kind, unsigned long size)
149 {
150         void *buf = get_data(size);
151         const char *type;
152
153         switch (kind) {
154         case OBJ_COMMIT: type = commit_type; break;
155         case OBJ_TREE:   type = tree_type; break;
156         case OBJ_BLOB:   type = blob_type; break;
157         case OBJ_TAG:    type = tag_type; break;
158         default: die("bad type %d", kind);
159         }
160         if (!dry_run)
161                 write_object(buf, size, type);
162         free(buf);
163         return 0;
164 }
165
166 static int unpack_delta_entry(unsigned long delta_size)
167 {
168         void *delta_data, *base;
169         unsigned long base_size;
170         char type[20];
171         unsigned char base_sha1[20];
172         int result;
173
174         hashcpy(base_sha1, fill(20));
175         use(20);
176
177         delta_data = get_data(delta_size);
178         if (dry_run) {
179                 free(delta_data);
180                 return 0;
181         }
182
183         if (!has_sha1_file(base_sha1)) {
184                 add_delta_to_list(base_sha1, delta_data, delta_size);
185                 return 0;
186         }
187         base = read_sha1_file(base_sha1, type, &base_size);
188         if (!base)
189                 die("failed to read delta-pack base object %s", sha1_to_hex(base_sha1));
190         result = resolve_delta(type, base, base_size, delta_data, delta_size);
191         free(base);
192         return result;
193 }
194
195 static void unpack_one(unsigned nr, unsigned total)
196 {
197         unsigned shift;
198         unsigned char *pack, c;
199         unsigned long size;
200         enum object_type type;
201
202         pack = fill(1);
203         c = *pack;
204         use(1);
205         type = (c >> 4) & 7;
206         size = (c & 15);
207         shift = 4;
208         while (c & 0x80) {
209                 pack = fill(1);
210                 c = *pack++;
211                 use(1);
212                 size += (c & 0x7f) << shift;
213                 shift += 7;
214         }
215         if (!quiet) {
216                 static unsigned long last_sec;
217                 static unsigned last_percent;
218                 struct timeval now;
219                 unsigned percentage = (nr * 100) / total;
220
221                 gettimeofday(&now, NULL);
222                 if (percentage != last_percent || now.tv_sec != last_sec) {
223                         last_sec = now.tv_sec;
224                         last_percent = percentage;
225                         fprintf(stderr, "%4u%% (%u/%u) done\r", percentage, nr, total);
226                 }
227         }
228         switch (type) {
229         case OBJ_COMMIT:
230         case OBJ_TREE:
231         case OBJ_BLOB:
232         case OBJ_TAG:
233                 unpack_non_delta_entry(type, size);
234                 return;
235         case OBJ_DELTA:
236                 unpack_delta_entry(size);
237                 return;
238         default:
239                 die("bad object type %d", type);
240         }
241 }
242
243 static void unpack_all(void)
244 {
245         int i;
246         struct pack_header *hdr = fill(sizeof(struct pack_header));
247         unsigned nr_objects = ntohl(hdr->hdr_entries);
248
249         if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
250                 die("bad pack file");
251         if (!pack_version_ok(hdr->hdr_version))
252                 die("unknown pack file version %d", ntohl(hdr->hdr_version));
253         fprintf(stderr, "Unpacking %d objects\n", nr_objects);
254
255         use(sizeof(struct pack_header));
256         for (i = 0; i < nr_objects; i++)
257                 unpack_one(i+1, nr_objects);
258         if (delta_list)
259                 die("unresolved deltas left after unpacking");
260 }
261
262 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
263 {
264         int i;
265         unsigned char sha1[20];
266
267         git_config(git_default_config);
268
269         quiet = !isatty(2);
270
271         for (i = 1 ; i < argc; i++) {
272                 const char *arg = argv[i];
273
274                 if (*arg == '-') {
275                         if (!strcmp(arg, "-n")) {
276                                 dry_run = 1;
277                                 continue;
278                         }
279                         if (!strcmp(arg, "-q")) {
280                                 quiet = 1;
281                                 continue;
282                         }
283                         usage(unpack_usage);
284                 }
285
286                 /* We don't take any non-flag arguments now.. Maybe some day */
287                 usage(unpack_usage);
288         }
289         SHA1_Init(&ctx);
290         unpack_all();
291         SHA1_Update(&ctx, buffer, offset);
292         SHA1_Final(sha1, &ctx);
293         if (hashcmp(fill(20), sha1))
294                 die("final sha1 did not match");
295         use(20);
296
297         /* Write the last part of the buffer to stdout */
298         while (len) {
299                 int ret = xwrite(1, buffer + offset, len);
300                 if (ret <= 0)
301                         break;
302                 len -= ret;
303                 offset += ret;
304         }
305
306         /* All done */
307         if (!quiet)
308                 fprintf(stderr, "\n");
309         return 0;
310 }