Merge branch 'jc/pack'
[git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20   XML_STATUS_OK = 1,
21   XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK    1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS  (1u << 1)
30 #define RECURSIVE     (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int commits_on_stdin;
40
41 static int got_alternates = -1;
42 static int corrupt_object_found;
43
44 static struct curl_slist *no_pragma_header;
45
46 struct alt_base
47 {
48         const char *base;
49         int path_len;
50         int got_indices;
51         struct packed_git *packs;
52         struct alt_base *next;
53 };
54
55 static struct alt_base *alt;
56
57 enum object_request_state {
58         WAITING,
59         ABORTED,
60         ACTIVE,
61         COMPLETE,
62 };
63
64 struct object_request
65 {
66         unsigned char sha1[20];
67         struct alt_base *repo;
68         char *url;
69         char filename[PATH_MAX];
70         char tmpfile[PATH_MAX];
71         int local;
72         enum object_request_state state;
73         CURLcode curl_result;
74         char errorstr[CURL_ERROR_SIZE];
75         long http_code;
76         unsigned char real_sha1[20];
77         SHA_CTX c;
78         z_stream stream;
79         int zret;
80         int rename;
81         struct active_request_slot *slot;
82         struct object_request *next;
83 };
84
85 struct alternates_request {
86         const char *base;
87         char *url;
88         struct buffer *buffer;
89         struct active_request_slot *slot;
90         int http_specific;
91 };
92
93 #ifndef NO_EXPAT
94 struct xml_ctx
95 {
96         char *name;
97         int len;
98         char *cdata;
99         void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100         void *userData;
101 };
102
103 struct remote_ls_ctx
104 {
105         struct alt_base *repo;
106         char *path;
107         void (*userFunc)(struct remote_ls_ctx *ls);
108         void *userData;
109         int flags;
110         char *dentry_name;
111         int dentry_flags;
112         int rc;
113         struct remote_ls_ctx *parent;
114 };
115 #endif
116
117 static struct object_request *object_queue_head;
118
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120                                void *data)
121 {
122         unsigned char expn[4096];
123         size_t size = eltsize * nmemb;
124         int posn = 0;
125         struct object_request *obj_req = (struct object_request *)data;
126         do {
127                 ssize_t retval = write(obj_req->local,
128                                        (char *) ptr + posn, size - posn);
129                 if (retval < 0)
130                         return posn;
131                 posn += retval;
132         } while (posn < size);
133
134         obj_req->stream.avail_in = size;
135         obj_req->stream.next_in = ptr;
136         do {
137                 obj_req->stream.next_out = expn;
138                 obj_req->stream.avail_out = sizeof(expn);
139                 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140                 SHA1_Update(&obj_req->c, expn,
141                             sizeof(expn) - obj_req->stream.avail_out);
142         } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143         data_received++;
144         return size;
145 }
146
147 static void fetch_alternates(const char *base);
148
149 static void process_object_response(void *callback_data);
150
151 static void start_object_request(struct object_request *obj_req)
152 {
153         char *hex = sha1_to_hex(obj_req->sha1);
154         char prevfile[PATH_MAX];
155         char *url;
156         char *posn;
157         int prevlocal;
158         unsigned char prev_buf[PREV_BUF_SIZE];
159         ssize_t prev_read = 0;
160         long prev_posn = 0;
161         char range[RANGE_HEADER_SIZE];
162         struct curl_slist *range_header = NULL;
163         struct active_request_slot *slot;
164
165         snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
166         unlink(prevfile);
167         rename(obj_req->tmpfile, prevfile);
168         unlink(obj_req->tmpfile);
169
170         if (obj_req->local != -1)
171                 error("fd leakage in start: %d", obj_req->local);
172         obj_req->local = open(obj_req->tmpfile,
173                               O_WRONLY | O_CREAT | O_EXCL, 0666);
174         /* This could have failed due to the "lazy directory creation";
175          * try to mkdir the last path component.
176          */
177         if (obj_req->local < 0 && errno == ENOENT) {
178                 char *dir = strrchr(obj_req->tmpfile, '/');
179                 if (dir) {
180                         *dir = 0;
181                         mkdir(obj_req->tmpfile, 0777);
182                         *dir = '/';
183                 }
184                 obj_req->local = open(obj_req->tmpfile,
185                                       O_WRONLY | O_CREAT | O_EXCL, 0666);
186         }
187
188         if (obj_req->local < 0) {
189                 obj_req->state = ABORTED;
190                 error("Couldn't create temporary file %s for %s: %s",
191                       obj_req->tmpfile, obj_req->filename, strerror(errno));
192                 return;
193         }
194
195         memset(&obj_req->stream, 0, sizeof(obj_req->stream));
196
197         inflateInit(&obj_req->stream);
198
199         SHA1_Init(&obj_req->c);
200
201         url = xmalloc(strlen(obj_req->repo->base) + 50);
202         obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
203         strcpy(url, obj_req->repo->base);
204         posn = url + strlen(obj_req->repo->base);
205         strcpy(posn, "objects/");
206         posn += 8;
207         memcpy(posn, hex, 2);
208         posn += 2;
209         *(posn++) = '/';
210         strcpy(posn, hex + 2);
211         strcpy(obj_req->url, url);
212
213         /* If a previous temp file is present, process what was already
214            fetched. */
215         prevlocal = open(prevfile, O_RDONLY);
216         if (prevlocal != -1) {
217                 do {
218                         prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
219                         if (prev_read>0) {
220                                 if (fwrite_sha1_file(prev_buf,
221                                                      1,
222                                                      prev_read,
223                                                      obj_req) == prev_read) {
224                                         prev_posn += prev_read;
225                                 } else {
226                                         prev_read = -1;
227                                 }
228                         }
229                 } while (prev_read > 0);
230                 close(prevlocal);
231         }
232         unlink(prevfile);
233
234         /* Reset inflate/SHA1 if there was an error reading the previous temp
235            file; also rewind to the beginning of the local file. */
236         if (prev_read == -1) {
237                 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
238                 inflateInit(&obj_req->stream);
239                 SHA1_Init(&obj_req->c);
240                 if (prev_posn>0) {
241                         prev_posn = 0;
242                         lseek(obj_req->local, SEEK_SET, 0);
243                         ftruncate(obj_req->local, 0);
244                 }
245         }
246
247         slot = get_active_slot();
248         slot->callback_func = process_object_response;
249         slot->callback_data = obj_req;
250         obj_req->slot = slot;
251
252         curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
253         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
254         curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
255         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
256         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
257
258         /* If we have successfully processed data from a previous fetch
259            attempt, only fetch the data we don't already have. */
260         if (prev_posn>0) {
261                 if (get_verbosely)
262                         fprintf(stderr,
263                                 "Resuming fetch of object %s at byte %ld\n",
264                                 hex, prev_posn);
265                 sprintf(range, "Range: bytes=%ld-", prev_posn);
266                 range_header = curl_slist_append(range_header, range);
267                 curl_easy_setopt(slot->curl,
268                                  CURLOPT_HTTPHEADER, range_header);
269         }
270
271         /* Try to get the request started, abort the request on error */
272         obj_req->state = ACTIVE;
273         if (!start_active_slot(slot)) {
274                 obj_req->state = ABORTED;
275                 obj_req->slot = NULL;
276                 close(obj_req->local); obj_req->local = -1;
277                 free(obj_req->url);
278                 return;
279         }
280 }
281
282 static void finish_object_request(struct object_request *obj_req)
283 {
284         struct stat st;
285
286         fchmod(obj_req->local, 0444);
287         close(obj_req->local); obj_req->local = -1;
288
289         if (obj_req->http_code == 416) {
290                 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
291         } else if (obj_req->curl_result != CURLE_OK) {
292                 if (stat(obj_req->tmpfile, &st) == 0)
293                         if (st.st_size == 0)
294                                 unlink(obj_req->tmpfile);
295                 return;
296         }
297
298         inflateEnd(&obj_req->stream);
299         SHA1_Final(obj_req->real_sha1, &obj_req->c);
300         if (obj_req->zret != Z_STREAM_END) {
301                 unlink(obj_req->tmpfile);
302                 return;
303         }
304         if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
305                 unlink(obj_req->tmpfile);
306                 return;
307         }
308         obj_req->rename =
309                 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
310
311         if (obj_req->rename == 0)
312                 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
313 }
314
315 static void process_object_response(void *callback_data)
316 {
317         struct object_request *obj_req =
318                 (struct object_request *)callback_data;
319
320         obj_req->curl_result = obj_req->slot->curl_result;
321         obj_req->http_code = obj_req->slot->http_code;
322         obj_req->slot = NULL;
323         obj_req->state = COMPLETE;
324
325         /* Use alternates if necessary */
326         if (obj_req->http_code == 404 ||
327             obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
328                 fetch_alternates(alt->base);
329                 if (obj_req->repo->next != NULL) {
330                         obj_req->repo =
331                                 obj_req->repo->next;
332                         close(obj_req->local);
333                         obj_req->local = -1;
334                         start_object_request(obj_req);
335                         return;
336                 }
337         }
338
339         finish_object_request(obj_req);
340 }
341
342 static void release_object_request(struct object_request *obj_req)
343 {
344         struct object_request *entry = object_queue_head;
345
346         if (obj_req->local != -1)
347                 error("fd leakage in release: %d", obj_req->local);
348         if (obj_req == object_queue_head) {
349                 object_queue_head = obj_req->next;
350         } else {
351                 while (entry->next != NULL && entry->next != obj_req)
352                         entry = entry->next;
353                 if (entry->next == obj_req)
354                         entry->next = entry->next->next;
355         }
356
357         free(obj_req->url);
358         free(obj_req);
359 }
360
361 #ifdef USE_CURL_MULTI
362 void fill_active_slots(void)
363 {
364         struct object_request *obj_req = object_queue_head;
365         struct active_request_slot *slot = active_queue_head;
366         int num_transfers;
367
368         while (active_requests < max_requests && obj_req != NULL) {
369                 if (obj_req->state == WAITING) {
370                         if (has_sha1_file(obj_req->sha1))
371                                 obj_req->state = COMPLETE;
372                         else
373                                 start_object_request(obj_req);
374                         curl_multi_perform(curlm, &num_transfers);
375                 }
376                 obj_req = obj_req->next;
377         }
378
379         while (slot != NULL) {
380                 if (!slot->in_use && slot->curl != NULL) {
381                         curl_easy_cleanup(slot->curl);
382                         slot->curl = NULL;
383                 }
384                 slot = slot->next;
385         }
386 }
387 #endif
388
389 void prefetch(unsigned char *sha1)
390 {
391         struct object_request *newreq;
392         struct object_request *tail;
393         char *filename = sha1_file_name(sha1);
394
395         newreq = xmalloc(sizeof(*newreq));
396         hashcpy(newreq->sha1, sha1);
397         newreq->repo = alt;
398         newreq->url = NULL;
399         newreq->local = -1;
400         newreq->state = WAITING;
401         snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
402         snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
403                  "%s.temp", filename);
404         newreq->slot = NULL;
405         newreq->next = NULL;
406
407         if (object_queue_head == NULL) {
408                 object_queue_head = newreq;
409         } else {
410                 tail = object_queue_head;
411                 while (tail->next != NULL) {
412                         tail = tail->next;
413                 }
414                 tail->next = newreq;
415         }
416
417 #ifdef USE_CURL_MULTI
418         fill_active_slots();
419         step_active_slots();
420 #endif
421 }
422
423 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
424 {
425         char *hex = sha1_to_hex(sha1);
426         char *filename;
427         char *url;
428         char tmpfile[PATH_MAX];
429         long prev_posn = 0;
430         char range[RANGE_HEADER_SIZE];
431         struct curl_slist *range_header = NULL;
432
433         FILE *indexfile;
434         struct active_request_slot *slot;
435         struct slot_results results;
436
437         if (has_pack_index(sha1))
438                 return 0;
439
440         if (get_verbosely)
441                 fprintf(stderr, "Getting index for pack %s\n", hex);
442
443         url = xmalloc(strlen(repo->base) + 64);
444         sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
445
446         filename = sha1_pack_index_name(sha1);
447         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
448         indexfile = fopen(tmpfile, "a");
449         if (!indexfile)
450                 return error("Unable to open local file %s for pack index",
451                              filename);
452
453         slot = get_active_slot();
454         slot->results = &results;
455         curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
456         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
457         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
458         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
459         slot->local = indexfile;
460
461         /* If there is data present from a previous transfer attempt,
462            resume where it left off */
463         prev_posn = ftell(indexfile);
464         if (prev_posn>0) {
465                 if (get_verbosely)
466                         fprintf(stderr,
467                                 "Resuming fetch of index for pack %s at byte %ld\n",
468                                 hex, prev_posn);
469                 sprintf(range, "Range: bytes=%ld-", prev_posn);
470                 range_header = curl_slist_append(range_header, range);
471                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
472         }
473
474         if (start_active_slot(slot)) {
475                 run_active_slot(slot);
476                 if (results.curl_result != CURLE_OK) {
477                         fclose(indexfile);
478                         return error("Unable to get pack index %s\n%s", url,
479                                      curl_errorstr);
480                 }
481         } else {
482                 fclose(indexfile);
483                 return error("Unable to start request");
484         }
485
486         fclose(indexfile);
487
488         return move_temp_to_file(tmpfile, filename);
489 }
490
491 static int setup_index(struct alt_base *repo, unsigned char *sha1)
492 {
493         struct packed_git *new_pack;
494         if (has_pack_file(sha1))
495                 return 0; /* don't list this as something we can get */
496
497         if (fetch_index(repo, sha1))
498                 return -1;
499
500         new_pack = parse_pack_index(sha1);
501         new_pack->next = repo->packs;
502         repo->packs = new_pack;
503         return 0;
504 }
505
506 static void process_alternates_response(void *callback_data)
507 {
508         struct alternates_request *alt_req =
509                 (struct alternates_request *)callback_data;
510         struct active_request_slot *slot = alt_req->slot;
511         struct alt_base *tail = alt;
512         const char *base = alt_req->base;
513         static const char null_byte = '\0';
514         char *data;
515         int i = 0;
516
517         if (alt_req->http_specific) {
518                 if (slot->curl_result != CURLE_OK ||
519                     !alt_req->buffer->posn) {
520
521                         /* Try reusing the slot to get non-http alternates */
522                         alt_req->http_specific = 0;
523                         sprintf(alt_req->url, "%s/objects/info/alternates",
524                                 base);
525                         curl_easy_setopt(slot->curl, CURLOPT_URL,
526                                          alt_req->url);
527                         active_requests++;
528                         slot->in_use = 1;
529                         if (slot->finished != NULL)
530                                 (*slot->finished) = 0;
531                         if (!start_active_slot(slot)) {
532                                 got_alternates = -1;
533                                 slot->in_use = 0;
534                                 if (slot->finished != NULL)
535                                         (*slot->finished) = 1;
536                         }
537                         return;
538                 }
539         } else if (slot->curl_result != CURLE_OK) {
540                 if (slot->http_code != 404 &&
541                     slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
542                         got_alternates = -1;
543                         return;
544                 }
545         }
546
547         fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
548         alt_req->buffer->posn--;
549         data = alt_req->buffer->buffer;
550
551         while (i < alt_req->buffer->posn) {
552                 int posn = i;
553                 while (posn < alt_req->buffer->posn && data[posn] != '\n')
554                         posn++;
555                 if (data[posn] == '\n') {
556                         int okay = 0;
557                         int serverlen = 0;
558                         struct alt_base *newalt;
559                         char *target = NULL;
560                         char *path;
561                         if (data[i] == '/') {
562                                 /* This counts
563                                  * http://git.host/pub/scm/linux.git/
564                                  * -----------here^
565                                  * so memcpy(dst, base, serverlen) will
566                                  * copy up to "...git.host".
567                                  */
568                                 const char *colon_ss = strstr(base,"://");
569                                 if (colon_ss) {
570                                         serverlen = (strchr(colon_ss + 3, '/')
571                                                      - base);
572                                         okay = 1;
573                                 }
574                         } else if (!memcmp(data + i, "../", 3)) {
575                                 /* Relative URL; chop the corresponding
576                                  * number of subpath from base (and ../
577                                  * from data), and concatenate the result.
578                                  *
579                                  * The code first drops ../ from data, and
580                                  * then drops one ../ from data and one path
581                                  * from base.  IOW, one extra ../ is dropped
582                                  * from data than path is dropped from base.
583                                  *
584                                  * This is not wrong.  The alternate in
585                                  *     http://git.host/pub/scm/linux.git/
586                                  * to borrow from
587                                  *     http://git.host/pub/scm/linus.git/
588                                  * is ../../linus.git/objects/.  You need
589                                  * two ../../ to borrow from your direct
590                                  * neighbour.
591                                  */
592                                 i += 3;
593                                 serverlen = strlen(base);
594                                 while (i + 2 < posn &&
595                                        !memcmp(data + i, "../", 3)) {
596                                         do {
597                                                 serverlen--;
598                                         } while (serverlen &&
599                                                  base[serverlen - 1] != '/');
600                                         i += 3;
601                                 }
602                                 /* If the server got removed, give up. */
603                                 okay = strchr(base, ':') - base + 3 <
604                                         serverlen;
605                         } else if (alt_req->http_specific) {
606                                 char *colon = strchr(data + i, ':');
607                                 char *slash = strchr(data + i, '/');
608                                 if (colon && slash && colon < data + posn &&
609                                     slash < data + posn && colon < slash) {
610                                         okay = 1;
611                                 }
612                         }
613                         /* skip "objects\n" at end */
614                         if (okay) {
615                                 target = xmalloc(serverlen + posn - i - 6);
616                                 memcpy(target, base, serverlen);
617                                 memcpy(target + serverlen, data + i,
618                                        posn - i - 7);
619                                 target[serverlen + posn - i - 7] = 0;
620                                 if (get_verbosely)
621                                         fprintf(stderr,
622                                                 "Also look at %s\n", target);
623                                 newalt = xmalloc(sizeof(*newalt));
624                                 newalt->next = NULL;
625                                 newalt->base = target;
626                                 newalt->got_indices = 0;
627                                 newalt->packs = NULL;
628                                 path = strstr(target, "//");
629                                 if (path) {
630                                         path = strchr(path+2, '/');
631                                         if (path)
632                                                 newalt->path_len = strlen(path);
633                                 }
634
635                                 while (tail->next != NULL)
636                                         tail = tail->next;
637                                 tail->next = newalt;
638                         }
639                 }
640                 i = posn + 1;
641         }
642
643         got_alternates = 1;
644 }
645
646 static void fetch_alternates(const char *base)
647 {
648         struct buffer buffer;
649         char *url;
650         char *data;
651         struct active_request_slot *slot;
652         struct alternates_request alt_req;
653
654         /* If another request has already started fetching alternates,
655            wait for them to arrive and return to processing this request's
656            curl message */
657 #ifdef USE_CURL_MULTI
658         while (got_alternates == 0) {
659                 step_active_slots();
660         }
661 #endif
662
663         /* Nothing to do if they've already been fetched */
664         if (got_alternates == 1)
665                 return;
666
667         /* Start the fetch */
668         got_alternates = 0;
669
670         data = xmalloc(4096);
671         buffer.size = 4096;
672         buffer.posn = 0;
673         buffer.buffer = data;
674
675         if (get_verbosely)
676                 fprintf(stderr, "Getting alternates list for %s\n", base);
677
678         url = xmalloc(strlen(base) + 31);
679         sprintf(url, "%s/objects/info/http-alternates", base);
680
681         /* Use a callback to process the result, since another request
682            may fail and need to have alternates loaded before continuing */
683         slot = get_active_slot();
684         slot->callback_func = process_alternates_response;
685         slot->callback_data = &alt_req;
686
687         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
688         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
689         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
690
691         alt_req.base = base;
692         alt_req.url = url;
693         alt_req.buffer = &buffer;
694         alt_req.http_specific = 1;
695         alt_req.slot = slot;
696
697         if (start_active_slot(slot))
698                 run_active_slot(slot);
699         else
700                 got_alternates = -1;
701
702         free(data);
703         free(url);
704 }
705
706 #ifndef NO_EXPAT
707 static void
708 xml_start_tag(void *userData, const char *name, const char **atts)
709 {
710         struct xml_ctx *ctx = (struct xml_ctx *)userData;
711         const char *c = strchr(name, ':');
712         int new_len;
713
714         if (c == NULL)
715                 c = name;
716         else
717                 c++;
718
719         new_len = strlen(ctx->name) + strlen(c) + 2;
720
721         if (new_len > ctx->len) {
722                 ctx->name = xrealloc(ctx->name, new_len);
723                 ctx->len = new_len;
724         }
725         strcat(ctx->name, ".");
726         strcat(ctx->name, c);
727
728         free(ctx->cdata);
729         ctx->cdata = NULL;
730
731         ctx->userFunc(ctx, 0);
732 }
733
734 static void
735 xml_end_tag(void *userData, const char *name)
736 {
737         struct xml_ctx *ctx = (struct xml_ctx *)userData;
738         const char *c = strchr(name, ':');
739         char *ep;
740
741         ctx->userFunc(ctx, 1);
742
743         if (c == NULL)
744                 c = name;
745         else
746                 c++;
747
748         ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
749         *ep = 0;
750 }
751
752 static void
753 xml_cdata(void *userData, const XML_Char *s, int len)
754 {
755         struct xml_ctx *ctx = (struct xml_ctx *)userData;
756         free(ctx->cdata);
757         ctx->cdata = xmalloc(len + 1);
758         strlcpy(ctx->cdata, s, len + 1);
759 }
760
761 static int remote_ls(struct alt_base *repo, const char *path, int flags,
762                      void (*userFunc)(struct remote_ls_ctx *ls),
763                      void *userData);
764
765 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
766 {
767         struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
768
769         if (tag_closed) {
770                 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
771                         if (ls->dentry_flags & IS_DIR) {
772                                 if (ls->flags & PROCESS_DIRS) {
773                                         ls->userFunc(ls);
774                                 }
775                                 if (strcmp(ls->dentry_name, ls->path) &&
776                                     ls->flags & RECURSIVE) {
777                                         ls->rc = remote_ls(ls->repo,
778                                                            ls->dentry_name,
779                                                            ls->flags,
780                                                            ls->userFunc,
781                                                            ls->userData);
782                                 }
783                         } else if (ls->flags & PROCESS_FILES) {
784                                 ls->userFunc(ls);
785                         }
786                 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
787                         ls->dentry_name = xmalloc(strlen(ctx->cdata) -
788                                                   ls->repo->path_len + 1);
789                         strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
790                 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
791                         ls->dentry_flags |= IS_DIR;
792                 }
793         } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
794                 free(ls->dentry_name);
795                 ls->dentry_name = NULL;
796                 ls->dentry_flags = 0;
797         }
798 }
799
800 static int remote_ls(struct alt_base *repo, const char *path, int flags,
801                      void (*userFunc)(struct remote_ls_ctx *ls),
802                      void *userData)
803 {
804         char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
805         struct active_request_slot *slot;
806         struct slot_results results;
807         struct buffer in_buffer;
808         struct buffer out_buffer;
809         char *in_data;
810         char *out_data;
811         XML_Parser parser = XML_ParserCreate(NULL);
812         enum XML_Status result;
813         struct curl_slist *dav_headers = NULL;
814         struct xml_ctx ctx;
815         struct remote_ls_ctx ls;
816
817         ls.flags = flags;
818         ls.repo = repo;
819         ls.path = xstrdup(path);
820         ls.dentry_name = NULL;
821         ls.dentry_flags = 0;
822         ls.userData = userData;
823         ls.userFunc = userFunc;
824         ls.rc = 0;
825
826         sprintf(url, "%s%s", repo->base, path);
827
828         out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
829         out_data = xmalloc(out_buffer.size + 1);
830         snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
831         out_buffer.posn = 0;
832         out_buffer.buffer = out_data;
833
834         in_buffer.size = 4096;
835         in_data = xmalloc(in_buffer.size);
836         in_buffer.posn = 0;
837         in_buffer.buffer = in_data;
838
839         dav_headers = curl_slist_append(dav_headers, "Depth: 1");
840         dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
841
842         slot = get_active_slot();
843         slot->results = &results;
844         curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
845         curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
846         curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
847         curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
848         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
849         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
850         curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
851         curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
852         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
853
854         if (start_active_slot(slot)) {
855                 run_active_slot(slot);
856                 if (results.curl_result == CURLE_OK) {
857                         ctx.name = xcalloc(10, 1);
858                         ctx.len = 0;
859                         ctx.cdata = NULL;
860                         ctx.userFunc = handle_remote_ls_ctx;
861                         ctx.userData = &ls;
862                         XML_SetUserData(parser, &ctx);
863                         XML_SetElementHandler(parser, xml_start_tag,
864                                               xml_end_tag);
865                         XML_SetCharacterDataHandler(parser, xml_cdata);
866                         result = XML_Parse(parser, in_buffer.buffer,
867                                            in_buffer.posn, 1);
868                         free(ctx.name);
869
870                         if (result != XML_STATUS_OK) {
871                                 ls.rc = error("XML error: %s",
872                                               XML_ErrorString(
873                                                       XML_GetErrorCode(parser)));
874                         }
875                 } else {
876                         ls.rc = -1;
877                 }
878         } else {
879                 ls.rc = error("Unable to start PROPFIND request");
880         }
881
882         free(ls.path);
883         free(url);
884         free(out_data);
885         free(in_buffer.buffer);
886         curl_slist_free_all(dav_headers);
887
888         return ls.rc;
889 }
890
891 static void process_ls_pack(struct remote_ls_ctx *ls)
892 {
893         unsigned char sha1[20];
894
895         if (strlen(ls->dentry_name) == 63 &&
896             !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
897             has_extension(ls->dentry_name, ".pack")) {
898                 get_sha1_hex(ls->dentry_name + 18, sha1);
899                 setup_index(ls->repo, sha1);
900         }
901 }
902 #endif
903
904 static int fetch_indices(struct alt_base *repo)
905 {
906         unsigned char sha1[20];
907         char *url;
908         struct buffer buffer;
909         char *data;
910         int i = 0;
911
912         struct active_request_slot *slot;
913         struct slot_results results;
914
915         if (repo->got_indices)
916                 return 0;
917
918         data = xmalloc(4096);
919         buffer.size = 4096;
920         buffer.posn = 0;
921         buffer.buffer = data;
922
923         if (get_verbosely)
924                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
925
926 #ifndef NO_EXPAT
927         if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
928                       process_ls_pack, NULL) == 0)
929                 return 0;
930 #endif
931
932         url = xmalloc(strlen(repo->base) + 21);
933         sprintf(url, "%s/objects/info/packs", repo->base);
934
935         slot = get_active_slot();
936         slot->results = &results;
937         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
938         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
939         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
940         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
941         if (start_active_slot(slot)) {
942                 run_active_slot(slot);
943                 if (results.curl_result != CURLE_OK) {
944                         if (results.http_code == 404 ||
945                             results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
946                                 repo->got_indices = 1;
947                                 free(buffer.buffer);
948                                 return 0;
949                         } else {
950                                 repo->got_indices = 0;
951                                 free(buffer.buffer);
952                                 return error("%s", curl_errorstr);
953                         }
954                 }
955         } else {
956                 repo->got_indices = 0;
957                 free(buffer.buffer);
958                 return error("Unable to start request");
959         }
960
961         data = buffer.buffer;
962         while (i < buffer.posn) {
963                 switch (data[i]) {
964                 case 'P':
965                         i++;
966                         if (i + 52 <= buffer.posn &&
967                             !strncmp(data + i, " pack-", 6) &&
968                             !strncmp(data + i + 46, ".pack\n", 6)) {
969                                 get_sha1_hex(data + i + 6, sha1);
970                                 setup_index(repo, sha1);
971                                 i += 51;
972                                 break;
973                         }
974                 default:
975                         while (i < buffer.posn && data[i] != '\n')
976                                 i++;
977                 }
978                 i++;
979         }
980
981         free(buffer.buffer);
982         repo->got_indices = 1;
983         return 0;
984 }
985
986 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
987 {
988         char *url;
989         struct packed_git *target;
990         struct packed_git **lst;
991         FILE *packfile;
992         char *filename;
993         char tmpfile[PATH_MAX];
994         int ret;
995         long prev_posn = 0;
996         char range[RANGE_HEADER_SIZE];
997         struct curl_slist *range_header = NULL;
998
999         struct active_request_slot *slot;
1000         struct slot_results results;
1001
1002         if (fetch_indices(repo))
1003                 return -1;
1004         target = find_sha1_pack(sha1, repo->packs);
1005         if (!target)
1006                 return -1;
1007
1008         if (get_verbosely) {
1009                 fprintf(stderr, "Getting pack %s\n",
1010                         sha1_to_hex(target->sha1));
1011                 fprintf(stderr, " which contains %s\n",
1012                         sha1_to_hex(sha1));
1013         }
1014
1015         url = xmalloc(strlen(repo->base) + 65);
1016         sprintf(url, "%s/objects/pack/pack-%s.pack",
1017                 repo->base, sha1_to_hex(target->sha1));
1018
1019         filename = sha1_pack_name(target->sha1);
1020         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1021         packfile = fopen(tmpfile, "a");
1022         if (!packfile)
1023                 return error("Unable to open local file %s for pack",
1024                              filename);
1025
1026         slot = get_active_slot();
1027         slot->results = &results;
1028         curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1029         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1030         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1031         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1032         slot->local = packfile;
1033
1034         /* If there is data present from a previous transfer attempt,
1035            resume where it left off */
1036         prev_posn = ftell(packfile);
1037         if (prev_posn>0) {
1038                 if (get_verbosely)
1039                         fprintf(stderr,
1040                                 "Resuming fetch of pack %s at byte %ld\n",
1041                                 sha1_to_hex(target->sha1), prev_posn);
1042                 sprintf(range, "Range: bytes=%ld-", prev_posn);
1043                 range_header = curl_slist_append(range_header, range);
1044                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1045         }
1046
1047         if (start_active_slot(slot)) {
1048                 run_active_slot(slot);
1049                 if (results.curl_result != CURLE_OK) {
1050                         fclose(packfile);
1051                         return error("Unable to get pack file %s\n%s", url,
1052                                      curl_errorstr);
1053                 }
1054         } else {
1055                 fclose(packfile);
1056                 return error("Unable to start request");
1057         }
1058
1059         fclose(packfile);
1060
1061         ret = move_temp_to_file(tmpfile, filename);
1062         if (ret)
1063                 return ret;
1064
1065         lst = &repo->packs;
1066         while (*lst != target)
1067                 lst = &((*lst)->next);
1068         *lst = (*lst)->next;
1069
1070         if (verify_pack(target, 0))
1071                 return -1;
1072         install_packed_git(target);
1073
1074         return 0;
1075 }
1076
1077 static void abort_object_request(struct object_request *obj_req)
1078 {
1079         if (obj_req->local >= 0) {
1080                 close(obj_req->local);
1081                 obj_req->local = -1;
1082         }
1083         unlink(obj_req->tmpfile);
1084         if (obj_req->slot) {
1085                 release_active_slot(obj_req->slot);
1086                 obj_req->slot = NULL;
1087         }
1088         release_object_request(obj_req);
1089 }
1090
1091 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1092 {
1093         char *hex = sha1_to_hex(sha1);
1094         int ret = 0;
1095         struct object_request *obj_req = object_queue_head;
1096
1097         while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1098                 obj_req = obj_req->next;
1099         if (obj_req == NULL)
1100                 return error("Couldn't find request for %s in the queue", hex);
1101
1102         if (has_sha1_file(obj_req->sha1)) {
1103                 abort_object_request(obj_req);
1104                 return 0;
1105         }
1106
1107 #ifdef USE_CURL_MULTI
1108         while (obj_req->state == WAITING) {
1109                 step_active_slots();
1110         }
1111 #else
1112         start_object_request(obj_req);
1113 #endif
1114
1115         while (obj_req->state == ACTIVE) {
1116                 run_active_slot(obj_req->slot);
1117         }
1118         if (obj_req->local != -1) {
1119                 close(obj_req->local); obj_req->local = -1;
1120         }
1121
1122         if (obj_req->state == ABORTED) {
1123                 ret = error("Request for %s aborted", hex);
1124         } else if (obj_req->curl_result != CURLE_OK &&
1125                    obj_req->http_code != 416) {
1126                 if (obj_req->http_code == 404 ||
1127                     obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1128                         ret = -1; /* Be silent, it is probably in a pack. */
1129                 else
1130                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1131                                     obj_req->errorstr, obj_req->curl_result,
1132                                     obj_req->http_code, hex);
1133         } else if (obj_req->zret != Z_STREAM_END) {
1134                 corrupt_object_found++;
1135                 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1136         } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1137                 ret = error("File %s has bad hash", hex);
1138         } else if (obj_req->rename < 0) {
1139                 ret = error("unable to write sha1 filename %s",
1140                             obj_req->filename);
1141         }
1142
1143         release_object_request(obj_req);
1144         return ret;
1145 }
1146
1147 int fetch(unsigned char *sha1)
1148 {
1149         struct alt_base *altbase = alt;
1150
1151         if (!fetch_object(altbase, sha1))
1152                 return 0;
1153         while (altbase) {
1154                 if (!fetch_pack(altbase, sha1))
1155                         return 0;
1156                 fetch_alternates(alt->base);
1157                 altbase = altbase->next;
1158         }
1159         return error("Unable to find %s under %s", sha1_to_hex(sha1),
1160                      alt->base);
1161 }
1162
1163 static inline int needs_quote(int ch)
1164 {
1165         if (((ch >= 'A') && (ch <= 'Z'))
1166                         || ((ch >= 'a') && (ch <= 'z'))
1167                         || ((ch >= '0') && (ch <= '9'))
1168                         || (ch == '/')
1169                         || (ch == '-')
1170                         || (ch == '.'))
1171                 return 0;
1172         return 1;
1173 }
1174
1175 static inline int hex(int v)
1176 {
1177         if (v < 10) return '0' + v;
1178         else return 'A' + v - 10;
1179 }
1180
1181 static char *quote_ref_url(const char *base, const char *ref)
1182 {
1183         const char *cp;
1184         char *dp, *qref;
1185         int len, baselen, ch;
1186
1187         baselen = strlen(base);
1188         len = baselen + 6; /* "refs/" + NUL */
1189         for (cp = ref; (ch = *cp) != 0; cp++, len++)
1190                 if (needs_quote(ch))
1191                         len += 2; /* extra two hex plus replacement % */
1192         qref = xmalloc(len);
1193         memcpy(qref, base, baselen);
1194         memcpy(qref + baselen, "refs/", 5);
1195         for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1196                 if (needs_quote(ch)) {
1197                         *dp++ = '%';
1198                         *dp++ = hex((ch >> 4) & 0xF);
1199                         *dp++ = hex(ch & 0xF);
1200                 }
1201                 else
1202                         *dp++ = ch;
1203         }
1204         *dp = 0;
1205
1206         return qref;
1207 }
1208
1209 int fetch_ref(char *ref, unsigned char *sha1)
1210 {
1211         char *url;
1212         char hex[42];
1213         struct buffer buffer;
1214         const char *base = alt->base;
1215         struct active_request_slot *slot;
1216         struct slot_results results;
1217         buffer.size = 41;
1218         buffer.posn = 0;
1219         buffer.buffer = hex;
1220         hex[41] = '\0';
1221
1222         url = quote_ref_url(base, ref);
1223         slot = get_active_slot();
1224         slot->results = &results;
1225         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1226         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1227         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1228         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1229         if (start_active_slot(slot)) {
1230                 run_active_slot(slot);
1231                 if (results.curl_result != CURLE_OK)
1232                         return error("Couldn't get %s for %s\n%s",
1233                                      url, ref, curl_errorstr);
1234         } else {
1235                 return error("Unable to start request");
1236         }
1237
1238         hex[40] = '\0';
1239         get_sha1_hex(hex, sha1);
1240         return 0;
1241 }
1242
1243 int main(int argc, const char **argv)
1244 {
1245         int commits;
1246         const char **write_ref = NULL;
1247         char **commit_id;
1248         const char *url;
1249         char *path;
1250         int arg = 1;
1251         int rc = 0;
1252
1253         setup_ident();
1254         setup_git_directory();
1255         git_config(git_default_config);
1256
1257         while (arg < argc && argv[arg][0] == '-') {
1258                 if (argv[arg][1] == 't') {
1259                         get_tree = 1;
1260                 } else if (argv[arg][1] == 'c') {
1261                         get_history = 1;
1262                 } else if (argv[arg][1] == 'a') {
1263                         get_all = 1;
1264                         get_tree = 1;
1265                         get_history = 1;
1266                 } else if (argv[arg][1] == 'v') {
1267                         get_verbosely = 1;
1268                 } else if (argv[arg][1] == 'w') {
1269                         write_ref = &argv[arg + 1];
1270                         arg++;
1271                 } else if (!strcmp(argv[arg], "--recover")) {
1272                         get_recover = 1;
1273                 } else if (!strcmp(argv[arg], "--stdin")) {
1274                         commits_on_stdin = 1;
1275                 }
1276                 arg++;
1277         }
1278         if (argc < arg + 2 - commits_on_stdin) {
1279                 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1280                 return 1;
1281         }
1282         if (commits_on_stdin) {
1283                 commits = pull_targets_stdin(&commit_id, &write_ref);
1284         } else {
1285                 commit_id = (char **) &argv[arg++];
1286                 commits = 1;
1287         }
1288         url = argv[arg];
1289
1290         http_init();
1291
1292         no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1293
1294         alt = xmalloc(sizeof(*alt));
1295         alt->base = url;
1296         alt->got_indices = 0;
1297         alt->packs = NULL;
1298         alt->next = NULL;
1299         path = strstr(url, "//");
1300         if (path) {
1301                 path = strchr(path+2, '/');
1302                 if (path)
1303                         alt->path_len = strlen(path);
1304         }
1305
1306         if (pull(commits, commit_id, write_ref, url))
1307                 rc = 1;
1308
1309         http_cleanup();
1310
1311         curl_slist_free_all(no_pragma_header);
1312
1313         if (commits_on_stdin)
1314                 pull_targets_free(commits, commit_id, write_ref);
1315
1316         if (corrupt_object_found) {
1317                 fprintf(stderr,
1318 "Some loose object were found to be corrupt, but they might be just\n"
1319 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1320 "status code.  Suggest running git fsck-objects.\n");
1321         }
1322         return rc;
1323 }