Merge branch 'js/http-mb'
[git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20   XML_STATUS_OK = 1,
21   XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK    1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS  (1u << 1)
30 #define RECURSIVE     (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int commits_on_stdin = 0;
40
41 static int got_alternates = -1;
42 static int corrupt_object_found = 0;
43
44 static struct curl_slist *no_pragma_header;
45
46 struct alt_base
47 {
48         const char *base;
49         int path_len;
50         int got_indices;
51         struct packed_git *packs;
52         struct alt_base *next;
53 };
54
55 static struct alt_base *alt = NULL;
56
57 enum object_request_state {
58         WAITING,
59         ABORTED,
60         ACTIVE,
61         COMPLETE,
62 };
63
64 struct object_request
65 {
66         unsigned char sha1[20];
67         struct alt_base *repo;
68         char *url;
69         char filename[PATH_MAX];
70         char tmpfile[PATH_MAX];
71         int local;
72         enum object_request_state state;
73         CURLcode curl_result;
74         char errorstr[CURL_ERROR_SIZE];
75         long http_code;
76         unsigned char real_sha1[20];
77         SHA_CTX c;
78         z_stream stream;
79         int zret;
80         int rename;
81         struct active_request_slot *slot;
82         struct object_request *next;
83 };
84
85 struct alternates_request {
86         const char *base;
87         char *url;
88         struct buffer *buffer;
89         struct active_request_slot *slot;
90         int http_specific;
91 };
92
93 #ifndef NO_EXPAT
94 struct xml_ctx
95 {
96         char *name;
97         int len;
98         char *cdata;
99         void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100         void *userData;
101 };
102
103 struct remote_ls_ctx
104 {
105         struct alt_base *repo;
106         char *path;
107         void (*userFunc)(struct remote_ls_ctx *ls);
108         void *userData;
109         int flags;
110         char *dentry_name;
111         int dentry_flags;
112         int rc;
113         struct remote_ls_ctx *parent;
114 };
115 #endif
116
117 static struct object_request *object_queue_head = NULL;
118
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120                                void *data)
121 {
122         unsigned char expn[4096];
123         size_t size = eltsize * nmemb;
124         int posn = 0;
125         struct object_request *obj_req = (struct object_request *)data;
126         do {
127                 ssize_t retval = write(obj_req->local,
128                                        (char *) ptr + posn, size - posn);
129                 if (retval < 0)
130                         return posn;
131                 posn += retval;
132         } while (posn < size);
133
134         obj_req->stream.avail_in = size;
135         obj_req->stream.next_in = ptr;
136         do {
137                 obj_req->stream.next_out = expn;
138                 obj_req->stream.avail_out = sizeof(expn);
139                 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140                 SHA1_Update(&obj_req->c, expn,
141                             sizeof(expn) - obj_req->stream.avail_out);
142         } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143         data_received++;
144         return size;
145 }
146
147 static void fetch_alternates(const char *base);
148
149 static void process_object_response(void *callback_data);
150
151 static void start_object_request(struct object_request *obj_req)
152 {
153         char *hex = sha1_to_hex(obj_req->sha1);
154         char prevfile[PATH_MAX];
155         char *url;
156         char *posn;
157         int prevlocal;
158         unsigned char prev_buf[PREV_BUF_SIZE];
159         ssize_t prev_read = 0;
160         long prev_posn = 0;
161         char range[RANGE_HEADER_SIZE];
162         struct curl_slist *range_header = NULL;
163         struct active_request_slot *slot;
164
165         snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
166         unlink(prevfile);
167         rename(obj_req->tmpfile, prevfile);
168         unlink(obj_req->tmpfile);
169
170         if (obj_req->local != -1)
171                 error("fd leakage in start: %d", obj_req->local);
172         obj_req->local = open(obj_req->tmpfile,
173                               O_WRONLY | O_CREAT | O_EXCL, 0666);
174         /* This could have failed due to the "lazy directory creation";
175          * try to mkdir the last path component.
176          */
177         if (obj_req->local < 0 && errno == ENOENT) {
178                 char *dir = strrchr(obj_req->tmpfile, '/');
179                 if (dir) {
180                         *dir = 0;
181                         mkdir(obj_req->tmpfile, 0777);
182                         *dir = '/';
183                 }
184                 obj_req->local = open(obj_req->tmpfile,
185                                       O_WRONLY | O_CREAT | O_EXCL, 0666);
186         }
187
188         if (obj_req->local < 0) {
189                 obj_req->state = ABORTED;
190                 error("Couldn't create temporary file %s for %s: %s",
191                       obj_req->tmpfile, obj_req->filename, strerror(errno));
192                 return;
193         }
194
195         memset(&obj_req->stream, 0, sizeof(obj_req->stream));
196
197         inflateInit(&obj_req->stream);
198
199         SHA1_Init(&obj_req->c);
200
201         url = xmalloc(strlen(obj_req->repo->base) + 50);
202         obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
203         strcpy(url, obj_req->repo->base);
204         posn = url + strlen(obj_req->repo->base);
205         strcpy(posn, "objects/");
206         posn += 8;
207         memcpy(posn, hex, 2);
208         posn += 2;
209         *(posn++) = '/';
210         strcpy(posn, hex + 2);
211         strcpy(obj_req->url, url);
212
213         /* If a previous temp file is present, process what was already
214            fetched. */
215         prevlocal = open(prevfile, O_RDONLY);
216         if (prevlocal != -1) {
217                 do {
218                         prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
219                         if (prev_read>0) {
220                                 if (fwrite_sha1_file(prev_buf,
221                                                      1,
222                                                      prev_read,
223                                                      obj_req) == prev_read) {
224                                         prev_posn += prev_read;
225                                 } else {
226                                         prev_read = -1;
227                                 }
228                         }
229                 } while (prev_read > 0);
230                 close(prevlocal);
231         }
232         unlink(prevfile);
233
234         /* Reset inflate/SHA1 if there was an error reading the previous temp
235            file; also rewind to the beginning of the local file. */
236         if (prev_read == -1) {
237                 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
238                 inflateInit(&obj_req->stream);
239                 SHA1_Init(&obj_req->c);
240                 if (prev_posn>0) {
241                         prev_posn = 0;
242                         lseek(obj_req->local, SEEK_SET, 0);
243                         ftruncate(obj_req->local, 0);
244                 }
245         }
246
247         slot = get_active_slot();
248         slot->callback_func = process_object_response;
249         slot->callback_data = obj_req;
250         obj_req->slot = slot;
251
252         curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
253         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
254         curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
255         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
256         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
257
258         /* If we have successfully processed data from a previous fetch
259            attempt, only fetch the data we don't already have. */
260         if (prev_posn>0) {
261                 if (get_verbosely)
262                         fprintf(stderr,
263                                 "Resuming fetch of object %s at byte %ld\n",
264                                 hex, prev_posn);
265                 sprintf(range, "Range: bytes=%ld-", prev_posn);
266                 range_header = curl_slist_append(range_header, range);
267                 curl_easy_setopt(slot->curl,
268                                  CURLOPT_HTTPHEADER, range_header);
269         }
270
271         /* Try to get the request started, abort the request on error */
272         obj_req->state = ACTIVE;
273         if (!start_active_slot(slot)) {
274                 obj_req->state = ABORTED;
275                 obj_req->slot = NULL;
276                 close(obj_req->local); obj_req->local = -1;
277                 free(obj_req->url);
278                 return;
279         }
280 }
281
282 static void finish_object_request(struct object_request *obj_req)
283 {
284         struct stat st;
285
286         fchmod(obj_req->local, 0444);
287         close(obj_req->local); obj_req->local = -1;
288
289         if (obj_req->http_code == 416) {
290                 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
291         } else if (obj_req->curl_result != CURLE_OK) {
292                 if (stat(obj_req->tmpfile, &st) == 0)
293                         if (st.st_size == 0)
294                                 unlink(obj_req->tmpfile);
295                 return;
296         }
297
298         inflateEnd(&obj_req->stream);
299         SHA1_Final(obj_req->real_sha1, &obj_req->c);
300         if (obj_req->zret != Z_STREAM_END) {
301                 unlink(obj_req->tmpfile);
302                 return;
303         }
304         if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
305                 unlink(obj_req->tmpfile);
306                 return;
307         }
308         obj_req->rename =
309                 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
310
311         if (obj_req->rename == 0)
312                 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
313 }
314
315 static void process_object_response(void *callback_data)
316 {
317         struct object_request *obj_req =
318                 (struct object_request *)callback_data;
319
320         obj_req->curl_result = obj_req->slot->curl_result;
321         obj_req->http_code = obj_req->slot->http_code;
322         obj_req->slot = NULL;
323         obj_req->state = COMPLETE;
324
325         /* Use alternates if necessary */
326         if (obj_req->http_code == 404 ||
327             obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
328                 fetch_alternates(alt->base);
329                 if (obj_req->repo->next != NULL) {
330                         obj_req->repo =
331                                 obj_req->repo->next;
332                         close(obj_req->local);
333                         obj_req->local = -1;
334                         start_object_request(obj_req);
335                         return;
336                 }
337         }
338
339         finish_object_request(obj_req);
340 }
341
342 static void release_object_request(struct object_request *obj_req)
343 {
344         struct object_request *entry = object_queue_head;
345
346         if (obj_req->local != -1)
347                 error("fd leakage in release: %d", obj_req->local);
348         if (obj_req == object_queue_head) {
349                 object_queue_head = obj_req->next;
350         } else {
351                 while (entry->next != NULL && entry->next != obj_req)
352                         entry = entry->next;
353                 if (entry->next == obj_req)
354                         entry->next = entry->next->next;
355         }
356
357         free(obj_req->url);
358         free(obj_req);
359 }
360
361 #ifdef USE_CURL_MULTI
362 void fill_active_slots(void)
363 {
364         struct object_request *obj_req = object_queue_head;
365         struct active_request_slot *slot = active_queue_head;
366         int num_transfers;
367
368         while (active_requests < max_requests && obj_req != NULL) {
369                 if (obj_req->state == WAITING) {
370                         if (has_sha1_file(obj_req->sha1))
371                                 obj_req->state = COMPLETE;
372                         else
373                                 start_object_request(obj_req);
374                         curl_multi_perform(curlm, &num_transfers);
375                 }
376                 obj_req = obj_req->next;
377         }
378
379         while (slot != NULL) {
380                 if (!slot->in_use && slot->curl != NULL) {
381                         curl_easy_cleanup(slot->curl);
382                         slot->curl = NULL;
383                 }
384                 slot = slot->next;
385         }
386 }
387 #endif
388
389 void prefetch(unsigned char *sha1)
390 {
391         struct object_request *newreq;
392         struct object_request *tail;
393         char *filename = sha1_file_name(sha1);
394
395         newreq = xmalloc(sizeof(*newreq));
396         memcpy(newreq->sha1, sha1, 20);
397         newreq->repo = alt;
398         newreq->url = NULL;
399         newreq->local = -1;
400         newreq->state = WAITING;
401         snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
402         snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
403                  "%s.temp", filename);
404         newreq->slot = NULL;
405         newreq->next = NULL;
406
407         if (object_queue_head == NULL) {
408                 object_queue_head = newreq;
409         } else {
410                 tail = object_queue_head;
411                 while (tail->next != NULL) {
412                         tail = tail->next;
413                 }
414                 tail->next = newreq;
415         }
416
417 #ifdef USE_CURL_MULTI
418         fill_active_slots();
419         step_active_slots();
420 #endif
421 }
422
423 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
424 {
425         char *hex = sha1_to_hex(sha1);
426         char *filename;
427         char *url;
428         char tmpfile[PATH_MAX];
429         long prev_posn = 0;
430         char range[RANGE_HEADER_SIZE];
431         struct curl_slist *range_header = NULL;
432
433         FILE *indexfile;
434         struct active_request_slot *slot;
435         struct slot_results results;
436
437         if (has_pack_index(sha1))
438                 return 0;
439
440         if (get_verbosely)
441                 fprintf(stderr, "Getting index for pack %s\n", hex);
442
443         url = xmalloc(strlen(repo->base) + 64);
444         sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
445
446         filename = sha1_pack_index_name(sha1);
447         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
448         indexfile = fopen(tmpfile, "a");
449         if (!indexfile)
450                 return error("Unable to open local file %s for pack index",
451                              filename);
452
453         slot = get_active_slot();
454         slot->results = &results;
455         curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
456         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
457         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
458         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
459         slot->local = indexfile;
460
461         /* If there is data present from a previous transfer attempt,
462            resume where it left off */
463         prev_posn = ftell(indexfile);
464         if (prev_posn>0) {
465                 if (get_verbosely)
466                         fprintf(stderr,
467                                 "Resuming fetch of index for pack %s at byte %ld\n",
468                                 hex, prev_posn);
469                 sprintf(range, "Range: bytes=%ld-", prev_posn);
470                 range_header = curl_slist_append(range_header, range);
471                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
472         }
473
474         if (start_active_slot(slot)) {
475                 run_active_slot(slot);
476                 if (results.curl_result != CURLE_OK) {
477                         fclose(indexfile);
478                         return error("Unable to get pack index %s\n%s", url,
479                                      curl_errorstr);
480                 }
481         } else {
482                 fclose(indexfile);
483                 return error("Unable to start request");
484         }
485
486         fclose(indexfile);
487
488         return move_temp_to_file(tmpfile, filename);
489 }
490
491 static int setup_index(struct alt_base *repo, unsigned char *sha1)
492 {
493         struct packed_git *new_pack;
494         if (has_pack_file(sha1))
495                 return 0; /* don't list this as something we can get */
496
497         if (fetch_index(repo, sha1))
498                 return -1;
499
500         new_pack = parse_pack_index(sha1);
501         new_pack->next = repo->packs;
502         repo->packs = new_pack;
503         return 0;
504 }
505
506 static void process_alternates_response(void *callback_data)
507 {
508         struct alternates_request *alt_req =
509                 (struct alternates_request *)callback_data;
510         struct active_request_slot *slot = alt_req->slot;
511         struct alt_base *tail = alt;
512         const char *base = alt_req->base;
513         static const char null_byte = '\0';
514         char *data;
515         int i = 0;
516
517         if (alt_req->http_specific) {
518                 if (slot->curl_result != CURLE_OK ||
519                     !alt_req->buffer->posn) {
520
521                         /* Try reusing the slot to get non-http alternates */
522                         alt_req->http_specific = 0;
523                         sprintf(alt_req->url, "%s/objects/info/alternates",
524                                 base);
525                         curl_easy_setopt(slot->curl, CURLOPT_URL,
526                                          alt_req->url);
527                         active_requests++;
528                         slot->in_use = 1;
529                         if (slot->finished != NULL)
530                                 (*slot->finished) = 0;
531                         if (!start_active_slot(slot)) {
532                                 got_alternates = -1;
533                                 slot->in_use = 0;
534                                 if (slot->finished != NULL)
535                                         (*slot->finished) = 1;
536                         }
537                         return;
538                 }
539         } else if (slot->curl_result != CURLE_OK) {
540                 if (slot->http_code != 404 &&
541                     slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
542                         got_alternates = -1;
543                         return;
544                 }
545         }
546
547         fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
548         alt_req->buffer->posn--;
549         data = alt_req->buffer->buffer;
550
551         while (i < alt_req->buffer->posn) {
552                 int posn = i;
553                 while (posn < alt_req->buffer->posn && data[posn] != '\n')
554                         posn++;
555                 if (data[posn] == '\n') {
556                         int okay = 0;
557                         int serverlen = 0;
558                         struct alt_base *newalt;
559                         char *target = NULL;
560                         char *path;
561                         if (data[i] == '/') {
562                                 serverlen = strchr(base + 8, '/') - base;
563                                 okay = 1;
564                         } else if (!memcmp(data + i, "../", 3)) {
565                                 i += 3;
566                                 serverlen = strlen(base);
567                                 while (i + 2 < posn &&
568                                        !memcmp(data + i, "../", 3)) {
569                                         do {
570                                                 serverlen--;
571                                         } while (serverlen &&
572                                                  base[serverlen - 1] != '/');
573                                         i += 3;
574                                 }
575                                 /* If the server got removed, give up. */
576                                 okay = strchr(base, ':') - base + 3 <
577                                         serverlen;
578                         } else if (alt_req->http_specific) {
579                                 char *colon = strchr(data + i, ':');
580                                 char *slash = strchr(data + i, '/');
581                                 if (colon && slash && colon < data + posn &&
582                                     slash < data + posn && colon < slash) {
583                                         okay = 1;
584                                 }
585                         }
586                         /* skip 'objects' at end */
587                         if (okay) {
588                                 target = xmalloc(serverlen + posn - i - 6);
589                                 strlcpy(target, base, serverlen);
590                                 strlcpy(target + serverlen, data + i, posn - i - 6);
591                                 if (get_verbosely)
592                                         fprintf(stderr,
593                                                 "Also look at %s\n", target);
594                                 newalt = xmalloc(sizeof(*newalt));
595                                 newalt->next = NULL;
596                                 newalt->base = target;
597                                 newalt->got_indices = 0;
598                                 newalt->packs = NULL;
599                                 path = strstr(target, "//");
600                                 if (path) {
601                                         path = strchr(path+2, '/');
602                                         if (path)
603                                                 newalt->path_len = strlen(path);
604                                 }
605
606                                 while (tail->next != NULL)
607                                         tail = tail->next;
608                                 tail->next = newalt;
609                         }
610                 }
611                 i = posn + 1;
612         }
613
614         got_alternates = 1;
615 }
616
617 static void fetch_alternates(const char *base)
618 {
619         struct buffer buffer;
620         char *url;
621         char *data;
622         struct active_request_slot *slot;
623         struct alternates_request alt_req;
624
625         /* If another request has already started fetching alternates,
626            wait for them to arrive and return to processing this request's
627            curl message */
628 #ifdef USE_CURL_MULTI
629         while (got_alternates == 0) {
630                 step_active_slots();
631         }
632 #endif
633
634         /* Nothing to do if they've already been fetched */
635         if (got_alternates == 1)
636                 return;
637
638         /* Start the fetch */
639         got_alternates = 0;
640
641         data = xmalloc(4096);
642         buffer.size = 4096;
643         buffer.posn = 0;
644         buffer.buffer = data;
645
646         if (get_verbosely)
647                 fprintf(stderr, "Getting alternates list for %s\n", base);
648
649         url = xmalloc(strlen(base) + 31);
650         sprintf(url, "%s/objects/info/http-alternates", base);
651
652         /* Use a callback to process the result, since another request
653            may fail and need to have alternates loaded before continuing */
654         slot = get_active_slot();
655         slot->callback_func = process_alternates_response;
656         slot->callback_data = &alt_req;
657
658         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
659         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
660         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
661
662         alt_req.base = base;
663         alt_req.url = url;
664         alt_req.buffer = &buffer;
665         alt_req.http_specific = 1;
666         alt_req.slot = slot;
667
668         if (start_active_slot(slot))
669                 run_active_slot(slot);
670         else
671                 got_alternates = -1;
672
673         free(data);
674         free(url);
675 }
676
677 #ifndef NO_EXPAT
678 static void
679 xml_start_tag(void *userData, const char *name, const char **atts)
680 {
681         struct xml_ctx *ctx = (struct xml_ctx *)userData;
682         const char *c = strchr(name, ':');
683         int new_len;
684
685         if (c == NULL)
686                 c = name;
687         else
688                 c++;
689
690         new_len = strlen(ctx->name) + strlen(c) + 2;
691
692         if (new_len > ctx->len) {
693                 ctx->name = xrealloc(ctx->name, new_len);
694                 ctx->len = new_len;
695         }
696         strcat(ctx->name, ".");
697         strcat(ctx->name, c);
698
699         if (ctx->cdata) {
700                 free(ctx->cdata);
701                 ctx->cdata = NULL;
702         }
703
704         ctx->userFunc(ctx, 0);
705 }
706
707 static void
708 xml_end_tag(void *userData, const char *name)
709 {
710         struct xml_ctx *ctx = (struct xml_ctx *)userData;
711         const char *c = strchr(name, ':');
712         char *ep;
713
714         ctx->userFunc(ctx, 1);
715
716         if (c == NULL)
717                 c = name;
718         else
719                 c++;
720
721         ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
722         *ep = 0;
723 }
724
725 static void
726 xml_cdata(void *userData, const XML_Char *s, int len)
727 {
728         struct xml_ctx *ctx = (struct xml_ctx *)userData;
729         if (ctx->cdata)
730                 free(ctx->cdata);
731         ctx->cdata = xmalloc(len + 1);
732         strlcpy(ctx->cdata, s, len + 1);
733 }
734
735 static int remote_ls(struct alt_base *repo, const char *path, int flags,
736                      void (*userFunc)(struct remote_ls_ctx *ls),
737                      void *userData);
738
739 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
740 {
741         struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
742
743         if (tag_closed) {
744                 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
745                         if (ls->dentry_flags & IS_DIR) {
746                                 if (ls->flags & PROCESS_DIRS) {
747                                         ls->userFunc(ls);
748                                 }
749                                 if (strcmp(ls->dentry_name, ls->path) &&
750                                     ls->flags & RECURSIVE) {
751                                         ls->rc = remote_ls(ls->repo,
752                                                            ls->dentry_name,
753                                                            ls->flags,
754                                                            ls->userFunc,
755                                                            ls->userData);
756                                 }
757                         } else if (ls->flags & PROCESS_FILES) {
758                                 ls->userFunc(ls);
759                         }
760                 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
761                         ls->dentry_name = xmalloc(strlen(ctx->cdata) -
762                                                   ls->repo->path_len + 1);
763                         strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
764                 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
765                         ls->dentry_flags |= IS_DIR;
766                 }
767         } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
768                 if (ls->dentry_name) {
769                         free(ls->dentry_name);
770                 }
771                 ls->dentry_name = NULL;
772                 ls->dentry_flags = 0;
773         }
774 }
775
776 static int remote_ls(struct alt_base *repo, const char *path, int flags,
777                      void (*userFunc)(struct remote_ls_ctx *ls),
778                      void *userData)
779 {
780         char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
781         struct active_request_slot *slot;
782         struct slot_results results;
783         struct buffer in_buffer;
784         struct buffer out_buffer;
785         char *in_data;
786         char *out_data;
787         XML_Parser parser = XML_ParserCreate(NULL);
788         enum XML_Status result;
789         struct curl_slist *dav_headers = NULL;
790         struct xml_ctx ctx;
791         struct remote_ls_ctx ls;
792
793         ls.flags = flags;
794         ls.repo = repo;
795         ls.path = strdup(path);
796         ls.dentry_name = NULL;
797         ls.dentry_flags = 0;
798         ls.userData = userData;
799         ls.userFunc = userFunc;
800         ls.rc = 0;
801
802         sprintf(url, "%s%s", repo->base, path);
803
804         out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
805         out_data = xmalloc(out_buffer.size + 1);
806         snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
807         out_buffer.posn = 0;
808         out_buffer.buffer = out_data;
809
810         in_buffer.size = 4096;
811         in_data = xmalloc(in_buffer.size);
812         in_buffer.posn = 0;
813         in_buffer.buffer = in_data;
814
815         dav_headers = curl_slist_append(dav_headers, "Depth: 1");
816         dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
817
818         slot = get_active_slot();
819         slot->results = &results;
820         curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
821         curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
822         curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
823         curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
824         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
825         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
826         curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
827         curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
828         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
829
830         if (start_active_slot(slot)) {
831                 run_active_slot(slot);
832                 if (results.curl_result == CURLE_OK) {
833                         ctx.name = xcalloc(10, 1);
834                         ctx.len = 0;
835                         ctx.cdata = NULL;
836                         ctx.userFunc = handle_remote_ls_ctx;
837                         ctx.userData = &ls;
838                         XML_SetUserData(parser, &ctx);
839                         XML_SetElementHandler(parser, xml_start_tag,
840                                               xml_end_tag);
841                         XML_SetCharacterDataHandler(parser, xml_cdata);
842                         result = XML_Parse(parser, in_buffer.buffer,
843                                            in_buffer.posn, 1);
844                         free(ctx.name);
845
846                         if (result != XML_STATUS_OK) {
847                                 ls.rc = error("XML error: %s",
848                                               XML_ErrorString(
849                                                       XML_GetErrorCode(parser)));
850                         }
851                 } else {
852                         ls.rc = -1;
853                 }
854         } else {
855                 ls.rc = error("Unable to start PROPFIND request");
856         }
857
858         free(ls.path);
859         free(url);
860         free(out_data);
861         free(in_buffer.buffer);
862         curl_slist_free_all(dav_headers);
863
864         return ls.rc;
865 }
866
867 static void process_ls_pack(struct remote_ls_ctx *ls)
868 {
869         unsigned char sha1[20];
870
871         if (strlen(ls->dentry_name) == 63 &&
872             !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
873             has_extension(ls->dentry_name, ".pack")) {
874                 get_sha1_hex(ls->dentry_name + 18, sha1);
875                 setup_index(ls->repo, sha1);
876         }
877 }
878 #endif
879
880 static int fetch_indices(struct alt_base *repo)
881 {
882         unsigned char sha1[20];
883         char *url;
884         struct buffer buffer;
885         char *data;
886         int i = 0;
887
888         struct active_request_slot *slot;
889         struct slot_results results;
890
891         if (repo->got_indices)
892                 return 0;
893
894         data = xmalloc(4096);
895         buffer.size = 4096;
896         buffer.posn = 0;
897         buffer.buffer = data;
898
899         if (get_verbosely)
900                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
901
902 #ifndef NO_EXPAT
903         if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
904                       process_ls_pack, NULL) == 0)
905                 return 0;
906 #endif
907
908         url = xmalloc(strlen(repo->base) + 21);
909         sprintf(url, "%s/objects/info/packs", repo->base);
910
911         slot = get_active_slot();
912         slot->results = &results;
913         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
914         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
915         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
916         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
917         if (start_active_slot(slot)) {
918                 run_active_slot(slot);
919                 if (results.curl_result != CURLE_OK) {
920                         if (results.http_code == 404 ||
921                             results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
922                                 repo->got_indices = 1;
923                                 free(buffer.buffer);
924                                 return 0;
925                         } else {
926                                 repo->got_indices = 0;
927                                 free(buffer.buffer);
928                                 return error("%s", curl_errorstr);
929                         }
930                 }
931         } else {
932                 repo->got_indices = 0;
933                 free(buffer.buffer);
934                 return error("Unable to start request");
935         }
936
937         data = buffer.buffer;
938         while (i < buffer.posn) {
939                 switch (data[i]) {
940                 case 'P':
941                         i++;
942                         if (i + 52 <= buffer.posn &&
943                             !strncmp(data + i, " pack-", 6) &&
944                             !strncmp(data + i + 46, ".pack\n", 6)) {
945                                 get_sha1_hex(data + i + 6, sha1);
946                                 setup_index(repo, sha1);
947                                 i += 51;
948                                 break;
949                         }
950                 default:
951                         while (i < buffer.posn && data[i] != '\n')
952                                 i++;
953                 }
954                 i++;
955         }
956
957         free(buffer.buffer);
958         repo->got_indices = 1;
959         return 0;
960 }
961
962 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
963 {
964         char *url;
965         struct packed_git *target;
966         struct packed_git **lst;
967         FILE *packfile;
968         char *filename;
969         char tmpfile[PATH_MAX];
970         int ret;
971         long prev_posn = 0;
972         char range[RANGE_HEADER_SIZE];
973         struct curl_slist *range_header = NULL;
974
975         struct active_request_slot *slot;
976         struct slot_results results;
977
978         if (fetch_indices(repo))
979                 return -1;
980         target = find_sha1_pack(sha1, repo->packs);
981         if (!target)
982                 return -1;
983
984         if (get_verbosely) {
985                 fprintf(stderr, "Getting pack %s\n",
986                         sha1_to_hex(target->sha1));
987                 fprintf(stderr, " which contains %s\n",
988                         sha1_to_hex(sha1));
989         }
990
991         url = xmalloc(strlen(repo->base) + 65);
992         sprintf(url, "%s/objects/pack/pack-%s.pack",
993                 repo->base, sha1_to_hex(target->sha1));
994
995         filename = sha1_pack_name(target->sha1);
996         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
997         packfile = fopen(tmpfile, "a");
998         if (!packfile)
999                 return error("Unable to open local file %s for pack",
1000                              filename);
1001
1002         slot = get_active_slot();
1003         slot->results = &results;
1004         curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1005         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1006         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1007         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1008         slot->local = packfile;
1009
1010         /* If there is data present from a previous transfer attempt,
1011            resume where it left off */
1012         prev_posn = ftell(packfile);
1013         if (prev_posn>0) {
1014                 if (get_verbosely)
1015                         fprintf(stderr,
1016                                 "Resuming fetch of pack %s at byte %ld\n",
1017                                 sha1_to_hex(target->sha1), prev_posn);
1018                 sprintf(range, "Range: bytes=%ld-", prev_posn);
1019                 range_header = curl_slist_append(range_header, range);
1020                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1021         }
1022
1023         if (start_active_slot(slot)) {
1024                 run_active_slot(slot);
1025                 if (results.curl_result != CURLE_OK) {
1026                         fclose(packfile);
1027                         return error("Unable to get pack file %s\n%s", url,
1028                                      curl_errorstr);
1029                 }
1030         } else {
1031                 fclose(packfile);
1032                 return error("Unable to start request");
1033         }
1034
1035         fclose(packfile);
1036
1037         ret = move_temp_to_file(tmpfile, filename);
1038         if (ret)
1039                 return ret;
1040
1041         lst = &repo->packs;
1042         while (*lst != target)
1043                 lst = &((*lst)->next);
1044         *lst = (*lst)->next;
1045
1046         if (verify_pack(target, 0))
1047                 return -1;
1048         install_packed_git(target);
1049
1050         return 0;
1051 }
1052
1053 static void abort_object_request(struct object_request *obj_req)
1054 {
1055         if (obj_req->local >= 0) {
1056                 close(obj_req->local);
1057                 obj_req->local = -1;
1058         }
1059         unlink(obj_req->tmpfile);
1060         if (obj_req->slot) {
1061                 release_active_slot(obj_req->slot);
1062                 obj_req->slot = NULL;
1063         }
1064         release_object_request(obj_req);
1065 }
1066
1067 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1068 {
1069         char *hex = sha1_to_hex(sha1);
1070         int ret = 0;
1071         struct object_request *obj_req = object_queue_head;
1072
1073         while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1074                 obj_req = obj_req->next;
1075         if (obj_req == NULL)
1076                 return error("Couldn't find request for %s in the queue", hex);
1077
1078         if (has_sha1_file(obj_req->sha1)) {
1079                 abort_object_request(obj_req);
1080                 return 0;
1081         }
1082
1083 #ifdef USE_CURL_MULTI
1084         while (obj_req->state == WAITING) {
1085                 step_active_slots();
1086         }
1087 #else
1088         start_object_request(obj_req);
1089 #endif
1090
1091         while (obj_req->state == ACTIVE) {
1092                 run_active_slot(obj_req->slot);
1093         }
1094         if (obj_req->local != -1) {
1095                 close(obj_req->local); obj_req->local = -1;
1096         }
1097
1098         if (obj_req->state == ABORTED) {
1099                 ret = error("Request for %s aborted", hex);
1100         } else if (obj_req->curl_result != CURLE_OK &&
1101                    obj_req->http_code != 416) {
1102                 if (obj_req->http_code == 404 ||
1103                     obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1104                         ret = -1; /* Be silent, it is probably in a pack. */
1105                 else
1106                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1107                                     obj_req->errorstr, obj_req->curl_result,
1108                                     obj_req->http_code, hex);
1109         } else if (obj_req->zret != Z_STREAM_END) {
1110                 corrupt_object_found++;
1111                 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1112         } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1113                 ret = error("File %s has bad hash", hex);
1114         } else if (obj_req->rename < 0) {
1115                 ret = error("unable to write sha1 filename %s",
1116                             obj_req->filename);
1117         }
1118
1119         release_object_request(obj_req);
1120         return ret;
1121 }
1122
1123 int fetch(unsigned char *sha1)
1124 {
1125         struct alt_base *altbase = alt;
1126
1127         if (!fetch_object(altbase, sha1))
1128                 return 0;
1129         while (altbase) {
1130                 if (!fetch_pack(altbase, sha1))
1131                         return 0;
1132                 fetch_alternates(alt->base);
1133                 altbase = altbase->next;
1134         }
1135         return error("Unable to find %s under %s", sha1_to_hex(sha1),
1136                      alt->base);
1137 }
1138
1139 static inline int needs_quote(int ch)
1140 {
1141         if (((ch >= 'A') && (ch <= 'Z'))
1142                         || ((ch >= 'a') && (ch <= 'z'))
1143                         || ((ch >= '0') && (ch <= '9'))
1144                         || (ch == '/')
1145                         || (ch == '-')
1146                         || (ch == '.'))
1147                 return 0;
1148         return 1;
1149 }
1150
1151 static inline int hex(int v)
1152 {
1153         if (v < 10) return '0' + v;
1154         else return 'A' + v - 10;
1155 }
1156
1157 static char *quote_ref_url(const char *base, const char *ref)
1158 {
1159         const char *cp;
1160         char *dp, *qref;
1161         int len, baselen, ch;
1162
1163         baselen = strlen(base);
1164         len = baselen + 6; /* "refs/" + NUL */
1165         for (cp = ref; (ch = *cp) != 0; cp++, len++)
1166                 if (needs_quote(ch))
1167                         len += 2; /* extra two hex plus replacement % */
1168         qref = xmalloc(len);
1169         memcpy(qref, base, baselen);
1170         memcpy(qref + baselen, "refs/", 5);
1171         for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1172                 if (needs_quote(ch)) {
1173                         *dp++ = '%';
1174                         *dp++ = hex((ch >> 4) & 0xF);
1175                         *dp++ = hex(ch & 0xF);
1176                 }
1177                 else
1178                         *dp++ = ch;
1179         }
1180         *dp = 0;
1181
1182         return qref;
1183 }
1184
1185 int fetch_ref(char *ref, unsigned char *sha1)
1186 {
1187         char *url;
1188         char hex[42];
1189         struct buffer buffer;
1190         const char *base = alt->base;
1191         struct active_request_slot *slot;
1192         struct slot_results results;
1193         buffer.size = 41;
1194         buffer.posn = 0;
1195         buffer.buffer = hex;
1196         hex[41] = '\0';
1197
1198         url = quote_ref_url(base, ref);
1199         slot = get_active_slot();
1200         slot->results = &results;
1201         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1202         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1203         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1204         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1205         if (start_active_slot(slot)) {
1206                 run_active_slot(slot);
1207                 if (results.curl_result != CURLE_OK)
1208                         return error("Couldn't get %s for %s\n%s",
1209                                      url, ref, curl_errorstr);
1210         } else {
1211                 return error("Unable to start request");
1212         }
1213
1214         hex[40] = '\0';
1215         get_sha1_hex(hex, sha1);
1216         return 0;
1217 }
1218
1219 int main(int argc, const char **argv)
1220 {
1221         int commits;
1222         const char **write_ref = NULL;
1223         char **commit_id;
1224         const char *url;
1225         char *path;
1226         int arg = 1;
1227         int rc = 0;
1228
1229         setup_ident();
1230         setup_git_directory();
1231         git_config(git_default_config);
1232
1233         while (arg < argc && argv[arg][0] == '-') {
1234                 if (argv[arg][1] == 't') {
1235                         get_tree = 1;
1236                 } else if (argv[arg][1] == 'c') {
1237                         get_history = 1;
1238                 } else if (argv[arg][1] == 'a') {
1239                         get_all = 1;
1240                         get_tree = 1;
1241                         get_history = 1;
1242                 } else if (argv[arg][1] == 'v') {
1243                         get_verbosely = 1;
1244                 } else if (argv[arg][1] == 'w') {
1245                         write_ref = &argv[arg + 1];
1246                         arg++;
1247                 } else if (!strcmp(argv[arg], "--recover")) {
1248                         get_recover = 1;
1249                 } else if (!strcmp(argv[arg], "--stdin")) {
1250                         commits_on_stdin = 1;
1251                 }
1252                 arg++;
1253         }
1254         if (argc < arg + 2 - commits_on_stdin) {
1255                 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1256                 return 1;
1257         }
1258         if (commits_on_stdin) {
1259                 commits = pull_targets_stdin(&commit_id, &write_ref);
1260         } else {
1261                 commit_id = (char **) &argv[arg++];
1262                 commits = 1;
1263         }
1264         url = argv[arg];
1265
1266         http_init();
1267
1268         no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1269
1270         alt = xmalloc(sizeof(*alt));
1271         alt->base = url;
1272         alt->got_indices = 0;
1273         alt->packs = NULL;
1274         alt->next = NULL;
1275         path = strstr(url, "//");
1276         if (path) {
1277                 path = strchr(path+2, '/');
1278                 if (path)
1279                         alt->path_len = strlen(path);
1280         }
1281
1282         if (pull(commits, commit_id, write_ref, url))
1283                 rc = 1;
1284
1285         http_cleanup();
1286
1287         curl_slist_free_all(no_pragma_header);
1288
1289         if (commits_on_stdin)
1290                 pull_targets_free(commits, commit_id, write_ref);
1291
1292         if (corrupt_object_found) {
1293                 fprintf(stderr,
1294 "Some loose object were found to be corrupt, but they might be just\n"
1295 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1296 "status code.  Suggest running git fsck-objects.\n");
1297         }
1298         return rc;
1299 }