Fix a bug in email extraction used in git-send-email.
[git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20   XML_STATUS_OK = 1,
21   XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK    1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS  (1u << 1)
30 #define RECURSIVE     (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int got_alternates = -1;
40 static int corrupt_object_found = 0;
41
42 static struct curl_slist *no_pragma_header;
43
44 struct alt_base
45 {
46         char *base;
47         int path_len;
48         int got_indices;
49         struct packed_git *packs;
50         struct alt_base *next;
51 };
52
53 static struct alt_base *alt = NULL;
54
55 enum object_request_state {
56         WAITING,
57         ABORTED,
58         ACTIVE,
59         COMPLETE,
60 };
61
62 struct object_request
63 {
64         unsigned char sha1[20];
65         struct alt_base *repo;
66         char *url;
67         char filename[PATH_MAX];
68         char tmpfile[PATH_MAX];
69         int local;
70         enum object_request_state state;
71         CURLcode curl_result;
72         char errorstr[CURL_ERROR_SIZE];
73         long http_code;
74         unsigned char real_sha1[20];
75         SHA_CTX c;
76         z_stream stream;
77         int zret;
78         int rename;
79         struct active_request_slot *slot;
80         struct object_request *next;
81 };
82
83 struct alternates_request {
84         char *base;
85         char *url;
86         struct buffer *buffer;
87         struct active_request_slot *slot;
88         int http_specific;
89 };
90
91 #ifndef NO_EXPAT
92 struct xml_ctx
93 {
94         char *name;
95         int len;
96         char *cdata;
97         void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
98         void *userData;
99 };
100
101 struct remote_ls_ctx
102 {
103         struct alt_base *repo;
104         char *path;
105         void (*userFunc)(struct remote_ls_ctx *ls);
106         void *userData;
107         int flags;
108         char *dentry_name;
109         int dentry_flags;
110         int rc;
111         struct remote_ls_ctx *parent;
112 };
113 #endif
114
115 static struct object_request *object_queue_head = NULL;
116
117 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
118                                void *data)
119 {
120         unsigned char expn[4096];
121         size_t size = eltsize * nmemb;
122         int posn = 0;
123         struct object_request *obj_req = (struct object_request *)data;
124         do {
125                 ssize_t retval = write(obj_req->local,
126                                        ptr + posn, size - posn);
127                 if (retval < 0)
128                         return posn;
129                 posn += retval;
130         } while (posn < size);
131
132         obj_req->stream.avail_in = size;
133         obj_req->stream.next_in = ptr;
134         do {
135                 obj_req->stream.next_out = expn;
136                 obj_req->stream.avail_out = sizeof(expn);
137                 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
138                 SHA1_Update(&obj_req->c, expn,
139                             sizeof(expn) - obj_req->stream.avail_out);
140         } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
141         data_received++;
142         return size;
143 }
144
145 static void fetch_alternates(char *base);
146
147 static void process_object_response(void *callback_data);
148
149 static void start_object_request(struct object_request *obj_req)
150 {
151         char *hex = sha1_to_hex(obj_req->sha1);
152         char prevfile[PATH_MAX];
153         char *url;
154         char *posn;
155         int prevlocal;
156         unsigned char prev_buf[PREV_BUF_SIZE];
157         ssize_t prev_read = 0;
158         long prev_posn = 0;
159         char range[RANGE_HEADER_SIZE];
160         struct curl_slist *range_header = NULL;
161         struct active_request_slot *slot;
162
163         snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
164         unlink(prevfile);
165         rename(obj_req->tmpfile, prevfile);
166         unlink(obj_req->tmpfile);
167
168         if (obj_req->local != -1)
169                 error("fd leakage in start: %d", obj_req->local);
170         obj_req->local = open(obj_req->tmpfile,
171                               O_WRONLY | O_CREAT | O_EXCL, 0666);
172         /* This could have failed due to the "lazy directory creation";
173          * try to mkdir the last path component.
174          */
175         if (obj_req->local < 0 && errno == ENOENT) {
176                 char *dir = strrchr(obj_req->tmpfile, '/');
177                 if (dir) {
178                         *dir = 0;
179                         mkdir(obj_req->tmpfile, 0777);
180                         *dir = '/';
181                 }
182                 obj_req->local = open(obj_req->tmpfile,
183                                       O_WRONLY | O_CREAT | O_EXCL, 0666);
184         }
185
186         if (obj_req->local < 0) {
187                 obj_req->state = ABORTED;
188                 error("Couldn't create temporary file %s for %s: %s",
189                       obj_req->tmpfile, obj_req->filename, strerror(errno));
190                 return;
191         }
192
193         memset(&obj_req->stream, 0, sizeof(obj_req->stream));
194
195         inflateInit(&obj_req->stream);
196
197         SHA1_Init(&obj_req->c);
198
199         url = xmalloc(strlen(obj_req->repo->base) + 50);
200         obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
201         strcpy(url, obj_req->repo->base);
202         posn = url + strlen(obj_req->repo->base);
203         strcpy(posn, "objects/");
204         posn += 8;
205         memcpy(posn, hex, 2);
206         posn += 2;
207         *(posn++) = '/';
208         strcpy(posn, hex + 2);
209         strcpy(obj_req->url, url);
210
211         /* If a previous temp file is present, process what was already
212            fetched. */
213         prevlocal = open(prevfile, O_RDONLY);
214         if (prevlocal != -1) {
215                 do {
216                         prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
217                         if (prev_read>0) {
218                                 if (fwrite_sha1_file(prev_buf,
219                                                      1,
220                                                      prev_read,
221                                                      obj_req) == prev_read) {
222                                         prev_posn += prev_read;
223                                 } else {
224                                         prev_read = -1;
225                                 }
226                         }
227                 } while (prev_read > 0);
228                 close(prevlocal);
229         }
230         unlink(prevfile);
231
232         /* Reset inflate/SHA1 if there was an error reading the previous temp
233            file; also rewind to the beginning of the local file. */
234         if (prev_read == -1) {
235                 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
236                 inflateInit(&obj_req->stream);
237                 SHA1_Init(&obj_req->c);
238                 if (prev_posn>0) {
239                         prev_posn = 0;
240                         lseek(obj_req->local, SEEK_SET, 0);
241                         ftruncate(obj_req->local, 0);
242                 }
243         }
244
245         slot = get_active_slot();
246         slot->callback_func = process_object_response;
247         slot->callback_data = obj_req;
248         obj_req->slot = slot;
249
250         curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
251         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
252         curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
253         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
254         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
255
256         /* If we have successfully processed data from a previous fetch
257            attempt, only fetch the data we don't already have. */
258         if (prev_posn>0) {
259                 if (get_verbosely)
260                         fprintf(stderr,
261                                 "Resuming fetch of object %s at byte %ld\n",
262                                 hex, prev_posn);
263                 sprintf(range, "Range: bytes=%ld-", prev_posn);
264                 range_header = curl_slist_append(range_header, range);
265                 curl_easy_setopt(slot->curl,
266                                  CURLOPT_HTTPHEADER, range_header);
267         }
268
269         /* Try to get the request started, abort the request on error */
270         obj_req->state = ACTIVE;
271         if (!start_active_slot(slot)) {
272                 obj_req->state = ABORTED;
273                 obj_req->slot = NULL;
274                 close(obj_req->local); obj_req->local = -1;
275                 free(obj_req->url);
276                 return;
277         }
278 }
279
280 static void finish_object_request(struct object_request *obj_req)
281 {
282         struct stat st;
283
284         fchmod(obj_req->local, 0444);
285         close(obj_req->local); obj_req->local = -1;
286
287         if (obj_req->http_code == 416) {
288                 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
289         } else if (obj_req->curl_result != CURLE_OK) {
290                 if (stat(obj_req->tmpfile, &st) == 0)
291                         if (st.st_size == 0)
292                                 unlink(obj_req->tmpfile);
293                 return;
294         }
295
296         inflateEnd(&obj_req->stream);
297         SHA1_Final(obj_req->real_sha1, &obj_req->c);
298         if (obj_req->zret != Z_STREAM_END) {
299                 unlink(obj_req->tmpfile);
300                 return;
301         }
302         if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
303                 unlink(obj_req->tmpfile);
304                 return;
305         }
306         obj_req->rename =
307                 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
308
309         if (obj_req->rename == 0)
310                 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
311 }
312
313 static void process_object_response(void *callback_data)
314 {
315         struct object_request *obj_req =
316                 (struct object_request *)callback_data;
317
318         obj_req->curl_result = obj_req->slot->curl_result;
319         obj_req->http_code = obj_req->slot->http_code;
320         obj_req->slot = NULL;
321         obj_req->state = COMPLETE;
322
323         /* Use alternates if necessary */
324         if (obj_req->http_code == 404 ||
325             obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
326                 fetch_alternates(alt->base);
327                 if (obj_req->repo->next != NULL) {
328                         obj_req->repo =
329                                 obj_req->repo->next;
330                         close(obj_req->local);
331                         obj_req->local = -1;
332                         start_object_request(obj_req);
333                         return;
334                 }
335         }
336
337         finish_object_request(obj_req);
338 }
339
340 static void release_object_request(struct object_request *obj_req)
341 {
342         struct object_request *entry = object_queue_head;
343
344         if (obj_req->local != -1)
345                 error("fd leakage in release: %d", obj_req->local);
346         if (obj_req == object_queue_head) {
347                 object_queue_head = obj_req->next;
348         } else {
349                 while (entry->next != NULL && entry->next != obj_req)
350                         entry = entry->next;
351                 if (entry->next == obj_req)
352                         entry->next = entry->next->next;
353         }
354
355         free(obj_req->url);
356         free(obj_req);
357 }
358
359 #ifdef USE_CURL_MULTI
360 void fill_active_slots(void)
361 {
362         struct object_request *obj_req = object_queue_head;
363         struct active_request_slot *slot = active_queue_head;
364         int num_transfers;
365
366         while (active_requests < max_requests && obj_req != NULL) {
367                 if (obj_req->state == WAITING) {
368                         if (has_sha1_file(obj_req->sha1))
369                                 obj_req->state = COMPLETE;
370                         else
371                                 start_object_request(obj_req);
372                         curl_multi_perform(curlm, &num_transfers);
373                 }
374                 obj_req = obj_req->next;
375         }
376
377         while (slot != NULL) {
378                 if (!slot->in_use && slot->curl != NULL) {
379                         curl_easy_cleanup(slot->curl);
380                         slot->curl = NULL;
381                 }
382                 slot = slot->next;
383         }
384 }
385 #endif
386
387 void prefetch(unsigned char *sha1)
388 {
389         struct object_request *newreq;
390         struct object_request *tail;
391         char *filename = sha1_file_name(sha1);
392
393         newreq = xmalloc(sizeof(*newreq));
394         memcpy(newreq->sha1, sha1, 20);
395         newreq->repo = alt;
396         newreq->url = NULL;
397         newreq->local = -1;
398         newreq->state = WAITING;
399         snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
400         snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
401                  "%s.temp", filename);
402         newreq->next = NULL;
403
404         if (object_queue_head == NULL) {
405                 object_queue_head = newreq;
406         } else {
407                 tail = object_queue_head;
408                 while (tail->next != NULL) {
409                         tail = tail->next;
410                 }
411                 tail->next = newreq;
412         }
413
414 #ifdef USE_CURL_MULTI
415         fill_active_slots();
416         step_active_slots();
417 #endif
418 }
419
420 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
421 {
422         char *hex = sha1_to_hex(sha1);
423         char *filename;
424         char *url;
425         char tmpfile[PATH_MAX];
426         long prev_posn = 0;
427         char range[RANGE_HEADER_SIZE];
428         struct curl_slist *range_header = NULL;
429
430         FILE *indexfile;
431         struct active_request_slot *slot;
432         struct slot_results results;
433
434         if (has_pack_index(sha1))
435                 return 0;
436
437         if (get_verbosely)
438                 fprintf(stderr, "Getting index for pack %s\n", hex);
439
440         url = xmalloc(strlen(repo->base) + 64);
441         sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
442
443         filename = sha1_pack_index_name(sha1);
444         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
445         indexfile = fopen(tmpfile, "a");
446         if (!indexfile)
447                 return error("Unable to open local file %s for pack index",
448                              filename);
449
450         slot = get_active_slot();
451         slot->results = &results;
452         curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
453         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
454         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
455         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
456         slot->local = indexfile;
457
458         /* If there is data present from a previous transfer attempt,
459            resume where it left off */
460         prev_posn = ftell(indexfile);
461         if (prev_posn>0) {
462                 if (get_verbosely)
463                         fprintf(stderr,
464                                 "Resuming fetch of index for pack %s at byte %ld\n",
465                                 hex, prev_posn);
466                 sprintf(range, "Range: bytes=%ld-", prev_posn);
467                 range_header = curl_slist_append(range_header, range);
468                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
469         }
470
471         if (start_active_slot(slot)) {
472                 run_active_slot(slot);
473                 if (results.curl_result != CURLE_OK) {
474                         fclose(indexfile);
475                         return error("Unable to get pack index %s\n%s", url,
476                                      curl_errorstr);
477                 }
478         } else {
479                 fclose(indexfile);
480                 return error("Unable to start request");
481         }
482
483         fclose(indexfile);
484
485         return move_temp_to_file(tmpfile, filename);
486 }
487
488 static int setup_index(struct alt_base *repo, unsigned char *sha1)
489 {
490         struct packed_git *new_pack;
491         if (has_pack_file(sha1))
492                 return 0; // don't list this as something we can get
493
494         if (fetch_index(repo, sha1))
495                 return -1;
496
497         new_pack = parse_pack_index(sha1);
498         new_pack->next = repo->packs;
499         repo->packs = new_pack;
500         return 0;
501 }
502
503 static void process_alternates_response(void *callback_data)
504 {
505         struct alternates_request *alt_req =
506                 (struct alternates_request *)callback_data;
507         struct active_request_slot *slot = alt_req->slot;
508         struct alt_base *tail = alt;
509         char *base = alt_req->base;
510         static const char null_byte = '\0';
511         char *data;
512         int i = 0;
513
514         if (alt_req->http_specific) {
515                 if (slot->curl_result != CURLE_OK ||
516                     !alt_req->buffer->posn) {
517
518                         /* Try reusing the slot to get non-http alternates */
519                         alt_req->http_specific = 0;
520                         sprintf(alt_req->url, "%s/objects/info/alternates",
521                                 base);
522                         curl_easy_setopt(slot->curl, CURLOPT_URL,
523                                          alt_req->url);
524                         active_requests++;
525                         slot->in_use = 1;
526                         if (slot->finished != NULL)
527                                 (*slot->finished) = 0;
528                         if (!start_active_slot(slot)) {
529                                 got_alternates = -1;
530                                 slot->in_use = 0;
531                                 if (slot->finished != NULL)
532                                         (*slot->finished) = 1;
533                         }
534                         return;
535                 }
536         } else if (slot->curl_result != CURLE_OK) {
537                 if (slot->http_code != 404 &&
538                     slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
539                         got_alternates = -1;
540                         return;
541                 }
542         }
543
544         fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
545         alt_req->buffer->posn--;
546         data = alt_req->buffer->buffer;
547
548         while (i < alt_req->buffer->posn) {
549                 int posn = i;
550                 while (posn < alt_req->buffer->posn && data[posn] != '\n')
551                         posn++;
552                 if (data[posn] == '\n') {
553                         int okay = 0;
554                         int serverlen = 0;
555                         struct alt_base *newalt;
556                         char *target = NULL;
557                         char *path;
558                         if (data[i] == '/') {
559                                 serverlen = strchr(base + 8, '/') - base;
560                                 okay = 1;
561                         } else if (!memcmp(data + i, "../", 3)) {
562                                 i += 3;
563                                 serverlen = strlen(base);
564                                 while (i + 2 < posn &&
565                                        !memcmp(data + i, "../", 3)) {
566                                         do {
567                                                 serverlen--;
568                                         } while (serverlen &&
569                                                  base[serverlen - 1] != '/');
570                                         i += 3;
571                                 }
572                                 // If the server got removed, give up.
573                                 okay = strchr(base, ':') - base + 3 <
574                                         serverlen;
575                         } else if (alt_req->http_specific) {
576                                 char *colon = strchr(data + i, ':');
577                                 char *slash = strchr(data + i, '/');
578                                 if (colon && slash && colon < data + posn &&
579                                     slash < data + posn && colon < slash) {
580                                         okay = 1;
581                                 }
582                         }
583                         // skip 'objects' at end
584                         if (okay) {
585                                 target = xmalloc(serverlen + posn - i - 6);
586                                 strncpy(target, base, serverlen);
587                                 strncpy(target + serverlen, data + i,
588                                         posn - i - 7);
589                                 target[serverlen + posn - i - 7] = '\0';
590                                 if (get_verbosely)
591                                         fprintf(stderr,
592                                                 "Also look at %s\n", target);
593                                 newalt = xmalloc(sizeof(*newalt));
594                                 newalt->next = NULL;
595                                 newalt->base = target;
596                                 newalt->got_indices = 0;
597                                 newalt->packs = NULL;
598                                 path = strstr(target, "//");
599                                 if (path) {
600                                         path = strchr(path+2, '/');
601                                         if (path)
602                                                 newalt->path_len = strlen(path);
603                                 }
604
605                                 while (tail->next != NULL)
606                                         tail = tail->next;
607                                 tail->next = newalt;
608                         }
609                 }
610                 i = posn + 1;
611         }
612
613         got_alternates = 1;
614 }
615
616 static void fetch_alternates(char *base)
617 {
618         struct buffer buffer;
619         char *url;
620         char *data;
621         struct active_request_slot *slot;
622         struct alternates_request alt_req;
623
624         /* If another request has already started fetching alternates,
625            wait for them to arrive and return to processing this request's
626            curl message */
627 #ifdef USE_CURL_MULTI
628         while (got_alternates == 0) {
629                 step_active_slots();
630         }
631 #endif
632
633         /* Nothing to do if they've already been fetched */
634         if (got_alternates == 1)
635                 return;
636
637         /* Start the fetch */
638         got_alternates = 0;
639
640         data = xmalloc(4096);
641         buffer.size = 4096;
642         buffer.posn = 0;
643         buffer.buffer = data;
644
645         if (get_verbosely)
646                 fprintf(stderr, "Getting alternates list for %s\n", base);
647
648         url = xmalloc(strlen(base) + 31);
649         sprintf(url, "%s/objects/info/http-alternates", base);
650
651         /* Use a callback to process the result, since another request
652            may fail and need to have alternates loaded before continuing */
653         slot = get_active_slot();
654         slot->callback_func = process_alternates_response;
655         slot->callback_data = &alt_req;
656
657         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
658         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
659         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
660
661         alt_req.base = base;
662         alt_req.url = url;
663         alt_req.buffer = &buffer;
664         alt_req.http_specific = 1;
665         alt_req.slot = slot;
666
667         if (start_active_slot(slot))
668                 run_active_slot(slot);
669         else
670                 got_alternates = -1;
671
672         free(data);
673         free(url);
674 }
675
676 #ifndef NO_EXPAT
677 static void
678 xml_start_tag(void *userData, const char *name, const char **atts)
679 {
680         struct xml_ctx *ctx = (struct xml_ctx *)userData;
681         const char *c = strchr(name, ':');
682         int new_len;
683
684         if (c == NULL)
685                 c = name;
686         else
687                 c++;
688
689         new_len = strlen(ctx->name) + strlen(c) + 2;
690
691         if (new_len > ctx->len) {
692                 ctx->name = xrealloc(ctx->name, new_len);
693                 ctx->len = new_len;
694         }
695         strcat(ctx->name, ".");
696         strcat(ctx->name, c);
697
698         if (ctx->cdata) {
699                 free(ctx->cdata);
700                 ctx->cdata = NULL;
701         }
702
703         ctx->userFunc(ctx, 0);
704 }
705
706 static void
707 xml_end_tag(void *userData, const char *name)
708 {
709         struct xml_ctx *ctx = (struct xml_ctx *)userData;
710         const char *c = strchr(name, ':');
711         char *ep;
712
713         ctx->userFunc(ctx, 1);
714
715         if (c == NULL)
716                 c = name;
717         else
718                 c++;
719
720         ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
721         *ep = 0;
722 }
723
724 static void
725 xml_cdata(void *userData, const XML_Char *s, int len)
726 {
727         struct xml_ctx *ctx = (struct xml_ctx *)userData;
728         if (ctx->cdata)
729                 free(ctx->cdata);
730         ctx->cdata = xcalloc(len+1, 1);
731         strncpy(ctx->cdata, s, len);
732 }
733
734 static int remote_ls(struct alt_base *repo, const char *path, int flags,
735                      void (*userFunc)(struct remote_ls_ctx *ls),
736                      void *userData);
737
738 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
739 {
740         struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
741
742         if (tag_closed) {
743                 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
744                         if (ls->dentry_flags & IS_DIR) {
745                                 if (ls->flags & PROCESS_DIRS) {
746                                         ls->userFunc(ls);
747                                 }
748                                 if (strcmp(ls->dentry_name, ls->path) &&
749                                     ls->flags & RECURSIVE) {
750                                         ls->rc = remote_ls(ls->repo,
751                                                            ls->dentry_name,
752                                                            ls->flags,
753                                                            ls->userFunc,
754                                                            ls->userData);
755                                 }
756                         } else if (ls->flags & PROCESS_FILES) {
757                                 ls->userFunc(ls);
758                         }
759                 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
760                         ls->dentry_name = xmalloc(strlen(ctx->cdata) -
761                                                   ls->repo->path_len + 1);
762                         strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
763                 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
764                         ls->dentry_flags |= IS_DIR;
765                 }
766         } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
767                 if (ls->dentry_name) {
768                         free(ls->dentry_name);
769                 }
770                 ls->dentry_name = NULL;
771                 ls->dentry_flags = 0;
772         }
773 }
774
775 static int remote_ls(struct alt_base *repo, const char *path, int flags,
776                      void (*userFunc)(struct remote_ls_ctx *ls),
777                      void *userData)
778 {
779         char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
780         struct active_request_slot *slot;
781         struct slot_results results;
782         struct buffer in_buffer;
783         struct buffer out_buffer;
784         char *in_data;
785         char *out_data;
786         XML_Parser parser = XML_ParserCreate(NULL);
787         enum XML_Status result;
788         struct curl_slist *dav_headers = NULL;
789         struct xml_ctx ctx;
790         struct remote_ls_ctx ls;
791
792         ls.flags = flags;
793         ls.repo = repo;
794         ls.path = strdup(path);
795         ls.dentry_name = NULL;
796         ls.dentry_flags = 0;
797         ls.userData = userData;
798         ls.userFunc = userFunc;
799         ls.rc = 0;
800
801         sprintf(url, "%s%s", repo->base, path);
802
803         out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
804         out_data = xmalloc(out_buffer.size + 1);
805         snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
806         out_buffer.posn = 0;
807         out_buffer.buffer = out_data;
808
809         in_buffer.size = 4096;
810         in_data = xmalloc(in_buffer.size);
811         in_buffer.posn = 0;
812         in_buffer.buffer = in_data;
813
814         dav_headers = curl_slist_append(dav_headers, "Depth: 1");
815         dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
816
817         slot = get_active_slot();
818         slot->results = &results;
819         curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
820         curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
821         curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
822         curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
823         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
824         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
825         curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
826         curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
827         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
828
829         if (start_active_slot(slot)) {
830                 run_active_slot(slot);
831                 if (results.curl_result == CURLE_OK) {
832                         ctx.name = xcalloc(10, 1);
833                         ctx.len = 0;
834                         ctx.cdata = NULL;
835                         ctx.userFunc = handle_remote_ls_ctx;
836                         ctx.userData = &ls;
837                         XML_SetUserData(parser, &ctx);
838                         XML_SetElementHandler(parser, xml_start_tag,
839                                               xml_end_tag);
840                         XML_SetCharacterDataHandler(parser, xml_cdata);
841                         result = XML_Parse(parser, in_buffer.buffer,
842                                            in_buffer.posn, 1);
843                         free(ctx.name);
844
845                         if (result != XML_STATUS_OK) {
846                                 ls.rc = error("XML error: %s",
847                                               XML_ErrorString(
848                                                       XML_GetErrorCode(parser)));
849                         }
850                 } else {
851                         ls.rc = -1;
852                 }
853         } else {
854                 ls.rc = error("Unable to start PROPFIND request");
855         }
856
857         free(ls.path);
858         free(url);
859         free(out_data);
860         free(in_buffer.buffer);
861         curl_slist_free_all(dav_headers);
862
863         return ls.rc;
864 }
865
866 static void process_ls_pack(struct remote_ls_ctx *ls)
867 {
868         unsigned char sha1[20];
869
870         if (strlen(ls->dentry_name) == 63 &&
871             !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
872             !strncmp(ls->dentry_name+58, ".pack", 5)) {
873                 get_sha1_hex(ls->dentry_name + 18, sha1);
874                 setup_index(ls->repo, sha1);
875         }
876 }
877 #endif
878
879 static int fetch_indices(struct alt_base *repo)
880 {
881         unsigned char sha1[20];
882         char *url;
883         struct buffer buffer;
884         char *data;
885         int i = 0;
886
887         struct active_request_slot *slot;
888         struct slot_results results;
889
890         if (repo->got_indices)
891                 return 0;
892
893         data = xmalloc(4096);
894         buffer.size = 4096;
895         buffer.posn = 0;
896         buffer.buffer = data;
897
898         if (get_verbosely)
899                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
900
901 #ifndef NO_EXPAT
902         if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
903                       process_ls_pack, NULL) == 0)
904                 return 0;
905 #endif
906
907         url = xmalloc(strlen(repo->base) + 21);
908         sprintf(url, "%s/objects/info/packs", repo->base);
909
910         slot = get_active_slot();
911         slot->results = &results;
912         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
913         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
914         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
915         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
916         if (start_active_slot(slot)) {
917                 run_active_slot(slot);
918                 if (results.curl_result != CURLE_OK) {
919                         if (results.http_code == 404 ||
920                             results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
921                                 repo->got_indices = 1;
922                                 free(buffer.buffer);
923                                 return 0;
924                         } else {
925                                 repo->got_indices = 0;
926                                 free(buffer.buffer);
927                                 return error("%s", curl_errorstr);
928                         }
929                 }
930         } else {
931                 repo->got_indices = 0;
932                 free(buffer.buffer);
933                 return error("Unable to start request");
934         }
935
936         data = buffer.buffer;
937         while (i < buffer.posn) {
938                 switch (data[i]) {
939                 case 'P':
940                         i++;
941                         if (i + 52 <= buffer.posn &&
942                             !strncmp(data + i, " pack-", 6) &&
943                             !strncmp(data + i + 46, ".pack\n", 6)) {
944                                 get_sha1_hex(data + i + 6, sha1);
945                                 setup_index(repo, sha1);
946                                 i += 51;
947                                 break;
948                         }
949                 default:
950                         while (i < buffer.posn && data[i] != '\n')
951                                 i++;
952                 }
953                 i++;
954         }
955
956         free(buffer.buffer);
957         repo->got_indices = 1;
958         return 0;
959 }
960
961 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
962 {
963         char *url;
964         struct packed_git *target;
965         struct packed_git **lst;
966         FILE *packfile;
967         char *filename;
968         char tmpfile[PATH_MAX];
969         int ret;
970         long prev_posn = 0;
971         char range[RANGE_HEADER_SIZE];
972         struct curl_slist *range_header = NULL;
973
974         struct active_request_slot *slot;
975         struct slot_results results;
976
977         if (fetch_indices(repo))
978                 return -1;
979         target = find_sha1_pack(sha1, repo->packs);
980         if (!target)
981                 return -1;
982
983         if (get_verbosely) {
984                 fprintf(stderr, "Getting pack %s\n",
985                         sha1_to_hex(target->sha1));
986                 fprintf(stderr, " which contains %s\n",
987                         sha1_to_hex(sha1));
988         }
989
990         url = xmalloc(strlen(repo->base) + 65);
991         sprintf(url, "%s/objects/pack/pack-%s.pack",
992                 repo->base, sha1_to_hex(target->sha1));
993
994         filename = sha1_pack_name(target->sha1);
995         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
996         packfile = fopen(tmpfile, "a");
997         if (!packfile)
998                 return error("Unable to open local file %s for pack",
999                              filename);
1000
1001         slot = get_active_slot();
1002         slot->results = &results;
1003         curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1004         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1005         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1006         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1007         slot->local = packfile;
1008
1009         /* If there is data present from a previous transfer attempt,
1010            resume where it left off */
1011         prev_posn = ftell(packfile);
1012         if (prev_posn>0) {
1013                 if (get_verbosely)
1014                         fprintf(stderr,
1015                                 "Resuming fetch of pack %s at byte %ld\n",
1016                                 sha1_to_hex(target->sha1), prev_posn);
1017                 sprintf(range, "Range: bytes=%ld-", prev_posn);
1018                 range_header = curl_slist_append(range_header, range);
1019                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1020         }
1021
1022         if (start_active_slot(slot)) {
1023                 run_active_slot(slot);
1024                 if (results.curl_result != CURLE_OK) {
1025                         fclose(packfile);
1026                         return error("Unable to get pack file %s\n%s", url,
1027                                      curl_errorstr);
1028                 }
1029         } else {
1030                 fclose(packfile);
1031                 return error("Unable to start request");
1032         }
1033
1034         fclose(packfile);
1035
1036         ret = move_temp_to_file(tmpfile, filename);
1037         if (ret)
1038                 return ret;
1039
1040         lst = &repo->packs;
1041         while (*lst != target)
1042                 lst = &((*lst)->next);
1043         *lst = (*lst)->next;
1044
1045         if (verify_pack(target, 0))
1046                 return -1;
1047         install_packed_git(target);
1048
1049         return 0;
1050 }
1051
1052 static void abort_object_request(struct object_request *obj_req)
1053 {
1054         if (obj_req->local >= 0) {
1055                 close(obj_req->local);
1056                 obj_req->local = -1;
1057         }
1058         unlink(obj_req->tmpfile);
1059         if (obj_req->slot) {
1060                 release_active_slot(obj_req->slot);
1061                 obj_req->slot = NULL;
1062         }
1063         release_object_request(obj_req);
1064 }
1065
1066 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1067 {
1068         char *hex = sha1_to_hex(sha1);
1069         int ret = 0;
1070         struct object_request *obj_req = object_queue_head;
1071
1072         while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1073                 obj_req = obj_req->next;
1074         if (obj_req == NULL)
1075                 return error("Couldn't find request for %s in the queue", hex);
1076
1077         if (has_sha1_file(obj_req->sha1)) {
1078                 abort_object_request(obj_req);
1079                 return 0;
1080         }
1081
1082 #ifdef USE_CURL_MULTI
1083         while (obj_req->state == WAITING) {
1084                 step_active_slots();
1085         }
1086 #else
1087         start_object_request(obj_req);
1088 #endif
1089
1090         while (obj_req->state == ACTIVE) {
1091                 run_active_slot(obj_req->slot);
1092         }
1093         if (obj_req->local != -1) {
1094                 close(obj_req->local); obj_req->local = -1;
1095         }
1096
1097         if (obj_req->state == ABORTED) {
1098                 ret = error("Request for %s aborted", hex);
1099         } else if (obj_req->curl_result != CURLE_OK &&
1100                    obj_req->http_code != 416) {
1101                 if (obj_req->http_code == 404 ||
1102                     obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1103                         ret = -1; /* Be silent, it is probably in a pack. */
1104                 else
1105                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1106                                     obj_req->errorstr, obj_req->curl_result,
1107                                     obj_req->http_code, hex);
1108         } else if (obj_req->zret != Z_STREAM_END) {
1109                 corrupt_object_found++;
1110                 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1111         } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1112                 ret = error("File %s has bad hash", hex);
1113         } else if (obj_req->rename < 0) {
1114                 ret = error("unable to write sha1 filename %s",
1115                             obj_req->filename);
1116         }
1117
1118         release_object_request(obj_req);
1119         return ret;
1120 }
1121
1122 int fetch(unsigned char *sha1)
1123 {
1124         struct alt_base *altbase = alt;
1125
1126         if (!fetch_object(altbase, sha1))
1127                 return 0;
1128         while (altbase) {
1129                 if (!fetch_pack(altbase, sha1))
1130                         return 0;
1131                 fetch_alternates(alt->base);
1132                 altbase = altbase->next;
1133         }
1134         return error("Unable to find %s under %s", sha1_to_hex(sha1),
1135                      alt->base);
1136 }
1137
1138 static inline int needs_quote(int ch)
1139 {
1140         switch (ch) {
1141         case '/': case '-': case '.':
1142         case 'A'...'Z': case 'a'...'z': case '0'...'9':
1143                 return 0;
1144         default:
1145                 return 1;
1146         }
1147 }
1148
1149 static inline int hex(int v)
1150 {
1151         if (v < 10) return '0' + v;
1152         else return 'A' + v - 10;
1153 }
1154
1155 static char *quote_ref_url(const char *base, const char *ref)
1156 {
1157         const char *cp;
1158         char *dp, *qref;
1159         int len, baselen, ch;
1160
1161         baselen = strlen(base);
1162         len = baselen + 6; /* "refs/" + NUL */
1163         for (cp = ref; (ch = *cp) != 0; cp++, len++)
1164                 if (needs_quote(ch))
1165                         len += 2; /* extra two hex plus replacement % */
1166         qref = xmalloc(len);
1167         memcpy(qref, base, baselen);
1168         memcpy(qref + baselen, "refs/", 5);
1169         for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1170                 if (needs_quote(ch)) {
1171                         *dp++ = '%';
1172                         *dp++ = hex((ch >> 4) & 0xF);
1173                         *dp++ = hex(ch & 0xF);
1174                 }
1175                 else
1176                         *dp++ = ch;
1177         }
1178         *dp = 0;
1179
1180         return qref;
1181 }
1182
1183 int fetch_ref(char *ref, unsigned char *sha1)
1184 {
1185         char *url;
1186         char hex[42];
1187         struct buffer buffer;
1188         char *base = alt->base;
1189         struct active_request_slot *slot;
1190         struct slot_results results;
1191         buffer.size = 41;
1192         buffer.posn = 0;
1193         buffer.buffer = hex;
1194         hex[41] = '\0';
1195
1196         url = quote_ref_url(base, ref);
1197         slot = get_active_slot();
1198         slot->results = &results;
1199         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1200         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1201         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1202         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1203         if (start_active_slot(slot)) {
1204                 run_active_slot(slot);
1205                 if (results.curl_result != CURLE_OK)
1206                         return error("Couldn't get %s for %s\n%s",
1207                                      url, ref, curl_errorstr);
1208         } else {
1209                 return error("Unable to start request");
1210         }
1211
1212         hex[40] = '\0';
1213         get_sha1_hex(hex, sha1);
1214         return 0;
1215 }
1216
1217 int main(int argc, char **argv)
1218 {
1219         char *commit_id;
1220         char *url;
1221         char *path;
1222         int arg = 1;
1223         int rc = 0;
1224
1225         setup_git_directory();
1226
1227         while (arg < argc && argv[arg][0] == '-') {
1228                 if (argv[arg][1] == 't') {
1229                         get_tree = 1;
1230                 } else if (argv[arg][1] == 'c') {
1231                         get_history = 1;
1232                 } else if (argv[arg][1] == 'a') {
1233                         get_all = 1;
1234                         get_tree = 1;
1235                         get_history = 1;
1236                 } else if (argv[arg][1] == 'v') {
1237                         get_verbosely = 1;
1238                 } else if (argv[arg][1] == 'w') {
1239                         write_ref = argv[arg + 1];
1240                         arg++;
1241                 } else if (!strcmp(argv[arg], "--recover")) {
1242                         get_recover = 1;
1243                 }
1244                 arg++;
1245         }
1246         if (argc < arg + 2) {
1247                 usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1248                 return 1;
1249         }
1250         commit_id = argv[arg];
1251         url = argv[arg + 1];
1252
1253         http_init();
1254
1255         no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1256
1257         alt = xmalloc(sizeof(*alt));
1258         alt->base = url;
1259         alt->got_indices = 0;
1260         alt->packs = NULL;
1261         alt->next = NULL;
1262         path = strstr(url, "//");
1263         if (path) {
1264                 path = strchr(path+2, '/');
1265                 if (path)
1266                         alt->path_len = strlen(path);
1267         }
1268
1269         if (pull(commit_id))
1270                 rc = 1;
1271
1272         curl_slist_free_all(no_pragma_header);
1273
1274         http_cleanup();
1275
1276         if (corrupt_object_found) {
1277                 fprintf(stderr,
1278 "Some loose object were found to be corrupt, but they might be just\n"
1279 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1280 "status code.  Suggest running git fsck-objects.\n");
1281         }
1282         return rc;
1283 }