Merge branch 'master' into next
[git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20   XML_STATUS_OK = 1,
21   XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK    1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS  (1u << 1)
30 #define RECURSIVE     (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int got_alternates = -1;
40 static int corrupt_object_found = 0;
41
42 static struct curl_slist *no_pragma_header;
43
44 struct alt_base
45 {
46         char *base;
47         int path_len;
48         int got_indices;
49         struct packed_git *packs;
50         struct alt_base *next;
51 };
52
53 static struct alt_base *alt = NULL;
54
55 enum object_request_state {
56         WAITING,
57         ABORTED,
58         ACTIVE,
59         COMPLETE,
60 };
61
62 struct object_request
63 {
64         unsigned char sha1[20];
65         struct alt_base *repo;
66         char *url;
67         char filename[PATH_MAX];
68         char tmpfile[PATH_MAX];
69         int local;
70         enum object_request_state state;
71         CURLcode curl_result;
72         char errorstr[CURL_ERROR_SIZE];
73         long http_code;
74         unsigned char real_sha1[20];
75         SHA_CTX c;
76         z_stream stream;
77         int zret;
78         int rename;
79         struct active_request_slot *slot;
80         struct object_request *next;
81 };
82
83 struct alternates_request {
84         char *base;
85         char *url;
86         struct buffer *buffer;
87         struct active_request_slot *slot;
88         int http_specific;
89 };
90
91 #ifndef NO_EXPAT
92 struct xml_ctx
93 {
94         char *name;
95         int len;
96         char *cdata;
97         void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
98         void *userData;
99 };
100
101 struct remote_ls_ctx
102 {
103         struct alt_base *repo;
104         char *path;
105         void (*userFunc)(struct remote_ls_ctx *ls);
106         void *userData;
107         int flags;
108         char *dentry_name;
109         int dentry_flags;
110         int rc;
111         struct remote_ls_ctx *parent;
112 };
113 #endif
114
115 static struct object_request *object_queue_head = NULL;
116
117 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
118                                void *data)
119 {
120         unsigned char expn[4096];
121         size_t size = eltsize * nmemb;
122         int posn = 0;
123         struct object_request *obj_req = (struct object_request *)data;
124         do {
125                 ssize_t retval = write(obj_req->local,
126                                        (char *) ptr + posn, size - posn);
127                 if (retval < 0)
128                         return posn;
129                 posn += retval;
130         } while (posn < size);
131
132         obj_req->stream.avail_in = size;
133         obj_req->stream.next_in = ptr;
134         do {
135                 obj_req->stream.next_out = expn;
136                 obj_req->stream.avail_out = sizeof(expn);
137                 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
138                 SHA1_Update(&obj_req->c, expn,
139                             sizeof(expn) - obj_req->stream.avail_out);
140         } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
141         data_received++;
142         return size;
143 }
144
145 static void fetch_alternates(char *base);
146
147 static void process_object_response(void *callback_data);
148
149 static void start_object_request(struct object_request *obj_req)
150 {
151         char *hex = sha1_to_hex(obj_req->sha1);
152         char prevfile[PATH_MAX];
153         char *url;
154         char *posn;
155         int prevlocal;
156         unsigned char prev_buf[PREV_BUF_SIZE];
157         ssize_t prev_read = 0;
158         long prev_posn = 0;
159         char range[RANGE_HEADER_SIZE];
160         struct curl_slist *range_header = NULL;
161         struct active_request_slot *slot;
162
163         snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
164         unlink(prevfile);
165         rename(obj_req->tmpfile, prevfile);
166         unlink(obj_req->tmpfile);
167
168         if (obj_req->local != -1)
169                 error("fd leakage in start: %d", obj_req->local);
170         obj_req->local = open(obj_req->tmpfile,
171                               O_WRONLY | O_CREAT | O_EXCL, 0666);
172         /* This could have failed due to the "lazy directory creation";
173          * try to mkdir the last path component.
174          */
175         if (obj_req->local < 0 && errno == ENOENT) {
176                 char *dir = strrchr(obj_req->tmpfile, '/');
177                 if (dir) {
178                         *dir = 0;
179                         mkdir(obj_req->tmpfile, 0777);
180                         *dir = '/';
181                 }
182                 obj_req->local = open(obj_req->tmpfile,
183                                       O_WRONLY | O_CREAT | O_EXCL, 0666);
184         }
185
186         if (obj_req->local < 0) {
187                 obj_req->state = ABORTED;
188                 error("Couldn't create temporary file %s for %s: %s",
189                       obj_req->tmpfile, obj_req->filename, strerror(errno));
190                 return;
191         }
192
193         memset(&obj_req->stream, 0, sizeof(obj_req->stream));
194
195         inflateInit(&obj_req->stream);
196
197         SHA1_Init(&obj_req->c);
198
199         url = xmalloc(strlen(obj_req->repo->base) + 50);
200         obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
201         strcpy(url, obj_req->repo->base);
202         posn = url + strlen(obj_req->repo->base);
203         strcpy(posn, "objects/");
204         posn += 8;
205         memcpy(posn, hex, 2);
206         posn += 2;
207         *(posn++) = '/';
208         strcpy(posn, hex + 2);
209         strcpy(obj_req->url, url);
210
211         /* If a previous temp file is present, process what was already
212            fetched. */
213         prevlocal = open(prevfile, O_RDONLY);
214         if (prevlocal != -1) {
215                 do {
216                         prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
217                         if (prev_read>0) {
218                                 if (fwrite_sha1_file(prev_buf,
219                                                      1,
220                                                      prev_read,
221                                                      obj_req) == prev_read) {
222                                         prev_posn += prev_read;
223                                 } else {
224                                         prev_read = -1;
225                                 }
226                         }
227                 } while (prev_read > 0);
228                 close(prevlocal);
229         }
230         unlink(prevfile);
231
232         /* Reset inflate/SHA1 if there was an error reading the previous temp
233            file; also rewind to the beginning of the local file. */
234         if (prev_read == -1) {
235                 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
236                 inflateInit(&obj_req->stream);
237                 SHA1_Init(&obj_req->c);
238                 if (prev_posn>0) {
239                         prev_posn = 0;
240                         lseek(obj_req->local, SEEK_SET, 0);
241                         ftruncate(obj_req->local, 0);
242                 }
243         }
244
245         slot = get_active_slot();
246         slot->callback_func = process_object_response;
247         slot->callback_data = obj_req;
248         obj_req->slot = slot;
249
250         curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
251         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
252         curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
253         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
254         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
255
256         /* If we have successfully processed data from a previous fetch
257            attempt, only fetch the data we don't already have. */
258         if (prev_posn>0) {
259                 if (get_verbosely)
260                         fprintf(stderr,
261                                 "Resuming fetch of object %s at byte %ld\n",
262                                 hex, prev_posn);
263                 sprintf(range, "Range: bytes=%ld-", prev_posn);
264                 range_header = curl_slist_append(range_header, range);
265                 curl_easy_setopt(slot->curl,
266                                  CURLOPT_HTTPHEADER, range_header);
267         }
268
269         /* Try to get the request started, abort the request on error */
270         obj_req->state = ACTIVE;
271         if (!start_active_slot(slot)) {
272                 obj_req->state = ABORTED;
273                 obj_req->slot = NULL;
274                 close(obj_req->local); obj_req->local = -1;
275                 free(obj_req->url);
276                 return;
277         }
278 }
279
280 static void finish_object_request(struct object_request *obj_req)
281 {
282         struct stat st;
283
284         fchmod(obj_req->local, 0444);
285         close(obj_req->local); obj_req->local = -1;
286
287         if (obj_req->http_code == 416) {
288                 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
289         } else if (obj_req->curl_result != CURLE_OK) {
290                 if (stat(obj_req->tmpfile, &st) == 0)
291                         if (st.st_size == 0)
292                                 unlink(obj_req->tmpfile);
293                 return;
294         }
295
296         inflateEnd(&obj_req->stream);
297         SHA1_Final(obj_req->real_sha1, &obj_req->c);
298         if (obj_req->zret != Z_STREAM_END) {
299                 unlink(obj_req->tmpfile);
300                 return;
301         }
302         if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
303                 unlink(obj_req->tmpfile);
304                 return;
305         }
306         obj_req->rename =
307                 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
308
309         if (obj_req->rename == 0)
310                 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
311 }
312
313 static void process_object_response(void *callback_data)
314 {
315         struct object_request *obj_req =
316                 (struct object_request *)callback_data;
317
318         obj_req->curl_result = obj_req->slot->curl_result;
319         obj_req->http_code = obj_req->slot->http_code;
320         obj_req->slot = NULL;
321         obj_req->state = COMPLETE;
322
323         /* Use alternates if necessary */
324         if (obj_req->http_code == 404 ||
325             obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
326                 fetch_alternates(alt->base);
327                 if (obj_req->repo->next != NULL) {
328                         obj_req->repo =
329                                 obj_req->repo->next;
330                         close(obj_req->local);
331                         obj_req->local = -1;
332                         start_object_request(obj_req);
333                         return;
334                 }
335         }
336
337         finish_object_request(obj_req);
338 }
339
340 static void release_object_request(struct object_request *obj_req)
341 {
342         struct object_request *entry = object_queue_head;
343
344         if (obj_req->local != -1)
345                 error("fd leakage in release: %d", obj_req->local);
346         if (obj_req == object_queue_head) {
347                 object_queue_head = obj_req->next;
348         } else {
349                 while (entry->next != NULL && entry->next != obj_req)
350                         entry = entry->next;
351                 if (entry->next == obj_req)
352                         entry->next = entry->next->next;
353         }
354
355         free(obj_req->url);
356         free(obj_req);
357 }
358
359 #ifdef USE_CURL_MULTI
360 void fill_active_slots(void)
361 {
362         struct object_request *obj_req = object_queue_head;
363         struct active_request_slot *slot = active_queue_head;
364         int num_transfers;
365
366         while (active_requests < max_requests && obj_req != NULL) {
367                 if (obj_req->state == WAITING) {
368                         if (has_sha1_file(obj_req->sha1))
369                                 obj_req->state = COMPLETE;
370                         else
371                                 start_object_request(obj_req);
372                         curl_multi_perform(curlm, &num_transfers);
373                 }
374                 obj_req = obj_req->next;
375         }
376
377         while (slot != NULL) {
378                 if (!slot->in_use && slot->curl != NULL) {
379                         curl_easy_cleanup(slot->curl);
380                         slot->curl = NULL;
381                 }
382                 slot = slot->next;
383         }
384 }
385 #endif
386
387 void prefetch(unsigned char *sha1)
388 {
389         struct object_request *newreq;
390         struct object_request *tail;
391         char *filename = sha1_file_name(sha1);
392
393         newreq = xmalloc(sizeof(*newreq));
394         memcpy(newreq->sha1, sha1, 20);
395         newreq->repo = alt;
396         newreq->url = NULL;
397         newreq->local = -1;
398         newreq->state = WAITING;
399         snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
400         snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
401                  "%s.temp", filename);
402         newreq->slot = NULL;
403         newreq->next = NULL;
404
405         if (object_queue_head == NULL) {
406                 object_queue_head = newreq;
407         } else {
408                 tail = object_queue_head;
409                 while (tail->next != NULL) {
410                         tail = tail->next;
411                 }
412                 tail->next = newreq;
413         }
414
415 #ifdef USE_CURL_MULTI
416         fill_active_slots();
417         step_active_slots();
418 #endif
419 }
420
421 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
422 {
423         char *hex = sha1_to_hex(sha1);
424         char *filename;
425         char *url;
426         char tmpfile[PATH_MAX];
427         long prev_posn = 0;
428         char range[RANGE_HEADER_SIZE];
429         struct curl_slist *range_header = NULL;
430
431         FILE *indexfile;
432         struct active_request_slot *slot;
433         struct slot_results results;
434
435         if (has_pack_index(sha1))
436                 return 0;
437
438         if (get_verbosely)
439                 fprintf(stderr, "Getting index for pack %s\n", hex);
440
441         url = xmalloc(strlen(repo->base) + 64);
442         sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
443
444         filename = sha1_pack_index_name(sha1);
445         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
446         indexfile = fopen(tmpfile, "a");
447         if (!indexfile)
448                 return error("Unable to open local file %s for pack index",
449                              filename);
450
451         slot = get_active_slot();
452         slot->results = &results;
453         curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
454         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
455         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
456         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
457         slot->local = indexfile;
458
459         /* If there is data present from a previous transfer attempt,
460            resume where it left off */
461         prev_posn = ftell(indexfile);
462         if (prev_posn>0) {
463                 if (get_verbosely)
464                         fprintf(stderr,
465                                 "Resuming fetch of index for pack %s at byte %ld\n",
466                                 hex, prev_posn);
467                 sprintf(range, "Range: bytes=%ld-", prev_posn);
468                 range_header = curl_slist_append(range_header, range);
469                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
470         }
471
472         if (start_active_slot(slot)) {
473                 run_active_slot(slot);
474                 if (results.curl_result != CURLE_OK) {
475                         fclose(indexfile);
476                         return error("Unable to get pack index %s\n%s", url,
477                                      curl_errorstr);
478                 }
479         } else {
480                 fclose(indexfile);
481                 return error("Unable to start request");
482         }
483
484         fclose(indexfile);
485
486         return move_temp_to_file(tmpfile, filename);
487 }
488
489 static int setup_index(struct alt_base *repo, unsigned char *sha1)
490 {
491         struct packed_git *new_pack;
492         if (has_pack_file(sha1))
493                 return 0; // don't list this as something we can get
494
495         if (fetch_index(repo, sha1))
496                 return -1;
497
498         new_pack = parse_pack_index(sha1);
499         new_pack->next = repo->packs;
500         repo->packs = new_pack;
501         return 0;
502 }
503
504 static void process_alternates_response(void *callback_data)
505 {
506         struct alternates_request *alt_req =
507                 (struct alternates_request *)callback_data;
508         struct active_request_slot *slot = alt_req->slot;
509         struct alt_base *tail = alt;
510         char *base = alt_req->base;
511         static const char null_byte = '\0';
512         char *data;
513         int i = 0;
514
515         if (alt_req->http_specific) {
516                 if (slot->curl_result != CURLE_OK ||
517                     !alt_req->buffer->posn) {
518
519                         /* Try reusing the slot to get non-http alternates */
520                         alt_req->http_specific = 0;
521                         sprintf(alt_req->url, "%s/objects/info/alternates",
522                                 base);
523                         curl_easy_setopt(slot->curl, CURLOPT_URL,
524                                          alt_req->url);
525                         active_requests++;
526                         slot->in_use = 1;
527                         if (slot->finished != NULL)
528                                 (*slot->finished) = 0;
529                         if (!start_active_slot(slot)) {
530                                 got_alternates = -1;
531                                 slot->in_use = 0;
532                                 if (slot->finished != NULL)
533                                         (*slot->finished) = 1;
534                         }
535                         return;
536                 }
537         } else if (slot->curl_result != CURLE_OK) {
538                 if (slot->http_code != 404 &&
539                     slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
540                         got_alternates = -1;
541                         return;
542                 }
543         }
544
545         fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
546         alt_req->buffer->posn--;
547         data = alt_req->buffer->buffer;
548
549         while (i < alt_req->buffer->posn) {
550                 int posn = i;
551                 while (posn < alt_req->buffer->posn && data[posn] != '\n')
552                         posn++;
553                 if (data[posn] == '\n') {
554                         int okay = 0;
555                         int serverlen = 0;
556                         struct alt_base *newalt;
557                         char *target = NULL;
558                         char *path;
559                         if (data[i] == '/') {
560                                 serverlen = strchr(base + 8, '/') - base;
561                                 okay = 1;
562                         } else if (!memcmp(data + i, "../", 3)) {
563                                 i += 3;
564                                 serverlen = strlen(base);
565                                 while (i + 2 < posn &&
566                                        !memcmp(data + i, "../", 3)) {
567                                         do {
568                                                 serverlen--;
569                                         } while (serverlen &&
570                                                  base[serverlen - 1] != '/');
571                                         i += 3;
572                                 }
573                                 // If the server got removed, give up.
574                                 okay = strchr(base, ':') - base + 3 <
575                                         serverlen;
576                         } else if (alt_req->http_specific) {
577                                 char *colon = strchr(data + i, ':');
578                                 char *slash = strchr(data + i, '/');
579                                 if (colon && slash && colon < data + posn &&
580                                     slash < data + posn && colon < slash) {
581                                         okay = 1;
582                                 }
583                         }
584                         // skip 'objects' at end
585                         if (okay) {
586                                 target = xmalloc(serverlen + posn - i - 6);
587                                 safe_strncpy(target, base, serverlen);
588                                 safe_strncpy(target + serverlen, data + i, posn - i - 6);
589                                 if (get_verbosely)
590                                         fprintf(stderr,
591                                                 "Also look at %s\n", target);
592                                 newalt = xmalloc(sizeof(*newalt));
593                                 newalt->next = NULL;
594                                 newalt->base = target;
595                                 newalt->got_indices = 0;
596                                 newalt->packs = NULL;
597                                 path = strstr(target, "//");
598                                 if (path) {
599                                         path = strchr(path+2, '/');
600                                         if (path)
601                                                 newalt->path_len = strlen(path);
602                                 }
603
604                                 while (tail->next != NULL)
605                                         tail = tail->next;
606                                 tail->next = newalt;
607                         }
608                 }
609                 i = posn + 1;
610         }
611
612         got_alternates = 1;
613 }
614
615 static void fetch_alternates(char *base)
616 {
617         struct buffer buffer;
618         char *url;
619         char *data;
620         struct active_request_slot *slot;
621         struct alternates_request alt_req;
622
623         /* If another request has already started fetching alternates,
624            wait for them to arrive and return to processing this request's
625            curl message */
626 #ifdef USE_CURL_MULTI
627         while (got_alternates == 0) {
628                 step_active_slots();
629         }
630 #endif
631
632         /* Nothing to do if they've already been fetched */
633         if (got_alternates == 1)
634                 return;
635
636         /* Start the fetch */
637         got_alternates = 0;
638
639         data = xmalloc(4096);
640         buffer.size = 4096;
641         buffer.posn = 0;
642         buffer.buffer = data;
643
644         if (get_verbosely)
645                 fprintf(stderr, "Getting alternates list for %s\n", base);
646
647         url = xmalloc(strlen(base) + 31);
648         sprintf(url, "%s/objects/info/http-alternates", base);
649
650         /* Use a callback to process the result, since another request
651            may fail and need to have alternates loaded before continuing */
652         slot = get_active_slot();
653         slot->callback_func = process_alternates_response;
654         slot->callback_data = &alt_req;
655
656         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
657         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
658         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
659
660         alt_req.base = base;
661         alt_req.url = url;
662         alt_req.buffer = &buffer;
663         alt_req.http_specific = 1;
664         alt_req.slot = slot;
665
666         if (start_active_slot(slot))
667                 run_active_slot(slot);
668         else
669                 got_alternates = -1;
670
671         free(data);
672         free(url);
673 }
674
675 #ifndef NO_EXPAT
676 static void
677 xml_start_tag(void *userData, const char *name, const char **atts)
678 {
679         struct xml_ctx *ctx = (struct xml_ctx *)userData;
680         const char *c = strchr(name, ':');
681         int new_len;
682
683         if (c == NULL)
684                 c = name;
685         else
686                 c++;
687
688         new_len = strlen(ctx->name) + strlen(c) + 2;
689
690         if (new_len > ctx->len) {
691                 ctx->name = xrealloc(ctx->name, new_len);
692                 ctx->len = new_len;
693         }
694         strcat(ctx->name, ".");
695         strcat(ctx->name, c);
696
697         if (ctx->cdata) {
698                 free(ctx->cdata);
699                 ctx->cdata = NULL;
700         }
701
702         ctx->userFunc(ctx, 0);
703 }
704
705 static void
706 xml_end_tag(void *userData, const char *name)
707 {
708         struct xml_ctx *ctx = (struct xml_ctx *)userData;
709         const char *c = strchr(name, ':');
710         char *ep;
711
712         ctx->userFunc(ctx, 1);
713
714         if (c == NULL)
715                 c = name;
716         else
717                 c++;
718
719         ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
720         *ep = 0;
721 }
722
723 static void
724 xml_cdata(void *userData, const XML_Char *s, int len)
725 {
726         struct xml_ctx *ctx = (struct xml_ctx *)userData;
727         if (ctx->cdata)
728                 free(ctx->cdata);
729         ctx->cdata = xmalloc(len + 1);
730         safe_strncpy(ctx->cdata, s, len + 1);
731 }
732
733 static int remote_ls(struct alt_base *repo, const char *path, int flags,
734                      void (*userFunc)(struct remote_ls_ctx *ls),
735                      void *userData);
736
737 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
738 {
739         struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
740
741         if (tag_closed) {
742                 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
743                         if (ls->dentry_flags & IS_DIR) {
744                                 if (ls->flags & PROCESS_DIRS) {
745                                         ls->userFunc(ls);
746                                 }
747                                 if (strcmp(ls->dentry_name, ls->path) &&
748                                     ls->flags & RECURSIVE) {
749                                         ls->rc = remote_ls(ls->repo,
750                                                            ls->dentry_name,
751                                                            ls->flags,
752                                                            ls->userFunc,
753                                                            ls->userData);
754                                 }
755                         } else if (ls->flags & PROCESS_FILES) {
756                                 ls->userFunc(ls);
757                         }
758                 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
759                         ls->dentry_name = xmalloc(strlen(ctx->cdata) -
760                                                   ls->repo->path_len + 1);
761                         strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
762                 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
763                         ls->dentry_flags |= IS_DIR;
764                 }
765         } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
766                 if (ls->dentry_name) {
767                         free(ls->dentry_name);
768                 }
769                 ls->dentry_name = NULL;
770                 ls->dentry_flags = 0;
771         }
772 }
773
774 static int remote_ls(struct alt_base *repo, const char *path, int flags,
775                      void (*userFunc)(struct remote_ls_ctx *ls),
776                      void *userData)
777 {
778         char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
779         struct active_request_slot *slot;
780         struct slot_results results;
781         struct buffer in_buffer;
782         struct buffer out_buffer;
783         char *in_data;
784         char *out_data;
785         XML_Parser parser = XML_ParserCreate(NULL);
786         enum XML_Status result;
787         struct curl_slist *dav_headers = NULL;
788         struct xml_ctx ctx;
789         struct remote_ls_ctx ls;
790
791         ls.flags = flags;
792         ls.repo = repo;
793         ls.path = strdup(path);
794         ls.dentry_name = NULL;
795         ls.dentry_flags = 0;
796         ls.userData = userData;
797         ls.userFunc = userFunc;
798         ls.rc = 0;
799
800         sprintf(url, "%s%s", repo->base, path);
801
802         out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
803         out_data = xmalloc(out_buffer.size + 1);
804         snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
805         out_buffer.posn = 0;
806         out_buffer.buffer = out_data;
807
808         in_buffer.size = 4096;
809         in_data = xmalloc(in_buffer.size);
810         in_buffer.posn = 0;
811         in_buffer.buffer = in_data;
812
813         dav_headers = curl_slist_append(dav_headers, "Depth: 1");
814         dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
815
816         slot = get_active_slot();
817         slot->results = &results;
818         curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
819         curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
820         curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
821         curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
822         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
823         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
824         curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
825         curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
826         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
827
828         if (start_active_slot(slot)) {
829                 run_active_slot(slot);
830                 if (results.curl_result == CURLE_OK) {
831                         ctx.name = xcalloc(10, 1);
832                         ctx.len = 0;
833                         ctx.cdata = NULL;
834                         ctx.userFunc = handle_remote_ls_ctx;
835                         ctx.userData = &ls;
836                         XML_SetUserData(parser, &ctx);
837                         XML_SetElementHandler(parser, xml_start_tag,
838                                               xml_end_tag);
839                         XML_SetCharacterDataHandler(parser, xml_cdata);
840                         result = XML_Parse(parser, in_buffer.buffer,
841                                            in_buffer.posn, 1);
842                         free(ctx.name);
843
844                         if (result != XML_STATUS_OK) {
845                                 ls.rc = error("XML error: %s",
846                                               XML_ErrorString(
847                                                       XML_GetErrorCode(parser)));
848                         }
849                 } else {
850                         ls.rc = -1;
851                 }
852         } else {
853                 ls.rc = error("Unable to start PROPFIND request");
854         }
855
856         free(ls.path);
857         free(url);
858         free(out_data);
859         free(in_buffer.buffer);
860         curl_slist_free_all(dav_headers);
861
862         return ls.rc;
863 }
864
865 static void process_ls_pack(struct remote_ls_ctx *ls)
866 {
867         unsigned char sha1[20];
868
869         if (strlen(ls->dentry_name) == 63 &&
870             !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
871             !strncmp(ls->dentry_name+58, ".pack", 5)) {
872                 get_sha1_hex(ls->dentry_name + 18, sha1);
873                 setup_index(ls->repo, sha1);
874         }
875 }
876 #endif
877
878 static int fetch_indices(struct alt_base *repo)
879 {
880         unsigned char sha1[20];
881         char *url;
882         struct buffer buffer;
883         char *data;
884         int i = 0;
885
886         struct active_request_slot *slot;
887         struct slot_results results;
888
889         if (repo->got_indices)
890                 return 0;
891
892         data = xmalloc(4096);
893         buffer.size = 4096;
894         buffer.posn = 0;
895         buffer.buffer = data;
896
897         if (get_verbosely)
898                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
899
900 #ifndef NO_EXPAT
901         if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
902                       process_ls_pack, NULL) == 0)
903                 return 0;
904 #endif
905
906         url = xmalloc(strlen(repo->base) + 21);
907         sprintf(url, "%s/objects/info/packs", repo->base);
908
909         slot = get_active_slot();
910         slot->results = &results;
911         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
912         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
913         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
914         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
915         if (start_active_slot(slot)) {
916                 run_active_slot(slot);
917                 if (results.curl_result != CURLE_OK) {
918                         if (results.http_code == 404 ||
919                             results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
920                                 repo->got_indices = 1;
921                                 free(buffer.buffer);
922                                 return 0;
923                         } else {
924                                 repo->got_indices = 0;
925                                 free(buffer.buffer);
926                                 return error("%s", curl_errorstr);
927                         }
928                 }
929         } else {
930                 repo->got_indices = 0;
931                 free(buffer.buffer);
932                 return error("Unable to start request");
933         }
934
935         data = buffer.buffer;
936         while (i < buffer.posn) {
937                 switch (data[i]) {
938                 case 'P':
939                         i++;
940                         if (i + 52 <= buffer.posn &&
941                             !strncmp(data + i, " pack-", 6) &&
942                             !strncmp(data + i + 46, ".pack\n", 6)) {
943                                 get_sha1_hex(data + i + 6, sha1);
944                                 setup_index(repo, sha1);
945                                 i += 51;
946                                 break;
947                         }
948                 default:
949                         while (i < buffer.posn && data[i] != '\n')
950                                 i++;
951                 }
952                 i++;
953         }
954
955         free(buffer.buffer);
956         repo->got_indices = 1;
957         return 0;
958 }
959
960 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
961 {
962         char *url;
963         struct packed_git *target;
964         struct packed_git **lst;
965         FILE *packfile;
966         char *filename;
967         char tmpfile[PATH_MAX];
968         int ret;
969         long prev_posn = 0;
970         char range[RANGE_HEADER_SIZE];
971         struct curl_slist *range_header = NULL;
972
973         struct active_request_slot *slot;
974         struct slot_results results;
975
976         if (fetch_indices(repo))
977                 return -1;
978         target = find_sha1_pack(sha1, repo->packs);
979         if (!target)
980                 return -1;
981
982         if (get_verbosely) {
983                 fprintf(stderr, "Getting pack %s\n",
984                         sha1_to_hex(target->sha1));
985                 fprintf(stderr, " which contains %s\n",
986                         sha1_to_hex(sha1));
987         }
988
989         url = xmalloc(strlen(repo->base) + 65);
990         sprintf(url, "%s/objects/pack/pack-%s.pack",
991                 repo->base, sha1_to_hex(target->sha1));
992
993         filename = sha1_pack_name(target->sha1);
994         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
995         packfile = fopen(tmpfile, "a");
996         if (!packfile)
997                 return error("Unable to open local file %s for pack",
998                              filename);
999
1000         slot = get_active_slot();
1001         slot->results = &results;
1002         curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1003         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1004         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1005         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1006         slot->local = packfile;
1007
1008         /* If there is data present from a previous transfer attempt,
1009            resume where it left off */
1010         prev_posn = ftell(packfile);
1011         if (prev_posn>0) {
1012                 if (get_verbosely)
1013                         fprintf(stderr,
1014                                 "Resuming fetch of pack %s at byte %ld\n",
1015                                 sha1_to_hex(target->sha1), prev_posn);
1016                 sprintf(range, "Range: bytes=%ld-", prev_posn);
1017                 range_header = curl_slist_append(range_header, range);
1018                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1019         }
1020
1021         if (start_active_slot(slot)) {
1022                 run_active_slot(slot);
1023                 if (results.curl_result != CURLE_OK) {
1024                         fclose(packfile);
1025                         return error("Unable to get pack file %s\n%s", url,
1026                                      curl_errorstr);
1027                 }
1028         } else {
1029                 fclose(packfile);
1030                 return error("Unable to start request");
1031         }
1032
1033         fclose(packfile);
1034
1035         ret = move_temp_to_file(tmpfile, filename);
1036         if (ret)
1037                 return ret;
1038
1039         lst = &repo->packs;
1040         while (*lst != target)
1041                 lst = &((*lst)->next);
1042         *lst = (*lst)->next;
1043
1044         if (verify_pack(target, 0))
1045                 return -1;
1046         install_packed_git(target);
1047
1048         return 0;
1049 }
1050
1051 static void abort_object_request(struct object_request *obj_req)
1052 {
1053         if (obj_req->local >= 0) {
1054                 close(obj_req->local);
1055                 obj_req->local = -1;
1056         }
1057         unlink(obj_req->tmpfile);
1058         if (obj_req->slot) {
1059                 release_active_slot(obj_req->slot);
1060                 obj_req->slot = NULL;
1061         }
1062         release_object_request(obj_req);
1063 }
1064
1065 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1066 {
1067         char *hex = sha1_to_hex(sha1);
1068         int ret = 0;
1069         struct object_request *obj_req = object_queue_head;
1070
1071         while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
1072                 obj_req = obj_req->next;
1073         if (obj_req == NULL)
1074                 return error("Couldn't find request for %s in the queue", hex);
1075
1076         if (has_sha1_file(obj_req->sha1)) {
1077                 abort_object_request(obj_req);
1078                 return 0;
1079         }
1080
1081 #ifdef USE_CURL_MULTI
1082         while (obj_req->state == WAITING) {
1083                 step_active_slots();
1084         }
1085 #else
1086         start_object_request(obj_req);
1087 #endif
1088
1089         while (obj_req->state == ACTIVE) {
1090                 run_active_slot(obj_req->slot);
1091         }
1092         if (obj_req->local != -1) {
1093                 close(obj_req->local); obj_req->local = -1;
1094         }
1095
1096         if (obj_req->state == ABORTED) {
1097                 ret = error("Request for %s aborted", hex);
1098         } else if (obj_req->curl_result != CURLE_OK &&
1099                    obj_req->http_code != 416) {
1100                 if (obj_req->http_code == 404 ||
1101                     obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
1102                         ret = -1; /* Be silent, it is probably in a pack. */
1103                 else
1104                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1105                                     obj_req->errorstr, obj_req->curl_result,
1106                                     obj_req->http_code, hex);
1107         } else if (obj_req->zret != Z_STREAM_END) {
1108                 corrupt_object_found++;
1109                 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1110         } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
1111                 ret = error("File %s has bad hash", hex);
1112         } else if (obj_req->rename < 0) {
1113                 ret = error("unable to write sha1 filename %s",
1114                             obj_req->filename);
1115         }
1116
1117         release_object_request(obj_req);
1118         return ret;
1119 }
1120
1121 int fetch(unsigned char *sha1)
1122 {
1123         struct alt_base *altbase = alt;
1124
1125         if (!fetch_object(altbase, sha1))
1126                 return 0;
1127         while (altbase) {
1128                 if (!fetch_pack(altbase, sha1))
1129                         return 0;
1130                 fetch_alternates(alt->base);
1131                 altbase = altbase->next;
1132         }
1133         return error("Unable to find %s under %s", sha1_to_hex(sha1),
1134                      alt->base);
1135 }
1136
1137 static inline int needs_quote(int ch)
1138 {
1139         if (((ch >= 'A') && (ch <= 'Z'))
1140                         || ((ch >= 'a') && (ch <= 'z'))
1141                         || ((ch >= '0') && (ch <= '9'))
1142                         || (ch == '/')
1143                         || (ch == '-')
1144                         || (ch == '.'))
1145                 return 0;
1146         return 1;
1147 }
1148
1149 static inline int hex(int v)
1150 {
1151         if (v < 10) return '0' + v;
1152         else return 'A' + v - 10;
1153 }
1154
1155 static char *quote_ref_url(const char *base, const char *ref)
1156 {
1157         const char *cp;
1158         char *dp, *qref;
1159         int len, baselen, ch;
1160
1161         baselen = strlen(base);
1162         len = baselen + 6; /* "refs/" + NUL */
1163         for (cp = ref; (ch = *cp) != 0; cp++, len++)
1164                 if (needs_quote(ch))
1165                         len += 2; /* extra two hex plus replacement % */
1166         qref = xmalloc(len);
1167         memcpy(qref, base, baselen);
1168         memcpy(qref + baselen, "refs/", 5);
1169         for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1170                 if (needs_quote(ch)) {
1171                         *dp++ = '%';
1172                         *dp++ = hex((ch >> 4) & 0xF);
1173                         *dp++ = hex(ch & 0xF);
1174                 }
1175                 else
1176                         *dp++ = ch;
1177         }
1178         *dp = 0;
1179
1180         return qref;
1181 }
1182
1183 int fetch_ref(char *ref, unsigned char *sha1)
1184 {
1185         char *url;
1186         char hex[42];
1187         struct buffer buffer;
1188         char *base = alt->base;
1189         struct active_request_slot *slot;
1190         struct slot_results results;
1191         buffer.size = 41;
1192         buffer.posn = 0;
1193         buffer.buffer = hex;
1194         hex[41] = '\0';
1195
1196         url = quote_ref_url(base, ref);
1197         slot = get_active_slot();
1198         slot->results = &results;
1199         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1200         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1201         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1202         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1203         if (start_active_slot(slot)) {
1204                 run_active_slot(slot);
1205                 if (results.curl_result != CURLE_OK)
1206                         return error("Couldn't get %s for %s\n%s",
1207                                      url, ref, curl_errorstr);
1208         } else {
1209                 return error("Unable to start request");
1210         }
1211
1212         hex[40] = '\0';
1213         get_sha1_hex(hex, sha1);
1214         return 0;
1215 }
1216
1217 int main(int argc, char **argv)
1218 {
1219         char *commit_id;
1220         char *url;
1221         char *path;
1222         int arg = 1;
1223         int rc = 0;
1224
1225         setup_git_directory();
1226         git_config(git_default_config);
1227
1228         while (arg < argc && argv[arg][0] == '-') {
1229                 if (argv[arg][1] == 't') {
1230                         get_tree = 1;
1231                 } else if (argv[arg][1] == 'c') {
1232                         get_history = 1;
1233                 } else if (argv[arg][1] == 'a') {
1234                         get_all = 1;
1235                         get_tree = 1;
1236                         get_history = 1;
1237                 } else if (argv[arg][1] == 'v') {
1238                         get_verbosely = 1;
1239                 } else if (argv[arg][1] == 'w') {
1240                         write_ref = argv[arg + 1];
1241                         arg++;
1242                 } else if (!strcmp(argv[arg], "--recover")) {
1243                         get_recover = 1;
1244                 }
1245                 arg++;
1246         }
1247         if (argc < arg + 2) {
1248                 usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
1249                 return 1;
1250         }
1251         commit_id = argv[arg];
1252         url = argv[arg + 1];
1253         write_ref_log_details = url;
1254
1255         http_init();
1256
1257         no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1258
1259         alt = xmalloc(sizeof(*alt));
1260         alt->base = url;
1261         alt->got_indices = 0;
1262         alt->packs = NULL;
1263         alt->next = NULL;
1264         path = strstr(url, "//");
1265         if (path) {
1266                 path = strchr(path+2, '/');
1267                 if (path)
1268                         alt->path_len = strlen(path);
1269         }
1270
1271         if (pull(commit_id))
1272                 rc = 1;
1273
1274         http_cleanup();
1275
1276         curl_slist_free_all(no_pragma_header);
1277
1278         if (corrupt_object_found) {
1279                 fprintf(stderr,
1280 "Some loose object were found to be corrupt, but they might be just\n"
1281 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1282 "status code.  Suggest running git fsck-objects.\n");
1283         }
1284         return rc;
1285 }