http-fetch.c: consolidate code to detect missing fetch target
[git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20   XML_STATUS_OK = 1,
21   XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK    1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS  (1u << 1)
30 #define RECURSIVE     (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int commits_on_stdin;
40
41 static int got_alternates = -1;
42 static int corrupt_object_found;
43
44 static struct curl_slist *no_pragma_header;
45
46 struct alt_base
47 {
48         const char *base;
49         int path_len;
50         int got_indices;
51         struct packed_git *packs;
52         struct alt_base *next;
53 };
54
55 static struct alt_base *alt;
56
57 enum object_request_state {
58         WAITING,
59         ABORTED,
60         ACTIVE,
61         COMPLETE,
62 };
63
64 struct object_request
65 {
66         unsigned char sha1[20];
67         struct alt_base *repo;
68         char *url;
69         char filename[PATH_MAX];
70         char tmpfile[PATH_MAX];
71         int local;
72         enum object_request_state state;
73         CURLcode curl_result;
74         char errorstr[CURL_ERROR_SIZE];
75         long http_code;
76         unsigned char real_sha1[20];
77         SHA_CTX c;
78         z_stream stream;
79         int zret;
80         int rename;
81         struct active_request_slot *slot;
82         struct object_request *next;
83 };
84
85 struct alternates_request {
86         const char *base;
87         char *url;
88         struct buffer *buffer;
89         struct active_request_slot *slot;
90         int http_specific;
91 };
92
93 #ifndef NO_EXPAT
94 struct xml_ctx
95 {
96         char *name;
97         int len;
98         char *cdata;
99         void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100         void *userData;
101 };
102
103 struct remote_ls_ctx
104 {
105         struct alt_base *repo;
106         char *path;
107         void (*userFunc)(struct remote_ls_ctx *ls);
108         void *userData;
109         int flags;
110         char *dentry_name;
111         int dentry_flags;
112         int rc;
113         struct remote_ls_ctx *parent;
114 };
115 #endif
116
117 static struct object_request *object_queue_head;
118
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120                                void *data)
121 {
122         unsigned char expn[4096];
123         size_t size = eltsize * nmemb;
124         int posn = 0;
125         struct object_request *obj_req = (struct object_request *)data;
126         do {
127                 ssize_t retval = write(obj_req->local,
128                                        (char *) ptr + posn, size - posn);
129                 if (retval < 0)
130                         return posn;
131                 posn += retval;
132         } while (posn < size);
133
134         obj_req->stream.avail_in = size;
135         obj_req->stream.next_in = ptr;
136         do {
137                 obj_req->stream.next_out = expn;
138                 obj_req->stream.avail_out = sizeof(expn);
139                 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140                 SHA1_Update(&obj_req->c, expn,
141                             sizeof(expn) - obj_req->stream.avail_out);
142         } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143         data_received++;
144         return size;
145 }
146
147 static int missing__target(int code, int result)
148 {
149         return  /* file:// URL -- do we ever use one??? */
150                 (result == CURLE_FILE_COULDNT_READ_FILE) ||
151                 /* http:// and https:// URL */
152                 (code == 404 && result == CURLE_HTTP_RETURNED_ERROR)
153                 ;
154 }
155
156 #define missing_target(a) missing__target((a)->http_code, (a)->curl_result)
157
158 static void fetch_alternates(const char *base);
159
160 static void process_object_response(void *callback_data);
161
162 static void start_object_request(struct object_request *obj_req)
163 {
164         char *hex = sha1_to_hex(obj_req->sha1);
165         char prevfile[PATH_MAX];
166         char *url;
167         char *posn;
168         int prevlocal;
169         unsigned char prev_buf[PREV_BUF_SIZE];
170         ssize_t prev_read = 0;
171         long prev_posn = 0;
172         char range[RANGE_HEADER_SIZE];
173         struct curl_slist *range_header = NULL;
174         struct active_request_slot *slot;
175
176         snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
177         unlink(prevfile);
178         rename(obj_req->tmpfile, prevfile);
179         unlink(obj_req->tmpfile);
180
181         if (obj_req->local != -1)
182                 error("fd leakage in start: %d", obj_req->local);
183         obj_req->local = open(obj_req->tmpfile,
184                               O_WRONLY | O_CREAT | O_EXCL, 0666);
185         /* This could have failed due to the "lazy directory creation";
186          * try to mkdir the last path component.
187          */
188         if (obj_req->local < 0 && errno == ENOENT) {
189                 char *dir = strrchr(obj_req->tmpfile, '/');
190                 if (dir) {
191                         *dir = 0;
192                         mkdir(obj_req->tmpfile, 0777);
193                         *dir = '/';
194                 }
195                 obj_req->local = open(obj_req->tmpfile,
196                                       O_WRONLY | O_CREAT | O_EXCL, 0666);
197         }
198
199         if (obj_req->local < 0) {
200                 obj_req->state = ABORTED;
201                 error("Couldn't create temporary file %s for %s: %s",
202                       obj_req->tmpfile, obj_req->filename, strerror(errno));
203                 return;
204         }
205
206         memset(&obj_req->stream, 0, sizeof(obj_req->stream));
207
208         inflateInit(&obj_req->stream);
209
210         SHA1_Init(&obj_req->c);
211
212         url = xmalloc(strlen(obj_req->repo->base) + 50);
213         obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
214         strcpy(url, obj_req->repo->base);
215         posn = url + strlen(obj_req->repo->base);
216         strcpy(posn, "objects/");
217         posn += 8;
218         memcpy(posn, hex, 2);
219         posn += 2;
220         *(posn++) = '/';
221         strcpy(posn, hex + 2);
222         strcpy(obj_req->url, url);
223
224         /* If a previous temp file is present, process what was already
225            fetched. */
226         prevlocal = open(prevfile, O_RDONLY);
227         if (prevlocal != -1) {
228                 do {
229                         prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
230                         if (prev_read>0) {
231                                 if (fwrite_sha1_file(prev_buf,
232                                                      1,
233                                                      prev_read,
234                                                      obj_req) == prev_read) {
235                                         prev_posn += prev_read;
236                                 } else {
237                                         prev_read = -1;
238                                 }
239                         }
240                 } while (prev_read > 0);
241                 close(prevlocal);
242         }
243         unlink(prevfile);
244
245         /* Reset inflate/SHA1 if there was an error reading the previous temp
246            file; also rewind to the beginning of the local file. */
247         if (prev_read == -1) {
248                 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
249                 inflateInit(&obj_req->stream);
250                 SHA1_Init(&obj_req->c);
251                 if (prev_posn>0) {
252                         prev_posn = 0;
253                         lseek(obj_req->local, SEEK_SET, 0);
254                         ftruncate(obj_req->local, 0);
255                 }
256         }
257
258         slot = get_active_slot();
259         slot->callback_func = process_object_response;
260         slot->callback_data = obj_req;
261         obj_req->slot = slot;
262
263         curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
264         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
265         curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
266         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
267         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
268
269         /* If we have successfully processed data from a previous fetch
270            attempt, only fetch the data we don't already have. */
271         if (prev_posn>0) {
272                 if (get_verbosely)
273                         fprintf(stderr,
274                                 "Resuming fetch of object %s at byte %ld\n",
275                                 hex, prev_posn);
276                 sprintf(range, "Range: bytes=%ld-", prev_posn);
277                 range_header = curl_slist_append(range_header, range);
278                 curl_easy_setopt(slot->curl,
279                                  CURLOPT_HTTPHEADER, range_header);
280         }
281
282         /* Try to get the request started, abort the request on error */
283         obj_req->state = ACTIVE;
284         if (!start_active_slot(slot)) {
285                 obj_req->state = ABORTED;
286                 obj_req->slot = NULL;
287                 close(obj_req->local); obj_req->local = -1;
288                 free(obj_req->url);
289                 return;
290         }
291 }
292
293 static void finish_object_request(struct object_request *obj_req)
294 {
295         struct stat st;
296
297         fchmod(obj_req->local, 0444);
298         close(obj_req->local); obj_req->local = -1;
299
300         if (obj_req->http_code == 416) {
301                 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
302         } else if (obj_req->curl_result != CURLE_OK) {
303                 if (stat(obj_req->tmpfile, &st) == 0)
304                         if (st.st_size == 0)
305                                 unlink(obj_req->tmpfile);
306                 return;
307         }
308
309         inflateEnd(&obj_req->stream);
310         SHA1_Final(obj_req->real_sha1, &obj_req->c);
311         if (obj_req->zret != Z_STREAM_END) {
312                 unlink(obj_req->tmpfile);
313                 return;
314         }
315         if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
316                 unlink(obj_req->tmpfile);
317                 return;
318         }
319         obj_req->rename =
320                 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
321
322         if (obj_req->rename == 0)
323                 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
324 }
325
326 static void process_object_response(void *callback_data)
327 {
328         struct object_request *obj_req =
329                 (struct object_request *)callback_data;
330
331         obj_req->curl_result = obj_req->slot->curl_result;
332         obj_req->http_code = obj_req->slot->http_code;
333         obj_req->slot = NULL;
334         obj_req->state = COMPLETE;
335
336         /* Use alternates if necessary */
337         if (missing_target(obj_req)) {
338                 fetch_alternates(alt->base);
339                 if (obj_req->repo->next != NULL) {
340                         obj_req->repo =
341                                 obj_req->repo->next;
342                         close(obj_req->local);
343                         obj_req->local = -1;
344                         start_object_request(obj_req);
345                         return;
346                 }
347         }
348
349         finish_object_request(obj_req);
350 }
351
352 static void release_object_request(struct object_request *obj_req)
353 {
354         struct object_request *entry = object_queue_head;
355
356         if (obj_req->local != -1)
357                 error("fd leakage in release: %d", obj_req->local);
358         if (obj_req == object_queue_head) {
359                 object_queue_head = obj_req->next;
360         } else {
361                 while (entry->next != NULL && entry->next != obj_req)
362                         entry = entry->next;
363                 if (entry->next == obj_req)
364                         entry->next = entry->next->next;
365         }
366
367         free(obj_req->url);
368         free(obj_req);
369 }
370
371 #ifdef USE_CURL_MULTI
372 void fill_active_slots(void)
373 {
374         struct object_request *obj_req = object_queue_head;
375         struct active_request_slot *slot = active_queue_head;
376         int num_transfers;
377
378         while (active_requests < max_requests && obj_req != NULL) {
379                 if (obj_req->state == WAITING) {
380                         if (has_sha1_file(obj_req->sha1))
381                                 obj_req->state = COMPLETE;
382                         else
383                                 start_object_request(obj_req);
384                         curl_multi_perform(curlm, &num_transfers);
385                 }
386                 obj_req = obj_req->next;
387         }
388
389         while (slot != NULL) {
390                 if (!slot->in_use && slot->curl != NULL) {
391                         curl_easy_cleanup(slot->curl);
392                         slot->curl = NULL;
393                 }
394                 slot = slot->next;
395         }
396 }
397 #endif
398
399 void prefetch(unsigned char *sha1)
400 {
401         struct object_request *newreq;
402         struct object_request *tail;
403         char *filename = sha1_file_name(sha1);
404
405         newreq = xmalloc(sizeof(*newreq));
406         hashcpy(newreq->sha1, sha1);
407         newreq->repo = alt;
408         newreq->url = NULL;
409         newreq->local = -1;
410         newreq->state = WAITING;
411         snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
412         snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
413                  "%s.temp", filename);
414         newreq->slot = NULL;
415         newreq->next = NULL;
416
417         if (object_queue_head == NULL) {
418                 object_queue_head = newreq;
419         } else {
420                 tail = object_queue_head;
421                 while (tail->next != NULL) {
422                         tail = tail->next;
423                 }
424                 tail->next = newreq;
425         }
426
427 #ifdef USE_CURL_MULTI
428         fill_active_slots();
429         step_active_slots();
430 #endif
431 }
432
433 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
434 {
435         char *hex = sha1_to_hex(sha1);
436         char *filename;
437         char *url;
438         char tmpfile[PATH_MAX];
439         long prev_posn = 0;
440         char range[RANGE_HEADER_SIZE];
441         struct curl_slist *range_header = NULL;
442
443         FILE *indexfile;
444         struct active_request_slot *slot;
445         struct slot_results results;
446
447         if (has_pack_index(sha1))
448                 return 0;
449
450         if (get_verbosely)
451                 fprintf(stderr, "Getting index for pack %s\n", hex);
452
453         url = xmalloc(strlen(repo->base) + 64);
454         sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
455
456         filename = sha1_pack_index_name(sha1);
457         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
458         indexfile = fopen(tmpfile, "a");
459         if (!indexfile)
460                 return error("Unable to open local file %s for pack index",
461                              filename);
462
463         slot = get_active_slot();
464         slot->results = &results;
465         curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
466         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
467         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
468         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
469         slot->local = indexfile;
470
471         /* If there is data present from a previous transfer attempt,
472            resume where it left off */
473         prev_posn = ftell(indexfile);
474         if (prev_posn>0) {
475                 if (get_verbosely)
476                         fprintf(stderr,
477                                 "Resuming fetch of index for pack %s at byte %ld\n",
478                                 hex, prev_posn);
479                 sprintf(range, "Range: bytes=%ld-", prev_posn);
480                 range_header = curl_slist_append(range_header, range);
481                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
482         }
483
484         if (start_active_slot(slot)) {
485                 run_active_slot(slot);
486                 if (results.curl_result != CURLE_OK) {
487                         fclose(indexfile);
488                         return error("Unable to get pack index %s\n%s", url,
489                                      curl_errorstr);
490                 }
491         } else {
492                 fclose(indexfile);
493                 return error("Unable to start request");
494         }
495
496         fclose(indexfile);
497
498         return move_temp_to_file(tmpfile, filename);
499 }
500
501 static int setup_index(struct alt_base *repo, unsigned char *sha1)
502 {
503         struct packed_git *new_pack;
504         if (has_pack_file(sha1))
505                 return 0; /* don't list this as something we can get */
506
507         if (fetch_index(repo, sha1))
508                 return -1;
509
510         new_pack = parse_pack_index(sha1);
511         new_pack->next = repo->packs;
512         repo->packs = new_pack;
513         return 0;
514 }
515
516 static void process_alternates_response(void *callback_data)
517 {
518         struct alternates_request *alt_req =
519                 (struct alternates_request *)callback_data;
520         struct active_request_slot *slot = alt_req->slot;
521         struct alt_base *tail = alt;
522         const char *base = alt_req->base;
523         static const char null_byte = '\0';
524         char *data;
525         int i = 0;
526
527         if (alt_req->http_specific) {
528                 if (slot->curl_result != CURLE_OK ||
529                     !alt_req->buffer->posn) {
530
531                         /* Try reusing the slot to get non-http alternates */
532                         alt_req->http_specific = 0;
533                         sprintf(alt_req->url, "%s/objects/info/alternates",
534                                 base);
535                         curl_easy_setopt(slot->curl, CURLOPT_URL,
536                                          alt_req->url);
537                         active_requests++;
538                         slot->in_use = 1;
539                         if (slot->finished != NULL)
540                                 (*slot->finished) = 0;
541                         if (!start_active_slot(slot)) {
542                                 got_alternates = -1;
543                                 slot->in_use = 0;
544                                 if (slot->finished != NULL)
545                                         (*slot->finished) = 1;
546                         }
547                         return;
548                 }
549         } else if (slot->curl_result != CURLE_OK) {
550                 if (!missing_target(slot)) {
551                         got_alternates = -1;
552                         return;
553                 }
554         }
555
556         fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
557         alt_req->buffer->posn--;
558         data = alt_req->buffer->buffer;
559
560         while (i < alt_req->buffer->posn) {
561                 int posn = i;
562                 while (posn < alt_req->buffer->posn && data[posn] != '\n')
563                         posn++;
564                 if (data[posn] == '\n') {
565                         int okay = 0;
566                         int serverlen = 0;
567                         struct alt_base *newalt;
568                         char *target = NULL;
569                         char *path;
570                         if (data[i] == '/') {
571                                 /* This counts
572                                  * http://git.host/pub/scm/linux.git/
573                                  * -----------here^
574                                  * so memcpy(dst, base, serverlen) will
575                                  * copy up to "...git.host".
576                                  */
577                                 const char *colon_ss = strstr(base,"://");
578                                 if (colon_ss) {
579                                         serverlen = (strchr(colon_ss + 3, '/')
580                                                      - base);
581                                         okay = 1;
582                                 }
583                         } else if (!memcmp(data + i, "../", 3)) {
584                                 /* Relative URL; chop the corresponding
585                                  * number of subpath from base (and ../
586                                  * from data), and concatenate the result.
587                                  *
588                                  * The code first drops ../ from data, and
589                                  * then drops one ../ from data and one path
590                                  * from base.  IOW, one extra ../ is dropped
591                                  * from data than path is dropped from base.
592                                  *
593                                  * This is not wrong.  The alternate in
594                                  *     http://git.host/pub/scm/linux.git/
595                                  * to borrow from
596                                  *     http://git.host/pub/scm/linus.git/
597                                  * is ../../linus.git/objects/.  You need
598                                  * two ../../ to borrow from your direct
599                                  * neighbour.
600                                  */
601                                 i += 3;
602                                 serverlen = strlen(base);
603                                 while (i + 2 < posn &&
604                                        !memcmp(data + i, "../", 3)) {
605                                         do {
606                                                 serverlen--;
607                                         } while (serverlen &&
608                                                  base[serverlen - 1] != '/');
609                                         i += 3;
610                                 }
611                                 /* If the server got removed, give up. */
612                                 okay = strchr(base, ':') - base + 3 <
613                                         serverlen;
614                         } else if (alt_req->http_specific) {
615                                 char *colon = strchr(data + i, ':');
616                                 char *slash = strchr(data + i, '/');
617                                 if (colon && slash && colon < data + posn &&
618                                     slash < data + posn && colon < slash) {
619                                         okay = 1;
620                                 }
621                         }
622                         /* skip "objects\n" at end */
623                         if (okay) {
624                                 target = xmalloc(serverlen + posn - i - 6);
625                                 memcpy(target, base, serverlen);
626                                 memcpy(target + serverlen, data + i,
627                                        posn - i - 7);
628                                 target[serverlen + posn - i - 7] = 0;
629                                 if (get_verbosely)
630                                         fprintf(stderr,
631                                                 "Also look at %s\n", target);
632                                 newalt = xmalloc(sizeof(*newalt));
633                                 newalt->next = NULL;
634                                 newalt->base = target;
635                                 newalt->got_indices = 0;
636                                 newalt->packs = NULL;
637                                 path = strstr(target, "//");
638                                 if (path) {
639                                         path = strchr(path+2, '/');
640                                         if (path)
641                                                 newalt->path_len = strlen(path);
642                                 }
643
644                                 while (tail->next != NULL)
645                                         tail = tail->next;
646                                 tail->next = newalt;
647                         }
648                 }
649                 i = posn + 1;
650         }
651
652         got_alternates = 1;
653 }
654
655 static void fetch_alternates(const char *base)
656 {
657         struct buffer buffer;
658         char *url;
659         char *data;
660         struct active_request_slot *slot;
661         struct alternates_request alt_req;
662
663         /* If another request has already started fetching alternates,
664            wait for them to arrive and return to processing this request's
665            curl message */
666 #ifdef USE_CURL_MULTI
667         while (got_alternates == 0) {
668                 step_active_slots();
669         }
670 #endif
671
672         /* Nothing to do if they've already been fetched */
673         if (got_alternates == 1)
674                 return;
675
676         /* Start the fetch */
677         got_alternates = 0;
678
679         data = xmalloc(4096);
680         buffer.size = 4096;
681         buffer.posn = 0;
682         buffer.buffer = data;
683
684         if (get_verbosely)
685                 fprintf(stderr, "Getting alternates list for %s\n", base);
686
687         url = xmalloc(strlen(base) + 31);
688         sprintf(url, "%s/objects/info/http-alternates", base);
689
690         /* Use a callback to process the result, since another request
691            may fail and need to have alternates loaded before continuing */
692         slot = get_active_slot();
693         slot->callback_func = process_alternates_response;
694         slot->callback_data = &alt_req;
695
696         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
697         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
698         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
699
700         alt_req.base = base;
701         alt_req.url = url;
702         alt_req.buffer = &buffer;
703         alt_req.http_specific = 1;
704         alt_req.slot = slot;
705
706         if (start_active_slot(slot))
707                 run_active_slot(slot);
708         else
709                 got_alternates = -1;
710
711         free(data);
712         free(url);
713 }
714
715 #ifndef NO_EXPAT
716 static void
717 xml_start_tag(void *userData, const char *name, const char **atts)
718 {
719         struct xml_ctx *ctx = (struct xml_ctx *)userData;
720         const char *c = strchr(name, ':');
721         int new_len;
722
723         if (c == NULL)
724                 c = name;
725         else
726                 c++;
727
728         new_len = strlen(ctx->name) + strlen(c) + 2;
729
730         if (new_len > ctx->len) {
731                 ctx->name = xrealloc(ctx->name, new_len);
732                 ctx->len = new_len;
733         }
734         strcat(ctx->name, ".");
735         strcat(ctx->name, c);
736
737         free(ctx->cdata);
738         ctx->cdata = NULL;
739
740         ctx->userFunc(ctx, 0);
741 }
742
743 static void
744 xml_end_tag(void *userData, const char *name)
745 {
746         struct xml_ctx *ctx = (struct xml_ctx *)userData;
747         const char *c = strchr(name, ':');
748         char *ep;
749
750         ctx->userFunc(ctx, 1);
751
752         if (c == NULL)
753                 c = name;
754         else
755                 c++;
756
757         ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
758         *ep = 0;
759 }
760
761 static void
762 xml_cdata(void *userData, const XML_Char *s, int len)
763 {
764         struct xml_ctx *ctx = (struct xml_ctx *)userData;
765         free(ctx->cdata);
766         ctx->cdata = xmalloc(len + 1);
767         strlcpy(ctx->cdata, s, len + 1);
768 }
769
770 static int remote_ls(struct alt_base *repo, const char *path, int flags,
771                      void (*userFunc)(struct remote_ls_ctx *ls),
772                      void *userData);
773
774 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
775 {
776         struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
777
778         if (tag_closed) {
779                 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
780                         if (ls->dentry_flags & IS_DIR) {
781                                 if (ls->flags & PROCESS_DIRS) {
782                                         ls->userFunc(ls);
783                                 }
784                                 if (strcmp(ls->dentry_name, ls->path) &&
785                                     ls->flags & RECURSIVE) {
786                                         ls->rc = remote_ls(ls->repo,
787                                                            ls->dentry_name,
788                                                            ls->flags,
789                                                            ls->userFunc,
790                                                            ls->userData);
791                                 }
792                         } else if (ls->flags & PROCESS_FILES) {
793                                 ls->userFunc(ls);
794                         }
795                 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
796                         ls->dentry_name = xmalloc(strlen(ctx->cdata) -
797                                                   ls->repo->path_len + 1);
798                         strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
799                 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
800                         ls->dentry_flags |= IS_DIR;
801                 }
802         } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
803                 free(ls->dentry_name);
804                 ls->dentry_name = NULL;
805                 ls->dentry_flags = 0;
806         }
807 }
808
809 static int remote_ls(struct alt_base *repo, const char *path, int flags,
810                      void (*userFunc)(struct remote_ls_ctx *ls),
811                      void *userData)
812 {
813         char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
814         struct active_request_slot *slot;
815         struct slot_results results;
816         struct buffer in_buffer;
817         struct buffer out_buffer;
818         char *in_data;
819         char *out_data;
820         XML_Parser parser = XML_ParserCreate(NULL);
821         enum XML_Status result;
822         struct curl_slist *dav_headers = NULL;
823         struct xml_ctx ctx;
824         struct remote_ls_ctx ls;
825
826         ls.flags = flags;
827         ls.repo = repo;
828         ls.path = xstrdup(path);
829         ls.dentry_name = NULL;
830         ls.dentry_flags = 0;
831         ls.userData = userData;
832         ls.userFunc = userFunc;
833         ls.rc = 0;
834
835         sprintf(url, "%s%s", repo->base, path);
836
837         out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
838         out_data = xmalloc(out_buffer.size + 1);
839         snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
840         out_buffer.posn = 0;
841         out_buffer.buffer = out_data;
842
843         in_buffer.size = 4096;
844         in_data = xmalloc(in_buffer.size);
845         in_buffer.posn = 0;
846         in_buffer.buffer = in_data;
847
848         dav_headers = curl_slist_append(dav_headers, "Depth: 1");
849         dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
850
851         slot = get_active_slot();
852         slot->results = &results;
853         curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
854         curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
855         curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
856         curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
857         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
858         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
859         curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
860         curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
861         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
862
863         if (start_active_slot(slot)) {
864                 run_active_slot(slot);
865                 if (results.curl_result == CURLE_OK) {
866                         ctx.name = xcalloc(10, 1);
867                         ctx.len = 0;
868                         ctx.cdata = NULL;
869                         ctx.userFunc = handle_remote_ls_ctx;
870                         ctx.userData = &ls;
871                         XML_SetUserData(parser, &ctx);
872                         XML_SetElementHandler(parser, xml_start_tag,
873                                               xml_end_tag);
874                         XML_SetCharacterDataHandler(parser, xml_cdata);
875                         result = XML_Parse(parser, in_buffer.buffer,
876                                            in_buffer.posn, 1);
877                         free(ctx.name);
878
879                         if (result != XML_STATUS_OK) {
880                                 ls.rc = error("XML error: %s",
881                                               XML_ErrorString(
882                                                       XML_GetErrorCode(parser)));
883                         }
884                 } else {
885                         ls.rc = -1;
886                 }
887         } else {
888                 ls.rc = error("Unable to start PROPFIND request");
889         }
890
891         free(ls.path);
892         free(url);
893         free(out_data);
894         free(in_buffer.buffer);
895         curl_slist_free_all(dav_headers);
896
897         return ls.rc;
898 }
899
900 static void process_ls_pack(struct remote_ls_ctx *ls)
901 {
902         unsigned char sha1[20];
903
904         if (strlen(ls->dentry_name) == 63 &&
905             !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
906             has_extension(ls->dentry_name, ".pack")) {
907                 get_sha1_hex(ls->dentry_name + 18, sha1);
908                 setup_index(ls->repo, sha1);
909         }
910 }
911 #endif
912
913 static int fetch_indices(struct alt_base *repo)
914 {
915         unsigned char sha1[20];
916         char *url;
917         struct buffer buffer;
918         char *data;
919         int i = 0;
920
921         struct active_request_slot *slot;
922         struct slot_results results;
923
924         if (repo->got_indices)
925                 return 0;
926
927         data = xmalloc(4096);
928         buffer.size = 4096;
929         buffer.posn = 0;
930         buffer.buffer = data;
931
932         if (get_verbosely)
933                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
934
935 #ifndef NO_EXPAT
936         if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
937                       process_ls_pack, NULL) == 0)
938                 return 0;
939 #endif
940
941         url = xmalloc(strlen(repo->base) + 21);
942         sprintf(url, "%s/objects/info/packs", repo->base);
943
944         slot = get_active_slot();
945         slot->results = &results;
946         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
947         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
948         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
949         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
950         if (start_active_slot(slot)) {
951                 run_active_slot(slot);
952                 if (results.curl_result != CURLE_OK) {
953                         if (missing_target(&results)) {
954                                 repo->got_indices = 1;
955                                 free(buffer.buffer);
956                                 return 0;
957                         } else {
958                                 repo->got_indices = 0;
959                                 free(buffer.buffer);
960                                 return error("%s", curl_errorstr);
961                         }
962                 }
963         } else {
964                 repo->got_indices = 0;
965                 free(buffer.buffer);
966                 return error("Unable to start request");
967         }
968
969         data = buffer.buffer;
970         while (i < buffer.posn) {
971                 switch (data[i]) {
972                 case 'P':
973                         i++;
974                         if (i + 52 <= buffer.posn &&
975                             !strncmp(data + i, " pack-", 6) &&
976                             !strncmp(data + i + 46, ".pack\n", 6)) {
977                                 get_sha1_hex(data + i + 6, sha1);
978                                 setup_index(repo, sha1);
979                                 i += 51;
980                                 break;
981                         }
982                 default:
983                         while (i < buffer.posn && data[i] != '\n')
984                                 i++;
985                 }
986                 i++;
987         }
988
989         free(buffer.buffer);
990         repo->got_indices = 1;
991         return 0;
992 }
993
994 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
995 {
996         char *url;
997         struct packed_git *target;
998         struct packed_git **lst;
999         FILE *packfile;
1000         char *filename;
1001         char tmpfile[PATH_MAX];
1002         int ret;
1003         long prev_posn = 0;
1004         char range[RANGE_HEADER_SIZE];
1005         struct curl_slist *range_header = NULL;
1006
1007         struct active_request_slot *slot;
1008         struct slot_results results;
1009
1010         if (fetch_indices(repo))
1011                 return -1;
1012         target = find_sha1_pack(sha1, repo->packs);
1013         if (!target)
1014                 return -1;
1015
1016         if (get_verbosely) {
1017                 fprintf(stderr, "Getting pack %s\n",
1018                         sha1_to_hex(target->sha1));
1019                 fprintf(stderr, " which contains %s\n",
1020                         sha1_to_hex(sha1));
1021         }
1022
1023         url = xmalloc(strlen(repo->base) + 65);
1024         sprintf(url, "%s/objects/pack/pack-%s.pack",
1025                 repo->base, sha1_to_hex(target->sha1));
1026
1027         filename = sha1_pack_name(target->sha1);
1028         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1029         packfile = fopen(tmpfile, "a");
1030         if (!packfile)
1031                 return error("Unable to open local file %s for pack",
1032                              filename);
1033
1034         slot = get_active_slot();
1035         slot->results = &results;
1036         curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1037         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1038         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1039         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1040         slot->local = packfile;
1041
1042         /* If there is data present from a previous transfer attempt,
1043            resume where it left off */
1044         prev_posn = ftell(packfile);
1045         if (prev_posn>0) {
1046                 if (get_verbosely)
1047                         fprintf(stderr,
1048                                 "Resuming fetch of pack %s at byte %ld\n",
1049                                 sha1_to_hex(target->sha1), prev_posn);
1050                 sprintf(range, "Range: bytes=%ld-", prev_posn);
1051                 range_header = curl_slist_append(range_header, range);
1052                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1053         }
1054
1055         if (start_active_slot(slot)) {
1056                 run_active_slot(slot);
1057                 if (results.curl_result != CURLE_OK) {
1058                         fclose(packfile);
1059                         return error("Unable to get pack file %s\n%s", url,
1060                                      curl_errorstr);
1061                 }
1062         } else {
1063                 fclose(packfile);
1064                 return error("Unable to start request");
1065         }
1066
1067         fclose(packfile);
1068
1069         ret = move_temp_to_file(tmpfile, filename);
1070         if (ret)
1071                 return ret;
1072
1073         lst = &repo->packs;
1074         while (*lst != target)
1075                 lst = &((*lst)->next);
1076         *lst = (*lst)->next;
1077
1078         if (verify_pack(target, 0))
1079                 return -1;
1080         install_packed_git(target);
1081
1082         return 0;
1083 }
1084
1085 static void abort_object_request(struct object_request *obj_req)
1086 {
1087         if (obj_req->local >= 0) {
1088                 close(obj_req->local);
1089                 obj_req->local = -1;
1090         }
1091         unlink(obj_req->tmpfile);
1092         if (obj_req->slot) {
1093                 release_active_slot(obj_req->slot);
1094                 obj_req->slot = NULL;
1095         }
1096         release_object_request(obj_req);
1097 }
1098
1099 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1100 {
1101         char *hex = sha1_to_hex(sha1);
1102         int ret = 0;
1103         struct object_request *obj_req = object_queue_head;
1104
1105         while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1106                 obj_req = obj_req->next;
1107         if (obj_req == NULL)
1108                 return error("Couldn't find request for %s in the queue", hex);
1109
1110         if (has_sha1_file(obj_req->sha1)) {
1111                 abort_object_request(obj_req);
1112                 return 0;
1113         }
1114
1115 #ifdef USE_CURL_MULTI
1116         while (obj_req->state == WAITING) {
1117                 step_active_slots();
1118         }
1119 #else
1120         start_object_request(obj_req);
1121 #endif
1122
1123         while (obj_req->state == ACTIVE) {
1124                 run_active_slot(obj_req->slot);
1125         }
1126         if (obj_req->local != -1) {
1127                 close(obj_req->local); obj_req->local = -1;
1128         }
1129
1130         if (obj_req->state == ABORTED) {
1131                 ret = error("Request for %s aborted", hex);
1132         } else if (obj_req->curl_result != CURLE_OK &&
1133                    obj_req->http_code != 416) {
1134                 if (missing_target(obj_req))
1135                         ret = -1; /* Be silent, it is probably in a pack. */
1136                 else
1137                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1138                                     obj_req->errorstr, obj_req->curl_result,
1139                                     obj_req->http_code, hex);
1140         } else if (obj_req->zret != Z_STREAM_END) {
1141                 corrupt_object_found++;
1142                 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1143         } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1144                 ret = error("File %s has bad hash", hex);
1145         } else if (obj_req->rename < 0) {
1146                 ret = error("unable to write sha1 filename %s",
1147                             obj_req->filename);
1148         }
1149
1150         release_object_request(obj_req);
1151         return ret;
1152 }
1153
1154 int fetch(unsigned char *sha1)
1155 {
1156         struct alt_base *altbase = alt;
1157
1158         if (!fetch_object(altbase, sha1))
1159                 return 0;
1160         while (altbase) {
1161                 if (!fetch_pack(altbase, sha1))
1162                         return 0;
1163                 fetch_alternates(alt->base);
1164                 altbase = altbase->next;
1165         }
1166         return error("Unable to find %s under %s", sha1_to_hex(sha1),
1167                      alt->base);
1168 }
1169
1170 static inline int needs_quote(int ch)
1171 {
1172         if (((ch >= 'A') && (ch <= 'Z'))
1173                         || ((ch >= 'a') && (ch <= 'z'))
1174                         || ((ch >= '0') && (ch <= '9'))
1175                         || (ch == '/')
1176                         || (ch == '-')
1177                         || (ch == '.'))
1178                 return 0;
1179         return 1;
1180 }
1181
1182 static inline int hex(int v)
1183 {
1184         if (v < 10) return '0' + v;
1185         else return 'A' + v - 10;
1186 }
1187
1188 static char *quote_ref_url(const char *base, const char *ref)
1189 {
1190         const char *cp;
1191         char *dp, *qref;
1192         int len, baselen, ch;
1193
1194         baselen = strlen(base);
1195         len = baselen + 6; /* "refs/" + NUL */
1196         for (cp = ref; (ch = *cp) != 0; cp++, len++)
1197                 if (needs_quote(ch))
1198                         len += 2; /* extra two hex plus replacement % */
1199         qref = xmalloc(len);
1200         memcpy(qref, base, baselen);
1201         memcpy(qref + baselen, "refs/", 5);
1202         for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1203                 if (needs_quote(ch)) {
1204                         *dp++ = '%';
1205                         *dp++ = hex((ch >> 4) & 0xF);
1206                         *dp++ = hex(ch & 0xF);
1207                 }
1208                 else
1209                         *dp++ = ch;
1210         }
1211         *dp = 0;
1212
1213         return qref;
1214 }
1215
1216 int fetch_ref(char *ref, unsigned char *sha1)
1217 {
1218         char *url;
1219         char hex[42];
1220         struct buffer buffer;
1221         const char *base = alt->base;
1222         struct active_request_slot *slot;
1223         struct slot_results results;
1224         buffer.size = 41;
1225         buffer.posn = 0;
1226         buffer.buffer = hex;
1227         hex[41] = '\0';
1228
1229         url = quote_ref_url(base, ref);
1230         slot = get_active_slot();
1231         slot->results = &results;
1232         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1233         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1234         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1235         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1236         if (start_active_slot(slot)) {
1237                 run_active_slot(slot);
1238                 if (results.curl_result != CURLE_OK)
1239                         return error("Couldn't get %s for %s\n%s",
1240                                      url, ref, curl_errorstr);
1241         } else {
1242                 return error("Unable to start request");
1243         }
1244
1245         hex[40] = '\0';
1246         get_sha1_hex(hex, sha1);
1247         return 0;
1248 }
1249
1250 int main(int argc, const char **argv)
1251 {
1252         int commits;
1253         const char **write_ref = NULL;
1254         char **commit_id;
1255         const char *url;
1256         char *path;
1257         int arg = 1;
1258         int rc = 0;
1259
1260         setup_ident();
1261         setup_git_directory();
1262         git_config(git_default_config);
1263
1264         while (arg < argc && argv[arg][0] == '-') {
1265                 if (argv[arg][1] == 't') {
1266                         get_tree = 1;
1267                 } else if (argv[arg][1] == 'c') {
1268                         get_history = 1;
1269                 } else if (argv[arg][1] == 'a') {
1270                         get_all = 1;
1271                         get_tree = 1;
1272                         get_history = 1;
1273                 } else if (argv[arg][1] == 'v') {
1274                         get_verbosely = 1;
1275                 } else if (argv[arg][1] == 'w') {
1276                         write_ref = &argv[arg + 1];
1277                         arg++;
1278                 } else if (!strcmp(argv[arg], "--recover")) {
1279                         get_recover = 1;
1280                 } else if (!strcmp(argv[arg], "--stdin")) {
1281                         commits_on_stdin = 1;
1282                 }
1283                 arg++;
1284         }
1285         if (argc < arg + 2 - commits_on_stdin) {
1286                 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1287                 return 1;
1288         }
1289         if (commits_on_stdin) {
1290                 commits = pull_targets_stdin(&commit_id, &write_ref);
1291         } else {
1292                 commit_id = (char **) &argv[arg++];
1293                 commits = 1;
1294         }
1295         url = argv[arg];
1296
1297         http_init();
1298
1299         no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1300
1301         alt = xmalloc(sizeof(*alt));
1302         alt->base = url;
1303         alt->got_indices = 0;
1304         alt->packs = NULL;
1305         alt->next = NULL;
1306         path = strstr(url, "//");
1307         if (path) {
1308                 path = strchr(path+2, '/');
1309                 if (path)
1310                         alt->path_len = strlen(path);
1311         }
1312
1313         if (pull(commits, commit_id, write_ref, url))
1314                 rc = 1;
1315
1316         http_cleanup();
1317
1318         curl_slist_free_all(no_pragma_header);
1319
1320         if (commits_on_stdin)
1321                 pull_targets_free(commits, commit_id, write_ref);
1322
1323         if (corrupt_object_found) {
1324                 fprintf(stderr,
1325 "Some loose object were found to be corrupt, but they might be just\n"
1326 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1327 "status code.  Suggest running git fsck-objects.\n");
1328         }
1329         return rc;
1330 }