git-pack-refs --prune
[git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #ifndef NO_EXPAT
8 #include <expat.h>
9
10 /* Definitions for DAV requests */
11 #define DAV_PROPFIND "PROPFIND"
12 #define DAV_PROPFIND_RESP ".multistatus.response"
13 #define DAV_PROPFIND_NAME ".multistatus.response.href"
14 #define DAV_PROPFIND_COLLECTION ".multistatus.response.propstat.prop.resourcetype.collection"
15 #define PROPFIND_ALL_REQUEST "<?xml version=\"1.0\" encoding=\"utf-8\" ?>\n<D:propfind xmlns:D=\"DAV:\">\n<D:allprop/>\n</D:propfind>"
16
17 /* Definitions for processing XML DAV responses */
18 #ifndef XML_STATUS_OK
19 enum XML_Status {
20   XML_STATUS_OK = 1,
21   XML_STATUS_ERROR = 0
22 };
23 #define XML_STATUS_OK    1
24 #define XML_STATUS_ERROR 0
25 #endif
26
27 /* Flags that control remote_ls processing */
28 #define PROCESS_FILES (1u << 0)
29 #define PROCESS_DIRS  (1u << 1)
30 #define RECURSIVE     (1u << 2)
31
32 /* Flags that remote_ls passes to callback functions */
33 #define IS_DIR (1u << 0)
34 #endif
35
36 #define PREV_BUF_SIZE 4096
37 #define RANGE_HEADER_SIZE 30
38
39 static int commits_on_stdin;
40
41 static int got_alternates = -1;
42 static int corrupt_object_found;
43
44 static struct curl_slist *no_pragma_header;
45
46 struct alt_base
47 {
48         const char *base;
49         int path_len;
50         int got_indices;
51         struct packed_git *packs;
52         struct alt_base *next;
53 };
54
55 static struct alt_base *alt;
56
57 enum object_request_state {
58         WAITING,
59         ABORTED,
60         ACTIVE,
61         COMPLETE,
62 };
63
64 struct object_request
65 {
66         unsigned char sha1[20];
67         struct alt_base *repo;
68         char *url;
69         char filename[PATH_MAX];
70         char tmpfile[PATH_MAX];
71         int local;
72         enum object_request_state state;
73         CURLcode curl_result;
74         char errorstr[CURL_ERROR_SIZE];
75         long http_code;
76         unsigned char real_sha1[20];
77         SHA_CTX c;
78         z_stream stream;
79         int zret;
80         int rename;
81         struct active_request_slot *slot;
82         struct object_request *next;
83 };
84
85 struct alternates_request {
86         const char *base;
87         char *url;
88         struct buffer *buffer;
89         struct active_request_slot *slot;
90         int http_specific;
91 };
92
93 #ifndef NO_EXPAT
94 struct xml_ctx
95 {
96         char *name;
97         int len;
98         char *cdata;
99         void (*userFunc)(struct xml_ctx *ctx, int tag_closed);
100         void *userData;
101 };
102
103 struct remote_ls_ctx
104 {
105         struct alt_base *repo;
106         char *path;
107         void (*userFunc)(struct remote_ls_ctx *ls);
108         void *userData;
109         int flags;
110         char *dentry_name;
111         int dentry_flags;
112         int rc;
113         struct remote_ls_ctx *parent;
114 };
115 #endif
116
117 static struct object_request *object_queue_head;
118
119 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
120                                void *data)
121 {
122         unsigned char expn[4096];
123         size_t size = eltsize * nmemb;
124         int posn = 0;
125         struct object_request *obj_req = (struct object_request *)data;
126         do {
127                 ssize_t retval = write(obj_req->local,
128                                        (char *) ptr + posn, size - posn);
129                 if (retval < 0)
130                         return posn;
131                 posn += retval;
132         } while (posn < size);
133
134         obj_req->stream.avail_in = size;
135         obj_req->stream.next_in = ptr;
136         do {
137                 obj_req->stream.next_out = expn;
138                 obj_req->stream.avail_out = sizeof(expn);
139                 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
140                 SHA1_Update(&obj_req->c, expn,
141                             sizeof(expn) - obj_req->stream.avail_out);
142         } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
143         data_received++;
144         return size;
145 }
146
147 static int missing__target(int code, int result)
148 {
149         return  /* file:// URL -- do we ever use one??? */
150                 (result == CURLE_FILE_COULDNT_READ_FILE) ||
151                 /* http:// and https:// URL */
152                 (code == 404 && result == CURLE_HTTP_RETURNED_ERROR) ||
153                 /* ftp:// URL */
154                 (code == 550 && result == CURLE_FTP_COULDNT_RETR_FILE)
155                 ;
156 }
157
158 #define missing_target(a) missing__target((a)->http_code, (a)->curl_result)
159
160 static void fetch_alternates(const char *base);
161
162 static void process_object_response(void *callback_data);
163
164 static void start_object_request(struct object_request *obj_req)
165 {
166         char *hex = sha1_to_hex(obj_req->sha1);
167         char prevfile[PATH_MAX];
168         char *url;
169         char *posn;
170         int prevlocal;
171         unsigned char prev_buf[PREV_BUF_SIZE];
172         ssize_t prev_read = 0;
173         long prev_posn = 0;
174         char range[RANGE_HEADER_SIZE];
175         struct curl_slist *range_header = NULL;
176         struct active_request_slot *slot;
177
178         snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
179         unlink(prevfile);
180         rename(obj_req->tmpfile, prevfile);
181         unlink(obj_req->tmpfile);
182
183         if (obj_req->local != -1)
184                 error("fd leakage in start: %d", obj_req->local);
185         obj_req->local = open(obj_req->tmpfile,
186                               O_WRONLY | O_CREAT | O_EXCL, 0666);
187         /* This could have failed due to the "lazy directory creation";
188          * try to mkdir the last path component.
189          */
190         if (obj_req->local < 0 && errno == ENOENT) {
191                 char *dir = strrchr(obj_req->tmpfile, '/');
192                 if (dir) {
193                         *dir = 0;
194                         mkdir(obj_req->tmpfile, 0777);
195                         *dir = '/';
196                 }
197                 obj_req->local = open(obj_req->tmpfile,
198                                       O_WRONLY | O_CREAT | O_EXCL, 0666);
199         }
200
201         if (obj_req->local < 0) {
202                 obj_req->state = ABORTED;
203                 error("Couldn't create temporary file %s for %s: %s",
204                       obj_req->tmpfile, obj_req->filename, strerror(errno));
205                 return;
206         }
207
208         memset(&obj_req->stream, 0, sizeof(obj_req->stream));
209
210         inflateInit(&obj_req->stream);
211
212         SHA1_Init(&obj_req->c);
213
214         url = xmalloc(strlen(obj_req->repo->base) + 50);
215         obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
216         strcpy(url, obj_req->repo->base);
217         posn = url + strlen(obj_req->repo->base);
218         strcpy(posn, "objects/");
219         posn += 8;
220         memcpy(posn, hex, 2);
221         posn += 2;
222         *(posn++) = '/';
223         strcpy(posn, hex + 2);
224         strcpy(obj_req->url, url);
225
226         /* If a previous temp file is present, process what was already
227            fetched. */
228         prevlocal = open(prevfile, O_RDONLY);
229         if (prevlocal != -1) {
230                 do {
231                         prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
232                         if (prev_read>0) {
233                                 if (fwrite_sha1_file(prev_buf,
234                                                      1,
235                                                      prev_read,
236                                                      obj_req) == prev_read) {
237                                         prev_posn += prev_read;
238                                 } else {
239                                         prev_read = -1;
240                                 }
241                         }
242                 } while (prev_read > 0);
243                 close(prevlocal);
244         }
245         unlink(prevfile);
246
247         /* Reset inflate/SHA1 if there was an error reading the previous temp
248            file; also rewind to the beginning of the local file. */
249         if (prev_read == -1) {
250                 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
251                 inflateInit(&obj_req->stream);
252                 SHA1_Init(&obj_req->c);
253                 if (prev_posn>0) {
254                         prev_posn = 0;
255                         lseek(obj_req->local, SEEK_SET, 0);
256                         ftruncate(obj_req->local, 0);
257                 }
258         }
259
260         slot = get_active_slot();
261         slot->callback_func = process_object_response;
262         slot->callback_data = obj_req;
263         obj_req->slot = slot;
264
265         curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
266         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
267         curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
268         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
269         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
270
271         /* If we have successfully processed data from a previous fetch
272            attempt, only fetch the data we don't already have. */
273         if (prev_posn>0) {
274                 if (get_verbosely)
275                         fprintf(stderr,
276                                 "Resuming fetch of object %s at byte %ld\n",
277                                 hex, prev_posn);
278                 sprintf(range, "Range: bytes=%ld-", prev_posn);
279                 range_header = curl_slist_append(range_header, range);
280                 curl_easy_setopt(slot->curl,
281                                  CURLOPT_HTTPHEADER, range_header);
282         }
283
284         /* Try to get the request started, abort the request on error */
285         obj_req->state = ACTIVE;
286         if (!start_active_slot(slot)) {
287                 obj_req->state = ABORTED;
288                 obj_req->slot = NULL;
289                 close(obj_req->local); obj_req->local = -1;
290                 free(obj_req->url);
291                 return;
292         }
293 }
294
295 static void finish_object_request(struct object_request *obj_req)
296 {
297         struct stat st;
298
299         fchmod(obj_req->local, 0444);
300         close(obj_req->local); obj_req->local = -1;
301
302         if (obj_req->http_code == 416) {
303                 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
304         } else if (obj_req->curl_result != CURLE_OK) {
305                 if (stat(obj_req->tmpfile, &st) == 0)
306                         if (st.st_size == 0)
307                                 unlink(obj_req->tmpfile);
308                 return;
309         }
310
311         inflateEnd(&obj_req->stream);
312         SHA1_Final(obj_req->real_sha1, &obj_req->c);
313         if (obj_req->zret != Z_STREAM_END) {
314                 unlink(obj_req->tmpfile);
315                 return;
316         }
317         if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
318                 unlink(obj_req->tmpfile);
319                 return;
320         }
321         obj_req->rename =
322                 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
323
324         if (obj_req->rename == 0)
325                 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
326 }
327
328 static void process_object_response(void *callback_data)
329 {
330         struct object_request *obj_req =
331                 (struct object_request *)callback_data;
332
333         obj_req->curl_result = obj_req->slot->curl_result;
334         obj_req->http_code = obj_req->slot->http_code;
335         obj_req->slot = NULL;
336         obj_req->state = COMPLETE;
337
338         /* Use alternates if necessary */
339         if (missing_target(obj_req)) {
340                 fetch_alternates(alt->base);
341                 if (obj_req->repo->next != NULL) {
342                         obj_req->repo =
343                                 obj_req->repo->next;
344                         close(obj_req->local);
345                         obj_req->local = -1;
346                         start_object_request(obj_req);
347                         return;
348                 }
349         }
350
351         finish_object_request(obj_req);
352 }
353
354 static void release_object_request(struct object_request *obj_req)
355 {
356         struct object_request *entry = object_queue_head;
357
358         if (obj_req->local != -1)
359                 error("fd leakage in release: %d", obj_req->local);
360         if (obj_req == object_queue_head) {
361                 object_queue_head = obj_req->next;
362         } else {
363                 while (entry->next != NULL && entry->next != obj_req)
364                         entry = entry->next;
365                 if (entry->next == obj_req)
366                         entry->next = entry->next->next;
367         }
368
369         free(obj_req->url);
370         free(obj_req);
371 }
372
373 #ifdef USE_CURL_MULTI
374 void fill_active_slots(void)
375 {
376         struct object_request *obj_req = object_queue_head;
377         struct active_request_slot *slot = active_queue_head;
378         int num_transfers;
379
380         while (active_requests < max_requests && obj_req != NULL) {
381                 if (obj_req->state == WAITING) {
382                         if (has_sha1_file(obj_req->sha1))
383                                 obj_req->state = COMPLETE;
384                         else
385                                 start_object_request(obj_req);
386                         curl_multi_perform(curlm, &num_transfers);
387                 }
388                 obj_req = obj_req->next;
389         }
390
391         while (slot != NULL) {
392                 if (!slot->in_use && slot->curl != NULL) {
393                         curl_easy_cleanup(slot->curl);
394                         slot->curl = NULL;
395                 }
396                 slot = slot->next;
397         }
398 }
399 #endif
400
401 void prefetch(unsigned char *sha1)
402 {
403         struct object_request *newreq;
404         struct object_request *tail;
405         char *filename = sha1_file_name(sha1);
406
407         newreq = xmalloc(sizeof(*newreq));
408         hashcpy(newreq->sha1, sha1);
409         newreq->repo = alt;
410         newreq->url = NULL;
411         newreq->local = -1;
412         newreq->state = WAITING;
413         snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
414         snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
415                  "%s.temp", filename);
416         newreq->slot = NULL;
417         newreq->next = NULL;
418
419         if (object_queue_head == NULL) {
420                 object_queue_head = newreq;
421         } else {
422                 tail = object_queue_head;
423                 while (tail->next != NULL) {
424                         tail = tail->next;
425                 }
426                 tail->next = newreq;
427         }
428
429 #ifdef USE_CURL_MULTI
430         fill_active_slots();
431         step_active_slots();
432 #endif
433 }
434
435 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
436 {
437         char *hex = sha1_to_hex(sha1);
438         char *filename;
439         char *url;
440         char tmpfile[PATH_MAX];
441         long prev_posn = 0;
442         char range[RANGE_HEADER_SIZE];
443         struct curl_slist *range_header = NULL;
444
445         FILE *indexfile;
446         struct active_request_slot *slot;
447         struct slot_results results;
448
449         if (has_pack_index(sha1))
450                 return 0;
451
452         if (get_verbosely)
453                 fprintf(stderr, "Getting index for pack %s\n", hex);
454
455         url = xmalloc(strlen(repo->base) + 64);
456         sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
457
458         filename = sha1_pack_index_name(sha1);
459         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
460         indexfile = fopen(tmpfile, "a");
461         if (!indexfile)
462                 return error("Unable to open local file %s for pack index",
463                              filename);
464
465         slot = get_active_slot();
466         slot->results = &results;
467         curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
468         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
469         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
470         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
471         slot->local = indexfile;
472
473         /* If there is data present from a previous transfer attempt,
474            resume where it left off */
475         prev_posn = ftell(indexfile);
476         if (prev_posn>0) {
477                 if (get_verbosely)
478                         fprintf(stderr,
479                                 "Resuming fetch of index for pack %s at byte %ld\n",
480                                 hex, prev_posn);
481                 sprintf(range, "Range: bytes=%ld-", prev_posn);
482                 range_header = curl_slist_append(range_header, range);
483                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
484         }
485
486         if (start_active_slot(slot)) {
487                 run_active_slot(slot);
488                 if (results.curl_result != CURLE_OK) {
489                         fclose(indexfile);
490                         return error("Unable to get pack index %s\n%s", url,
491                                      curl_errorstr);
492                 }
493         } else {
494                 fclose(indexfile);
495                 return error("Unable to start request");
496         }
497
498         fclose(indexfile);
499
500         return move_temp_to_file(tmpfile, filename);
501 }
502
503 static int setup_index(struct alt_base *repo, unsigned char *sha1)
504 {
505         struct packed_git *new_pack;
506         if (has_pack_file(sha1))
507                 return 0; /* don't list this as something we can get */
508
509         if (fetch_index(repo, sha1))
510                 return -1;
511
512         new_pack = parse_pack_index(sha1);
513         new_pack->next = repo->packs;
514         repo->packs = new_pack;
515         return 0;
516 }
517
518 static void process_alternates_response(void *callback_data)
519 {
520         struct alternates_request *alt_req =
521                 (struct alternates_request *)callback_data;
522         struct active_request_slot *slot = alt_req->slot;
523         struct alt_base *tail = alt;
524         const char *base = alt_req->base;
525         static const char null_byte = '\0';
526         char *data;
527         int i = 0;
528
529         if (alt_req->http_specific) {
530                 if (slot->curl_result != CURLE_OK ||
531                     !alt_req->buffer->posn) {
532
533                         /* Try reusing the slot to get non-http alternates */
534                         alt_req->http_specific = 0;
535                         sprintf(alt_req->url, "%s/objects/info/alternates",
536                                 base);
537                         curl_easy_setopt(slot->curl, CURLOPT_URL,
538                                          alt_req->url);
539                         active_requests++;
540                         slot->in_use = 1;
541                         if (slot->finished != NULL)
542                                 (*slot->finished) = 0;
543                         if (!start_active_slot(slot)) {
544                                 got_alternates = -1;
545                                 slot->in_use = 0;
546                                 if (slot->finished != NULL)
547                                         (*slot->finished) = 1;
548                         }
549                         return;
550                 }
551         } else if (slot->curl_result != CURLE_OK) {
552                 if (!missing_target(slot)) {
553                         got_alternates = -1;
554                         return;
555                 }
556         }
557
558         fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
559         alt_req->buffer->posn--;
560         data = alt_req->buffer->buffer;
561
562         while (i < alt_req->buffer->posn) {
563                 int posn = i;
564                 while (posn < alt_req->buffer->posn && data[posn] != '\n')
565                         posn++;
566                 if (data[posn] == '\n') {
567                         int okay = 0;
568                         int serverlen = 0;
569                         struct alt_base *newalt;
570                         char *target = NULL;
571                         char *path;
572                         if (data[i] == '/') {
573                                 /* This counts
574                                  * http://git.host/pub/scm/linux.git/
575                                  * -----------here^
576                                  * so memcpy(dst, base, serverlen) will
577                                  * copy up to "...git.host".
578                                  */
579                                 const char *colon_ss = strstr(base,"://");
580                                 if (colon_ss) {
581                                         serverlen = (strchr(colon_ss + 3, '/')
582                                                      - base);
583                                         okay = 1;
584                                 }
585                         } else if (!memcmp(data + i, "../", 3)) {
586                                 /* Relative URL; chop the corresponding
587                                  * number of subpath from base (and ../
588                                  * from data), and concatenate the result.
589                                  *
590                                  * The code first drops ../ from data, and
591                                  * then drops one ../ from data and one path
592                                  * from base.  IOW, one extra ../ is dropped
593                                  * from data than path is dropped from base.
594                                  *
595                                  * This is not wrong.  The alternate in
596                                  *     http://git.host/pub/scm/linux.git/
597                                  * to borrow from
598                                  *     http://git.host/pub/scm/linus.git/
599                                  * is ../../linus.git/objects/.  You need
600                                  * two ../../ to borrow from your direct
601                                  * neighbour.
602                                  */
603                                 i += 3;
604                                 serverlen = strlen(base);
605                                 while (i + 2 < posn &&
606                                        !memcmp(data + i, "../", 3)) {
607                                         do {
608                                                 serverlen--;
609                                         } while (serverlen &&
610                                                  base[serverlen - 1] != '/');
611                                         i += 3;
612                                 }
613                                 /* If the server got removed, give up. */
614                                 okay = strchr(base, ':') - base + 3 <
615                                         serverlen;
616                         } else if (alt_req->http_specific) {
617                                 char *colon = strchr(data + i, ':');
618                                 char *slash = strchr(data + i, '/');
619                                 if (colon && slash && colon < data + posn &&
620                                     slash < data + posn && colon < slash) {
621                                         okay = 1;
622                                 }
623                         }
624                         /* skip "objects\n" at end */
625                         if (okay) {
626                                 target = xmalloc(serverlen + posn - i - 6);
627                                 memcpy(target, base, serverlen);
628                                 memcpy(target + serverlen, data + i,
629                                        posn - i - 7);
630                                 target[serverlen + posn - i - 7] = 0;
631                                 if (get_verbosely)
632                                         fprintf(stderr,
633                                                 "Also look at %s\n", target);
634                                 newalt = xmalloc(sizeof(*newalt));
635                                 newalt->next = NULL;
636                                 newalt->base = target;
637                                 newalt->got_indices = 0;
638                                 newalt->packs = NULL;
639                                 path = strstr(target, "//");
640                                 if (path) {
641                                         path = strchr(path+2, '/');
642                                         if (path)
643                                                 newalt->path_len = strlen(path);
644                                 }
645
646                                 while (tail->next != NULL)
647                                         tail = tail->next;
648                                 tail->next = newalt;
649                         }
650                 }
651                 i = posn + 1;
652         }
653
654         got_alternates = 1;
655 }
656
657 static void fetch_alternates(const char *base)
658 {
659         struct buffer buffer;
660         char *url;
661         char *data;
662         struct active_request_slot *slot;
663         struct alternates_request alt_req;
664
665         /* If another request has already started fetching alternates,
666            wait for them to arrive and return to processing this request's
667            curl message */
668 #ifdef USE_CURL_MULTI
669         while (got_alternates == 0) {
670                 step_active_slots();
671         }
672 #endif
673
674         /* Nothing to do if they've already been fetched */
675         if (got_alternates == 1)
676                 return;
677
678         /* Start the fetch */
679         got_alternates = 0;
680
681         data = xmalloc(4096);
682         buffer.size = 4096;
683         buffer.posn = 0;
684         buffer.buffer = data;
685
686         if (get_verbosely)
687                 fprintf(stderr, "Getting alternates list for %s\n", base);
688
689         url = xmalloc(strlen(base) + 31);
690         sprintf(url, "%s/objects/info/http-alternates", base);
691
692         /* Use a callback to process the result, since another request
693            may fail and need to have alternates loaded before continuing */
694         slot = get_active_slot();
695         slot->callback_func = process_alternates_response;
696         slot->callback_data = &alt_req;
697
698         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
699         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
700         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
701
702         alt_req.base = base;
703         alt_req.url = url;
704         alt_req.buffer = &buffer;
705         alt_req.http_specific = 1;
706         alt_req.slot = slot;
707
708         if (start_active_slot(slot))
709                 run_active_slot(slot);
710         else
711                 got_alternates = -1;
712
713         free(data);
714         free(url);
715 }
716
717 #ifndef NO_EXPAT
718 static void
719 xml_start_tag(void *userData, const char *name, const char **atts)
720 {
721         struct xml_ctx *ctx = (struct xml_ctx *)userData;
722         const char *c = strchr(name, ':');
723         int new_len;
724
725         if (c == NULL)
726                 c = name;
727         else
728                 c++;
729
730         new_len = strlen(ctx->name) + strlen(c) + 2;
731
732         if (new_len > ctx->len) {
733                 ctx->name = xrealloc(ctx->name, new_len);
734                 ctx->len = new_len;
735         }
736         strcat(ctx->name, ".");
737         strcat(ctx->name, c);
738
739         free(ctx->cdata);
740         ctx->cdata = NULL;
741
742         ctx->userFunc(ctx, 0);
743 }
744
745 static void
746 xml_end_tag(void *userData, const char *name)
747 {
748         struct xml_ctx *ctx = (struct xml_ctx *)userData;
749         const char *c = strchr(name, ':');
750         char *ep;
751
752         ctx->userFunc(ctx, 1);
753
754         if (c == NULL)
755                 c = name;
756         else
757                 c++;
758
759         ep = ctx->name + strlen(ctx->name) - strlen(c) - 1;
760         *ep = 0;
761 }
762
763 static void
764 xml_cdata(void *userData, const XML_Char *s, int len)
765 {
766         struct xml_ctx *ctx = (struct xml_ctx *)userData;
767         free(ctx->cdata);
768         ctx->cdata = xmalloc(len + 1);
769         strlcpy(ctx->cdata, s, len + 1);
770 }
771
772 static int remote_ls(struct alt_base *repo, const char *path, int flags,
773                      void (*userFunc)(struct remote_ls_ctx *ls),
774                      void *userData);
775
776 static void handle_remote_ls_ctx(struct xml_ctx *ctx, int tag_closed)
777 {
778         struct remote_ls_ctx *ls = (struct remote_ls_ctx *)ctx->userData;
779
780         if (tag_closed) {
781                 if (!strcmp(ctx->name, DAV_PROPFIND_RESP) && ls->dentry_name) {
782                         if (ls->dentry_flags & IS_DIR) {
783                                 if (ls->flags & PROCESS_DIRS) {
784                                         ls->userFunc(ls);
785                                 }
786                                 if (strcmp(ls->dentry_name, ls->path) &&
787                                     ls->flags & RECURSIVE) {
788                                         ls->rc = remote_ls(ls->repo,
789                                                            ls->dentry_name,
790                                                            ls->flags,
791                                                            ls->userFunc,
792                                                            ls->userData);
793                                 }
794                         } else if (ls->flags & PROCESS_FILES) {
795                                 ls->userFunc(ls);
796                         }
797                 } else if (!strcmp(ctx->name, DAV_PROPFIND_NAME) && ctx->cdata) {
798                         ls->dentry_name = xmalloc(strlen(ctx->cdata) -
799                                                   ls->repo->path_len + 1);
800                         strcpy(ls->dentry_name, ctx->cdata + ls->repo->path_len);
801                 } else if (!strcmp(ctx->name, DAV_PROPFIND_COLLECTION)) {
802                         ls->dentry_flags |= IS_DIR;
803                 }
804         } else if (!strcmp(ctx->name, DAV_PROPFIND_RESP)) {
805                 free(ls->dentry_name);
806                 ls->dentry_name = NULL;
807                 ls->dentry_flags = 0;
808         }
809 }
810
811 static int remote_ls(struct alt_base *repo, const char *path, int flags,
812                      void (*userFunc)(struct remote_ls_ctx *ls),
813                      void *userData)
814 {
815         char *url = xmalloc(strlen(repo->base) + strlen(path) + 1);
816         struct active_request_slot *slot;
817         struct slot_results results;
818         struct buffer in_buffer;
819         struct buffer out_buffer;
820         char *in_data;
821         char *out_data;
822         XML_Parser parser = XML_ParserCreate(NULL);
823         enum XML_Status result;
824         struct curl_slist *dav_headers = NULL;
825         struct xml_ctx ctx;
826         struct remote_ls_ctx ls;
827
828         ls.flags = flags;
829         ls.repo = repo;
830         ls.path = xstrdup(path);
831         ls.dentry_name = NULL;
832         ls.dentry_flags = 0;
833         ls.userData = userData;
834         ls.userFunc = userFunc;
835         ls.rc = 0;
836
837         sprintf(url, "%s%s", repo->base, path);
838
839         out_buffer.size = strlen(PROPFIND_ALL_REQUEST);
840         out_data = xmalloc(out_buffer.size + 1);
841         snprintf(out_data, out_buffer.size + 1, PROPFIND_ALL_REQUEST);
842         out_buffer.posn = 0;
843         out_buffer.buffer = out_data;
844
845         in_buffer.size = 4096;
846         in_data = xmalloc(in_buffer.size);
847         in_buffer.posn = 0;
848         in_buffer.buffer = in_data;
849
850         dav_headers = curl_slist_append(dav_headers, "Depth: 1");
851         dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
852
853         slot = get_active_slot();
854         slot->results = &results;
855         curl_easy_setopt(slot->curl, CURLOPT_INFILE, &out_buffer);
856         curl_easy_setopt(slot->curl, CURLOPT_INFILESIZE, out_buffer.size);
857         curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, fread_buffer);
858         curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
859         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
860         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
861         curl_easy_setopt(slot->curl, CURLOPT_UPLOAD, 1);
862         curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, DAV_PROPFIND);
863         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
864
865         if (start_active_slot(slot)) {
866                 run_active_slot(slot);
867                 if (results.curl_result == CURLE_OK) {
868                         ctx.name = xcalloc(10, 1);
869                         ctx.len = 0;
870                         ctx.cdata = NULL;
871                         ctx.userFunc = handle_remote_ls_ctx;
872                         ctx.userData = &ls;
873                         XML_SetUserData(parser, &ctx);
874                         XML_SetElementHandler(parser, xml_start_tag,
875                                               xml_end_tag);
876                         XML_SetCharacterDataHandler(parser, xml_cdata);
877                         result = XML_Parse(parser, in_buffer.buffer,
878                                            in_buffer.posn, 1);
879                         free(ctx.name);
880
881                         if (result != XML_STATUS_OK) {
882                                 ls.rc = error("XML error: %s",
883                                               XML_ErrorString(
884                                                       XML_GetErrorCode(parser)));
885                         }
886                 } else {
887                         ls.rc = -1;
888                 }
889         } else {
890                 ls.rc = error("Unable to start PROPFIND request");
891         }
892
893         free(ls.path);
894         free(url);
895         free(out_data);
896         free(in_buffer.buffer);
897         curl_slist_free_all(dav_headers);
898
899         return ls.rc;
900 }
901
902 static void process_ls_pack(struct remote_ls_ctx *ls)
903 {
904         unsigned char sha1[20];
905
906         if (strlen(ls->dentry_name) == 63 &&
907             !strncmp(ls->dentry_name, "objects/pack/pack-", 18) &&
908             has_extension(ls->dentry_name, ".pack")) {
909                 get_sha1_hex(ls->dentry_name + 18, sha1);
910                 setup_index(ls->repo, sha1);
911         }
912 }
913 #endif
914
915 static int fetch_indices(struct alt_base *repo)
916 {
917         unsigned char sha1[20];
918         char *url;
919         struct buffer buffer;
920         char *data;
921         int i = 0;
922
923         struct active_request_slot *slot;
924         struct slot_results results;
925
926         if (repo->got_indices)
927                 return 0;
928
929         data = xmalloc(4096);
930         buffer.size = 4096;
931         buffer.posn = 0;
932         buffer.buffer = data;
933
934         if (get_verbosely)
935                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
936
937 #ifndef NO_EXPAT
938         if (remote_ls(repo, "objects/pack/", PROCESS_FILES,
939                       process_ls_pack, NULL) == 0)
940                 return 0;
941 #endif
942
943         url = xmalloc(strlen(repo->base) + 21);
944         sprintf(url, "%s/objects/info/packs", repo->base);
945
946         slot = get_active_slot();
947         slot->results = &results;
948         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
949         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
950         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
951         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
952         if (start_active_slot(slot)) {
953                 run_active_slot(slot);
954                 if (results.curl_result != CURLE_OK) {
955                         if (missing_target(&results)) {
956                                 repo->got_indices = 1;
957                                 free(buffer.buffer);
958                                 return 0;
959                         } else {
960                                 repo->got_indices = 0;
961                                 free(buffer.buffer);
962                                 return error("%s", curl_errorstr);
963                         }
964                 }
965         } else {
966                 repo->got_indices = 0;
967                 free(buffer.buffer);
968                 return error("Unable to start request");
969         }
970
971         data = buffer.buffer;
972         while (i < buffer.posn) {
973                 switch (data[i]) {
974                 case 'P':
975                         i++;
976                         if (i + 52 <= buffer.posn &&
977                             !strncmp(data + i, " pack-", 6) &&
978                             !strncmp(data + i + 46, ".pack\n", 6)) {
979                                 get_sha1_hex(data + i + 6, sha1);
980                                 setup_index(repo, sha1);
981                                 i += 51;
982                                 break;
983                         }
984                 default:
985                         while (i < buffer.posn && data[i] != '\n')
986                                 i++;
987                 }
988                 i++;
989         }
990
991         free(buffer.buffer);
992         repo->got_indices = 1;
993         return 0;
994 }
995
996 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
997 {
998         char *url;
999         struct packed_git *target;
1000         struct packed_git **lst;
1001         FILE *packfile;
1002         char *filename;
1003         char tmpfile[PATH_MAX];
1004         int ret;
1005         long prev_posn = 0;
1006         char range[RANGE_HEADER_SIZE];
1007         struct curl_slist *range_header = NULL;
1008
1009         struct active_request_slot *slot;
1010         struct slot_results results;
1011
1012         if (fetch_indices(repo))
1013                 return -1;
1014         target = find_sha1_pack(sha1, repo->packs);
1015         if (!target)
1016                 return -1;
1017
1018         if (get_verbosely) {
1019                 fprintf(stderr, "Getting pack %s\n",
1020                         sha1_to_hex(target->sha1));
1021                 fprintf(stderr, " which contains %s\n",
1022                         sha1_to_hex(sha1));
1023         }
1024
1025         url = xmalloc(strlen(repo->base) + 65);
1026         sprintf(url, "%s/objects/pack/pack-%s.pack",
1027                 repo->base, sha1_to_hex(target->sha1));
1028
1029         filename = sha1_pack_name(target->sha1);
1030         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
1031         packfile = fopen(tmpfile, "a");
1032         if (!packfile)
1033                 return error("Unable to open local file %s for pack",
1034                              filename);
1035
1036         slot = get_active_slot();
1037         slot->results = &results;
1038         curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
1039         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
1040         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1041         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
1042         slot->local = packfile;
1043
1044         /* If there is data present from a previous transfer attempt,
1045            resume where it left off */
1046         prev_posn = ftell(packfile);
1047         if (prev_posn>0) {
1048                 if (get_verbosely)
1049                         fprintf(stderr,
1050                                 "Resuming fetch of pack %s at byte %ld\n",
1051                                 sha1_to_hex(target->sha1), prev_posn);
1052                 sprintf(range, "Range: bytes=%ld-", prev_posn);
1053                 range_header = curl_slist_append(range_header, range);
1054                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
1055         }
1056
1057         if (start_active_slot(slot)) {
1058                 run_active_slot(slot);
1059                 if (results.curl_result != CURLE_OK) {
1060                         fclose(packfile);
1061                         return error("Unable to get pack file %s\n%s", url,
1062                                      curl_errorstr);
1063                 }
1064         } else {
1065                 fclose(packfile);
1066                 return error("Unable to start request");
1067         }
1068
1069         fclose(packfile);
1070
1071         ret = move_temp_to_file(tmpfile, filename);
1072         if (ret)
1073                 return ret;
1074
1075         lst = &repo->packs;
1076         while (*lst != target)
1077                 lst = &((*lst)->next);
1078         *lst = (*lst)->next;
1079
1080         if (verify_pack(target, 0))
1081                 return -1;
1082         install_packed_git(target);
1083
1084         return 0;
1085 }
1086
1087 static void abort_object_request(struct object_request *obj_req)
1088 {
1089         if (obj_req->local >= 0) {
1090                 close(obj_req->local);
1091                 obj_req->local = -1;
1092         }
1093         unlink(obj_req->tmpfile);
1094         if (obj_req->slot) {
1095                 release_active_slot(obj_req->slot);
1096                 obj_req->slot = NULL;
1097         }
1098         release_object_request(obj_req);
1099 }
1100
1101 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
1102 {
1103         char *hex = sha1_to_hex(sha1);
1104         int ret = 0;
1105         struct object_request *obj_req = object_queue_head;
1106
1107         while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
1108                 obj_req = obj_req->next;
1109         if (obj_req == NULL)
1110                 return error("Couldn't find request for %s in the queue", hex);
1111
1112         if (has_sha1_file(obj_req->sha1)) {
1113                 abort_object_request(obj_req);
1114                 return 0;
1115         }
1116
1117 #ifdef USE_CURL_MULTI
1118         while (obj_req->state == WAITING) {
1119                 step_active_slots();
1120         }
1121 #else
1122         start_object_request(obj_req);
1123 #endif
1124
1125         while (obj_req->state == ACTIVE) {
1126                 run_active_slot(obj_req->slot);
1127         }
1128         if (obj_req->local != -1) {
1129                 close(obj_req->local); obj_req->local = -1;
1130         }
1131
1132         if (obj_req->state == ABORTED) {
1133                 ret = error("Request for %s aborted", hex);
1134         } else if (obj_req->curl_result != CURLE_OK &&
1135                    obj_req->http_code != 416) {
1136                 if (missing_target(obj_req))
1137                         ret = -1; /* Be silent, it is probably in a pack. */
1138                 else
1139                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
1140                                     obj_req->errorstr, obj_req->curl_result,
1141                                     obj_req->http_code, hex);
1142         } else if (obj_req->zret != Z_STREAM_END) {
1143                 corrupt_object_found++;
1144                 ret = error("File %s (%s) corrupt", hex, obj_req->url);
1145         } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
1146                 ret = error("File %s has bad hash", hex);
1147         } else if (obj_req->rename < 0) {
1148                 ret = error("unable to write sha1 filename %s",
1149                             obj_req->filename);
1150         }
1151
1152         release_object_request(obj_req);
1153         return ret;
1154 }
1155
1156 int fetch(unsigned char *sha1)
1157 {
1158         struct alt_base *altbase = alt;
1159
1160         if (!fetch_object(altbase, sha1))
1161                 return 0;
1162         while (altbase) {
1163                 if (!fetch_pack(altbase, sha1))
1164                         return 0;
1165                 fetch_alternates(alt->base);
1166                 altbase = altbase->next;
1167         }
1168         return error("Unable to find %s under %s", sha1_to_hex(sha1),
1169                      alt->base);
1170 }
1171
1172 static inline int needs_quote(int ch)
1173 {
1174         if (((ch >= 'A') && (ch <= 'Z'))
1175                         || ((ch >= 'a') && (ch <= 'z'))
1176                         || ((ch >= '0') && (ch <= '9'))
1177                         || (ch == '/')
1178                         || (ch == '-')
1179                         || (ch == '.'))
1180                 return 0;
1181         return 1;
1182 }
1183
1184 static inline int hex(int v)
1185 {
1186         if (v < 10) return '0' + v;
1187         else return 'A' + v - 10;
1188 }
1189
1190 static char *quote_ref_url(const char *base, const char *ref)
1191 {
1192         const char *cp;
1193         char *dp, *qref;
1194         int len, baselen, ch;
1195
1196         baselen = strlen(base);
1197         len = baselen + 6; /* "refs/" + NUL */
1198         for (cp = ref; (ch = *cp) != 0; cp++, len++)
1199                 if (needs_quote(ch))
1200                         len += 2; /* extra two hex plus replacement % */
1201         qref = xmalloc(len);
1202         memcpy(qref, base, baselen);
1203         memcpy(qref + baselen, "refs/", 5);
1204         for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
1205                 if (needs_quote(ch)) {
1206                         *dp++ = '%';
1207                         *dp++ = hex((ch >> 4) & 0xF);
1208                         *dp++ = hex(ch & 0xF);
1209                 }
1210                 else
1211                         *dp++ = ch;
1212         }
1213         *dp = 0;
1214
1215         return qref;
1216 }
1217
1218 int fetch_ref(char *ref, unsigned char *sha1)
1219 {
1220         char *url;
1221         char hex[42];
1222         struct buffer buffer;
1223         const char *base = alt->base;
1224         struct active_request_slot *slot;
1225         struct slot_results results;
1226         buffer.size = 41;
1227         buffer.posn = 0;
1228         buffer.buffer = hex;
1229         hex[41] = '\0';
1230
1231         url = quote_ref_url(base, ref);
1232         slot = get_active_slot();
1233         slot->results = &results;
1234         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
1235         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
1236         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
1237         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
1238         if (start_active_slot(slot)) {
1239                 run_active_slot(slot);
1240                 if (results.curl_result != CURLE_OK)
1241                         return error("Couldn't get %s for %s\n%s",
1242                                      url, ref, curl_errorstr);
1243         } else {
1244                 return error("Unable to start request");
1245         }
1246
1247         hex[40] = '\0';
1248         get_sha1_hex(hex, sha1);
1249         return 0;
1250 }
1251
1252 int main(int argc, const char **argv)
1253 {
1254         int commits;
1255         const char **write_ref = NULL;
1256         char **commit_id;
1257         const char *url;
1258         char *path;
1259         int arg = 1;
1260         int rc = 0;
1261
1262         setup_ident();
1263         setup_git_directory();
1264         git_config(git_default_config);
1265
1266         while (arg < argc && argv[arg][0] == '-') {
1267                 if (argv[arg][1] == 't') {
1268                         get_tree = 1;
1269                 } else if (argv[arg][1] == 'c') {
1270                         get_history = 1;
1271                 } else if (argv[arg][1] == 'a') {
1272                         get_all = 1;
1273                         get_tree = 1;
1274                         get_history = 1;
1275                 } else if (argv[arg][1] == 'v') {
1276                         get_verbosely = 1;
1277                 } else if (argv[arg][1] == 'w') {
1278                         write_ref = &argv[arg + 1];
1279                         arg++;
1280                 } else if (!strcmp(argv[arg], "--recover")) {
1281                         get_recover = 1;
1282                 } else if (!strcmp(argv[arg], "--stdin")) {
1283                         commits_on_stdin = 1;
1284                 }
1285                 arg++;
1286         }
1287         if (argc < arg + 2 - commits_on_stdin) {
1288                 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1289                 return 1;
1290         }
1291         if (commits_on_stdin) {
1292                 commits = pull_targets_stdin(&commit_id, &write_ref);
1293         } else {
1294                 commit_id = (char **) &argv[arg++];
1295                 commits = 1;
1296         }
1297         url = argv[arg];
1298
1299         http_init();
1300
1301         no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1302
1303         alt = xmalloc(sizeof(*alt));
1304         alt->base = url;
1305         alt->got_indices = 0;
1306         alt->packs = NULL;
1307         alt->next = NULL;
1308         path = strstr(url, "//");
1309         if (path) {
1310                 path = strchr(path+2, '/');
1311                 if (path)
1312                         alt->path_len = strlen(path);
1313         }
1314
1315         if (pull(commits, commit_id, write_ref, url))
1316                 rc = 1;
1317
1318         http_cleanup();
1319
1320         curl_slist_free_all(no_pragma_header);
1321
1322         if (commits_on_stdin)
1323                 pull_targets_free(commits, commit_id, write_ref);
1324
1325         if (corrupt_object_found) {
1326                 fprintf(stderr,
1327 "Some loose object were found to be corrupt, but they might be just\n"
1328 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1329 "status code.  Suggest running git fsck-objects.\n");
1330         }
1331         return rc;
1332 }