gitweb: Use rev-list --skip option.
[git] / http-fetch.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "pack.h"
4 #include "fetch.h"
5 #include "http.h"
6
7 #define PREV_BUF_SIZE 4096
8 #define RANGE_HEADER_SIZE 30
9
10 static int commits_on_stdin;
11
12 static int got_alternates = -1;
13 static int corrupt_object_found;
14
15 static struct curl_slist *no_pragma_header;
16
17 struct alt_base
18 {
19         const char *base;
20         int path_len;
21         int got_indices;
22         struct packed_git *packs;
23         struct alt_base *next;
24 };
25
26 static struct alt_base *alt;
27
28 enum object_request_state {
29         WAITING,
30         ABORTED,
31         ACTIVE,
32         COMPLETE,
33 };
34
35 struct object_request
36 {
37         unsigned char sha1[20];
38         struct alt_base *repo;
39         char *url;
40         char filename[PATH_MAX];
41         char tmpfile[PATH_MAX];
42         int local;
43         enum object_request_state state;
44         CURLcode curl_result;
45         char errorstr[CURL_ERROR_SIZE];
46         long http_code;
47         unsigned char real_sha1[20];
48         SHA_CTX c;
49         z_stream stream;
50         int zret;
51         int rename;
52         struct active_request_slot *slot;
53         struct object_request *next;
54 };
55
56 struct alternates_request {
57         const char *base;
58         char *url;
59         struct buffer *buffer;
60         struct active_request_slot *slot;
61         int http_specific;
62 };
63
64 static struct object_request *object_queue_head;
65
66 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
67                                void *data)
68 {
69         unsigned char expn[4096];
70         size_t size = eltsize * nmemb;
71         int posn = 0;
72         struct object_request *obj_req = (struct object_request *)data;
73         do {
74                 ssize_t retval = write(obj_req->local,
75                                        (char *) ptr + posn, size - posn);
76                 if (retval < 0)
77                         return posn;
78                 posn += retval;
79         } while (posn < size);
80
81         obj_req->stream.avail_in = size;
82         obj_req->stream.next_in = ptr;
83         do {
84                 obj_req->stream.next_out = expn;
85                 obj_req->stream.avail_out = sizeof(expn);
86                 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
87                 SHA1_Update(&obj_req->c, expn,
88                             sizeof(expn) - obj_req->stream.avail_out);
89         } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
90         data_received++;
91         return size;
92 }
93
94 static int missing__target(int code, int result)
95 {
96         return  /* file:// URL -- do we ever use one??? */
97                 (result == CURLE_FILE_COULDNT_READ_FILE) ||
98                 /* http:// and https:// URL */
99                 (code == 404 && result == CURLE_HTTP_RETURNED_ERROR) ||
100                 /* ftp:// URL */
101                 (code == 550 && result == CURLE_FTP_COULDNT_RETR_FILE)
102                 ;
103 }
104
105 #define missing_target(a) missing__target((a)->http_code, (a)->curl_result)
106
107 static void fetch_alternates(const char *base);
108
109 static void process_object_response(void *callback_data);
110
111 static void start_object_request(struct object_request *obj_req)
112 {
113         char *hex = sha1_to_hex(obj_req->sha1);
114         char prevfile[PATH_MAX];
115         char *url;
116         char *posn;
117         int prevlocal;
118         unsigned char prev_buf[PREV_BUF_SIZE];
119         ssize_t prev_read = 0;
120         long prev_posn = 0;
121         char range[RANGE_HEADER_SIZE];
122         struct curl_slist *range_header = NULL;
123         struct active_request_slot *slot;
124
125         snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
126         unlink(prevfile);
127         rename(obj_req->tmpfile, prevfile);
128         unlink(obj_req->tmpfile);
129
130         if (obj_req->local != -1)
131                 error("fd leakage in start: %d", obj_req->local);
132         obj_req->local = open(obj_req->tmpfile,
133                               O_WRONLY | O_CREAT | O_EXCL, 0666);
134         /* This could have failed due to the "lazy directory creation";
135          * try to mkdir the last path component.
136          */
137         if (obj_req->local < 0 && errno == ENOENT) {
138                 char *dir = strrchr(obj_req->tmpfile, '/');
139                 if (dir) {
140                         *dir = 0;
141                         mkdir(obj_req->tmpfile, 0777);
142                         *dir = '/';
143                 }
144                 obj_req->local = open(obj_req->tmpfile,
145                                       O_WRONLY | O_CREAT | O_EXCL, 0666);
146         }
147
148         if (obj_req->local < 0) {
149                 obj_req->state = ABORTED;
150                 error("Couldn't create temporary file %s for %s: %s",
151                       obj_req->tmpfile, obj_req->filename, strerror(errno));
152                 return;
153         }
154
155         memset(&obj_req->stream, 0, sizeof(obj_req->stream));
156
157         inflateInit(&obj_req->stream);
158
159         SHA1_Init(&obj_req->c);
160
161         url = xmalloc(strlen(obj_req->repo->base) + 50);
162         obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
163         strcpy(url, obj_req->repo->base);
164         posn = url + strlen(obj_req->repo->base);
165         strcpy(posn, "objects/");
166         posn += 8;
167         memcpy(posn, hex, 2);
168         posn += 2;
169         *(posn++) = '/';
170         strcpy(posn, hex + 2);
171         strcpy(obj_req->url, url);
172
173         /* If a previous temp file is present, process what was already
174            fetched. */
175         prevlocal = open(prevfile, O_RDONLY);
176         if (prevlocal != -1) {
177                 do {
178                         prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
179                         if (prev_read>0) {
180                                 if (fwrite_sha1_file(prev_buf,
181                                                      1,
182                                                      prev_read,
183                                                      obj_req) == prev_read) {
184                                         prev_posn += prev_read;
185                                 } else {
186                                         prev_read = -1;
187                                 }
188                         }
189                 } while (prev_read > 0);
190                 close(prevlocal);
191         }
192         unlink(prevfile);
193
194         /* Reset inflate/SHA1 if there was an error reading the previous temp
195            file; also rewind to the beginning of the local file. */
196         if (prev_read == -1) {
197                 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
198                 inflateInit(&obj_req->stream);
199                 SHA1_Init(&obj_req->c);
200                 if (prev_posn>0) {
201                         prev_posn = 0;
202                         lseek(obj_req->local, SEEK_SET, 0);
203                         ftruncate(obj_req->local, 0);
204                 }
205         }
206
207         slot = get_active_slot();
208         slot->callback_func = process_object_response;
209         slot->callback_data = obj_req;
210         obj_req->slot = slot;
211
212         curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
213         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
214         curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
215         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
216         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
217
218         /* If we have successfully processed data from a previous fetch
219            attempt, only fetch the data we don't already have. */
220         if (prev_posn>0) {
221                 if (get_verbosely)
222                         fprintf(stderr,
223                                 "Resuming fetch of object %s at byte %ld\n",
224                                 hex, prev_posn);
225                 sprintf(range, "Range: bytes=%ld-", prev_posn);
226                 range_header = curl_slist_append(range_header, range);
227                 curl_easy_setopt(slot->curl,
228                                  CURLOPT_HTTPHEADER, range_header);
229         }
230
231         /* Try to get the request started, abort the request on error */
232         obj_req->state = ACTIVE;
233         if (!start_active_slot(slot)) {
234                 obj_req->state = ABORTED;
235                 obj_req->slot = NULL;
236                 close(obj_req->local); obj_req->local = -1;
237                 free(obj_req->url);
238                 return;
239         }
240 }
241
242 static void finish_object_request(struct object_request *obj_req)
243 {
244         struct stat st;
245
246         fchmod(obj_req->local, 0444);
247         close(obj_req->local); obj_req->local = -1;
248
249         if (obj_req->http_code == 416) {
250                 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
251         } else if (obj_req->curl_result != CURLE_OK) {
252                 if (stat(obj_req->tmpfile, &st) == 0)
253                         if (st.st_size == 0)
254                                 unlink(obj_req->tmpfile);
255                 return;
256         }
257
258         inflateEnd(&obj_req->stream);
259         SHA1_Final(obj_req->real_sha1, &obj_req->c);
260         if (obj_req->zret != Z_STREAM_END) {
261                 unlink(obj_req->tmpfile);
262                 return;
263         }
264         if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
265                 unlink(obj_req->tmpfile);
266                 return;
267         }
268         obj_req->rename =
269                 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
270
271         if (obj_req->rename == 0)
272                 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
273 }
274
275 static void process_object_response(void *callback_data)
276 {
277         struct object_request *obj_req =
278                 (struct object_request *)callback_data;
279
280         obj_req->curl_result = obj_req->slot->curl_result;
281         obj_req->http_code = obj_req->slot->http_code;
282         obj_req->slot = NULL;
283         obj_req->state = COMPLETE;
284
285         /* Use alternates if necessary */
286         if (missing_target(obj_req)) {
287                 fetch_alternates(alt->base);
288                 if (obj_req->repo->next != NULL) {
289                         obj_req->repo =
290                                 obj_req->repo->next;
291                         close(obj_req->local);
292                         obj_req->local = -1;
293                         start_object_request(obj_req);
294                         return;
295                 }
296         }
297
298         finish_object_request(obj_req);
299 }
300
301 static void release_object_request(struct object_request *obj_req)
302 {
303         struct object_request *entry = object_queue_head;
304
305         if (obj_req->local != -1)
306                 error("fd leakage in release: %d", obj_req->local);
307         if (obj_req == object_queue_head) {
308                 object_queue_head = obj_req->next;
309         } else {
310                 while (entry->next != NULL && entry->next != obj_req)
311                         entry = entry->next;
312                 if (entry->next == obj_req)
313                         entry->next = entry->next->next;
314         }
315
316         free(obj_req->url);
317         free(obj_req);
318 }
319
320 #ifdef USE_CURL_MULTI
321 void fill_active_slots(void)
322 {
323         struct object_request *obj_req = object_queue_head;
324         struct active_request_slot *slot = active_queue_head;
325         int num_transfers;
326
327         while (active_requests < max_requests && obj_req != NULL) {
328                 if (obj_req->state == WAITING) {
329                         if (has_sha1_file(obj_req->sha1))
330                                 obj_req->state = COMPLETE;
331                         else
332                                 start_object_request(obj_req);
333                         curl_multi_perform(curlm, &num_transfers);
334                 }
335                 obj_req = obj_req->next;
336         }
337
338         while (slot != NULL) {
339                 if (!slot->in_use && slot->curl != NULL) {
340                         curl_easy_cleanup(slot->curl);
341                         slot->curl = NULL;
342                 }
343                 slot = slot->next;
344         }
345 }
346 #endif
347
348 void prefetch(unsigned char *sha1)
349 {
350         struct object_request *newreq;
351         struct object_request *tail;
352         char *filename = sha1_file_name(sha1);
353
354         newreq = xmalloc(sizeof(*newreq));
355         hashcpy(newreq->sha1, sha1);
356         newreq->repo = alt;
357         newreq->url = NULL;
358         newreq->local = -1;
359         newreq->state = WAITING;
360         snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
361         snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
362                  "%s.temp", filename);
363         newreq->slot = NULL;
364         newreq->next = NULL;
365
366         if (object_queue_head == NULL) {
367                 object_queue_head = newreq;
368         } else {
369                 tail = object_queue_head;
370                 while (tail->next != NULL) {
371                         tail = tail->next;
372                 }
373                 tail->next = newreq;
374         }
375
376 #ifdef USE_CURL_MULTI
377         fill_active_slots();
378         step_active_slots();
379 #endif
380 }
381
382 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
383 {
384         char *hex = sha1_to_hex(sha1);
385         char *filename;
386         char *url;
387         char tmpfile[PATH_MAX];
388         long prev_posn = 0;
389         char range[RANGE_HEADER_SIZE];
390         struct curl_slist *range_header = NULL;
391
392         FILE *indexfile;
393         struct active_request_slot *slot;
394         struct slot_results results;
395
396         if (has_pack_index(sha1))
397                 return 0;
398
399         if (get_verbosely)
400                 fprintf(stderr, "Getting index for pack %s\n", hex);
401
402         url = xmalloc(strlen(repo->base) + 64);
403         sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
404
405         filename = sha1_pack_index_name(sha1);
406         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
407         indexfile = fopen(tmpfile, "a");
408         if (!indexfile)
409                 return error("Unable to open local file %s for pack index",
410                              filename);
411
412         slot = get_active_slot();
413         slot->results = &results;
414         curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
415         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
416         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
417         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
418         slot->local = indexfile;
419
420         /* If there is data present from a previous transfer attempt,
421            resume where it left off */
422         prev_posn = ftell(indexfile);
423         if (prev_posn>0) {
424                 if (get_verbosely)
425                         fprintf(stderr,
426                                 "Resuming fetch of index for pack %s at byte %ld\n",
427                                 hex, prev_posn);
428                 sprintf(range, "Range: bytes=%ld-", prev_posn);
429                 range_header = curl_slist_append(range_header, range);
430                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
431         }
432
433         if (start_active_slot(slot)) {
434                 run_active_slot(slot);
435                 if (results.curl_result != CURLE_OK) {
436                         fclose(indexfile);
437                         return error("Unable to get pack index %s\n%s", url,
438                                      curl_errorstr);
439                 }
440         } else {
441                 fclose(indexfile);
442                 return error("Unable to start request");
443         }
444
445         fclose(indexfile);
446
447         return move_temp_to_file(tmpfile, filename);
448 }
449
450 static int setup_index(struct alt_base *repo, unsigned char *sha1)
451 {
452         struct packed_git *new_pack;
453         if (has_pack_file(sha1))
454                 return 0; /* don't list this as something we can get */
455
456         if (fetch_index(repo, sha1))
457                 return -1;
458
459         new_pack = parse_pack_index(sha1);
460         new_pack->next = repo->packs;
461         repo->packs = new_pack;
462         return 0;
463 }
464
465 static void process_alternates_response(void *callback_data)
466 {
467         struct alternates_request *alt_req =
468                 (struct alternates_request *)callback_data;
469         struct active_request_slot *slot = alt_req->slot;
470         struct alt_base *tail = alt;
471         const char *base = alt_req->base;
472         static const char null_byte = '\0';
473         char *data;
474         int i = 0;
475
476         if (alt_req->http_specific) {
477                 if (slot->curl_result != CURLE_OK ||
478                     !alt_req->buffer->posn) {
479
480                         /* Try reusing the slot to get non-http alternates */
481                         alt_req->http_specific = 0;
482                         sprintf(alt_req->url, "%s/objects/info/alternates",
483                                 base);
484                         curl_easy_setopt(slot->curl, CURLOPT_URL,
485                                          alt_req->url);
486                         active_requests++;
487                         slot->in_use = 1;
488                         if (slot->finished != NULL)
489                                 (*slot->finished) = 0;
490                         if (!start_active_slot(slot)) {
491                                 got_alternates = -1;
492                                 slot->in_use = 0;
493                                 if (slot->finished != NULL)
494                                         (*slot->finished) = 1;
495                         }
496                         return;
497                 }
498         } else if (slot->curl_result != CURLE_OK) {
499                 if (!missing_target(slot)) {
500                         got_alternates = -1;
501                         return;
502                 }
503         }
504
505         fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
506         alt_req->buffer->posn--;
507         data = alt_req->buffer->buffer;
508
509         while (i < alt_req->buffer->posn) {
510                 int posn = i;
511                 while (posn < alt_req->buffer->posn && data[posn] != '\n')
512                         posn++;
513                 if (data[posn] == '\n') {
514                         int okay = 0;
515                         int serverlen = 0;
516                         struct alt_base *newalt;
517                         char *target = NULL;
518                         char *path;
519                         if (data[i] == '/') {
520                                 /* This counts
521                                  * http://git.host/pub/scm/linux.git/
522                                  * -----------here^
523                                  * so memcpy(dst, base, serverlen) will
524                                  * copy up to "...git.host".
525                                  */
526                                 const char *colon_ss = strstr(base,"://");
527                                 if (colon_ss) {
528                                         serverlen = (strchr(colon_ss + 3, '/')
529                                                      - base);
530                                         okay = 1;
531                                 }
532                         } else if (!memcmp(data + i, "../", 3)) {
533                                 /* Relative URL; chop the corresponding
534                                  * number of subpath from base (and ../
535                                  * from data), and concatenate the result.
536                                  *
537                                  * The code first drops ../ from data, and
538                                  * then drops one ../ from data and one path
539                                  * from base.  IOW, one extra ../ is dropped
540                                  * from data than path is dropped from base.
541                                  *
542                                  * This is not wrong.  The alternate in
543                                  *     http://git.host/pub/scm/linux.git/
544                                  * to borrow from
545                                  *     http://git.host/pub/scm/linus.git/
546                                  * is ../../linus.git/objects/.  You need
547                                  * two ../../ to borrow from your direct
548                                  * neighbour.
549                                  */
550                                 i += 3;
551                                 serverlen = strlen(base);
552                                 while (i + 2 < posn &&
553                                        !memcmp(data + i, "../", 3)) {
554                                         do {
555                                                 serverlen--;
556                                         } while (serverlen &&
557                                                  base[serverlen - 1] != '/');
558                                         i += 3;
559                                 }
560                                 /* If the server got removed, give up. */
561                                 okay = strchr(base, ':') - base + 3 <
562                                         serverlen;
563                         } else if (alt_req->http_specific) {
564                                 char *colon = strchr(data + i, ':');
565                                 char *slash = strchr(data + i, '/');
566                                 if (colon && slash && colon < data + posn &&
567                                     slash < data + posn && colon < slash) {
568                                         okay = 1;
569                                 }
570                         }
571                         /* skip "objects\n" at end */
572                         if (okay) {
573                                 target = xmalloc(serverlen + posn - i - 6);
574                                 memcpy(target, base, serverlen);
575                                 memcpy(target + serverlen, data + i,
576                                        posn - i - 7);
577                                 target[serverlen + posn - i - 7] = 0;
578                                 if (get_verbosely)
579                                         fprintf(stderr,
580                                                 "Also look at %s\n", target);
581                                 newalt = xmalloc(sizeof(*newalt));
582                                 newalt->next = NULL;
583                                 newalt->base = target;
584                                 newalt->got_indices = 0;
585                                 newalt->packs = NULL;
586                                 path = strstr(target, "//");
587                                 if (path) {
588                                         path = strchr(path+2, '/');
589                                         if (path)
590                                                 newalt->path_len = strlen(path);
591                                 }
592
593                                 while (tail->next != NULL)
594                                         tail = tail->next;
595                                 tail->next = newalt;
596                         }
597                 }
598                 i = posn + 1;
599         }
600
601         got_alternates = 1;
602 }
603
604 static void fetch_alternates(const char *base)
605 {
606         struct buffer buffer;
607         char *url;
608         char *data;
609         struct active_request_slot *slot;
610         struct alternates_request alt_req;
611
612         /* If another request has already started fetching alternates,
613            wait for them to arrive and return to processing this request's
614            curl message */
615 #ifdef USE_CURL_MULTI
616         while (got_alternates == 0) {
617                 step_active_slots();
618         }
619 #endif
620
621         /* Nothing to do if they've already been fetched */
622         if (got_alternates == 1)
623                 return;
624
625         /* Start the fetch */
626         got_alternates = 0;
627
628         data = xmalloc(4096);
629         buffer.size = 4096;
630         buffer.posn = 0;
631         buffer.buffer = data;
632
633         if (get_verbosely)
634                 fprintf(stderr, "Getting alternates list for %s\n", base);
635
636         url = xmalloc(strlen(base) + 31);
637         sprintf(url, "%s/objects/info/http-alternates", base);
638
639         /* Use a callback to process the result, since another request
640            may fail and need to have alternates loaded before continuing */
641         slot = get_active_slot();
642         slot->callback_func = process_alternates_response;
643         slot->callback_data = &alt_req;
644
645         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
646         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
647         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
648
649         alt_req.base = base;
650         alt_req.url = url;
651         alt_req.buffer = &buffer;
652         alt_req.http_specific = 1;
653         alt_req.slot = slot;
654
655         if (start_active_slot(slot))
656                 run_active_slot(slot);
657         else
658                 got_alternates = -1;
659
660         free(data);
661         free(url);
662 }
663
664 static int fetch_indices(struct alt_base *repo)
665 {
666         unsigned char sha1[20];
667         char *url;
668         struct buffer buffer;
669         char *data;
670         int i = 0;
671
672         struct active_request_slot *slot;
673         struct slot_results results;
674
675         if (repo->got_indices)
676                 return 0;
677
678         data = xmalloc(4096);
679         buffer.size = 4096;
680         buffer.posn = 0;
681         buffer.buffer = data;
682
683         if (get_verbosely)
684                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
685
686         url = xmalloc(strlen(repo->base) + 21);
687         sprintf(url, "%s/objects/info/packs", repo->base);
688
689         slot = get_active_slot();
690         slot->results = &results;
691         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
692         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
693         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
694         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
695         if (start_active_slot(slot)) {
696                 run_active_slot(slot);
697                 if (results.curl_result != CURLE_OK) {
698                         if (missing_target(&results)) {
699                                 repo->got_indices = 1;
700                                 free(buffer.buffer);
701                                 return 0;
702                         } else {
703                                 repo->got_indices = 0;
704                                 free(buffer.buffer);
705                                 return error("%s", curl_errorstr);
706                         }
707                 }
708         } else {
709                 repo->got_indices = 0;
710                 free(buffer.buffer);
711                 return error("Unable to start request");
712         }
713
714         data = buffer.buffer;
715         while (i < buffer.posn) {
716                 switch (data[i]) {
717                 case 'P':
718                         i++;
719                         if (i + 52 <= buffer.posn &&
720                             !strncmp(data + i, " pack-", 6) &&
721                             !strncmp(data + i + 46, ".pack\n", 6)) {
722                                 get_sha1_hex(data + i + 6, sha1);
723                                 setup_index(repo, sha1);
724                                 i += 51;
725                                 break;
726                         }
727                 default:
728                         while (i < buffer.posn && data[i] != '\n')
729                                 i++;
730                 }
731                 i++;
732         }
733
734         free(buffer.buffer);
735         repo->got_indices = 1;
736         return 0;
737 }
738
739 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
740 {
741         char *url;
742         struct packed_git *target;
743         struct packed_git **lst;
744         FILE *packfile;
745         char *filename;
746         char tmpfile[PATH_MAX];
747         int ret;
748         long prev_posn = 0;
749         char range[RANGE_HEADER_SIZE];
750         struct curl_slist *range_header = NULL;
751
752         struct active_request_slot *slot;
753         struct slot_results results;
754
755         if (fetch_indices(repo))
756                 return -1;
757         target = find_sha1_pack(sha1, repo->packs);
758         if (!target)
759                 return -1;
760
761         if (get_verbosely) {
762                 fprintf(stderr, "Getting pack %s\n",
763                         sha1_to_hex(target->sha1));
764                 fprintf(stderr, " which contains %s\n",
765                         sha1_to_hex(sha1));
766         }
767
768         url = xmalloc(strlen(repo->base) + 65);
769         sprintf(url, "%s/objects/pack/pack-%s.pack",
770                 repo->base, sha1_to_hex(target->sha1));
771
772         filename = sha1_pack_name(target->sha1);
773         snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
774         packfile = fopen(tmpfile, "a");
775         if (!packfile)
776                 return error("Unable to open local file %s for pack",
777                              filename);
778
779         slot = get_active_slot();
780         slot->results = &results;
781         curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
782         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
783         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
784         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
785         slot->local = packfile;
786
787         /* If there is data present from a previous transfer attempt,
788            resume where it left off */
789         prev_posn = ftell(packfile);
790         if (prev_posn>0) {
791                 if (get_verbosely)
792                         fprintf(stderr,
793                                 "Resuming fetch of pack %s at byte %ld\n",
794                                 sha1_to_hex(target->sha1), prev_posn);
795                 sprintf(range, "Range: bytes=%ld-", prev_posn);
796                 range_header = curl_slist_append(range_header, range);
797                 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
798         }
799
800         if (start_active_slot(slot)) {
801                 run_active_slot(slot);
802                 if (results.curl_result != CURLE_OK) {
803                         fclose(packfile);
804                         return error("Unable to get pack file %s\n%s", url,
805                                      curl_errorstr);
806                 }
807         } else {
808                 fclose(packfile);
809                 return error("Unable to start request");
810         }
811
812         fclose(packfile);
813
814         ret = move_temp_to_file(tmpfile, filename);
815         if (ret)
816                 return ret;
817
818         lst = &repo->packs;
819         while (*lst != target)
820                 lst = &((*lst)->next);
821         *lst = (*lst)->next;
822
823         if (verify_pack(target, 0))
824                 return -1;
825         install_packed_git(target);
826
827         return 0;
828 }
829
830 static void abort_object_request(struct object_request *obj_req)
831 {
832         if (obj_req->local >= 0) {
833                 close(obj_req->local);
834                 obj_req->local = -1;
835         }
836         unlink(obj_req->tmpfile);
837         if (obj_req->slot) {
838                 release_active_slot(obj_req->slot);
839                 obj_req->slot = NULL;
840         }
841         release_object_request(obj_req);
842 }
843
844 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
845 {
846         char *hex = sha1_to_hex(sha1);
847         int ret = 0;
848         struct object_request *obj_req = object_queue_head;
849
850         while (obj_req != NULL && hashcmp(obj_req->sha1, sha1))
851                 obj_req = obj_req->next;
852         if (obj_req == NULL)
853                 return error("Couldn't find request for %s in the queue", hex);
854
855         if (has_sha1_file(obj_req->sha1)) {
856                 abort_object_request(obj_req);
857                 return 0;
858         }
859
860 #ifdef USE_CURL_MULTI
861         while (obj_req->state == WAITING) {
862                 step_active_slots();
863         }
864 #else
865         start_object_request(obj_req);
866 #endif
867
868         while (obj_req->state == ACTIVE) {
869                 run_active_slot(obj_req->slot);
870         }
871         if (obj_req->local != -1) {
872                 close(obj_req->local); obj_req->local = -1;
873         }
874
875         if (obj_req->state == ABORTED) {
876                 ret = error("Request for %s aborted", hex);
877         } else if (obj_req->curl_result != CURLE_OK &&
878                    obj_req->http_code != 416) {
879                 if (missing_target(obj_req))
880                         ret = -1; /* Be silent, it is probably in a pack. */
881                 else
882                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
883                                     obj_req->errorstr, obj_req->curl_result,
884                                     obj_req->http_code, hex);
885         } else if (obj_req->zret != Z_STREAM_END) {
886                 corrupt_object_found++;
887                 ret = error("File %s (%s) corrupt", hex, obj_req->url);
888         } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) {
889                 ret = error("File %s has bad hash", hex);
890         } else if (obj_req->rename < 0) {
891                 ret = error("unable to write sha1 filename %s",
892                             obj_req->filename);
893         }
894
895         release_object_request(obj_req);
896         return ret;
897 }
898
899 int fetch(unsigned char *sha1)
900 {
901         struct alt_base *altbase = alt;
902
903         if (!fetch_object(altbase, sha1))
904                 return 0;
905         while (altbase) {
906                 if (!fetch_pack(altbase, sha1))
907                         return 0;
908                 fetch_alternates(alt->base);
909                 altbase = altbase->next;
910         }
911         return error("Unable to find %s under %s", sha1_to_hex(sha1),
912                      alt->base);
913 }
914
915 static inline int needs_quote(int ch)
916 {
917         if (((ch >= 'A') && (ch <= 'Z'))
918                         || ((ch >= 'a') && (ch <= 'z'))
919                         || ((ch >= '0') && (ch <= '9'))
920                         || (ch == '/')
921                         || (ch == '-')
922                         || (ch == '.'))
923                 return 0;
924         return 1;
925 }
926
927 static inline int hex(int v)
928 {
929         if (v < 10) return '0' + v;
930         else return 'A' + v - 10;
931 }
932
933 static char *quote_ref_url(const char *base, const char *ref)
934 {
935         const char *cp;
936         char *dp, *qref;
937         int len, baselen, ch;
938
939         baselen = strlen(base);
940         len = baselen + 6; /* "refs/" + NUL */
941         for (cp = ref; (ch = *cp) != 0; cp++, len++)
942                 if (needs_quote(ch))
943                         len += 2; /* extra two hex plus replacement % */
944         qref = xmalloc(len);
945         memcpy(qref, base, baselen);
946         memcpy(qref + baselen, "refs/", 5);
947         for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
948                 if (needs_quote(ch)) {
949                         *dp++ = '%';
950                         *dp++ = hex((ch >> 4) & 0xF);
951                         *dp++ = hex(ch & 0xF);
952                 }
953                 else
954                         *dp++ = ch;
955         }
956         *dp = 0;
957
958         return qref;
959 }
960
961 int fetch_ref(char *ref, unsigned char *sha1)
962 {
963         char *url;
964         char hex[42];
965         struct buffer buffer;
966         const char *base = alt->base;
967         struct active_request_slot *slot;
968         struct slot_results results;
969         buffer.size = 41;
970         buffer.posn = 0;
971         buffer.buffer = hex;
972         hex[41] = '\0';
973
974         url = quote_ref_url(base, ref);
975         slot = get_active_slot();
976         slot->results = &results;
977         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
978         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
979         curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
980         curl_easy_setopt(slot->curl, CURLOPT_URL, url);
981         if (start_active_slot(slot)) {
982                 run_active_slot(slot);
983                 if (results.curl_result != CURLE_OK)
984                         return error("Couldn't get %s for %s\n%s",
985                                      url, ref, curl_errorstr);
986         } else {
987                 return error("Unable to start request");
988         }
989
990         hex[40] = '\0';
991         get_sha1_hex(hex, sha1);
992         return 0;
993 }
994
995 int main(int argc, const char **argv)
996 {
997         int commits;
998         const char **write_ref = NULL;
999         char **commit_id;
1000         const char *url;
1001         char *path;
1002         int arg = 1;
1003         int rc = 0;
1004
1005         setup_ident();
1006         setup_git_directory();
1007         git_config(git_default_config);
1008
1009         while (arg < argc && argv[arg][0] == '-') {
1010                 if (argv[arg][1] == 't') {
1011                         get_tree = 1;
1012                 } else if (argv[arg][1] == 'c') {
1013                         get_history = 1;
1014                 } else if (argv[arg][1] == 'a') {
1015                         get_all = 1;
1016                         get_tree = 1;
1017                         get_history = 1;
1018                 } else if (argv[arg][1] == 'v') {
1019                         get_verbosely = 1;
1020                 } else if (argv[arg][1] == 'w') {
1021                         write_ref = &argv[arg + 1];
1022                         arg++;
1023                 } else if (!strcmp(argv[arg], "--recover")) {
1024                         get_recover = 1;
1025                 } else if (!strcmp(argv[arg], "--stdin")) {
1026                         commits_on_stdin = 1;
1027                 }
1028                 arg++;
1029         }
1030         if (argc < arg + 2 - commits_on_stdin) {
1031                 usage("git-http-fetch [-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url");
1032                 return 1;
1033         }
1034         if (commits_on_stdin) {
1035                 commits = pull_targets_stdin(&commit_id, &write_ref);
1036         } else {
1037                 commit_id = (char **) &argv[arg++];
1038                 commits = 1;
1039         }
1040         url = argv[arg];
1041
1042         http_init();
1043
1044         no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
1045
1046         alt = xmalloc(sizeof(*alt));
1047         alt->base = url;
1048         alt->got_indices = 0;
1049         alt->packs = NULL;
1050         alt->next = NULL;
1051         path = strstr(url, "//");
1052         if (path) {
1053                 path = strchr(path+2, '/');
1054                 if (path)
1055                         alt->path_len = strlen(path);
1056         }
1057
1058         if (pull(commits, commit_id, write_ref, url))
1059                 rc = 1;
1060
1061         http_cleanup();
1062
1063         curl_slist_free_all(no_pragma_header);
1064
1065         if (commits_on_stdin)
1066                 pull_targets_free(commits, commit_id, write_ref);
1067
1068         if (corrupt_object_found) {
1069                 fprintf(stderr,
1070 "Some loose object were found to be corrupt, but they might be just\n"
1071 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1072 "status code.  Suggest running git fsck-objects.\n");
1073         }
1074         return rc;
1075 }