Merge branch 'cb/pcre2-chartables-leakfix'
[git] / http-walker.c
1 #include "cache.h"
2 #include "repository.h"
3 #include "commit.h"
4 #include "walker.h"
5 #include "http.h"
6 #include "list.h"
7 #include "transport.h"
8 #include "packfile.h"
9 #include "object-store.h"
10
11 struct alt_base {
12         char *base;
13         int got_indices;
14         struct packed_git *packs;
15         struct alt_base *next;
16 };
17
18 enum object_request_state {
19         WAITING,
20         ABORTED,
21         ACTIVE,
22         COMPLETE
23 };
24
25 struct object_request {
26         struct walker *walker;
27         struct object_id oid;
28         struct alt_base *repo;
29         enum object_request_state state;
30         struct http_object_request *req;
31         struct list_head node;
32 };
33
34 struct alternates_request {
35         struct walker *walker;
36         const char *base;
37         struct strbuf *url;
38         struct strbuf *buffer;
39         struct active_request_slot *slot;
40         int http_specific;
41 };
42
43 struct walker_data {
44         const char *url;
45         int got_alternates;
46         struct alt_base *alt;
47 };
48
49 static LIST_HEAD(object_queue_head);
50
51 static void fetch_alternates(struct walker *walker, const char *base);
52
53 static void process_object_response(void *callback_data);
54
55 static void start_object_request(struct walker *walker,
56                                  struct object_request *obj_req)
57 {
58         struct active_request_slot *slot;
59         struct http_object_request *req;
60
61         req = new_http_object_request(obj_req->repo->base, &obj_req->oid);
62         if (req == NULL) {
63                 obj_req->state = ABORTED;
64                 return;
65         }
66         obj_req->req = req;
67
68         slot = req->slot;
69         slot->callback_func = process_object_response;
70         slot->callback_data = obj_req;
71
72         /* Try to get the request started, abort the request on error */
73         obj_req->state = ACTIVE;
74         if (!start_active_slot(slot)) {
75                 obj_req->state = ABORTED;
76                 release_http_object_request(req);
77                 return;
78         }
79 }
80
81 static void finish_object_request(struct object_request *obj_req)
82 {
83         if (finish_http_object_request(obj_req->req))
84                 return;
85
86         if (obj_req->req->rename == 0)
87                 walker_say(obj_req->walker, "got %s\n", oid_to_hex(&obj_req->oid));
88 }
89
90 static void process_object_response(void *callback_data)
91 {
92         struct object_request *obj_req =
93                 (struct object_request *)callback_data;
94         struct walker *walker = obj_req->walker;
95         struct walker_data *data = walker->data;
96         struct alt_base *alt = data->alt;
97
98         process_http_object_request(obj_req->req);
99         obj_req->state = COMPLETE;
100
101         normalize_curl_result(&obj_req->req->curl_result,
102                               obj_req->req->http_code,
103                               obj_req->req->errorstr,
104                               sizeof(obj_req->req->errorstr));
105
106         /* Use alternates if necessary */
107         if (missing_target(obj_req->req)) {
108                 fetch_alternates(walker, alt->base);
109                 if (obj_req->repo->next != NULL) {
110                         obj_req->repo =
111                                 obj_req->repo->next;
112                         release_http_object_request(obj_req->req);
113                         start_object_request(walker, obj_req);
114                         return;
115                 }
116         }
117
118         finish_object_request(obj_req);
119 }
120
121 static void release_object_request(struct object_request *obj_req)
122 {
123         if (obj_req->req !=NULL && obj_req->req->localfile != -1)
124                 error("fd leakage in release: %d", obj_req->req->localfile);
125
126         list_del(&obj_req->node);
127         free(obj_req);
128 }
129
130 #ifdef USE_CURL_MULTI
131 static int fill_active_slot(struct walker *walker)
132 {
133         struct object_request *obj_req;
134         struct list_head *pos, *tmp, *head = &object_queue_head;
135
136         list_for_each_safe(pos, tmp, head) {
137                 obj_req = list_entry(pos, struct object_request, node);
138                 if (obj_req->state == WAITING) {
139                         if (has_object_file(&obj_req->oid))
140                                 obj_req->state = COMPLETE;
141                         else {
142                                 start_object_request(walker, obj_req);
143                                 return 1;
144                         }
145                 }
146         }
147         return 0;
148 }
149 #endif
150
151 static void prefetch(struct walker *walker, unsigned char *sha1)
152 {
153         struct object_request *newreq;
154         struct walker_data *data = walker->data;
155
156         newreq = xmalloc(sizeof(*newreq));
157         newreq->walker = walker;
158         hashcpy(newreq->oid.hash, sha1);
159         newreq->repo = data->alt;
160         newreq->state = WAITING;
161         newreq->req = NULL;
162
163         http_is_verbose = walker->get_verbosely;
164         list_add_tail(&newreq->node, &object_queue_head);
165
166 #ifdef USE_CURL_MULTI
167         fill_active_slots();
168         step_active_slots();
169 #endif
170 }
171
172 static int is_alternate_allowed(const char *url)
173 {
174         const char *protocols[] = {
175                 "http", "https", "ftp", "ftps"
176         };
177         int i;
178
179         if (http_follow_config != HTTP_FOLLOW_ALWAYS) {
180                 warning("alternate disabled by http.followRedirects: %s", url);
181                 return 0;
182         }
183
184         for (i = 0; i < ARRAY_SIZE(protocols); i++) {
185                 const char *end;
186                 if (skip_prefix(url, protocols[i], &end) &&
187                     starts_with(end, "://"))
188                         break;
189         }
190
191         if (i >= ARRAY_SIZE(protocols)) {
192                 warning("ignoring alternate with unknown protocol: %s", url);
193                 return 0;
194         }
195         if (!is_transport_allowed(protocols[i], 0)) {
196                 warning("ignoring alternate with restricted protocol: %s", url);
197                 return 0;
198         }
199
200         return 1;
201 }
202
203 static void process_alternates_response(void *callback_data)
204 {
205         struct alternates_request *alt_req =
206                 (struct alternates_request *)callback_data;
207         struct walker *walker = alt_req->walker;
208         struct walker_data *cdata = walker->data;
209         struct active_request_slot *slot = alt_req->slot;
210         struct alt_base *tail = cdata->alt;
211         const char *base = alt_req->base;
212         const char null_byte = '\0';
213         char *data;
214         int i = 0;
215
216         normalize_curl_result(&slot->curl_result, slot->http_code,
217                               curl_errorstr, sizeof(curl_errorstr));
218
219         if (alt_req->http_specific) {
220                 if (slot->curl_result != CURLE_OK ||
221                     !alt_req->buffer->len) {
222
223                         /* Try reusing the slot to get non-http alternates */
224                         alt_req->http_specific = 0;
225                         strbuf_reset(alt_req->url);
226                         strbuf_addf(alt_req->url, "%s/objects/info/alternates",
227                                     base);
228                         curl_easy_setopt(slot->curl, CURLOPT_URL,
229                                          alt_req->url->buf);
230                         active_requests++;
231                         slot->in_use = 1;
232                         if (slot->finished != NULL)
233                                 (*slot->finished) = 0;
234                         if (!start_active_slot(slot)) {
235                                 cdata->got_alternates = -1;
236                                 slot->in_use = 0;
237                                 if (slot->finished != NULL)
238                                         (*slot->finished) = 1;
239                         }
240                         return;
241                 }
242         } else if (slot->curl_result != CURLE_OK) {
243                 if (!missing_target(slot)) {
244                         cdata->got_alternates = -1;
245                         return;
246                 }
247         }
248
249         fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer);
250         alt_req->buffer->len--;
251         data = alt_req->buffer->buf;
252
253         while (i < alt_req->buffer->len) {
254                 int posn = i;
255                 while (posn < alt_req->buffer->len && data[posn] != '\n')
256                         posn++;
257                 if (data[posn] == '\n') {
258                         int okay = 0;
259                         int serverlen = 0;
260                         struct alt_base *newalt;
261                         if (data[i] == '/') {
262                                 /*
263                                  * This counts
264                                  * http://git.host/pub/scm/linux.git/
265                                  * -----------here^
266                                  * so memcpy(dst, base, serverlen) will
267                                  * copy up to "...git.host".
268                                  */
269                                 const char *colon_ss = strstr(base,"://");
270                                 if (colon_ss) {
271                                         serverlen = (strchr(colon_ss + 3, '/')
272                                                      - base);
273                                         okay = 1;
274                                 }
275                         } else if (!memcmp(data + i, "../", 3)) {
276                                 /*
277                                  * Relative URL; chop the corresponding
278                                  * number of subpath from base (and ../
279                                  * from data), and concatenate the result.
280                                  *
281                                  * The code first drops ../ from data, and
282                                  * then drops one ../ from data and one path
283                                  * from base.  IOW, one extra ../ is dropped
284                                  * from data than path is dropped from base.
285                                  *
286                                  * This is not wrong.  The alternate in
287                                  *     http://git.host/pub/scm/linux.git/
288                                  * to borrow from
289                                  *     http://git.host/pub/scm/linus.git/
290                                  * is ../../linus.git/objects/.  You need
291                                  * two ../../ to borrow from your direct
292                                  * neighbour.
293                                  */
294                                 i += 3;
295                                 serverlen = strlen(base);
296                                 while (i + 2 < posn &&
297                                        !memcmp(data + i, "../", 3)) {
298                                         do {
299                                                 serverlen--;
300                                         } while (serverlen &&
301                                                  base[serverlen - 1] != '/');
302                                         i += 3;
303                                 }
304                                 /* If the server got removed, give up. */
305                                 okay = strchr(base, ':') - base + 3 <
306                                        serverlen;
307                         } else if (alt_req->http_specific) {
308                                 char *colon = strchr(data + i, ':');
309                                 char *slash = strchr(data + i, '/');
310                                 if (colon && slash && colon < data + posn &&
311                                     slash < data + posn && colon < slash) {
312                                         okay = 1;
313                                 }
314                         }
315                         if (okay) {
316                                 struct strbuf target = STRBUF_INIT;
317                                 strbuf_add(&target, base, serverlen);
318                                 strbuf_add(&target, data + i, posn - i);
319                                 if (!strbuf_strip_suffix(&target, "objects")) {
320                                         warning("ignoring alternate that does"
321                                                 " not end in 'objects': %s",
322                                                 target.buf);
323                                         strbuf_release(&target);
324                                 } else if (is_alternate_allowed(target.buf)) {
325                                         warning("adding alternate object store: %s",
326                                                 target.buf);
327                                         newalt = xmalloc(sizeof(*newalt));
328                                         newalt->next = NULL;
329                                         newalt->base = strbuf_detach(&target, NULL);
330                                         newalt->got_indices = 0;
331                                         newalt->packs = NULL;
332
333                                         while (tail->next != NULL)
334                                                 tail = tail->next;
335                                         tail->next = newalt;
336                                 } else {
337                                         strbuf_release(&target);
338                                 }
339                         }
340                 }
341                 i = posn + 1;
342         }
343
344         cdata->got_alternates = 1;
345 }
346
347 static void fetch_alternates(struct walker *walker, const char *base)
348 {
349         struct strbuf buffer = STRBUF_INIT;
350         struct strbuf url = STRBUF_INIT;
351         struct active_request_slot *slot;
352         struct alternates_request alt_req;
353         struct walker_data *cdata = walker->data;
354
355         /*
356          * If another request has already started fetching alternates,
357          * wait for them to arrive and return to processing this request's
358          * curl message
359          */
360 #ifdef USE_CURL_MULTI
361         while (cdata->got_alternates == 0) {
362                 step_active_slots();
363         }
364 #endif
365
366         /* Nothing to do if they've already been fetched */
367         if (cdata->got_alternates == 1)
368                 return;
369
370         /* Start the fetch */
371         cdata->got_alternates = 0;
372
373         if (walker->get_verbosely)
374                 fprintf(stderr, "Getting alternates list for %s\n", base);
375
376         strbuf_addf(&url, "%s/objects/info/http-alternates", base);
377
378         /*
379          * Use a callback to process the result, since another request
380          * may fail and need to have alternates loaded before continuing
381          */
382         slot = get_active_slot();
383         slot->callback_func = process_alternates_response;
384         alt_req.walker = walker;
385         slot->callback_data = &alt_req;
386
387         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
388         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
389         curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
390
391         alt_req.base = base;
392         alt_req.url = &url;
393         alt_req.buffer = &buffer;
394         alt_req.http_specific = 1;
395         alt_req.slot = slot;
396
397         if (start_active_slot(slot))
398                 run_active_slot(slot);
399         else
400                 cdata->got_alternates = -1;
401
402         strbuf_release(&buffer);
403         strbuf_release(&url);
404 }
405
406 static int fetch_indices(struct walker *walker, struct alt_base *repo)
407 {
408         int ret;
409
410         if (repo->got_indices)
411                 return 0;
412
413         if (walker->get_verbosely)
414                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
415
416         switch (http_get_info_packs(repo->base, &repo->packs)) {
417         case HTTP_OK:
418         case HTTP_MISSING_TARGET:
419                 repo->got_indices = 1;
420                 ret = 0;
421                 break;
422         default:
423                 repo->got_indices = 0;
424                 ret = -1;
425         }
426
427         return ret;
428 }
429
430 static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
431 {
432         struct packed_git *target;
433         int ret;
434         struct slot_results results;
435         struct http_pack_request *preq;
436
437         if (fetch_indices(walker, repo))
438                 return -1;
439         target = find_sha1_pack(sha1, repo->packs);
440         if (!target)
441                 return -1;
442
443         if (walker->get_verbosely) {
444                 fprintf(stderr, "Getting pack %s\n",
445                         hash_to_hex(target->hash));
446                 fprintf(stderr, " which contains %s\n",
447                         hash_to_hex(sha1));
448         }
449
450         preq = new_http_pack_request(target, repo->base);
451         if (preq == NULL)
452                 goto abort;
453         preq->lst = &repo->packs;
454         preq->slot->results = &results;
455
456         if (start_active_slot(preq->slot)) {
457                 run_active_slot(preq->slot);
458                 if (results.curl_result != CURLE_OK) {
459                         error("Unable to get pack file %s\n%s", preq->url,
460                               curl_errorstr);
461                         goto abort;
462                 }
463         } else {
464                 error("Unable to start request");
465                 goto abort;
466         }
467
468         ret = finish_http_pack_request(preq);
469         release_http_pack_request(preq);
470         if (ret)
471                 return ret;
472
473         return 0;
474
475 abort:
476         return -1;
477 }
478
479 static void abort_object_request(struct object_request *obj_req)
480 {
481         release_object_request(obj_req);
482 }
483
484 static int fetch_object(struct walker *walker, unsigned char *hash)
485 {
486         char *hex = hash_to_hex(hash);
487         int ret = 0;
488         struct object_request *obj_req = NULL;
489         struct http_object_request *req;
490         struct list_head *pos, *head = &object_queue_head;
491
492         list_for_each(pos, head) {
493                 obj_req = list_entry(pos, struct object_request, node);
494                 if (hasheq(obj_req->oid.hash, hash))
495                         break;
496         }
497         if (obj_req == NULL)
498                 return error("Couldn't find request for %s in the queue", hex);
499
500         if (has_object_file(&obj_req->oid)) {
501                 if (obj_req->req != NULL)
502                         abort_http_object_request(obj_req->req);
503                 abort_object_request(obj_req);
504                 return 0;
505         }
506
507 #ifdef USE_CURL_MULTI
508         while (obj_req->state == WAITING)
509                 step_active_slots();
510 #else
511         start_object_request(walker, obj_req);
512 #endif
513
514         /*
515          * obj_req->req might change when fetching alternates in the callback
516          * process_object_response; therefore, the "shortcut" variable, req,
517          * is used only after we're done with slots.
518          */
519         while (obj_req->state == ACTIVE)
520                 run_active_slot(obj_req->req->slot);
521
522         req = obj_req->req;
523
524         if (req->localfile != -1) {
525                 close(req->localfile);
526                 req->localfile = -1;
527         }
528
529         normalize_curl_result(&req->curl_result, req->http_code,
530                               req->errorstr, sizeof(req->errorstr));
531
532         if (obj_req->state == ABORTED) {
533                 ret = error("Request for %s aborted", hex);
534         } else if (req->curl_result != CURLE_OK &&
535                    req->http_code != 416) {
536                 if (missing_target(req))
537                         ret = -1; /* Be silent, it is probably in a pack. */
538                 else
539                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
540                                     req->errorstr, req->curl_result,
541                                     req->http_code, hex);
542         } else if (req->zret != Z_STREAM_END) {
543                 walker->corrupt_object_found++;
544                 ret = error("File %s (%s) corrupt", hex, req->url);
545         } else if (!oideq(&obj_req->oid, &req->real_oid)) {
546                 ret = error("File %s has bad hash", hex);
547         } else if (req->rename < 0) {
548                 struct strbuf buf = STRBUF_INIT;
549                 loose_object_path(the_repository, &buf, &req->oid);
550                 ret = error("unable to write sha1 filename %s", buf.buf);
551                 strbuf_release(&buf);
552         }
553
554         release_http_object_request(req);
555         release_object_request(obj_req);
556         return ret;
557 }
558
559 static int fetch(struct walker *walker, unsigned char *hash)
560 {
561         struct walker_data *data = walker->data;
562         struct alt_base *altbase = data->alt;
563
564         if (!fetch_object(walker, hash))
565                 return 0;
566         while (altbase) {
567                 if (!http_fetch_pack(walker, altbase, hash))
568                         return 0;
569                 fetch_alternates(walker, data->alt->base);
570                 altbase = altbase->next;
571         }
572         return error("Unable to find %s under %s", hash_to_hex(hash),
573                      data->alt->base);
574 }
575
576 static int fetch_ref(struct walker *walker, struct ref *ref)
577 {
578         struct walker_data *data = walker->data;
579         return http_fetch_ref(data->alt->base, ref);
580 }
581
582 static void cleanup(struct walker *walker)
583 {
584         struct walker_data *data = walker->data;
585         struct alt_base *alt, *alt_next;
586
587         if (data) {
588                 alt = data->alt;
589                 while (alt) {
590                         alt_next = alt->next;
591
592                         free(alt->base);
593                         free(alt);
594
595                         alt = alt_next;
596                 }
597                 free(data);
598                 walker->data = NULL;
599         }
600 }
601
602 struct walker *get_http_walker(const char *url)
603 {
604         char *s;
605         struct walker_data *data = xmalloc(sizeof(struct walker_data));
606         struct walker *walker = xmalloc(sizeof(struct walker));
607
608         data->alt = xmalloc(sizeof(*data->alt));
609         data->alt->base = xstrdup(url);
610         for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
611                 *s = 0;
612
613         data->alt->got_indices = 0;
614         data->alt->packs = NULL;
615         data->alt->next = NULL;
616         data->got_alternates = -1;
617
618         walker->corrupt_object_found = 0;
619         walker->fetch = fetch;
620         walker->fetch_ref = fetch_ref;
621         walker->prefetch = prefetch;
622         walker->cleanup = cleanup;
623         walker->data = data;
624
625 #ifdef USE_CURL_MULTI
626         add_fill_function(walker, (int (*)(void *)) fill_active_slot);
627 #endif
628
629         return walker;
630 }