Merge branch 'dt/mailinfo' into maint
[git] / http-walker.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "walker.h"
4 #include "http.h"
5 #include "list.h"
6
7 struct alt_base {
8         char *base;
9         int got_indices;
10         struct packed_git *packs;
11         struct alt_base *next;
12 };
13
14 enum object_request_state {
15         WAITING,
16         ABORTED,
17         ACTIVE,
18         COMPLETE
19 };
20
21 struct object_request {
22         struct walker *walker;
23         unsigned char sha1[20];
24         struct alt_base *repo;
25         enum object_request_state state;
26         struct http_object_request *req;
27         struct list_head node;
28 };
29
30 struct alternates_request {
31         struct walker *walker;
32         const char *base;
33         struct strbuf *url;
34         struct strbuf *buffer;
35         struct active_request_slot *slot;
36         int http_specific;
37 };
38
39 struct walker_data {
40         const char *url;
41         int got_alternates;
42         struct alt_base *alt;
43 };
44
45 static LIST_HEAD(object_queue_head);
46
47 static void fetch_alternates(struct walker *walker, const char *base);
48
49 static void process_object_response(void *callback_data);
50
51 static void start_object_request(struct walker *walker,
52                                  struct object_request *obj_req)
53 {
54         struct active_request_slot *slot;
55         struct http_object_request *req;
56
57         req = new_http_object_request(obj_req->repo->base, obj_req->sha1);
58         if (req == NULL) {
59                 obj_req->state = ABORTED;
60                 return;
61         }
62         obj_req->req = req;
63
64         slot = req->slot;
65         slot->callback_func = process_object_response;
66         slot->callback_data = obj_req;
67
68         /* Try to get the request started, abort the request on error */
69         obj_req->state = ACTIVE;
70         if (!start_active_slot(slot)) {
71                 obj_req->state = ABORTED;
72                 release_http_object_request(req);
73                 return;
74         }
75 }
76
77 static void finish_object_request(struct object_request *obj_req)
78 {
79         if (finish_http_object_request(obj_req->req))
80                 return;
81
82         if (obj_req->req->rename == 0)
83                 walker_say(obj_req->walker, "got %s\n", sha1_to_hex(obj_req->sha1));
84 }
85
86 static void process_object_response(void *callback_data)
87 {
88         struct object_request *obj_req =
89                 (struct object_request *)callback_data;
90         struct walker *walker = obj_req->walker;
91         struct walker_data *data = walker->data;
92         struct alt_base *alt = data->alt;
93
94         process_http_object_request(obj_req->req);
95         obj_req->state = COMPLETE;
96
97         /* Use alternates if necessary */
98         if (missing_target(obj_req->req)) {
99                 fetch_alternates(walker, alt->base);
100                 if (obj_req->repo->next != NULL) {
101                         obj_req->repo =
102                                 obj_req->repo->next;
103                         release_http_object_request(obj_req->req);
104                         start_object_request(walker, obj_req);
105                         return;
106                 }
107         }
108
109         finish_object_request(obj_req);
110 }
111
112 static void release_object_request(struct object_request *obj_req)
113 {
114         if (obj_req->req !=NULL && obj_req->req->localfile != -1)
115                 error("fd leakage in release: %d", obj_req->req->localfile);
116
117         list_del(&obj_req->node);
118         free(obj_req);
119 }
120
121 #ifdef USE_CURL_MULTI
122 static int fill_active_slot(struct walker *walker)
123 {
124         struct object_request *obj_req;
125         struct list_head *pos, *tmp, *head = &object_queue_head;
126
127         list_for_each_safe(pos, tmp, head) {
128                 obj_req = list_entry(pos, struct object_request, node);
129                 if (obj_req->state == WAITING) {
130                         if (has_sha1_file(obj_req->sha1))
131                                 obj_req->state = COMPLETE;
132                         else {
133                                 start_object_request(walker, obj_req);
134                                 return 1;
135                         }
136                 }
137         }
138         return 0;
139 }
140 #endif
141
142 static void prefetch(struct walker *walker, unsigned char *sha1)
143 {
144         struct object_request *newreq;
145         struct walker_data *data = walker->data;
146
147         newreq = xmalloc(sizeof(*newreq));
148         newreq->walker = walker;
149         hashcpy(newreq->sha1, sha1);
150         newreq->repo = data->alt;
151         newreq->state = WAITING;
152         newreq->req = NULL;
153
154         http_is_verbose = walker->get_verbosely;
155         list_add_tail(&newreq->node, &object_queue_head);
156
157 #ifdef USE_CURL_MULTI
158         fill_active_slots();
159         step_active_slots();
160 #endif
161 }
162
163 static void process_alternates_response(void *callback_data)
164 {
165         struct alternates_request *alt_req =
166                 (struct alternates_request *)callback_data;
167         struct walker *walker = alt_req->walker;
168         struct walker_data *cdata = walker->data;
169         struct active_request_slot *slot = alt_req->slot;
170         struct alt_base *tail = cdata->alt;
171         const char *base = alt_req->base;
172         const char null_byte = '\0';
173         char *data;
174         int i = 0;
175
176         if (alt_req->http_specific) {
177                 if (slot->curl_result != CURLE_OK ||
178                     !alt_req->buffer->len) {
179
180                         /* Try reusing the slot to get non-http alternates */
181                         alt_req->http_specific = 0;
182                         strbuf_reset(alt_req->url);
183                         strbuf_addf(alt_req->url, "%s/objects/info/alternates",
184                                     base);
185                         curl_easy_setopt(slot->curl, CURLOPT_URL,
186                                          alt_req->url->buf);
187                         active_requests++;
188                         slot->in_use = 1;
189                         if (slot->finished != NULL)
190                                 (*slot->finished) = 0;
191                         if (!start_active_slot(slot)) {
192                                 cdata->got_alternates = -1;
193                                 slot->in_use = 0;
194                                 if (slot->finished != NULL)
195                                         (*slot->finished) = 1;
196                         }
197                         return;
198                 }
199         } else if (slot->curl_result != CURLE_OK) {
200                 if (!missing_target(slot)) {
201                         cdata->got_alternates = -1;
202                         return;
203                 }
204         }
205
206         fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer);
207         alt_req->buffer->len--;
208         data = alt_req->buffer->buf;
209
210         while (i < alt_req->buffer->len) {
211                 int posn = i;
212                 while (posn < alt_req->buffer->len && data[posn] != '\n')
213                         posn++;
214                 if (data[posn] == '\n') {
215                         int okay = 0;
216                         int serverlen = 0;
217                         struct alt_base *newalt;
218                         if (data[i] == '/') {
219                                 /*
220                                  * This counts
221                                  * http://git.host/pub/scm/linux.git/
222                                  * -----------here^
223                                  * so memcpy(dst, base, serverlen) will
224                                  * copy up to "...git.host".
225                                  */
226                                 const char *colon_ss = strstr(base,"://");
227                                 if (colon_ss) {
228                                         serverlen = (strchr(colon_ss + 3, '/')
229                                                      - base);
230                                         okay = 1;
231                                 }
232                         } else if (!memcmp(data + i, "../", 3)) {
233                                 /*
234                                  * Relative URL; chop the corresponding
235                                  * number of subpath from base (and ../
236                                  * from data), and concatenate the result.
237                                  *
238                                  * The code first drops ../ from data, and
239                                  * then drops one ../ from data and one path
240                                  * from base.  IOW, one extra ../ is dropped
241                                  * from data than path is dropped from base.
242                                  *
243                                  * This is not wrong.  The alternate in
244                                  *     http://git.host/pub/scm/linux.git/
245                                  * to borrow from
246                                  *     http://git.host/pub/scm/linus.git/
247                                  * is ../../linus.git/objects/.  You need
248                                  * two ../../ to borrow from your direct
249                                  * neighbour.
250                                  */
251                                 i += 3;
252                                 serverlen = strlen(base);
253                                 while (i + 2 < posn &&
254                                        !memcmp(data + i, "../", 3)) {
255                                         do {
256                                                 serverlen--;
257                                         } while (serverlen &&
258                                                  base[serverlen - 1] != '/');
259                                         i += 3;
260                                 }
261                                 /* If the server got removed, give up. */
262                                 okay = strchr(base, ':') - base + 3 <
263                                        serverlen;
264                         } else if (alt_req->http_specific) {
265                                 char *colon = strchr(data + i, ':');
266                                 char *slash = strchr(data + i, '/');
267                                 if (colon && slash && colon < data + posn &&
268                                     slash < data + posn && colon < slash) {
269                                         okay = 1;
270                                 }
271                         }
272                         /* skip "objects\n" at end */
273                         if (okay) {
274                                 struct strbuf target = STRBUF_INIT;
275                                 strbuf_add(&target, base, serverlen);
276                                 strbuf_add(&target, data + i, posn - i - 7);
277                                 if (walker->get_verbosely)
278                                         fprintf(stderr, "Also look at %s\n",
279                                                 target.buf);
280                                 newalt = xmalloc(sizeof(*newalt));
281                                 newalt->next = NULL;
282                                 newalt->base = strbuf_detach(&target, NULL);
283                                 newalt->got_indices = 0;
284                                 newalt->packs = NULL;
285
286                                 while (tail->next != NULL)
287                                         tail = tail->next;
288                                 tail->next = newalt;
289                         }
290                 }
291                 i = posn + 1;
292         }
293
294         cdata->got_alternates = 1;
295 }
296
297 static void fetch_alternates(struct walker *walker, const char *base)
298 {
299         struct strbuf buffer = STRBUF_INIT;
300         struct strbuf url = STRBUF_INIT;
301         struct active_request_slot *slot;
302         struct alternates_request alt_req;
303         struct walker_data *cdata = walker->data;
304
305         /*
306          * If another request has already started fetching alternates,
307          * wait for them to arrive and return to processing this request's
308          * curl message
309          */
310 #ifdef USE_CURL_MULTI
311         while (cdata->got_alternates == 0) {
312                 step_active_slots();
313         }
314 #endif
315
316         /* Nothing to do if they've already been fetched */
317         if (cdata->got_alternates == 1)
318                 return;
319
320         /* Start the fetch */
321         cdata->got_alternates = 0;
322
323         if (walker->get_verbosely)
324                 fprintf(stderr, "Getting alternates list for %s\n", base);
325
326         strbuf_addf(&url, "%s/objects/info/http-alternates", base);
327
328         /*
329          * Use a callback to process the result, since another request
330          * may fail and need to have alternates loaded before continuing
331          */
332         slot = get_active_slot();
333         slot->callback_func = process_alternates_response;
334         alt_req.walker = walker;
335         slot->callback_data = &alt_req;
336
337         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
338         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
339         curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
340
341         alt_req.base = base;
342         alt_req.url = &url;
343         alt_req.buffer = &buffer;
344         alt_req.http_specific = 1;
345         alt_req.slot = slot;
346
347         if (start_active_slot(slot))
348                 run_active_slot(slot);
349         else
350                 cdata->got_alternates = -1;
351
352         strbuf_release(&buffer);
353         strbuf_release(&url);
354 }
355
356 static int fetch_indices(struct walker *walker, struct alt_base *repo)
357 {
358         int ret;
359
360         if (repo->got_indices)
361                 return 0;
362
363         if (walker->get_verbosely)
364                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
365
366         switch (http_get_info_packs(repo->base, &repo->packs)) {
367         case HTTP_OK:
368         case HTTP_MISSING_TARGET:
369                 repo->got_indices = 1;
370                 ret = 0;
371                 break;
372         default:
373                 repo->got_indices = 0;
374                 ret = -1;
375         }
376
377         return ret;
378 }
379
380 static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
381 {
382         struct packed_git *target;
383         int ret;
384         struct slot_results results;
385         struct http_pack_request *preq;
386
387         if (fetch_indices(walker, repo))
388                 return -1;
389         target = find_sha1_pack(sha1, repo->packs);
390         if (!target)
391                 return -1;
392
393         if (walker->get_verbosely) {
394                 fprintf(stderr, "Getting pack %s\n",
395                         sha1_to_hex(target->sha1));
396                 fprintf(stderr, " which contains %s\n",
397                         sha1_to_hex(sha1));
398         }
399
400         preq = new_http_pack_request(target, repo->base);
401         if (preq == NULL)
402                 goto abort;
403         preq->lst = &repo->packs;
404         preq->slot->results = &results;
405
406         if (start_active_slot(preq->slot)) {
407                 run_active_slot(preq->slot);
408                 if (results.curl_result != CURLE_OK) {
409                         error("Unable to get pack file %s\n%s", preq->url,
410                               curl_errorstr);
411                         goto abort;
412                 }
413         } else {
414                 error("Unable to start request");
415                 goto abort;
416         }
417
418         ret = finish_http_pack_request(preq);
419         release_http_pack_request(preq);
420         if (ret)
421                 return ret;
422
423         return 0;
424
425 abort:
426         return -1;
427 }
428
429 static void abort_object_request(struct object_request *obj_req)
430 {
431         release_object_request(obj_req);
432 }
433
434 static int fetch_object(struct walker *walker, unsigned char *sha1)
435 {
436         char *hex = sha1_to_hex(sha1);
437         int ret = 0;
438         struct object_request *obj_req = NULL;
439         struct http_object_request *req;
440         struct list_head *pos, *head = &object_queue_head;
441
442         list_for_each(pos, head) {
443                 obj_req = list_entry(pos, struct object_request, node);
444                 if (!hashcmp(obj_req->sha1, sha1))
445                         break;
446         }
447         if (obj_req == NULL)
448                 return error("Couldn't find request for %s in the queue", hex);
449
450         if (has_sha1_file(obj_req->sha1)) {
451                 if (obj_req->req != NULL)
452                         abort_http_object_request(obj_req->req);
453                 abort_object_request(obj_req);
454                 return 0;
455         }
456
457 #ifdef USE_CURL_MULTI
458         while (obj_req->state == WAITING)
459                 step_active_slots();
460 #else
461         start_object_request(walker, obj_req);
462 #endif
463
464         /*
465          * obj_req->req might change when fetching alternates in the callback
466          * process_object_response; therefore, the "shortcut" variable, req,
467          * is used only after we're done with slots.
468          */
469         while (obj_req->state == ACTIVE)
470                 run_active_slot(obj_req->req->slot);
471
472         req = obj_req->req;
473
474         if (req->localfile != -1) {
475                 close(req->localfile);
476                 req->localfile = -1;
477         }
478
479         /*
480          * we turned off CURLOPT_FAILONERROR to avoid losing a
481          * persistent connection and got CURLE_OK.
482          */
483         if (req->http_code == 404 && req->curl_result == CURLE_OK &&
484                         (starts_with(req->url, "http://") ||
485                          starts_with(req->url, "https://")))
486                 req->curl_result = CURLE_HTTP_RETURNED_ERROR;
487
488         if (obj_req->state == ABORTED) {
489                 ret = error("Request for %s aborted", hex);
490         } else if (req->curl_result != CURLE_OK &&
491                    req->http_code != 416) {
492                 if (missing_target(req))
493                         ret = -1; /* Be silent, it is probably in a pack. */
494                 else
495                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
496                                     req->errorstr, req->curl_result,
497                                     req->http_code, hex);
498         } else if (req->zret != Z_STREAM_END) {
499                 walker->corrupt_object_found++;
500                 ret = error("File %s (%s) corrupt", hex, req->url);
501         } else if (hashcmp(obj_req->sha1, req->real_sha1)) {
502                 ret = error("File %s has bad hash", hex);
503         } else if (req->rename < 0) {
504                 ret = error("unable to write sha1 filename %s",
505                             sha1_file_name(req->sha1));
506         }
507
508         release_http_object_request(req);
509         release_object_request(obj_req);
510         return ret;
511 }
512
513 static int fetch(struct walker *walker, unsigned char *sha1)
514 {
515         struct walker_data *data = walker->data;
516         struct alt_base *altbase = data->alt;
517
518         if (!fetch_object(walker, sha1))
519                 return 0;
520         while (altbase) {
521                 if (!http_fetch_pack(walker, altbase, sha1))
522                         return 0;
523                 fetch_alternates(walker, data->alt->base);
524                 altbase = altbase->next;
525         }
526         return error("Unable to find %s under %s", sha1_to_hex(sha1),
527                      data->alt->base);
528 }
529
530 static int fetch_ref(struct walker *walker, struct ref *ref)
531 {
532         struct walker_data *data = walker->data;
533         return http_fetch_ref(data->alt->base, ref);
534 }
535
536 static void cleanup(struct walker *walker)
537 {
538         struct walker_data *data = walker->data;
539         struct alt_base *alt, *alt_next;
540
541         if (data) {
542                 alt = data->alt;
543                 while (alt) {
544                         alt_next = alt->next;
545
546                         free(alt->base);
547                         free(alt);
548
549                         alt = alt_next;
550                 }
551                 free(data);
552                 walker->data = NULL;
553         }
554 }
555
556 struct walker *get_http_walker(const char *url)
557 {
558         char *s;
559         struct walker_data *data = xmalloc(sizeof(struct walker_data));
560         struct walker *walker = xmalloc(sizeof(struct walker));
561
562         data->alt = xmalloc(sizeof(*data->alt));
563         data->alt->base = xstrdup(url);
564         for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
565                 *s = 0;
566
567         data->alt->got_indices = 0;
568         data->alt->packs = NULL;
569         data->alt->next = NULL;
570         data->got_alternates = -1;
571
572         walker->corrupt_object_found = 0;
573         walker->fetch = fetch;
574         walker->fetch_ref = fetch_ref;
575         walker->prefetch = prefetch;
576         walker->cleanup = cleanup;
577         walker->data = data;
578
579 #ifdef USE_CURL_MULTI
580         add_fill_function(walker, (int (*)(void *)) fill_active_slot);
581 #endif
582
583         return walker;
584 }