Merge branch 'nd/shallow-fixup'
[git] / http-walker.c
1 #include "cache.h"
2 #include "commit.h"
3 #include "walker.h"
4 #include "http.h"
5 #include "list.h"
6
7 struct alt_base {
8         char *base;
9         int got_indices;
10         struct packed_git *packs;
11         struct alt_base *next;
12 };
13
14 enum object_request_state {
15         WAITING,
16         ABORTED,
17         ACTIVE,
18         COMPLETE
19 };
20
21 struct object_request {
22         struct walker *walker;
23         unsigned char sha1[20];
24         struct alt_base *repo;
25         enum object_request_state state;
26         struct http_object_request *req;
27         struct list_head node;
28 };
29
30 struct alternates_request {
31         struct walker *walker;
32         const char *base;
33         struct strbuf *url;
34         struct strbuf *buffer;
35         struct active_request_slot *slot;
36         int http_specific;
37 };
38
39 struct walker_data {
40         const char *url;
41         int got_alternates;
42         struct alt_base *alt;
43 };
44
45 static LIST_HEAD(object_queue_head);
46
47 static void fetch_alternates(struct walker *walker, const char *base);
48
49 static void process_object_response(void *callback_data);
50
51 static void start_object_request(struct walker *walker,
52                                  struct object_request *obj_req)
53 {
54         struct active_request_slot *slot;
55         struct http_object_request *req;
56
57         req = new_http_object_request(obj_req->repo->base, obj_req->sha1);
58         if (req == NULL) {
59                 obj_req->state = ABORTED;
60                 return;
61         }
62         obj_req->req = req;
63
64         slot = req->slot;
65         slot->callback_func = process_object_response;
66         slot->callback_data = obj_req;
67
68         /* Try to get the request started, abort the request on error */
69         obj_req->state = ACTIVE;
70         if (!start_active_slot(slot)) {
71                 obj_req->state = ABORTED;
72                 release_http_object_request(req);
73                 return;
74         }
75 }
76
77 static void finish_object_request(struct object_request *obj_req)
78 {
79         if (finish_http_object_request(obj_req->req))
80                 return;
81
82         if (obj_req->req->rename == 0)
83                 walker_say(obj_req->walker, "got %s\n", sha1_to_hex(obj_req->sha1));
84 }
85
86 static void process_object_response(void *callback_data)
87 {
88         struct object_request *obj_req =
89                 (struct object_request *)callback_data;
90         struct walker *walker = obj_req->walker;
91         struct walker_data *data = walker->data;
92         struct alt_base *alt = data->alt;
93
94         process_http_object_request(obj_req->req);
95         obj_req->state = COMPLETE;
96
97         /* Use alternates if necessary */
98         if (missing_target(obj_req->req)) {
99                 fetch_alternates(walker, alt->base);
100                 if (obj_req->repo->next != NULL) {
101                         obj_req->repo =
102                                 obj_req->repo->next;
103                         release_http_object_request(obj_req->req);
104                         start_object_request(walker, obj_req);
105                         return;
106                 }
107         }
108
109         finish_object_request(obj_req);
110 }
111
112 static void release_object_request(struct object_request *obj_req)
113 {
114         if (obj_req->req !=NULL && obj_req->req->localfile != -1)
115                 error("fd leakage in release: %d", obj_req->req->localfile);
116
117         list_del(&obj_req->node);
118         free(obj_req);
119 }
120
121 #ifdef USE_CURL_MULTI
122 static int fill_active_slot(struct walker *walker)
123 {
124         struct object_request *obj_req;
125         struct list_head *pos, *tmp, *head = &object_queue_head;
126
127         list_for_each_safe(pos, tmp, head) {
128                 obj_req = list_entry(pos, struct object_request, node);
129                 if (obj_req->state == WAITING) {
130                         if (has_sha1_file(obj_req->sha1))
131                                 obj_req->state = COMPLETE;
132                         else {
133                                 start_object_request(walker, obj_req);
134                                 return 1;
135                         }
136                 }
137         }
138         return 0;
139 }
140 #endif
141
142 static void prefetch(struct walker *walker, unsigned char *sha1)
143 {
144         struct object_request *newreq;
145         struct walker_data *data = walker->data;
146
147         newreq = xmalloc(sizeof(*newreq));
148         newreq->walker = walker;
149         hashcpy(newreq->sha1, sha1);
150         newreq->repo = data->alt;
151         newreq->state = WAITING;
152         newreq->req = NULL;
153
154         http_is_verbose = walker->get_verbosely;
155         list_add_tail(&newreq->node, &object_queue_head);
156
157 #ifdef USE_CURL_MULTI
158         fill_active_slots();
159         step_active_slots();
160 #endif
161 }
162
163 static void process_alternates_response(void *callback_data)
164 {
165         struct alternates_request *alt_req =
166                 (struct alternates_request *)callback_data;
167         struct walker *walker = alt_req->walker;
168         struct walker_data *cdata = walker->data;
169         struct active_request_slot *slot = alt_req->slot;
170         struct alt_base *tail = cdata->alt;
171         const char *base = alt_req->base;
172         const char null_byte = '\0';
173         char *data;
174         int i = 0;
175
176         if (alt_req->http_specific) {
177                 if (slot->curl_result != CURLE_OK ||
178                     !alt_req->buffer->len) {
179
180                         /* Try reusing the slot to get non-http alternates */
181                         alt_req->http_specific = 0;
182                         strbuf_reset(alt_req->url);
183                         strbuf_addf(alt_req->url, "%s/objects/info/alternates",
184                                     base);
185                         curl_easy_setopt(slot->curl, CURLOPT_URL,
186                                          alt_req->url->buf);
187                         active_requests++;
188                         slot->in_use = 1;
189                         if (slot->finished != NULL)
190                                 (*slot->finished) = 0;
191                         if (!start_active_slot(slot)) {
192                                 cdata->got_alternates = -1;
193                                 slot->in_use = 0;
194                                 if (slot->finished != NULL)
195                                         (*slot->finished) = 1;
196                         }
197                         return;
198                 }
199         } else if (slot->curl_result != CURLE_OK) {
200                 if (!missing_target(slot)) {
201                         cdata->got_alternates = -1;
202                         return;
203                 }
204         }
205
206         fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer);
207         alt_req->buffer->len--;
208         data = alt_req->buffer->buf;
209
210         while (i < alt_req->buffer->len) {
211                 int posn = i;
212                 while (posn < alt_req->buffer->len && data[posn] != '\n')
213                         posn++;
214                 if (data[posn] == '\n') {
215                         int okay = 0;
216                         int serverlen = 0;
217                         struct alt_base *newalt;
218                         if (data[i] == '/') {
219                                 /*
220                                  * This counts
221                                  * http://git.host/pub/scm/linux.git/
222                                  * -----------here^
223                                  * so memcpy(dst, base, serverlen) will
224                                  * copy up to "...git.host".
225                                  */
226                                 const char *colon_ss = strstr(base,"://");
227                                 if (colon_ss) {
228                                         serverlen = (strchr(colon_ss + 3, '/')
229                                                      - base);
230                                         okay = 1;
231                                 }
232                         } else if (!memcmp(data + i, "../", 3)) {
233                                 /*
234                                  * Relative URL; chop the corresponding
235                                  * number of subpath from base (and ../
236                                  * from data), and concatenate the result.
237                                  *
238                                  * The code first drops ../ from data, and
239                                  * then drops one ../ from data and one path
240                                  * from base.  IOW, one extra ../ is dropped
241                                  * from data than path is dropped from base.
242                                  *
243                                  * This is not wrong.  The alternate in
244                                  *     http://git.host/pub/scm/linux.git/
245                                  * to borrow from
246                                  *     http://git.host/pub/scm/linus.git/
247                                  * is ../../linus.git/objects/.  You need
248                                  * two ../../ to borrow from your direct
249                                  * neighbour.
250                                  */
251                                 i += 3;
252                                 serverlen = strlen(base);
253                                 while (i + 2 < posn &&
254                                        !memcmp(data + i, "../", 3)) {
255                                         do {
256                                                 serverlen--;
257                                         } while (serverlen &&
258                                                  base[serverlen - 1] != '/');
259                                         i += 3;
260                                 }
261                                 /* If the server got removed, give up. */
262                                 okay = strchr(base, ':') - base + 3 <
263                                        serverlen;
264                         } else if (alt_req->http_specific) {
265                                 char *colon = strchr(data + i, ':');
266                                 char *slash = strchr(data + i, '/');
267                                 if (colon && slash && colon < data + posn &&
268                                     slash < data + posn && colon < slash) {
269                                         okay = 1;
270                                 }
271                         }
272                         /* skip "objects\n" at end */
273                         if (okay) {
274                                 struct strbuf target = STRBUF_INIT;
275                                 strbuf_add(&target, base, serverlen);
276                                 strbuf_add(&target, data + i, posn - i - 7);
277                                 warning("adding alternate object store: %s",
278                                         target.buf);
279                                 newalt = xmalloc(sizeof(*newalt));
280                                 newalt->next = NULL;
281                                 newalt->base = strbuf_detach(&target, NULL);
282                                 newalt->got_indices = 0;
283                                 newalt->packs = NULL;
284
285                                 while (tail->next != NULL)
286                                         tail = tail->next;
287                                 tail->next = newalt;
288                         }
289                 }
290                 i = posn + 1;
291         }
292
293         cdata->got_alternates = 1;
294 }
295
296 static void fetch_alternates(struct walker *walker, const char *base)
297 {
298         struct strbuf buffer = STRBUF_INIT;
299         struct strbuf url = STRBUF_INIT;
300         struct active_request_slot *slot;
301         struct alternates_request alt_req;
302         struct walker_data *cdata = walker->data;
303
304         if (http_follow_config != HTTP_FOLLOW_ALWAYS)
305                 return;
306
307         /*
308          * If another request has already started fetching alternates,
309          * wait for them to arrive and return to processing this request's
310          * curl message
311          */
312 #ifdef USE_CURL_MULTI
313         while (cdata->got_alternates == 0) {
314                 step_active_slots();
315         }
316 #endif
317
318         /* Nothing to do if they've already been fetched */
319         if (cdata->got_alternates == 1)
320                 return;
321
322         /* Start the fetch */
323         cdata->got_alternates = 0;
324
325         if (walker->get_verbosely)
326                 fprintf(stderr, "Getting alternates list for %s\n", base);
327
328         strbuf_addf(&url, "%s/objects/info/http-alternates", base);
329
330         /*
331          * Use a callback to process the result, since another request
332          * may fail and need to have alternates loaded before continuing
333          */
334         slot = get_active_slot();
335         slot->callback_func = process_alternates_response;
336         alt_req.walker = walker;
337         slot->callback_data = &alt_req;
338
339         curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
340         curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
341         curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
342
343         alt_req.base = base;
344         alt_req.url = &url;
345         alt_req.buffer = &buffer;
346         alt_req.http_specific = 1;
347         alt_req.slot = slot;
348
349         if (start_active_slot(slot))
350                 run_active_slot(slot);
351         else
352                 cdata->got_alternates = -1;
353
354         strbuf_release(&buffer);
355         strbuf_release(&url);
356 }
357
358 static int fetch_indices(struct walker *walker, struct alt_base *repo)
359 {
360         int ret;
361
362         if (repo->got_indices)
363                 return 0;
364
365         if (walker->get_verbosely)
366                 fprintf(stderr, "Getting pack list for %s\n", repo->base);
367
368         switch (http_get_info_packs(repo->base, &repo->packs)) {
369         case HTTP_OK:
370         case HTTP_MISSING_TARGET:
371                 repo->got_indices = 1;
372                 ret = 0;
373                 break;
374         default:
375                 repo->got_indices = 0;
376                 ret = -1;
377         }
378
379         return ret;
380 }
381
382 static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
383 {
384         struct packed_git *target;
385         int ret;
386         struct slot_results results;
387         struct http_pack_request *preq;
388
389         if (fetch_indices(walker, repo))
390                 return -1;
391         target = find_sha1_pack(sha1, repo->packs);
392         if (!target)
393                 return -1;
394
395         if (walker->get_verbosely) {
396                 fprintf(stderr, "Getting pack %s\n",
397                         sha1_to_hex(target->sha1));
398                 fprintf(stderr, " which contains %s\n",
399                         sha1_to_hex(sha1));
400         }
401
402         preq = new_http_pack_request(target, repo->base);
403         if (preq == NULL)
404                 goto abort;
405         preq->lst = &repo->packs;
406         preq->slot->results = &results;
407
408         if (start_active_slot(preq->slot)) {
409                 run_active_slot(preq->slot);
410                 if (results.curl_result != CURLE_OK) {
411                         error("Unable to get pack file %s\n%s", preq->url,
412                               curl_errorstr);
413                         goto abort;
414                 }
415         } else {
416                 error("Unable to start request");
417                 goto abort;
418         }
419
420         ret = finish_http_pack_request(preq);
421         release_http_pack_request(preq);
422         if (ret)
423                 return ret;
424
425         return 0;
426
427 abort:
428         return -1;
429 }
430
431 static void abort_object_request(struct object_request *obj_req)
432 {
433         release_object_request(obj_req);
434 }
435
436 static int fetch_object(struct walker *walker, unsigned char *sha1)
437 {
438         char *hex = sha1_to_hex(sha1);
439         int ret = 0;
440         struct object_request *obj_req = NULL;
441         struct http_object_request *req;
442         struct list_head *pos, *head = &object_queue_head;
443
444         list_for_each(pos, head) {
445                 obj_req = list_entry(pos, struct object_request, node);
446                 if (!hashcmp(obj_req->sha1, sha1))
447                         break;
448         }
449         if (obj_req == NULL)
450                 return error("Couldn't find request for %s in the queue", hex);
451
452         if (has_sha1_file(obj_req->sha1)) {
453                 if (obj_req->req != NULL)
454                         abort_http_object_request(obj_req->req);
455                 abort_object_request(obj_req);
456                 return 0;
457         }
458
459 #ifdef USE_CURL_MULTI
460         while (obj_req->state == WAITING)
461                 step_active_slots();
462 #else
463         start_object_request(walker, obj_req);
464 #endif
465
466         /*
467          * obj_req->req might change when fetching alternates in the callback
468          * process_object_response; therefore, the "shortcut" variable, req,
469          * is used only after we're done with slots.
470          */
471         while (obj_req->state == ACTIVE)
472                 run_active_slot(obj_req->req->slot);
473
474         req = obj_req->req;
475
476         if (req->localfile != -1) {
477                 close(req->localfile);
478                 req->localfile = -1;
479         }
480
481         /*
482          * we turned off CURLOPT_FAILONERROR to avoid losing a
483          * persistent connection and got CURLE_OK.
484          */
485         if (req->http_code >= 300 && req->curl_result == CURLE_OK &&
486                         (starts_with(req->url, "http://") ||
487                          starts_with(req->url, "https://"))) {
488                 req->curl_result = CURLE_HTTP_RETURNED_ERROR;
489                 xsnprintf(req->errorstr, sizeof(req->errorstr),
490                           "HTTP request failed");
491         }
492
493         if (obj_req->state == ABORTED) {
494                 ret = error("Request for %s aborted", hex);
495         } else if (req->curl_result != CURLE_OK &&
496                    req->http_code != 416) {
497                 if (missing_target(req))
498                         ret = -1; /* Be silent, it is probably in a pack. */
499                 else
500                         ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
501                                     req->errorstr, req->curl_result,
502                                     req->http_code, hex);
503         } else if (req->zret != Z_STREAM_END) {
504                 walker->corrupt_object_found++;
505                 ret = error("File %s (%s) corrupt", hex, req->url);
506         } else if (hashcmp(obj_req->sha1, req->real_sha1)) {
507                 ret = error("File %s has bad hash", hex);
508         } else if (req->rename < 0) {
509                 ret = error("unable to write sha1 filename %s",
510                             sha1_file_name(req->sha1));
511         }
512
513         release_http_object_request(req);
514         release_object_request(obj_req);
515         return ret;
516 }
517
518 static int fetch(struct walker *walker, unsigned char *sha1)
519 {
520         struct walker_data *data = walker->data;
521         struct alt_base *altbase = data->alt;
522
523         if (!fetch_object(walker, sha1))
524                 return 0;
525         while (altbase) {
526                 if (!http_fetch_pack(walker, altbase, sha1))
527                         return 0;
528                 fetch_alternates(walker, data->alt->base);
529                 altbase = altbase->next;
530         }
531         return error("Unable to find %s under %s", sha1_to_hex(sha1),
532                      data->alt->base);
533 }
534
535 static int fetch_ref(struct walker *walker, struct ref *ref)
536 {
537         struct walker_data *data = walker->data;
538         return http_fetch_ref(data->alt->base, ref);
539 }
540
541 static void cleanup(struct walker *walker)
542 {
543         struct walker_data *data = walker->data;
544         struct alt_base *alt, *alt_next;
545
546         if (data) {
547                 alt = data->alt;
548                 while (alt) {
549                         alt_next = alt->next;
550
551                         free(alt->base);
552                         free(alt);
553
554                         alt = alt_next;
555                 }
556                 free(data);
557                 walker->data = NULL;
558         }
559 }
560
561 struct walker *get_http_walker(const char *url)
562 {
563         char *s;
564         struct walker_data *data = xmalloc(sizeof(struct walker_data));
565         struct walker *walker = xmalloc(sizeof(struct walker));
566
567         data->alt = xmalloc(sizeof(*data->alt));
568         data->alt->base = xstrdup(url);
569         for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
570                 *s = 0;
571
572         data->alt->got_indices = 0;
573         data->alt->packs = NULL;
574         data->alt->next = NULL;
575         data->got_alternates = -1;
576
577         walker->corrupt_object_found = 0;
578         walker->fetch = fetch;
579         walker->fetch_ref = fetch_ref;
580         walker->prefetch = prefetch;
581         walker->cleanup = cleanup;
582         walker->data = data;
583
584 #ifdef USE_CURL_MULTI
585         add_fill_function(walker, (int (*)(void *)) fill_active_slot);
586 #endif
587
588         return walker;
589 }