2 * 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
16 #include <linux/blkdev.h>
17 #include <linux/bio.h>
18 #include <linux/dst.h>
20 #include <linux/in6.h>
21 #include <linux/poll.h>
22 #include <linux/slab.h>
23 #include <linux/socket.h>
28 * Export bioset is used for server block IO requests.
30 static struct bio_set *dst_bio_set;
32 int __init dst_export_init(void)
36 dst_bio_set = bioset_create(32, sizeof(struct dst_export_priv));
46 void dst_export_exit(void)
48 bioset_free(dst_bio_set);
52 * When client connects and autonegotiates with the server node,
53 * its permissions are checked in a security attributes and sent
56 static unsigned int dst_check_permissions(struct dst_state *main, struct dst_state *st)
58 struct dst_node *n = main->node;
59 struct dst_secure *sentry;
60 struct dst_secure_user *s;
61 struct saddr *sa = &st->ctl.addr;
62 unsigned int perm = 0;
64 mutex_lock(&n->security_lock);
65 list_for_each_entry(sentry, &n->security_list, sec_entry) {
68 if (s->addr.sa_family != sa->sa_family)
71 if (s->addr.sa_data_len != sa->sa_data_len)
75 * This '2' below is a port field. This may be very wrong to do
76 * in atalk for example though. If there will be any need to extent
77 * protocol to something else, I can create per-family helpers and
78 * use them instead of this memcmp.
80 if (memcmp(s->addr.sa_data + 2, sa->sa_data + 2,
84 perm = s->permissions;
86 mutex_unlock(&n->security_lock);
92 * Accept new client: allocate appropriate network state and check permissions.
94 static struct dst_state *dst_accept_client(struct dst_state *st)
96 unsigned int revents = 0;
97 unsigned int err_mask = POLLERR | POLLHUP | POLLRDHUP;
98 unsigned int mask = err_mask | POLLIN;
99 struct dst_node *n = st->node;
101 struct socket *sock = NULL;
102 struct dst_state *new;
104 while (!err && !sock) {
105 revents = dst_state_poll(st);
107 if (!(revents & mask)) {
111 prepare_to_wait(&st->thread_wait,
112 &wait, TASK_INTERRUPTIBLE);
113 if (!n->trans_scan_timeout || st->need_exit)
116 revents = dst_state_poll(st);
121 if (signal_pending(current))
125 * Magic HZ? Polling check above is not safe in
126 * all cases (like socket reset in BH context),
127 * so it is simpler just to postpone it to the
128 * process context instead of implementing special
131 schedule_timeout(HZ);
133 finish_wait(&st->thread_wait, &wait);
139 dprintk("%s: st: %p, revents: %x [err: %d, in: %d].\n",
140 __func__, st, revents, revents & err_mask,
143 if (revents & err_mask) {
144 dprintk("%s: revents: %x, socket: %p, err: %d.\n",
145 __func__, revents, st->socket, err);
149 if (!n->trans_scan_timeout || st->need_exit)
152 if (st->socket && (revents & POLLIN))
153 err = kernel_accept(st->socket, &sock, 0);
155 dst_state_unlock(st);
161 new = dst_state_alloc(st->node);
164 goto err_out_release;
168 new->ctl.addr.sa_data_len = sizeof(struct sockaddr);
169 err = kernel_getpeername(sock, (struct sockaddr *)&new->ctl.addr,
170 (int *)&new->ctl.addr.sa_data_len);
174 new->permissions = dst_check_permissions(st, new);
175 if (new->permissions == 0) {
177 dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
178 "Client is not allowed to connect");
182 err = dst_poll_init(new);
186 dst_dump_addr(sock, (struct sockaddr *)&new->ctl.addr,
200 * Each server's block request sometime finishes.
201 * Usually it happens in hard irq context of the appropriate controller,
202 * so to play good with all cases we just queue BIO into the queue
203 * and wake up processing thread, which gets completed request and
204 * send (encrypting if needed) it back to the client (if it was a read
205 * request), or sends back reply that writing succesfully completed.
207 static int dst_export_process_request_queue(struct dst_state *st)
210 struct dst_export_priv *p = NULL;
214 while (!list_empty(&st->request_list)) {
215 spin_lock_irqsave(&st->request_lock, flags);
216 if (!list_empty(&st->request_list)) {
217 p = list_first_entry(&st->request_list,
218 struct dst_export_priv, request_entry);
219 list_del(&p->request_entry);
221 spin_unlock_irqrestore(&st->request_lock, flags);
228 if (dst_need_crypto(st->node) && (bio_data_dir(bio) == READ))
229 err = dst_export_crypto(st->node, bio);
231 err = dst_export_send_bio(bio);
241 * Cleanup export state.
242 * It has to wait until all requests are finished,
243 * and then free them all.
245 static void dst_state_cleanup_export(struct dst_state *st)
247 struct dst_export_priv *p;
251 * This loop waits for all pending bios to be completed and freed.
253 while (atomic_read(&st->refcnt) > 1) {
254 dprintk("%s: st: %p, refcnt: %d, list_empty: %d.\n",
255 __func__, st, atomic_read(&st->refcnt),
256 list_empty(&st->request_list));
257 wait_event_timeout(st->thread_wait,
258 (atomic_read(&st->refcnt) == 1) ||
259 !list_empty(&st->request_list),
262 while (!list_empty(&st->request_list)) {
264 spin_lock_irqsave(&st->request_lock, flags);
265 if (!list_empty(&st->request_list)) {
266 p = list_first_entry(&st->request_list,
267 struct dst_export_priv, request_entry);
268 list_del(&p->request_entry);
270 spin_unlock_irqrestore(&st->request_lock, flags);
275 dprintk("%s: st: %p, refcnt: %d, list_empty: %d, p: %p.\n",
276 __func__, st, atomic_read(&st->refcnt),
277 list_empty(&st->request_list), p);
285 * Client accepting thread.
286 * Not only accepts new connection, but also schedules receiving thread
287 * and performs request completion described above.
289 static int dst_accept(void *init_data, void *schedule_data)
291 struct dst_state *main_st = schedule_data;
292 struct dst_node *n = init_data;
293 struct dst_state *st;
296 while (n->trans_scan_timeout && !main_st->need_exit) {
297 dprintk("%s: main_st: %p, n: %p.\n", __func__, main_st, n);
298 st = dst_accept_client(main_st);
302 err = dst_state_schedule_receiver(st);
304 while (n->trans_scan_timeout) {
305 err = wait_event_interruptible_timeout(st->thread_wait,
306 !list_empty(&st->request_list) ||
307 !n->trans_scan_timeout ||
311 if (!n->trans_scan_timeout || st->need_exit)
314 if (list_empty(&st->request_list))
317 err = dst_export_process_request_queue(st);
323 wake_up(&st->thread_wait);
326 dst_state_cleanup_export(st);
329 dprintk("%s: freeing listening socket st: %p.\n", __func__, main_st);
331 dst_state_lock(main_st);
332 dst_poll_exit(main_st);
333 dst_state_socket_release(main_st);
334 dst_state_unlock(main_st);
335 dst_state_put(main_st);
336 dprintk("%s: freed listening socket st: %p.\n", __func__, main_st);
341 int dst_start_export(struct dst_node *n)
343 if (list_empty(&n->security_list)) {
344 printk(KERN_ERR "You are trying to export node '%s' without security attributes.\n"
345 "No clients will be allowed to connect. Exiting.\n", n->name);
348 return dst_node_trans_init(n, sizeof(struct dst_export_priv));
352 * Initialize listening state and schedule accepting thread.
354 int dst_node_init_listened(struct dst_node *n, struct dst_export_ctl *le)
356 struct dst_state *st;
358 struct dst_network_ctl *ctl = &le->ctl;
360 memcpy(&n->info->net, ctl, sizeof(struct dst_network_ctl));
362 st = dst_state_alloc(n);
367 memcpy(&st->ctl, ctl, sizeof(struct dst_network_ctl));
369 err = dst_state_socket_create(st);
373 st->socket->sk->sk_reuse = 1;
375 err = kernel_bind(st->socket, (struct sockaddr *)&ctl->addr,
376 ctl->addr.sa_data_len);
378 goto err_out_socket_release;
380 err = kernel_listen(st->socket, 1024);
382 goto err_out_socket_release;
385 err = dst_poll_init(st);
387 goto err_out_socket_release;
391 err = thread_pool_schedule(n->pool, dst_thread_setup,
392 dst_accept, st, MAX_SCHEDULE_TIMEOUT);
394 goto err_out_poll_exit;
400 err_out_socket_release:
401 dst_state_socket_release(st);
410 * Free bio and related private data.
411 * Also drop a reference counter for appropriate state,
412 * which waits when there are no more block IOs in-flight.
414 static void dst_bio_destructor(struct bio *bio)
417 struct dst_export_priv *priv = bio->bi_private;
420 bio_for_each_segment(bv, bio, i) {
424 __free_page(bv->bv_page);
428 dst_state_put(priv->state);
429 bio_free(bio, dst_bio_set);
433 * Block IO completion. Queue request to be sent back to
434 * the client (or just confirmation).
436 static void dst_bio_end_io(struct bio *bio, int err)
438 struct dst_export_priv *p = bio->bi_private;
439 struct dst_state *st = p->state;
442 spin_lock_irqsave(&st->request_lock, flags);
443 list_add_tail(&p->request_entry, &st->request_list);
444 spin_unlock_irqrestore(&st->request_lock, flags);
446 wake_up(&st->thread_wait);
450 * Allocate read request for the server.
452 static int dst_export_read_request(struct bio *bio, unsigned int total_size)
460 page = alloc_page(GFP_KERNEL);
464 size = min_t(unsigned int, PAGE_SIZE, total_size);
466 err = bio_add_page(bio, page, size, 0);
467 dprintk("%s: bio: %llu/%u, size: %u, err: %d.\n",
468 __func__, (u64)bio->bi_sector, bio->bi_size,
471 goto err_out_free_page;
485 * Allocate write request for the server.
486 * Should not only get pages, but also read data from the network.
488 static int dst_export_write_request(struct dst_state *st,
489 struct bio *bio, unsigned int total_size)
498 page = alloc_page(GFP_KERNEL);
504 goto err_out_free_page;
506 size = min_t(unsigned int, PAGE_SIZE, total_size);
508 err = dst_data_recv(st, data, size);
510 goto err_out_unmap_page;
512 err = bio_add_page(bio, page, size, 0);
514 goto err_out_unmap_page;
532 * Groovy, we've gotten an IO request from the client.
533 * Allocate BIO from the bioset, private data from the mempool
534 * and lots of pages for IO.
536 int dst_process_io(struct dst_state *st)
538 struct dst_node *n = st->node;
539 struct dst_cmd *cmd = st->data;
541 struct dst_export_priv *priv;
544 if (unlikely(!n->bdev)) {
549 bio = bio_alloc_bioset(GFP_KERNEL,
550 PAGE_ALIGN(cmd->size) >> PAGE_SHIFT,
555 priv = (struct dst_export_priv *)(((void *)bio) - sizeof (struct dst_export_priv));
557 priv->state = dst_state_get(st);
560 bio->bi_private = priv;
561 bio->bi_end_io = dst_bio_end_io;
562 bio->bi_destructor = dst_bio_destructor;
563 bio->bi_bdev = n->bdev;
566 * Server side is only interested in two low bits:
567 * uptodate (set by itself actually) and rw block
569 bio->bi_flags |= cmd->flags & 3;
571 bio->bi_rw = cmd->rw;
573 bio->bi_sector = cmd->sector;
575 dst_bio_to_cmd(bio, &priv->cmd, DST_IO_RESPONSE, cmd->id);
578 priv->cmd.size = cmd->size;
580 if (bio_data_dir(bio) == WRITE) {
581 err = dst_recv_cdata(st, priv->cmd.hash);
585 err = dst_export_write_request(st, bio, cmd->size);
589 if (dst_need_crypto(n))
590 return dst_export_crypto(n, bio);
592 err = dst_export_read_request(bio, cmd->size);
597 dprintk("%s: bio: %llu/%u, rw: %lu, dir: %lu, flags: %lx, phys: %d.\n",
598 __func__, (u64)bio->bi_sector, bio->bi_size,
599 bio->bi_rw, bio_data_dir(bio),
600 bio->bi_flags, bio->bi_phys_segments);
602 generic_make_request(bio);
613 * Ok, block IO is ready, let's send it back to the client...
615 int dst_export_send_bio(struct bio *bio)
617 struct dst_export_priv *p = bio->bi_private;
618 struct dst_state *st = p->state;
619 struct dst_cmd *cmd = &p->cmd;
622 dprintk("%s: id: %llu, bio: %llu/%u, csize: %u, flags: %lu, rw: %lu.\n",
623 __func__, cmd->id, (u64)bio->bi_sector, bio->bi_size,
624 cmd->csize, bio->bi_flags, bio->bi_rw);
626 dst_convert_cmd(cmd);
634 if (bio_data_dir(bio) == WRITE) {
635 /* ... or just confirmation that writing has completed. */
636 cmd->size = cmd->csize = 0;
637 err = dst_data_send_header(st->socket, cmd,
638 sizeof(struct dst_cmd), 0);
642 err = dst_send_bio(st, cmd, bio);
647 dst_state_unlock(st);
653 dst_state_unlock(st);