Merge branch 'linus' into cpus4096
[linux-2.6] / drivers / scsi / iscsi_tcp.c
1 /*
2  * iSCSI Initiator over TCP/IP Data-Path
3  *
4  * Copyright (C) 2004 Dmitry Yusupov
5  * Copyright (C) 2004 Alex Aizman
6  * Copyright (C) 2005 - 2006 Mike Christie
7  * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8  * maintained by open-iscsi@googlegroups.com
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published
12  * by the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful, but
16  * WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * General Public License for more details.
19  *
20  * See the file COPYING included with this distribution for more details.
21  *
22  * Credits:
23  *      Christoph Hellwig
24  *      FUJITA Tomonori
25  *      Arne Redlich
26  *      Zhenyu Wang
27  */
28
29 #include <linux/types.h>
30 #include <linux/inet.h>
31 #include <linux/file.h>
32 #include <linux/blkdev.h>
33 #include <linux/crypto.h>
34 #include <linux/delay.h>
35 #include <linux/kfifo.h>
36 #include <linux/scatterlist.h>
37 #include <net/tcp.h>
38 #include <scsi/scsi_cmnd.h>
39 #include <scsi/scsi_device.h>
40 #include <scsi/scsi_host.h>
41 #include <scsi/scsi.h>
42 #include <scsi/scsi_transport_iscsi.h>
43
44 #include "iscsi_tcp.h"
45
46 MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, "
47               "Dmitry Yusupov <dmitry_yus@yahoo.com>, "
48               "Alex Aizman <itn780@yahoo.com>");
49 MODULE_DESCRIPTION("iSCSI/TCP data-path");
50 MODULE_LICENSE("GPL");
51 #undef DEBUG_TCP
52
53 #ifdef DEBUG_TCP
54 #define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
55 #else
56 #define debug_tcp(fmt...)
57 #endif
58
59 static struct scsi_transport_template *iscsi_sw_tcp_scsi_transport;
60 static struct scsi_host_template iscsi_sw_tcp_sht;
61 static struct iscsi_transport iscsi_sw_tcp_transport;
62
63 static unsigned int iscsi_max_lun = 512;
64 module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
65
66 /**
67  * iscsi_sw_tcp_recv - TCP receive in sendfile fashion
68  * @rd_desc: read descriptor
69  * @skb: socket buffer
70  * @offset: offset in skb
71  * @len: skb->len - offset
72  */
73 static int iscsi_sw_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
74                              unsigned int offset, size_t len)
75 {
76         struct iscsi_conn *conn = rd_desc->arg.data;
77         unsigned int consumed, total_consumed = 0;
78         int status;
79
80         debug_tcp("in %d bytes\n", skb->len - offset);
81
82         do {
83                 status = 0;
84                 consumed = iscsi_tcp_recv_skb(conn, skb, offset, 0, &status);
85                 offset += consumed;
86                 total_consumed += consumed;
87         } while (consumed != 0 && status != ISCSI_TCP_SKB_DONE);
88
89         debug_tcp("read %d bytes status %d\n", skb->len - offset, status);
90         return total_consumed;
91 }
92
93 static void iscsi_sw_tcp_data_ready(struct sock *sk, int flag)
94 {
95         struct iscsi_conn *conn = sk->sk_user_data;
96         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
97         read_descriptor_t rd_desc;
98
99         read_lock(&sk->sk_callback_lock);
100
101         /*
102          * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
103          * We set count to 1 because we want the network layer to
104          * hand us all the skbs that are available. iscsi_tcp_recv
105          * handled pdus that cross buffers or pdus that still need data.
106          */
107         rd_desc.arg.data = conn;
108         rd_desc.count = 1;
109         tcp_read_sock(sk, &rd_desc, iscsi_sw_tcp_recv);
110
111         read_unlock(&sk->sk_callback_lock);
112
113         /* If we had to (atomically) map a highmem page,
114          * unmap it now. */
115         iscsi_tcp_segment_unmap(&tcp_conn->in.segment);
116 }
117
118 static void iscsi_sw_tcp_state_change(struct sock *sk)
119 {
120         struct iscsi_tcp_conn *tcp_conn;
121         struct iscsi_sw_tcp_conn *tcp_sw_conn;
122         struct iscsi_conn *conn;
123         struct iscsi_session *session;
124         void (*old_state_change)(struct sock *);
125
126         read_lock(&sk->sk_callback_lock);
127
128         conn = (struct iscsi_conn*)sk->sk_user_data;
129         session = conn->session;
130
131         if ((sk->sk_state == TCP_CLOSE_WAIT ||
132              sk->sk_state == TCP_CLOSE) &&
133             !atomic_read(&sk->sk_rmem_alloc)) {
134                 debug_tcp("iscsi_tcp_state_change: TCP_CLOSE|TCP_CLOSE_WAIT\n");
135                 iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
136         }
137
138         tcp_conn = conn->dd_data;
139         tcp_sw_conn = tcp_conn->dd_data;
140         old_state_change = tcp_sw_conn->old_state_change;
141
142         read_unlock(&sk->sk_callback_lock);
143
144         old_state_change(sk);
145 }
146
147 /**
148  * iscsi_write_space - Called when more output buffer space is available
149  * @sk: socket space is available for
150  **/
151 static void iscsi_sw_tcp_write_space(struct sock *sk)
152 {
153         struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
154         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
155         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
156
157         tcp_sw_conn->old_write_space(sk);
158         debug_tcp("iscsi_write_space: cid %d\n", conn->id);
159         scsi_queue_work(conn->session->host, &conn->xmitwork);
160 }
161
162 static void iscsi_sw_tcp_conn_set_callbacks(struct iscsi_conn *conn)
163 {
164         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
165         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
166         struct sock *sk = tcp_sw_conn->sock->sk;
167
168         /* assign new callbacks */
169         write_lock_bh(&sk->sk_callback_lock);
170         sk->sk_user_data = conn;
171         tcp_sw_conn->old_data_ready = sk->sk_data_ready;
172         tcp_sw_conn->old_state_change = sk->sk_state_change;
173         tcp_sw_conn->old_write_space = sk->sk_write_space;
174         sk->sk_data_ready = iscsi_sw_tcp_data_ready;
175         sk->sk_state_change = iscsi_sw_tcp_state_change;
176         sk->sk_write_space = iscsi_sw_tcp_write_space;
177         write_unlock_bh(&sk->sk_callback_lock);
178 }
179
180 static void
181 iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_sw_tcp_conn *tcp_sw_conn)
182 {
183         struct sock *sk = tcp_sw_conn->sock->sk;
184
185         /* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
186         write_lock_bh(&sk->sk_callback_lock);
187         sk->sk_user_data    = NULL;
188         sk->sk_data_ready   = tcp_sw_conn->old_data_ready;
189         sk->sk_state_change = tcp_sw_conn->old_state_change;
190         sk->sk_write_space  = tcp_sw_conn->old_write_space;
191         sk->sk_no_check  = 0;
192         write_unlock_bh(&sk->sk_callback_lock);
193 }
194
195 /**
196  * iscsi_sw_tcp_xmit_segment - transmit segment
197  * @tcp_conn: the iSCSI TCP connection
198  * @segment: the buffer to transmnit
199  *
200  * This function transmits as much of the buffer as
201  * the network layer will accept, and returns the number of
202  * bytes transmitted.
203  *
204  * If CRC hashing is enabled, the function will compute the
205  * hash as it goes. When the entire segment has been transmitted,
206  * it will retrieve the hash value and send it as well.
207  */
208 static int iscsi_sw_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
209                                      struct iscsi_segment *segment)
210 {
211         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
212         struct socket *sk = tcp_sw_conn->sock;
213         unsigned int copied = 0;
214         int r = 0;
215
216         while (!iscsi_tcp_segment_done(tcp_conn, segment, 0, r)) {
217                 struct scatterlist *sg;
218                 unsigned int offset, copy;
219                 int flags = 0;
220
221                 r = 0;
222                 offset = segment->copied;
223                 copy = segment->size - offset;
224
225                 if (segment->total_copied + segment->size < segment->total_size)
226                         flags |= MSG_MORE;
227
228                 /* Use sendpage if we can; else fall back to sendmsg */
229                 if (!segment->data) {
230                         sg = segment->sg;
231                         offset += segment->sg_offset + sg->offset;
232                         r = tcp_sw_conn->sendpage(sk, sg_page(sg), offset,
233                                                   copy, flags);
234                 } else {
235                         struct msghdr msg = { .msg_flags = flags };
236                         struct kvec iov = {
237                                 .iov_base = segment->data + offset,
238                                 .iov_len = copy
239                         };
240
241                         r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
242                 }
243
244                 if (r < 0) {
245                         iscsi_tcp_segment_unmap(segment);
246                         if (copied || r == -EAGAIN)
247                                 break;
248                         return r;
249                 }
250                 copied += r;
251         }
252         return copied;
253 }
254
255 /**
256  * iscsi_sw_tcp_xmit - TCP transmit
257  **/
258 static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
259 {
260         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
261         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
262         struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
263         unsigned int consumed = 0;
264         int rc = 0;
265
266         while (1) {
267                 rc = iscsi_sw_tcp_xmit_segment(tcp_conn, segment);
268                 if (rc < 0) {
269                         rc = ISCSI_ERR_XMIT_FAILED;
270                         goto error;
271                 }
272                 if (rc == 0)
273                         break;
274
275                 consumed += rc;
276
277                 if (segment->total_copied >= segment->total_size) {
278                         if (segment->done != NULL) {
279                                 rc = segment->done(tcp_conn, segment);
280                                 if (rc != 0)
281                                         goto error;
282                         }
283                 }
284         }
285
286         debug_tcp("xmit %d bytes\n", consumed);
287
288         conn->txdata_octets += consumed;
289         return consumed;
290
291 error:
292         /* Transmit error. We could initiate error recovery
293          * here. */
294         debug_tcp("Error sending PDU, errno=%d\n", rc);
295         iscsi_conn_failure(conn, rc);
296         return -EIO;
297 }
298
299 /**
300  * iscsi_tcp_xmit_qlen - return the number of bytes queued for xmit
301  */
302 static inline int iscsi_sw_tcp_xmit_qlen(struct iscsi_conn *conn)
303 {
304         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
305         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
306         struct iscsi_segment *segment = &tcp_sw_conn->out.segment;
307
308         return segment->total_copied - segment->total_size;
309 }
310
311 static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task)
312 {
313         struct iscsi_conn *conn = task->conn;
314         int rc;
315
316         while (iscsi_sw_tcp_xmit_qlen(conn)) {
317                 rc = iscsi_sw_tcp_xmit(conn);
318                 if (rc == 0)
319                         return -EAGAIN;
320                 if (rc < 0)
321                         return rc;
322         }
323
324         return 0;
325 }
326
327 /*
328  * This is called when we're done sending the header.
329  * Simply copy the data_segment to the send segment, and return.
330  */
331 static int iscsi_sw_tcp_send_hdr_done(struct iscsi_tcp_conn *tcp_conn,
332                                       struct iscsi_segment *segment)
333 {
334         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
335
336         tcp_sw_conn->out.segment = tcp_sw_conn->out.data_segment;
337         debug_tcp("Header done. Next segment size %u total_size %u\n",
338                   tcp_sw_conn->out.segment.size,
339                   tcp_sw_conn->out.segment.total_size);
340         return 0;
341 }
342
343 static void iscsi_sw_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr,
344                                        size_t hdrlen)
345 {
346         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
347         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
348
349         debug_tcp("%s(%p%s)\n", __func__, tcp_conn,
350                         conn->hdrdgst_en? ", digest enabled" : "");
351
352         /* Clear the data segment - needs to be filled in by the
353          * caller using iscsi_tcp_send_data_prep() */
354         memset(&tcp_sw_conn->out.data_segment, 0,
355                sizeof(struct iscsi_segment));
356
357         /* If header digest is enabled, compute the CRC and
358          * place the digest into the same buffer. We make
359          * sure that both iscsi_tcp_task and mtask have
360          * sufficient room.
361          */
362         if (conn->hdrdgst_en) {
363                 iscsi_tcp_dgst_header(&tcp_sw_conn->tx_hash, hdr, hdrlen,
364                                       hdr + hdrlen);
365                 hdrlen += ISCSI_DIGEST_SIZE;
366         }
367
368         /* Remember header pointer for later, when we need
369          * to decide whether there's a payload to go along
370          * with the header. */
371         tcp_sw_conn->out.hdr = hdr;
372
373         iscsi_segment_init_linear(&tcp_sw_conn->out.segment, hdr, hdrlen,
374                                   iscsi_sw_tcp_send_hdr_done, NULL);
375 }
376
377 /*
378  * Prepare the send buffer for the payload data.
379  * Padding and checksumming will all be taken care
380  * of by the iscsi_segment routines.
381  */
382 static int
383 iscsi_sw_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg,
384                             unsigned int count, unsigned int offset,
385                             unsigned int len)
386 {
387         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
388         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
389         struct hash_desc *tx_hash = NULL;
390         unsigned int hdr_spec_len;
391
392         debug_tcp("%s(%p, offset=%d, datalen=%d%s)\n", __func__,
393                         tcp_conn, offset, len,
394                         conn->datadgst_en? ", digest enabled" : "");
395
396         /* Make sure the datalen matches what the caller
397            said he would send. */
398         hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
399         WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));
400
401         if (conn->datadgst_en)
402                 tx_hash = &tcp_sw_conn->tx_hash;
403
404         return iscsi_segment_seek_sg(&tcp_sw_conn->out.data_segment,
405                                      sg, count, offset, len,
406                                      NULL, tx_hash);
407 }
408
409 static void
410 iscsi_sw_tcp_send_linear_data_prep(struct iscsi_conn *conn, void *data,
411                                    size_t len)
412 {
413         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
414         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
415         struct hash_desc *tx_hash = NULL;
416         unsigned int hdr_spec_len;
417
418         debug_tcp("%s(%p, datalen=%d%s)\n", __func__, tcp_conn, len,
419                   conn->datadgst_en? ", digest enabled" : "");
420
421         /* Make sure the datalen matches what the caller
422            said he would send. */
423         hdr_spec_len = ntoh24(tcp_sw_conn->out.hdr->dlength);
424         WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len));
425
426         if (conn->datadgst_en)
427                 tx_hash = &tcp_sw_conn->tx_hash;
428
429         iscsi_segment_init_linear(&tcp_sw_conn->out.data_segment,
430                                 data, len, NULL, tx_hash);
431 }
432
433 static int iscsi_sw_tcp_pdu_init(struct iscsi_task *task,
434                                  unsigned int offset, unsigned int count)
435 {
436         struct iscsi_conn *conn = task->conn;
437         int err = 0;
438
439         iscsi_sw_tcp_send_hdr_prep(conn, task->hdr, task->hdr_len);
440
441         if (!count)
442                 return 0;
443
444         if (!task->sc)
445                 iscsi_sw_tcp_send_linear_data_prep(conn, task->data, count);
446         else {
447                 struct scsi_data_buffer *sdb = scsi_out(task->sc);
448
449                 err = iscsi_sw_tcp_send_data_prep(conn, sdb->table.sgl,
450                                                   sdb->table.nents, offset,
451                                                   count);
452         }
453
454         if (err) {
455                 iscsi_conn_failure(conn, err);
456                 return -EIO;
457         }
458         return 0;
459 }
460
461 static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
462 {
463         struct iscsi_tcp_task *tcp_task = task->dd_data;
464
465         task->hdr = task->dd_data + sizeof(*tcp_task);
466         task->hdr_max = sizeof(struct iscsi_sw_tcp_hdrbuf) - ISCSI_DIGEST_SIZE;
467         return 0;
468 }
469
470 static struct iscsi_cls_conn *
471 iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session,
472                          uint32_t conn_idx)
473 {
474         struct iscsi_conn *conn;
475         struct iscsi_cls_conn *cls_conn;
476         struct iscsi_tcp_conn *tcp_conn;
477         struct iscsi_sw_tcp_conn *tcp_sw_conn;
478
479         cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*tcp_sw_conn),
480                                         conn_idx);
481         if (!cls_conn)
482                 return NULL;
483         conn = cls_conn->dd_data;
484         tcp_conn = conn->dd_data;
485         tcp_sw_conn = tcp_conn->dd_data;
486
487         tcp_sw_conn->tx_hash.tfm = crypto_alloc_hash("crc32c", 0,
488                                                      CRYPTO_ALG_ASYNC);
489         tcp_sw_conn->tx_hash.flags = 0;
490         if (IS_ERR(tcp_sw_conn->tx_hash.tfm))
491                 goto free_conn;
492
493         tcp_sw_conn->rx_hash.tfm = crypto_alloc_hash("crc32c", 0,
494                                                      CRYPTO_ALG_ASYNC);
495         tcp_sw_conn->rx_hash.flags = 0;
496         if (IS_ERR(tcp_sw_conn->rx_hash.tfm))
497                 goto free_tx_tfm;
498         tcp_conn->rx_hash = &tcp_sw_conn->rx_hash;
499
500         return cls_conn;
501
502 free_tx_tfm:
503         crypto_free_hash(tcp_sw_conn->tx_hash.tfm);
504 free_conn:
505         iscsi_conn_printk(KERN_ERR, conn,
506                           "Could not create connection due to crc32c "
507                           "loading error. Make sure the crc32c "
508                           "module is built as a module or into the "
509                           "kernel\n");
510         iscsi_tcp_conn_teardown(cls_conn);
511         return NULL;
512 }
513
514 static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn)
515 {
516         struct iscsi_session *session = conn->session;
517         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
518         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
519         struct socket *sock = tcp_sw_conn->sock;
520
521         if (!sock)
522                 return;
523
524         sock_hold(sock->sk);
525         iscsi_sw_tcp_conn_restore_callbacks(tcp_sw_conn);
526         sock_put(sock->sk);
527
528         spin_lock_bh(&session->lock);
529         tcp_sw_conn->sock = NULL;
530         spin_unlock_bh(&session->lock);
531         sockfd_put(sock);
532 }
533
534 static void iscsi_sw_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
535 {
536         struct iscsi_conn *conn = cls_conn->dd_data;
537         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
538         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
539
540         iscsi_sw_tcp_release_conn(conn);
541
542         if (tcp_sw_conn->tx_hash.tfm)
543                 crypto_free_hash(tcp_sw_conn->tx_hash.tfm);
544         if (tcp_sw_conn->rx_hash.tfm)
545                 crypto_free_hash(tcp_sw_conn->rx_hash.tfm);
546
547         iscsi_tcp_conn_teardown(cls_conn);
548 }
549
550 static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
551 {
552         struct iscsi_conn *conn = cls_conn->dd_data;
553         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
554         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
555
556         /* userspace may have goofed up and not bound us */
557         if (!tcp_sw_conn->sock)
558                 return;
559         /*
560          * Make sure our recv side is stopped.
561          * Older tools called conn stop before ep_disconnect
562          * so IO could still be coming in.
563          */
564         write_lock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
565         set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
566         write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
567
568         iscsi_conn_stop(cls_conn, flag);
569         iscsi_sw_tcp_release_conn(conn);
570 }
571
572 static int iscsi_sw_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
573                                  char *buf, int *port,
574                                  int (*getname)(struct socket *,
575                                                 struct sockaddr *,
576                                                 int *addrlen))
577 {
578         struct sockaddr_storage *addr;
579         struct sockaddr_in6 *sin6;
580         struct sockaddr_in *sin;
581         int rc = 0, len;
582
583         addr = kmalloc(sizeof(*addr), GFP_KERNEL);
584         if (!addr)
585                 return -ENOMEM;
586
587         if (getname(sock, (struct sockaddr *) addr, &len)) {
588                 rc = -ENODEV;
589                 goto free_addr;
590         }
591
592         switch (addr->ss_family) {
593         case AF_INET:
594                 sin = (struct sockaddr_in *)addr;
595                 spin_lock_bh(&conn->session->lock);
596                 sprintf(buf, "%pI4", &sin->sin_addr.s_addr);
597                 *port = be16_to_cpu(sin->sin_port);
598                 spin_unlock_bh(&conn->session->lock);
599                 break;
600         case AF_INET6:
601                 sin6 = (struct sockaddr_in6 *)addr;
602                 spin_lock_bh(&conn->session->lock);
603                 sprintf(buf, "%pI6", &sin6->sin6_addr);
604                 *port = be16_to_cpu(sin6->sin6_port);
605                 spin_unlock_bh(&conn->session->lock);
606                 break;
607         }
608 free_addr:
609         kfree(addr);
610         return rc;
611 }
612
613 static int
614 iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
615                        struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
616                        int is_leading)
617 {
618         struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
619         struct iscsi_host *ihost = shost_priv(shost);
620         struct iscsi_conn *conn = cls_conn->dd_data;
621         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
622         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
623         struct sock *sk;
624         struct socket *sock;
625         int err;
626
627         /* lookup for existing socket */
628         sock = sockfd_lookup((int)transport_eph, &err);
629         if (!sock) {
630                 iscsi_conn_printk(KERN_ERR, conn,
631                                   "sockfd_lookup failed %d\n", err);
632                 return -EEXIST;
633         }
634         /*
635          * copy these values now because if we drop the session
636          * userspace may still want to query the values since we will
637          * be using them for the reconnect
638          */
639         err = iscsi_sw_tcp_get_addr(conn, sock, conn->portal_address,
640                                     &conn->portal_port, kernel_getpeername);
641         if (err)
642                 goto free_socket;
643
644         err = iscsi_sw_tcp_get_addr(conn, sock, ihost->local_address,
645                                     &ihost->local_port, kernel_getsockname);
646         if (err)
647                 goto free_socket;
648
649         err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
650         if (err)
651                 goto free_socket;
652
653         /* bind iSCSI connection and socket */
654         tcp_sw_conn->sock = sock;
655
656         /* setup Socket parameters */
657         sk = sock->sk;
658         sk->sk_reuse = 1;
659         sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
660         sk->sk_allocation = GFP_ATOMIC;
661
662         iscsi_sw_tcp_conn_set_callbacks(conn);
663         tcp_sw_conn->sendpage = tcp_sw_conn->sock->ops->sendpage;
664         /*
665          * set receive state machine into initial state
666          */
667         iscsi_tcp_hdr_recv_prep(tcp_conn);
668         return 0;
669
670 free_socket:
671         sockfd_put(sock);
672         return err;
673 }
674
675 static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn,
676                                        enum iscsi_param param, char *buf,
677                                        int buflen)
678 {
679         struct iscsi_conn *conn = cls_conn->dd_data;
680         struct iscsi_session *session = conn->session;
681         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
682         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
683         int value;
684
685         switch(param) {
686         case ISCSI_PARAM_HDRDGST_EN:
687                 iscsi_set_param(cls_conn, param, buf, buflen);
688                 break;
689         case ISCSI_PARAM_DATADGST_EN:
690                 iscsi_set_param(cls_conn, param, buf, buflen);
691                 tcp_sw_conn->sendpage = conn->datadgst_en ?
692                         sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage;
693                 break;
694         case ISCSI_PARAM_MAX_R2T:
695                 sscanf(buf, "%d", &value);
696                 if (value <= 0 || !is_power_of_2(value))
697                         return -EINVAL;
698                 if (session->max_r2t == value)
699                         break;
700                 iscsi_tcp_r2tpool_free(session);
701                 iscsi_set_param(cls_conn, param, buf, buflen);
702                 if (iscsi_tcp_r2tpool_alloc(session))
703                         return -ENOMEM;
704                 break;
705         default:
706                 return iscsi_set_param(cls_conn, param, buf, buflen);
707         }
708
709         return 0;
710 }
711
712 static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
713                                        enum iscsi_param param, char *buf)
714 {
715         struct iscsi_conn *conn = cls_conn->dd_data;
716         int len;
717
718         switch(param) {
719         case ISCSI_PARAM_CONN_PORT:
720                 spin_lock_bh(&conn->session->lock);
721                 len = sprintf(buf, "%hu\n", conn->portal_port);
722                 spin_unlock_bh(&conn->session->lock);
723                 break;
724         case ISCSI_PARAM_CONN_ADDRESS:
725                 spin_lock_bh(&conn->session->lock);
726                 len = sprintf(buf, "%s\n", conn->portal_address);
727                 spin_unlock_bh(&conn->session->lock);
728                 break;
729         default:
730                 return iscsi_conn_get_param(cls_conn, param, buf);
731         }
732
733         return len;
734 }
735
736 static void
737 iscsi_sw_tcp_conn_get_stats(struct iscsi_cls_conn *cls_conn,
738                             struct iscsi_stats *stats)
739 {
740         struct iscsi_conn *conn = cls_conn->dd_data;
741         struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
742         struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
743
744         stats->custom_length = 3;
745         strcpy(stats->custom[0].desc, "tx_sendpage_failures");
746         stats->custom[0].value = tcp_sw_conn->sendpage_failures_cnt;
747         strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
748         stats->custom[1].value = tcp_sw_conn->discontiguous_hdr_cnt;
749         strcpy(stats->custom[2].desc, "eh_abort_cnt");
750         stats->custom[2].value = conn->eh_abort_cnt;
751
752         iscsi_tcp_conn_get_stats(cls_conn, stats);
753 }
754
755 static struct iscsi_cls_session *
756 iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
757                             uint16_t qdepth, uint32_t initial_cmdsn,
758                             uint32_t *hostno)
759 {
760         struct iscsi_cls_session *cls_session;
761         struct iscsi_session *session;
762         struct Scsi_Host *shost;
763
764         if (ep) {
765                 printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
766                 return NULL;
767         }
768
769         shost = iscsi_host_alloc(&iscsi_sw_tcp_sht, 0, qdepth);
770         if (!shost)
771                 return NULL;
772         shost->transportt = iscsi_sw_tcp_scsi_transport;
773         shost->max_lun = iscsi_max_lun;
774         shost->max_id = 0;
775         shost->max_channel = 0;
776         shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
777
778         if (iscsi_host_add(shost, NULL))
779                 goto free_host;
780         *hostno = shost->host_no;
781
782         cls_session = iscsi_session_setup(&iscsi_sw_tcp_transport, shost,
783                                           cmds_max,
784                                           sizeof(struct iscsi_tcp_task) +
785                                           sizeof(struct iscsi_sw_tcp_hdrbuf),
786                                           initial_cmdsn, 0);
787         if (!cls_session)
788                 goto remove_host;
789         session = cls_session->dd_data;
790
791         shost->can_queue = session->scsi_cmds_max;
792         if (iscsi_tcp_r2tpool_alloc(session))
793                 goto remove_session;
794         return cls_session;
795
796 remove_session:
797         iscsi_session_teardown(cls_session);
798 remove_host:
799         iscsi_host_remove(shost);
800 free_host:
801         iscsi_host_free(shost);
802         return NULL;
803 }
804
805 static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
806 {
807         struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
808
809         iscsi_tcp_r2tpool_free(cls_session->dd_data);
810         iscsi_session_teardown(cls_session);
811
812         iscsi_host_remove(shost);
813         iscsi_host_free(shost);
814 }
815
816 static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)
817 {
818         blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_ANY);
819         blk_queue_dma_alignment(sdev->request_queue, 0);
820         return 0;
821 }
822
823 static struct scsi_host_template iscsi_sw_tcp_sht = {
824         .module                 = THIS_MODULE,
825         .name                   = "iSCSI Initiator over TCP/IP",
826         .queuecommand           = iscsi_queuecommand,
827         .change_queue_depth     = iscsi_change_queue_depth,
828         .can_queue              = ISCSI_DEF_XMIT_CMDS_MAX - 1,
829         .sg_tablesize           = 4096,
830         .max_sectors            = 0xFFFF,
831         .cmd_per_lun            = ISCSI_DEF_CMD_PER_LUN,
832         .eh_abort_handler       = iscsi_eh_abort,
833         .eh_device_reset_handler= iscsi_eh_device_reset,
834         .eh_target_reset_handler= iscsi_eh_target_reset,
835         .use_clustering         = DISABLE_CLUSTERING,
836         .slave_configure        = iscsi_sw_tcp_slave_configure,
837         .proc_name              = "iscsi_tcp",
838         .this_id                = -1,
839 };
840
841 static struct iscsi_transport iscsi_sw_tcp_transport = {
842         .owner                  = THIS_MODULE,
843         .name                   = "tcp",
844         .caps                   = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
845                                   | CAP_DATADGST,
846         .param_mask             = ISCSI_MAX_RECV_DLENGTH |
847                                   ISCSI_MAX_XMIT_DLENGTH |
848                                   ISCSI_HDRDGST_EN |
849                                   ISCSI_DATADGST_EN |
850                                   ISCSI_INITIAL_R2T_EN |
851                                   ISCSI_MAX_R2T |
852                                   ISCSI_IMM_DATA_EN |
853                                   ISCSI_FIRST_BURST |
854                                   ISCSI_MAX_BURST |
855                                   ISCSI_PDU_INORDER_EN |
856                                   ISCSI_DATASEQ_INORDER_EN |
857                                   ISCSI_ERL |
858                                   ISCSI_CONN_PORT |
859                                   ISCSI_CONN_ADDRESS |
860                                   ISCSI_EXP_STATSN |
861                                   ISCSI_PERSISTENT_PORT |
862                                   ISCSI_PERSISTENT_ADDRESS |
863                                   ISCSI_TARGET_NAME | ISCSI_TPGT |
864                                   ISCSI_USERNAME | ISCSI_PASSWORD |
865                                   ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
866                                   ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
867                                   ISCSI_LU_RESET_TMO |
868                                   ISCSI_PING_TMO | ISCSI_RECV_TMO |
869                                   ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
870         .host_param_mask        = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
871                                   ISCSI_HOST_INITIATOR_NAME |
872                                   ISCSI_HOST_NETDEV_NAME,
873         /* session management */
874         .create_session         = iscsi_sw_tcp_session_create,
875         .destroy_session        = iscsi_sw_tcp_session_destroy,
876         /* connection management */
877         .create_conn            = iscsi_sw_tcp_conn_create,
878         .bind_conn              = iscsi_sw_tcp_conn_bind,
879         .destroy_conn           = iscsi_sw_tcp_conn_destroy,
880         .set_param              = iscsi_sw_tcp_conn_set_param,
881         .get_conn_param         = iscsi_sw_tcp_conn_get_param,
882         .get_session_param      = iscsi_session_get_param,
883         .start_conn             = iscsi_conn_start,
884         .stop_conn              = iscsi_sw_tcp_conn_stop,
885         /* iscsi host params */
886         .get_host_param         = iscsi_host_get_param,
887         .set_host_param         = iscsi_host_set_param,
888         /* IO */
889         .send_pdu               = iscsi_conn_send_pdu,
890         .get_stats              = iscsi_sw_tcp_conn_get_stats,
891         /* iscsi task/cmd helpers */
892         .init_task              = iscsi_tcp_task_init,
893         .xmit_task              = iscsi_tcp_task_xmit,
894         .cleanup_task           = iscsi_tcp_cleanup_task,
895         /* low level pdu helpers */
896         .xmit_pdu               = iscsi_sw_tcp_pdu_xmit,
897         .init_pdu               = iscsi_sw_tcp_pdu_init,
898         .alloc_pdu              = iscsi_sw_tcp_pdu_alloc,
899         /* recovery */
900         .session_recovery_timedout = iscsi_session_recovery_timedout,
901 };
902
903 static int __init iscsi_sw_tcp_init(void)
904 {
905         if (iscsi_max_lun < 1) {
906                 printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
907                        iscsi_max_lun);
908                 return -EINVAL;
909         }
910
911         iscsi_sw_tcp_scsi_transport = iscsi_register_transport(
912                                                 &iscsi_sw_tcp_transport);
913         if (!iscsi_sw_tcp_scsi_transport)
914                 return -ENODEV;
915
916         return 0;
917 }
918
919 static void __exit iscsi_sw_tcp_exit(void)
920 {
921         iscsi_unregister_transport(&iscsi_sw_tcp_transport);
922 }
923
924 module_init(iscsi_sw_tcp_init);
925 module_exit(iscsi_sw_tcp_exit);