2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $
34 #include <linux/kernel.h>
35 #include <linux/slab.h>
37 #include <linux/scatterlist.h>
38 #include <linux/kfifo.h>
39 #include <scsi/scsi_cmnd.h>
40 #include <scsi/scsi_host.h>
42 #include "iscsi_iser.h"
44 /* Constant PDU lengths calculations */
45 #define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \
46 sizeof (struct iscsi_hdr))
48 /* iser_dto_add_regd_buff - increments the reference count for *
49 * the registered buffer & adds it to the DTO object */
50 static void iser_dto_add_regd_buff(struct iser_dto *dto,
51 struct iser_regd_buf *regd_buf,
52 unsigned long use_offset,
53 unsigned long use_size)
57 atomic_inc(®d_buf->ref_count);
59 add_idx = dto->regd_vector_len;
60 dto->regd[add_idx] = regd_buf;
61 dto->used_sz[add_idx] = use_size;
62 dto->offset[add_idx] = use_offset;
64 dto->regd_vector_len++;
67 /* Register user buffer memory and initialize passive rdma
68 * dto descriptor. Total data size is stored in
69 * iser_ctask->data[ISER_DIR_IN].data_len
71 static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask,
75 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
76 struct iser_regd_buf *regd_buf;
78 struct iser_hdr *hdr = &iser_ctask->desc.iser_header;
79 struct iser_data_buf *buf_in = &iser_ctask->data[ISER_DIR_IN];
81 err = iser_dma_map_task_data(iser_ctask,
88 if (edtl > iser_ctask->data[ISER_DIR_IN].data_len) {
89 iser_err("Total data length: %ld, less than EDTL: "
90 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
91 iser_ctask->data[ISER_DIR_IN].data_len, edtl,
92 ctask->itt, iser_ctask->iser_conn);
96 err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_IN);
98 iser_err("Failed to set up Data-IN RDMA\n");
101 regd_buf = &iser_ctask->rdma_regd[ISER_DIR_IN];
103 hdr->flags |= ISER_RSV;
104 hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey);
105 hdr->read_va = cpu_to_be64(regd_buf->reg.va);
107 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
108 ctask->itt, regd_buf->reg.rkey,
109 (unsigned long long)regd_buf->reg.va);
114 /* Register user buffer memory and initialize passive rdma
115 * dto descriptor. Total data size is stored in
116 * ctask->data[ISER_DIR_OUT].data_len
119 iser_prepare_write_cmd(struct iscsi_cmd_task *ctask,
121 unsigned int unsol_sz,
124 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
125 struct iser_regd_buf *regd_buf;
127 struct iser_dto *send_dto = &iser_ctask->desc.dto;
128 struct iser_hdr *hdr = &iser_ctask->desc.iser_header;
129 struct iser_data_buf *buf_out = &iser_ctask->data[ISER_DIR_OUT];
131 err = iser_dma_map_task_data(iser_ctask,
138 if (edtl > iser_ctask->data[ISER_DIR_OUT].data_len) {
139 iser_err("Total data length: %ld, less than EDTL: %d, "
140 "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
141 iser_ctask->data[ISER_DIR_OUT].data_len,
142 edtl, ctask->itt, ctask->conn);
146 err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_OUT);
148 iser_err("Failed to register write cmd RDMA mem\n");
152 regd_buf = &iser_ctask->rdma_regd[ISER_DIR_OUT];
154 if (unsol_sz < edtl) {
155 hdr->flags |= ISER_WSV;
156 hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey);
157 hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz);
159 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
160 "VA:%#llX + unsol:%d\n",
161 ctask->itt, regd_buf->reg.rkey,
162 (unsigned long long)regd_buf->reg.va, unsol_sz);
166 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
168 iser_dto_add_regd_buff(send_dto,
178 * iser_post_receive_control - allocates, initializes and posts receive DTO.
180 static int iser_post_receive_control(struct iscsi_conn *conn)
182 struct iscsi_iser_conn *iser_conn = conn->dd_data;
183 struct iser_desc *rx_desc;
184 struct iser_regd_buf *regd_hdr;
185 struct iser_regd_buf *regd_data;
186 struct iser_dto *recv_dto = NULL;
187 struct iser_device *device = iser_conn->ib_conn->device;
188 int rx_data_size, err = 0;
190 rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
191 if (rx_desc == NULL) {
192 iser_err("Failed to alloc desc for post recv\n");
195 rx_desc->type = ISCSI_RX;
197 /* for the login sequence we must support rx of upto 8K; login is done
198 * after conn create/bind (connect) and conn stop/bind (reconnect),
199 * what's common for both schemes is that the connection is not started
201 if (conn->c_stage != ISCSI_CONN_STARTED)
202 rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
203 else /* FIXME till user space sets conn->max_recv_dlength correctly */
206 rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
207 if (rx_desc->data == NULL) {
208 iser_err("Failed to alloc data buf for post recv\n");
210 goto post_rx_kmalloc_failure;
213 recv_dto = &rx_desc->dto;
214 recv_dto->ib_conn = iser_conn->ib_conn;
215 recv_dto->regd_vector_len = 0;
217 regd_hdr = &rx_desc->hdr_regd_buf;
218 memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
219 regd_hdr->device = device;
220 regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
221 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
223 iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);
225 iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
227 regd_data = &rx_desc->data_regd_buf;
228 memset(regd_data, 0, sizeof(struct iser_regd_buf));
229 regd_data->device = device;
230 regd_data->virt_addr = rx_desc->data;
231 regd_data->data_size = rx_data_size;
233 iser_reg_single(device, regd_data, DMA_FROM_DEVICE);
235 iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
237 err = iser_post_recv(rx_desc);
241 /* iser_post_recv failed */
242 iser_dto_buffs_release(recv_dto);
243 kfree(rx_desc->data);
244 post_rx_kmalloc_failure:
245 kmem_cache_free(ig.desc_cache, rx_desc);
249 /* creates a new tx descriptor and adds header regd buffer */
250 static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
251 struct iser_desc *tx_desc)
253 struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf;
254 struct iser_dto *send_dto = &tx_desc->dto;
256 memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
257 regd_hdr->device = iser_conn->ib_conn->device;
258 regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */
259 regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
261 send_dto->ib_conn = iser_conn->ib_conn;
262 send_dto->notify_enable = 1;
263 send_dto->regd_vector_len = 0;
265 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
266 tx_desc->iser_header.flags = ISER_VER;
268 iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0);
272 * iser_conn_set_full_featured_mode - (iSER API)
274 int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
276 struct iscsi_iser_conn *iser_conn = conn->dd_data;
279 /* no need to keep it in a var, we are after login so if this should
280 * be negotiated, by now the result should be available here */
281 int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
283 iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
285 /* Check that there is no posted recv or send buffers left - */
286 /* they must be consumed during the login phase */
287 BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0);
288 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
290 /* Initial post receive buffers */
291 for (i = 0; i < initial_post_recv_bufs_num; i++) {
292 if (iser_post_receive_control(conn) != 0) {
293 iser_err("Failed to post recv bufs at:%d conn:0x%p\n",
298 iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn);
303 iser_check_xmit(struct iscsi_conn *conn, void *task)
305 struct iscsi_iser_conn *iser_conn = conn->dd_data;
307 if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
308 ISER_QP_MAX_REQ_DTOS) {
309 iser_dbg("%ld can't xmit task %p\n",jiffies,task);
317 * iser_send_command - send command PDU
319 int iser_send_command(struct iscsi_conn *conn,
320 struct iscsi_cmd_task *ctask)
322 struct iscsi_iser_conn *iser_conn = conn->dd_data;
323 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
324 struct iser_dto *send_dto = NULL;
327 struct iser_data_buf *data_buf;
329 struct iscsi_cmd *hdr = ctask->hdr;
330 struct scsi_cmnd *sc = ctask->sc;
332 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
333 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
336 if (iser_check_xmit(conn, ctask))
339 edtl = ntohl(hdr->data_length);
341 /* build the tx desc regd header and add it to the tx desc dto */
342 iser_ctask->desc.type = ISCSI_TX_SCSI_COMMAND;
343 send_dto = &iser_ctask->desc.dto;
344 send_dto->ctask = iser_ctask;
345 iser_create_send_desc(iser_conn, &iser_ctask->desc);
347 if (hdr->flags & ISCSI_FLAG_CMD_READ)
348 data_buf = &iser_ctask->data[ISER_DIR_IN];
350 data_buf = &iser_ctask->data[ISER_DIR_OUT];
352 if (scsi_sg_count(sc)) { /* using a scatter list */
353 data_buf->buf = scsi_sglist(sc);
354 data_buf->size = scsi_sg_count(sc);
357 data_buf->data_len = scsi_bufflen(sc);
359 if (hdr->flags & ISCSI_FLAG_CMD_READ) {
360 err = iser_prepare_read_cmd(ctask, edtl);
362 goto send_command_error;
364 if (hdr->flags & ISCSI_FLAG_CMD_WRITE) {
365 err = iser_prepare_write_cmd(ctask,
371 goto send_command_error;
374 iser_reg_single(iser_conn->ib_conn->device,
375 send_dto->regd[0], DMA_TO_DEVICE);
377 if (iser_post_receive_control(conn) != 0) {
378 iser_err("post_recv failed!\n");
380 goto send_command_error;
383 iser_ctask->status = ISER_TASK_STATUS_STARTED;
385 err = iser_post_send(&iser_ctask->desc);
390 iser_dto_buffs_release(send_dto);
391 iser_err("conn %p failed ctask->itt %d err %d\n",conn, ctask->itt, err);
396 * iser_send_data_out - send data out PDU
398 int iser_send_data_out(struct iscsi_conn *conn,
399 struct iscsi_cmd_task *ctask,
400 struct iscsi_data *hdr)
402 struct iscsi_iser_conn *iser_conn = conn->dd_data;
403 struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
404 struct iser_desc *tx_desc = NULL;
405 struct iser_dto *send_dto = NULL;
406 unsigned long buf_offset;
407 unsigned long data_seg_len;
411 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
412 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
416 if (iser_check_xmit(conn, ctask))
419 itt = (__force uint32_t)hdr->itt;
420 data_seg_len = ntoh24(hdr->dlength);
421 buf_offset = ntohl(hdr->offset);
423 iser_dbg("%s itt %d dseg_len %d offset %d\n",
424 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);
426 tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
427 if (tx_desc == NULL) {
428 iser_err("Failed to alloc desc for post dataout\n");
432 tx_desc->type = ISCSI_TX_DATAOUT;
433 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
435 /* build the tx desc regd header and add it to the tx desc dto */
436 send_dto = &tx_desc->dto;
437 send_dto->ctask = iser_ctask;
438 iser_create_send_desc(iser_conn, tx_desc);
440 iser_reg_single(iser_conn->ib_conn->device,
441 send_dto->regd[0], DMA_TO_DEVICE);
443 /* all data was registered for RDMA, we can use the lkey */
444 iser_dto_add_regd_buff(send_dto,
445 &iser_ctask->rdma_regd[ISER_DIR_OUT],
449 if (buf_offset + data_seg_len > iser_ctask->data[ISER_DIR_OUT].data_len) {
450 iser_err("Offset:%ld & DSL:%ld in Data-Out "
451 "inconsistent with total len:%ld, itt:%d\n",
452 buf_offset, data_seg_len,
453 iser_ctask->data[ISER_DIR_OUT].data_len, itt);
455 goto send_data_out_error;
457 iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n",
458 itt, buf_offset, data_seg_len);
461 err = iser_post_send(tx_desc);
466 iser_dto_buffs_release(send_dto);
467 kmem_cache_free(ig.desc_cache, tx_desc);
468 iser_err("conn %p failed err %d\n",conn, err);
472 int iser_send_control(struct iscsi_conn *conn,
473 struct iscsi_mgmt_task *mtask)
475 struct iscsi_iser_conn *iser_conn = conn->dd_data;
476 struct iser_desc *mdesc = mtask->dd_data;
477 struct iser_dto *send_dto = NULL;
478 unsigned long data_seg_len;
480 struct iser_regd_buf *regd_buf;
481 struct iser_device *device;
483 if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
484 iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
488 if (iser_check_xmit(conn,mtask))
491 /* build the tx desc regd header and add it to the tx desc dto */
492 mdesc->type = ISCSI_TX_CONTROL;
493 send_dto = &mdesc->dto;
494 send_dto->ctask = NULL;
495 iser_create_send_desc(iser_conn, mdesc);
497 device = iser_conn->ib_conn->device;
499 iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE);
501 data_seg_len = ntoh24(mtask->hdr->dlength);
503 if (data_seg_len > 0) {
504 regd_buf = &mdesc->data_regd_buf;
505 memset(regd_buf, 0, sizeof(struct iser_regd_buf));
506 regd_buf->device = device;
507 regd_buf->virt_addr = mtask->data;
508 regd_buf->data_size = mtask->data_count;
509 iser_reg_single(device, regd_buf,
511 iser_dto_add_regd_buff(send_dto, regd_buf,
516 if (iser_post_receive_control(conn) != 0) {
517 iser_err("post_rcv_buff failed!\n");
519 goto send_control_error;
522 err = iser_post_send(mdesc);
527 iser_dto_buffs_release(send_dto);
528 iser_err("conn %p failed err %d\n",conn, err);
533 * iser_rcv_dto_completion - recv DTO completion
535 void iser_rcv_completion(struct iser_desc *rx_desc,
536 unsigned long dto_xfer_len)
538 struct iser_dto *dto = &rx_desc->dto;
539 struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn;
540 struct iscsi_session *session = conn->iscsi_conn->session;
541 struct iscsi_cmd_task *ctask;
542 struct iscsi_iser_cmd_task *iser_ctask;
543 struct iscsi_hdr *hdr;
544 char *rx_data = NULL;
547 unsigned char opcode;
549 hdr = &rx_desc->iscsi_header;
551 iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt);
553 if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */
554 rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN;
555 rx_data = dto->regd[1]->virt_addr;
556 rx_data += dto->offset[1];
559 opcode = hdr->opcode & ISCSI_OPCODE_MASK;
561 if (opcode == ISCSI_OP_SCSI_CMD_RSP) {
562 itt = get_itt(hdr->itt); /* mask out cid and age bits */
563 if (!(itt < session->cmds_max))
564 iser_err("itt can't be matched to task!!! "
565 "conn %p opcode %d cmds_max %d itt %d\n",
566 conn->iscsi_conn,opcode,session->cmds_max,itt);
567 /* use the mapping given with the cmds array indexed by itt */
568 ctask = (struct iscsi_cmd_task *)session->cmds[itt];
569 iser_ctask = ctask->dd_data;
570 iser_dbg("itt %d ctask %p\n",itt,ctask);
571 iser_ctask->status = ISER_TASK_STATUS_COMPLETED;
572 iser_ctask_rdma_finalize(iser_ctask);
575 iser_dto_buffs_release(dto);
577 iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len);
579 kfree(rx_desc->data);
580 kmem_cache_free(ig.desc_cache, rx_desc);
582 /* decrementing conn->post_recv_buf_count only --after-- freeing the *
583 * task eliminates the need to worry on tasks which are completed in *
584 * parallel to the execution of iser_conn_term. So the code that waits *
585 * for the posted rx bufs refcount to become zero handles everything */
586 atomic_dec(&conn->ib_conn->post_recv_buf_count);
589 void iser_snd_completion(struct iser_desc *tx_desc)
591 struct iser_dto *dto = &tx_desc->dto;
592 struct iser_conn *ib_conn = dto->ib_conn;
593 struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn;
594 struct iscsi_conn *conn = iser_conn->iscsi_conn;
595 struct iscsi_mgmt_task *mtask;
598 iser_dbg("Initiator, Data sent dto=0x%p\n", dto);
600 iser_dto_buffs_release(dto);
602 if (tx_desc->type == ISCSI_TX_DATAOUT)
603 kmem_cache_free(ig.desc_cache, tx_desc);
605 if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
606 ISER_QP_MAX_REQ_DTOS)
609 atomic_dec(&ib_conn->post_send_buf_count);
612 iser_dbg("%ld resuming tx\n",jiffies);
613 scsi_queue_work(conn->session->host, &conn->xmitwork);
616 if (tx_desc->type == ISCSI_TX_CONTROL) {
617 /* this arithmetic is legal by libiscsi dd_data allocation */
618 mtask = (void *) ((long)(void *)tx_desc -
619 sizeof(struct iscsi_mgmt_task));
620 if (mtask->hdr->itt == RESERVED_ITT) {
621 struct iscsi_session *session = conn->session;
623 spin_lock(&conn->session->lock);
624 iscsi_free_mgmt_task(conn, mtask);
625 spin_unlock(&session->lock);
630 void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask)
633 iser_ctask->status = ISER_TASK_STATUS_INIT;
635 iser_ctask->dir[ISER_DIR_IN] = 0;
636 iser_ctask->dir[ISER_DIR_OUT] = 0;
638 iser_ctask->data[ISER_DIR_IN].data_len = 0;
639 iser_ctask->data[ISER_DIR_OUT].data_len = 0;
641 memset(&iser_ctask->rdma_regd[ISER_DIR_IN], 0,
642 sizeof(struct iser_regd_buf));
643 memset(&iser_ctask->rdma_regd[ISER_DIR_OUT], 0,
644 sizeof(struct iser_regd_buf));
647 void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask)
650 int is_rdma_aligned = 1;
651 struct iser_regd_buf *regd;
653 /* if we were reading, copy back to unaligned sglist,
654 * anyway dma_unmap and free the copy
656 if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) {
658 iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN);
660 if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
662 iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT);
665 if (iser_ctask->dir[ISER_DIR_IN]) {
666 regd = &iser_ctask->rdma_regd[ISER_DIR_IN];
667 deferred = iser_regd_buff_release(regd);
669 iser_err("%d references remain for BUF-IN rdma reg\n",
670 atomic_read(®d->ref_count));
674 if (iser_ctask->dir[ISER_DIR_OUT]) {
675 regd = &iser_ctask->rdma_regd[ISER_DIR_OUT];
676 deferred = iser_regd_buff_release(regd);
678 iser_err("%d references remain for BUF-OUT rdma reg\n",
679 atomic_read(®d->ref_count));
683 /* if the data was unaligned, it was already unmapped and then copied */
685 iser_dma_unmap_task_data(iser_ctask);
688 void iser_dto_buffs_release(struct iser_dto *dto)
692 for (i = 0; i < dto->regd_vector_len; i++)
693 iser_regd_buff_release(dto->regd[i]);