2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include "ipath_verbs.h"
34 #include "ips_common.h"
36 /* cut down ridiculously long IB macro names */
37 #define OP(x) IB_OPCODE_UC_##x
39 static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
42 if (++qp->s_last == qp->s_size)
44 if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
45 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
46 wc->wr_id = wqe->wr.wr_id;
47 wc->status = IB_WC_SUCCESS;
48 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
50 wc->byte_len = wqe->length;
51 wc->qp_num = qp->ibqp.qp_num;
52 wc->src_qp = qp->remote_qpn;
54 wc->slid = qp->remote_ah_attr.dlid;
55 wc->sl = qp->remote_ah_attr.sl;
56 wc->dlid_path_bits = 0;
58 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
60 wqe = get_swqe_ptr(qp, qp->s_last);
64 * ipath_do_uc_send - do a send on a UC queue
65 * @data: contains a pointer to the QP to send on
67 * Process entries in the send work queue until the queue is exhausted.
68 * Only allow one CPU to send a packet per QP (tasklet).
69 * Otherwise, after we drop the QP lock, two threads could send
70 * packets out of order.
71 * This is similar to ipath_do_rc_send() below except we don't have
72 * timeouts or resends.
74 void ipath_do_uc_send(unsigned long data)
76 struct ipath_qp *qp = (struct ipath_qp *)data;
77 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
78 struct ipath_swqe *wqe;
86 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
88 struct ipath_other_headers *ohdr;
91 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
94 if (unlikely(qp->remote_ah_attr.dlid ==
95 ipath_layer_get_lid(dev->dd))) {
96 /* Pass in an uninitialized ib_wc to save stack space. */
97 ipath_ruc_loopback(qp, &wc);
98 clear_bit(IPATH_S_BUSY, &qp->s_flags);
102 ohdr = &qp->s_hdr.u.oth;
103 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
104 ohdr = &qp->s_hdr.u.l.oth;
107 /* Check for a constructed packet to be sent. */
108 if (qp->s_hdrwords != 0) {
110 * If no PIO bufs are available, return.
111 * An interrupt will call ipath_ib_piobufavail()
112 * when one is available.
114 if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
118 ipath_no_bufs_available(qp, dev);
121 dev->n_unicast_xmit++;
122 /* Record that we sent the packet and s_hdr is empty. */
127 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
131 * The lock is needed to synchronize between
132 * setting qp->s_ack_state and post_send().
134 spin_lock_irqsave(&qp->s_lock, flags);
136 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
139 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index);
141 /* Send a request. */
142 wqe = get_swqe_ptr(qp, qp->s_last);
143 switch (qp->s_state) {
146 * Signal the completion of the last send (if there is
149 if (qp->s_last != qp->s_tail)
150 complete_last_send(qp, wqe, &wc);
152 /* Check if send work queue is empty. */
153 if (qp->s_tail == qp->s_head)
156 * Start a new request.
158 qp->s_psn = wqe->psn = qp->s_next_psn;
159 qp->s_sge.sge = wqe->sg_list[0];
160 qp->s_sge.sg_list = wqe->sg_list + 1;
161 qp->s_sge.num_sge = wqe->wr.num_sge;
162 qp->s_len = len = wqe->length;
163 switch (wqe->wr.opcode) {
165 case IB_WR_SEND_WITH_IMM:
167 qp->s_state = OP(SEND_FIRST);
171 if (wqe->wr.opcode == IB_WR_SEND)
172 qp->s_state = OP(SEND_ONLY);
175 OP(SEND_ONLY_WITH_IMMEDIATE);
176 /* Immediate data comes after the BTH */
177 ohdr->u.imm_data = wqe->wr.imm_data;
180 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
184 case IB_WR_RDMA_WRITE:
185 case IB_WR_RDMA_WRITE_WITH_IMM:
186 ohdr->u.rc.reth.vaddr =
187 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
188 ohdr->u.rc.reth.rkey =
189 cpu_to_be32(wqe->wr.wr.rdma.rkey);
190 ohdr->u.rc.reth.length = cpu_to_be32(len);
191 hwords += sizeof(struct ib_reth) / 4;
193 qp->s_state = OP(RDMA_WRITE_FIRST);
197 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
198 qp->s_state = OP(RDMA_WRITE_ONLY);
201 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
202 /* Immediate data comes after the RETH */
203 ohdr->u.rc.imm_data = wqe->wr.imm_data;
205 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
213 if (++qp->s_tail >= qp->s_size)
218 qp->s_state = OP(SEND_MIDDLE);
220 case OP(SEND_MIDDLE):
226 if (wqe->wr.opcode == IB_WR_SEND)
227 qp->s_state = OP(SEND_LAST);
229 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
230 /* Immediate data comes after the BTH */
231 ohdr->u.imm_data = wqe->wr.imm_data;
234 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
238 case OP(RDMA_WRITE_FIRST):
239 qp->s_state = OP(RDMA_WRITE_MIDDLE);
241 case OP(RDMA_WRITE_MIDDLE):
247 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
248 qp->s_state = OP(RDMA_WRITE_LAST);
251 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
252 /* Immediate data comes after the BTH */
253 ohdr->u.imm_data = wqe->wr.imm_data;
255 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
260 bth2 = qp->s_next_psn++ & IPS_PSN_MASK;
262 bth0 |= qp->s_state << 24;
264 spin_unlock_irqrestore(&qp->s_lock, flags);
266 /* Construct the header. */
267 extra_bytes = (4 - len) & 3;
268 nwords = (len + extra_bytes) >> 2;
269 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
270 /* Header size in 32-bit words. */
273 qp->s_hdr.u.l.grh.version_tclass_flow =
274 cpu_to_be32((6 << 28) |
275 (qp->remote_ah_attr.grh.traffic_class
277 qp->remote_ah_attr.grh.flow_label);
278 qp->s_hdr.u.l.grh.paylen =
279 cpu_to_be16(((hwords - 12) + nwords +
281 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
282 qp->s_hdr.u.l.grh.next_hdr = 0x1B;
283 qp->s_hdr.u.l.grh.hop_limit =
284 qp->remote_ah_attr.grh.hop_limit;
285 /* The SGID is 32-bit aligned. */
286 qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
288 qp->s_hdr.u.l.grh.sgid.global.interface_id =
289 ipath_layer_get_guid(dev->dd);
290 qp->s_hdr.u.l.grh.dgid = qp->remote_ah_attr.grh.dgid;
292 qp->s_hdrwords = hwords;
293 qp->s_cur_sge = &qp->s_sge;
294 qp->s_cur_size = len;
295 lrh0 |= qp->remote_ah_attr.sl << 4;
296 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
298 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
299 qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
300 qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd));
301 bth0 |= extra_bytes << 20;
302 ohdr->bth[0] = cpu_to_be32(bth0);
303 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
304 ohdr->bth[2] = cpu_to_be32(bth2);
306 /* Check for more work to do. */
310 spin_unlock_irqrestore(&qp->s_lock, flags);
311 clear_bit(IPATH_S_BUSY, &qp->s_flags);
318 * ipath_uc_rcv - handle an incoming UC packet
319 * @dev: the device the packet came in on
320 * @hdr: the header of the packet
321 * @has_grh: true if the packet has a GRH
322 * @data: the packet data
323 * @tlen: the length of the packet
324 * @qp: the QP for this packet.
326 * This is called from ipath_qp_rcv() to process an incoming UC packet
328 * Called at interrupt level.
330 void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
331 int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
333 struct ipath_other_headers *ohdr;
340 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
341 struct ib_reth *reth;
347 hdrsize = 8 + 12; /* LRH + BTH */
348 psn = be32_to_cpu(ohdr->bth[2]);
351 ohdr = &hdr->u.l.oth;
352 hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
354 * The header with GRH is 60 bytes and the
355 * core driver sets the eager header buffer
356 * size to 56 bytes so the last 4 bytes of
357 * the BTH header (PSN) is in the data buffer.
360 ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
361 if (header_in_data) {
362 psn = be32_to_cpu(((__be32 *) data)[0]);
363 data += sizeof(__be32);
365 psn = be32_to_cpu(ohdr->bth[2]);
368 * The opcode is in the low byte when its in network order
369 * (top byte when in host order).
371 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
376 spin_lock_irqsave(&qp->r_rq.lock, flags);
378 /* Compare the PSN verses the expected PSN. */
379 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
381 * Handle a sequence error.
382 * Silently drop any current message.
386 qp->r_state = OP(SEND_LAST);
390 case OP(SEND_ONLY_WITH_IMMEDIATE):
393 case OP(RDMA_WRITE_FIRST):
394 case OP(RDMA_WRITE_ONLY):
395 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
404 /* Check for opcode sequence errors. */
405 switch (qp->r_state) {
407 case OP(SEND_MIDDLE):
408 if (opcode == OP(SEND_MIDDLE) ||
409 opcode == OP(SEND_LAST) ||
410 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
414 case OP(RDMA_WRITE_FIRST):
415 case OP(RDMA_WRITE_MIDDLE):
416 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
417 opcode == OP(RDMA_WRITE_LAST) ||
418 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
423 if (opcode == OP(SEND_FIRST) ||
424 opcode == OP(SEND_ONLY) ||
425 opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
426 opcode == OP(RDMA_WRITE_FIRST) ||
427 opcode == OP(RDMA_WRITE_ONLY) ||
428 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
433 /* OK, process the packet. */
437 case OP(SEND_ONLY_WITH_IMMEDIATE):
439 if (qp->r_reuse_sge) {
441 qp->r_sge = qp->s_rdma_sge;
442 } else if (!ipath_get_rwqe(qp, 0)) {
446 /* Save the WQE so we can reuse it in case of an error. */
447 qp->s_rdma_sge = qp->r_sge;
449 if (opcode == OP(SEND_ONLY))
451 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
454 case OP(SEND_MIDDLE):
455 /* Check for invalid length PMTU or posted rwqe len. */
456 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
461 qp->r_rcv_len += pmtu;
462 if (unlikely(qp->r_rcv_len > qp->r_len)) {
467 ipath_copy_sge(&qp->r_sge, data, pmtu);
470 case OP(SEND_LAST_WITH_IMMEDIATE):
472 if (header_in_data) {
473 wc.imm_data = *(__be32 *) data;
474 data += sizeof(__be32);
476 /* Immediate data comes after BTH */
477 wc.imm_data = ohdr->u.imm_data;
480 wc.wc_flags = IB_WC_WITH_IMM;
484 /* Get the number of bytes the message was padded by. */
485 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
486 /* Check for invalid length. */
487 /* XXX LAST len should be >= 1 */
488 if (unlikely(tlen < (hdrsize + pad + 4))) {
493 /* Don't count the CRC. */
494 tlen -= (hdrsize + pad + 4);
495 wc.byte_len = tlen + qp->r_rcv_len;
496 if (unlikely(wc.byte_len > qp->r_len)) {
501 /* XXX Need to free SGEs */
503 ipath_copy_sge(&qp->r_sge, data, tlen);
504 wc.wr_id = qp->r_wr_id;
505 wc.status = IB_WC_SUCCESS;
506 wc.opcode = IB_WC_RECV;
508 wc.qp_num = qp->ibqp.qp_num;
509 wc.src_qp = qp->remote_qpn;
511 wc.slid = qp->remote_ah_attr.dlid;
512 wc.sl = qp->remote_ah_attr.sl;
513 wc.dlid_path_bits = 0;
515 /* Signal completion event if the solicited bit is set. */
516 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
518 __constant_cpu_to_be32(1 << 23)) != 0);
521 case OP(RDMA_WRITE_FIRST):
522 case OP(RDMA_WRITE_ONLY):
523 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
525 /* RETH comes after BTH */
527 reth = &ohdr->u.rc.reth;
529 reth = (struct ib_reth *)data;
530 data += sizeof(*reth);
532 hdrsize += sizeof(*reth);
533 qp->r_len = be32_to_cpu(reth->length);
535 if (qp->r_len != 0) {
536 u32 rkey = be32_to_cpu(reth->rkey);
537 u64 vaddr = be64_to_cpu(reth->vaddr);
540 if (unlikely(!ipath_rkey_ok(
541 dev, &qp->r_sge, qp->r_len,
543 IB_ACCESS_REMOTE_WRITE))) {
548 qp->r_sge.sg_list = NULL;
549 qp->r_sge.sge.mr = NULL;
550 qp->r_sge.sge.vaddr = NULL;
551 qp->r_sge.sge.length = 0;
552 qp->r_sge.sge.sge_length = 0;
554 if (unlikely(!(qp->qp_access_flags &
555 IB_ACCESS_REMOTE_WRITE))) {
559 if (opcode == OP(RDMA_WRITE_ONLY))
562 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
565 case OP(RDMA_WRITE_MIDDLE):
566 /* Check for invalid length PMTU or posted rwqe len. */
567 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
571 qp->r_rcv_len += pmtu;
572 if (unlikely(qp->r_rcv_len > qp->r_len)) {
576 ipath_copy_sge(&qp->r_sge, data, pmtu);
579 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
581 /* Get the number of bytes the message was padded by. */
582 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
583 /* Check for invalid length. */
584 /* XXX LAST len should be >= 1 */
585 if (unlikely(tlen < (hdrsize + pad + 4))) {
589 /* Don't count the CRC. */
590 tlen -= (hdrsize + pad + 4);
591 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
595 if (qp->r_reuse_sge) {
597 } else if (!ipath_get_rwqe(qp, 1)) {
601 if (header_in_data) {
602 wc.imm_data = *(__be32 *) data;
603 data += sizeof(__be32);
605 /* Immediate data comes after BTH */
606 wc.imm_data = ohdr->u.imm_data;
609 wc.wc_flags = IB_WC_WITH_IMM;
613 case OP(RDMA_WRITE_LAST):
615 /* Get the number of bytes the message was padded by. */
616 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
617 /* Check for invalid length. */
618 /* XXX LAST len should be >= 1 */
619 if (unlikely(tlen < (hdrsize + pad + 4))) {
623 /* Don't count the CRC. */
624 tlen -= (hdrsize + pad + 4);
625 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
629 ipath_copy_sge(&qp->r_sge, data, tlen);
633 /* Drop packet for unknown opcodes. */
634 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
639 qp->r_state = opcode;
641 spin_unlock_irqrestore(&qp->r_rq.lock, flags);