[PATCH] IB/ipath: fix shared receive queues for RC
[linux-2.6] / drivers / infiniband / hw / ipath / ipath_uc.c
1 /*
2  * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include "ipath_verbs.h"
35 #include "ips_common.h"
36
37 /* cut down ridiculously long IB macro names */
38 #define OP(x) IB_OPCODE_UC_##x
39
40 static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
41                                struct ib_wc *wc)
42 {
43         if (++qp->s_last == qp->s_size)
44                 qp->s_last = 0;
45         if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
46             (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
47                 wc->wr_id = wqe->wr.wr_id;
48                 wc->status = IB_WC_SUCCESS;
49                 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
50                 wc->vendor_err = 0;
51                 wc->byte_len = wqe->length;
52                 wc->qp_num = qp->ibqp.qp_num;
53                 wc->src_qp = qp->remote_qpn;
54                 wc->pkey_index = 0;
55                 wc->slid = qp->remote_ah_attr.dlid;
56                 wc->sl = qp->remote_ah_attr.sl;
57                 wc->dlid_path_bits = 0;
58                 wc->port_num = 0;
59                 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
60         }
61         wqe = get_swqe_ptr(qp, qp->s_last);
62 }
63
64 /**
65  * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
66  * @qp: a pointer to the QP
67  * @ohdr: a pointer to the IB header being constructed
68  * @pmtu: the path MTU
69  * @bth0p: pointer to the BTH opcode word
70  * @bth2p: pointer to the BTH PSN word
71  *
72  * Return 1 if constructed; otherwise, return 0.
73  * Note the QP s_lock must be held and interrupts disabled.
74  */
75 int ipath_make_uc_req(struct ipath_qp *qp,
76                       struct ipath_other_headers *ohdr,
77                       u32 pmtu, u32 *bth0p, u32 *bth2p)
78 {
79         struct ipath_swqe *wqe;
80         u32 hwords;
81         u32 bth0;
82         u32 len;
83         struct ib_wc wc;
84
85         if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
86                 goto done;
87
88         /* header size in 32-bit words LRH+BTH = (8+12)/4. */
89         hwords = 5;
90         bth0 = 0;
91
92         /* Get the next send request. */
93         wqe = get_swqe_ptr(qp, qp->s_last);
94         switch (qp->s_state) {
95         default:
96                 /*
97                  * Signal the completion of the last send
98                  * (if there is one).
99                  */
100                 if (qp->s_last != qp->s_tail)
101                         complete_last_send(qp, wqe, &wc);
102
103                 /* Check if send work queue is empty. */
104                 if (qp->s_tail == qp->s_head)
105                         goto done;
106                 /*
107                  * Start a new request.
108                  */
109                 qp->s_psn = wqe->psn = qp->s_next_psn;
110                 qp->s_sge.sge = wqe->sg_list[0];
111                 qp->s_sge.sg_list = wqe->sg_list + 1;
112                 qp->s_sge.num_sge = wqe->wr.num_sge;
113                 qp->s_len = len = wqe->length;
114                 switch (wqe->wr.opcode) {
115                 case IB_WR_SEND:
116                 case IB_WR_SEND_WITH_IMM:
117                         if (len > pmtu) {
118                                 qp->s_state = OP(SEND_FIRST);
119                                 len = pmtu;
120                                 break;
121                         }
122                         if (wqe->wr.opcode == IB_WR_SEND)
123                                 qp->s_state = OP(SEND_ONLY);
124                         else {
125                                 qp->s_state =
126                                         OP(SEND_ONLY_WITH_IMMEDIATE);
127                                 /* Immediate data comes after the BTH */
128                                 ohdr->u.imm_data = wqe->wr.imm_data;
129                                 hwords += 1;
130                         }
131                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
132                                 bth0 |= 1 << 23;
133                         break;
134
135                 case IB_WR_RDMA_WRITE:
136                 case IB_WR_RDMA_WRITE_WITH_IMM:
137                         ohdr->u.rc.reth.vaddr =
138                                 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
139                         ohdr->u.rc.reth.rkey =
140                                 cpu_to_be32(wqe->wr.wr.rdma.rkey);
141                         ohdr->u.rc.reth.length = cpu_to_be32(len);
142                         hwords += sizeof(struct ib_reth) / 4;
143                         if (len > pmtu) {
144                                 qp->s_state = OP(RDMA_WRITE_FIRST);
145                                 len = pmtu;
146                                 break;
147                         }
148                         if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
149                                 qp->s_state = OP(RDMA_WRITE_ONLY);
150                         else {
151                                 qp->s_state =
152                                         OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
153                                 /* Immediate data comes after the RETH */
154                                 ohdr->u.rc.imm_data = wqe->wr.imm_data;
155                                 hwords += 1;
156                                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
157                                         bth0 |= 1 << 23;
158                         }
159                         break;
160
161                 default:
162                         goto done;
163                 }
164                 if (++qp->s_tail >= qp->s_size)
165                         qp->s_tail = 0;
166                 break;
167
168         case OP(SEND_FIRST):
169                 qp->s_state = OP(SEND_MIDDLE);
170                 /* FALLTHROUGH */
171         case OP(SEND_MIDDLE):
172                 len = qp->s_len;
173                 if (len > pmtu) {
174                         len = pmtu;
175                         break;
176                 }
177                 if (wqe->wr.opcode == IB_WR_SEND)
178                         qp->s_state = OP(SEND_LAST);
179                 else {
180                         qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
181                         /* Immediate data comes after the BTH */
182                         ohdr->u.imm_data = wqe->wr.imm_data;
183                         hwords += 1;
184                 }
185                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
186                         bth0 |= 1 << 23;
187                 break;
188
189         case OP(RDMA_WRITE_FIRST):
190                 qp->s_state = OP(RDMA_WRITE_MIDDLE);
191                 /* FALLTHROUGH */
192         case OP(RDMA_WRITE_MIDDLE):
193                 len = qp->s_len;
194                 if (len > pmtu) {
195                         len = pmtu;
196                         break;
197                 }
198                 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
199                         qp->s_state = OP(RDMA_WRITE_LAST);
200                 else {
201                         qp->s_state =
202                                 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
203                         /* Immediate data comes after the BTH */
204                         ohdr->u.imm_data = wqe->wr.imm_data;
205                         hwords += 1;
206                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
207                                 bth0 |= 1 << 23;
208                 }
209                 break;
210         }
211         qp->s_len -= len;
212         qp->s_hdrwords = hwords;
213         qp->s_cur_sge = &qp->s_sge;
214         qp->s_cur_size = len;
215         *bth0p = bth0 | (qp->s_state << 24);
216         *bth2p = qp->s_next_psn++ & IPS_PSN_MASK;
217         return 1;
218
219 done:
220         return 0;
221 }
222
223 /**
224  * ipath_uc_rcv - handle an incoming UC packet
225  * @dev: the device the packet came in on
226  * @hdr: the header of the packet
227  * @has_grh: true if the packet has a GRH
228  * @data: the packet data
229  * @tlen: the length of the packet
230  * @qp: the QP for this packet.
231  *
232  * This is called from ipath_qp_rcv() to process an incoming UC packet
233  * for the given QP.
234  * Called at interrupt level.
235  */
236 void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
237                   int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
238 {
239         struct ipath_other_headers *ohdr;
240         int opcode;
241         u32 hdrsize;
242         u32 psn;
243         u32 pad;
244         unsigned long flags;
245         struct ib_wc wc;
246         u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
247         struct ib_reth *reth;
248         int header_in_data;
249
250         /* Check for GRH */
251         if (!has_grh) {
252                 ohdr = &hdr->u.oth;
253                 hdrsize = 8 + 12;       /* LRH + BTH */
254                 psn = be32_to_cpu(ohdr->bth[2]);
255                 header_in_data = 0;
256         } else {
257                 ohdr = &hdr->u.l.oth;
258                 hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
259                 /*
260                  * The header with GRH is 60 bytes and the
261                  * core driver sets the eager header buffer
262                  * size to 56 bytes so the last 4 bytes of
263                  * the BTH header (PSN) is in the data buffer.
264                  */
265                 header_in_data =
266                         ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
267                 if (header_in_data) {
268                         psn = be32_to_cpu(((__be32 *) data)[0]);
269                         data += sizeof(__be32);
270                 } else
271                         psn = be32_to_cpu(ohdr->bth[2]);
272         }
273         /*
274          * The opcode is in the low byte when its in network order
275          * (top byte when in host order).
276          */
277         opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
278
279         wc.imm_data = 0;
280         wc.wc_flags = 0;
281
282         spin_lock_irqsave(&qp->r_rq.lock, flags);
283
284         /* Compare the PSN verses the expected PSN. */
285         if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
286                 /*
287                  * Handle a sequence error.
288                  * Silently drop any current message.
289                  */
290                 qp->r_psn = psn;
291         inv:
292                 qp->r_state = OP(SEND_LAST);
293                 switch (opcode) {
294                 case OP(SEND_FIRST):
295                 case OP(SEND_ONLY):
296                 case OP(SEND_ONLY_WITH_IMMEDIATE):
297                         goto send_first;
298
299                 case OP(RDMA_WRITE_FIRST):
300                 case OP(RDMA_WRITE_ONLY):
301                 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
302                         goto rdma_first;
303
304                 default:
305                         dev->n_pkt_drops++;
306                         goto done;
307                 }
308         }
309
310         /* Check for opcode sequence errors. */
311         switch (qp->r_state) {
312         case OP(SEND_FIRST):
313         case OP(SEND_MIDDLE):
314                 if (opcode == OP(SEND_MIDDLE) ||
315                     opcode == OP(SEND_LAST) ||
316                     opcode == OP(SEND_LAST_WITH_IMMEDIATE))
317                         break;
318                 goto inv;
319
320         case OP(RDMA_WRITE_FIRST):
321         case OP(RDMA_WRITE_MIDDLE):
322                 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
323                     opcode == OP(RDMA_WRITE_LAST) ||
324                     opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
325                         break;
326                 goto inv;
327
328         default:
329                 if (opcode == OP(SEND_FIRST) ||
330                     opcode == OP(SEND_ONLY) ||
331                     opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
332                     opcode == OP(RDMA_WRITE_FIRST) ||
333                     opcode == OP(RDMA_WRITE_ONLY) ||
334                     opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
335                         break;
336                 goto inv;
337         }
338
339         /* OK, process the packet. */
340         switch (opcode) {
341         case OP(SEND_FIRST):
342         case OP(SEND_ONLY):
343         case OP(SEND_ONLY_WITH_IMMEDIATE):
344         send_first:
345                 if (qp->r_reuse_sge) {
346                         qp->r_reuse_sge = 0;
347                         qp->r_sge = qp->s_rdma_sge;
348                 } else if (!ipath_get_rwqe(qp, 0)) {
349                         dev->n_pkt_drops++;
350                         goto done;
351                 }
352                 /* Save the WQE so we can reuse it in case of an error. */
353                 qp->s_rdma_sge = qp->r_sge;
354                 qp->r_rcv_len = 0;
355                 if (opcode == OP(SEND_ONLY))
356                         goto send_last;
357                 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
358                         goto send_last_imm;
359                 /* FALLTHROUGH */
360         case OP(SEND_MIDDLE):
361                 /* Check for invalid length PMTU or posted rwqe len. */
362                 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
363                         qp->r_reuse_sge = 1;
364                         dev->n_pkt_drops++;
365                         goto done;
366                 }
367                 qp->r_rcv_len += pmtu;
368                 if (unlikely(qp->r_rcv_len > qp->r_len)) {
369                         qp->r_reuse_sge = 1;
370                         dev->n_pkt_drops++;
371                         goto done;
372                 }
373                 ipath_copy_sge(&qp->r_sge, data, pmtu);
374                 break;
375
376         case OP(SEND_LAST_WITH_IMMEDIATE):
377         send_last_imm:
378                 if (header_in_data) {
379                         wc.imm_data = *(__be32 *) data;
380                         data += sizeof(__be32);
381                 } else {
382                         /* Immediate data comes after BTH */
383                         wc.imm_data = ohdr->u.imm_data;
384                 }
385                 hdrsize += 4;
386                 wc.wc_flags = IB_WC_WITH_IMM;
387                 /* FALLTHROUGH */
388         case OP(SEND_LAST):
389         send_last:
390                 /* Get the number of bytes the message was padded by. */
391                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
392                 /* Check for invalid length. */
393                 /* XXX LAST len should be >= 1 */
394                 if (unlikely(tlen < (hdrsize + pad + 4))) {
395                         qp->r_reuse_sge = 1;
396                         dev->n_pkt_drops++;
397                         goto done;
398                 }
399                 /* Don't count the CRC. */
400                 tlen -= (hdrsize + pad + 4);
401                 wc.byte_len = tlen + qp->r_rcv_len;
402                 if (unlikely(wc.byte_len > qp->r_len)) {
403                         qp->r_reuse_sge = 1;
404                         dev->n_pkt_drops++;
405                         goto done;
406                 }
407                 /* XXX Need to free SGEs */
408         last_imm:
409                 ipath_copy_sge(&qp->r_sge, data, tlen);
410                 wc.wr_id = qp->r_wr_id;
411                 wc.status = IB_WC_SUCCESS;
412                 wc.opcode = IB_WC_RECV;
413                 wc.vendor_err = 0;
414                 wc.qp_num = qp->ibqp.qp_num;
415                 wc.src_qp = qp->remote_qpn;
416                 wc.pkey_index = 0;
417                 wc.slid = qp->remote_ah_attr.dlid;
418                 wc.sl = qp->remote_ah_attr.sl;
419                 wc.dlid_path_bits = 0;
420                 wc.port_num = 0;
421                 /* Signal completion event if the solicited bit is set. */
422                 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
423                                (ohdr->bth[0] &
424                                 __constant_cpu_to_be32(1 << 23)) != 0);
425                 break;
426
427         case OP(RDMA_WRITE_FIRST):
428         case OP(RDMA_WRITE_ONLY):
429         case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
430         rdma_first:
431                 /* RETH comes after BTH */
432                 if (!header_in_data)
433                         reth = &ohdr->u.rc.reth;
434                 else {
435                         reth = (struct ib_reth *)data;
436                         data += sizeof(*reth);
437                 }
438                 hdrsize += sizeof(*reth);
439                 qp->r_len = be32_to_cpu(reth->length);
440                 qp->r_rcv_len = 0;
441                 if (qp->r_len != 0) {
442                         u32 rkey = be32_to_cpu(reth->rkey);
443                         u64 vaddr = be64_to_cpu(reth->vaddr);
444                         int ok;
445
446                         /* Check rkey */
447                         ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len,
448                                            vaddr, rkey,
449                                            IB_ACCESS_REMOTE_WRITE);
450                         if (unlikely(!ok)) {
451                                 dev->n_pkt_drops++;
452                                 goto done;
453                         }
454                 } else {
455                         qp->r_sge.sg_list = NULL;
456                         qp->r_sge.sge.mr = NULL;
457                         qp->r_sge.sge.vaddr = NULL;
458                         qp->r_sge.sge.length = 0;
459                         qp->r_sge.sge.sge_length = 0;
460                 }
461                 if (unlikely(!(qp->qp_access_flags &
462                                IB_ACCESS_REMOTE_WRITE))) {
463                         dev->n_pkt_drops++;
464                         goto done;
465                 }
466                 if (opcode == OP(RDMA_WRITE_ONLY))
467                         goto rdma_last;
468                 else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
469                         goto rdma_last_imm;
470                 /* FALLTHROUGH */
471         case OP(RDMA_WRITE_MIDDLE):
472                 /* Check for invalid length PMTU or posted rwqe len. */
473                 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
474                         dev->n_pkt_drops++;
475                         goto done;
476                 }
477                 qp->r_rcv_len += pmtu;
478                 if (unlikely(qp->r_rcv_len > qp->r_len)) {
479                         dev->n_pkt_drops++;
480                         goto done;
481                 }
482                 ipath_copy_sge(&qp->r_sge, data, pmtu);
483                 break;
484
485         case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
486         rdma_last_imm:
487                 /* Get the number of bytes the message was padded by. */
488                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
489                 /* Check for invalid length. */
490                 /* XXX LAST len should be >= 1 */
491                 if (unlikely(tlen < (hdrsize + pad + 4))) {
492                         dev->n_pkt_drops++;
493                         goto done;
494                 }
495                 /* Don't count the CRC. */
496                 tlen -= (hdrsize + pad + 4);
497                 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
498                         dev->n_pkt_drops++;
499                         goto done;
500                 }
501                 if (qp->r_reuse_sge)
502                         qp->r_reuse_sge = 0;
503                 else if (!ipath_get_rwqe(qp, 1)) {
504                         dev->n_pkt_drops++;
505                         goto done;
506                 }
507                 if (header_in_data) {
508                         wc.imm_data = *(__be32 *) data;
509                         data += sizeof(__be32);
510                 } else {
511                         /* Immediate data comes after BTH */
512                         wc.imm_data = ohdr->u.imm_data;
513                 }
514                 hdrsize += 4;
515                 wc.wc_flags = IB_WC_WITH_IMM;
516                 wc.byte_len = 0;
517                 goto last_imm;
518
519         case OP(RDMA_WRITE_LAST):
520         rdma_last:
521                 /* Get the number of bytes the message was padded by. */
522                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
523                 /* Check for invalid length. */
524                 /* XXX LAST len should be >= 1 */
525                 if (unlikely(tlen < (hdrsize + pad + 4))) {
526                         dev->n_pkt_drops++;
527                         goto done;
528                 }
529                 /* Don't count the CRC. */
530                 tlen -= (hdrsize + pad + 4);
531                 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) {
532                         dev->n_pkt_drops++;
533                         goto done;
534                 }
535                 ipath_copy_sge(&qp->r_sge, data, tlen);
536                 break;
537
538         default:
539                 /* Drop packet for unknown opcodes. */
540                 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
541                 dev->n_pkt_drops++;
542                 goto bail;
543         }
544         qp->r_psn++;
545         qp->r_state = opcode;
546 done:
547         spin_unlock_irqrestore(&qp->r_rq.lock, flags);
548
549 bail:
550         return;
551 }