Btrfs: update space balancing code
[linux-2.6] / drivers / infiniband / hw / amso1100 / c2_qp.c
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Cisco Systems. All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
6  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  */
37
38 #include <linux/delay.h>
39
40 #include "c2.h"
41 #include "c2_vq.h"
42 #include "c2_status.h"
43
44 #define C2_MAX_ORD_PER_QP 128
45 #define C2_MAX_IRD_PER_QP 128
46
47 #define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
48 #define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
49 #define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
50
51 #define NO_SUPPORT -1
52 static const u8 c2_opcode[] = {
53         [IB_WR_SEND] = C2_WR_TYPE_SEND,
54         [IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
55         [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
56         [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
57         [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
58         [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
59         [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
60 };
61
62 static int to_c2_state(enum ib_qp_state ib_state)
63 {
64         switch (ib_state) {
65         case IB_QPS_RESET:
66                 return C2_QP_STATE_IDLE;
67         case IB_QPS_RTS:
68                 return C2_QP_STATE_RTS;
69         case IB_QPS_SQD:
70                 return C2_QP_STATE_CLOSING;
71         case IB_QPS_SQE:
72                 return C2_QP_STATE_CLOSING;
73         case IB_QPS_ERR:
74                 return C2_QP_STATE_ERROR;
75         default:
76                 return -1;
77         }
78 }
79
80 static int to_ib_state(enum c2_qp_state c2_state)
81 {
82         switch (c2_state) {
83         case C2_QP_STATE_IDLE:
84                 return IB_QPS_RESET;
85         case C2_QP_STATE_CONNECTING:
86                 return IB_QPS_RTR;
87         case C2_QP_STATE_RTS:
88                 return IB_QPS_RTS;
89         case C2_QP_STATE_CLOSING:
90                 return IB_QPS_SQD;
91         case C2_QP_STATE_ERROR:
92                 return IB_QPS_ERR;
93         case C2_QP_STATE_TERMINATE:
94                 return IB_QPS_SQE;
95         default:
96                 return -1;
97         }
98 }
99
100 static const char *to_ib_state_str(int ib_state)
101 {
102         static const char *state_str[] = {
103                 "IB_QPS_RESET",
104                 "IB_QPS_INIT",
105                 "IB_QPS_RTR",
106                 "IB_QPS_RTS",
107                 "IB_QPS_SQD",
108                 "IB_QPS_SQE",
109                 "IB_QPS_ERR"
110         };
111         if (ib_state < IB_QPS_RESET ||
112             ib_state > IB_QPS_ERR)
113                 return "<invalid IB QP state>";
114
115         ib_state -= IB_QPS_RESET;
116         return state_str[ib_state];
117 }
118
119 void c2_set_qp_state(struct c2_qp *qp, int c2_state)
120 {
121         int new_state = to_ib_state(c2_state);
122
123         pr_debug("%s: qp[%p] state modify %s --> %s\n",
124                __func__,
125                 qp,
126                 to_ib_state_str(qp->state),
127                 to_ib_state_str(new_state));
128         qp->state = new_state;
129 }
130
131 #define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
132
133 int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
134                  struct ib_qp_attr *attr, int attr_mask)
135 {
136         struct c2wr_qp_modify_req wr;
137         struct c2wr_qp_modify_rep *reply;
138         struct c2_vq_req *vq_req;
139         unsigned long flags;
140         u8 next_state;
141         int err;
142
143         pr_debug("%s:%d qp=%p, %s --> %s\n",
144                 __func__, __LINE__,
145                 qp,
146                 to_ib_state_str(qp->state),
147                 to_ib_state_str(attr->qp_state));
148
149         vq_req = vq_req_alloc(c2dev);
150         if (!vq_req)
151                 return -ENOMEM;
152
153         c2_wr_set_id(&wr, CCWR_QP_MODIFY);
154         wr.hdr.context = (unsigned long) vq_req;
155         wr.rnic_handle = c2dev->adapter_handle;
156         wr.qp_handle = qp->adapter_handle;
157         wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
158         wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
159         wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
160         wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
161
162         if (attr_mask & IB_QP_STATE) {
163                 /* Ensure the state is valid */
164                 if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) {
165                         err = -EINVAL;
166                         goto bail0;
167                 }
168
169                 wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
170
171                 if (attr->qp_state == IB_QPS_ERR) {
172                         spin_lock_irqsave(&qp->lock, flags);
173                         if (qp->cm_id && qp->state == IB_QPS_RTS) {
174                                 pr_debug("Generating CLOSE event for QP-->ERR, "
175                                         "qp=%p, cm_id=%p\n",qp,qp->cm_id);
176                                 /* Generate an CLOSE event */
177                                 vq_req->cm_id = qp->cm_id;
178                                 vq_req->event = IW_CM_EVENT_CLOSE;
179                         }
180                         spin_unlock_irqrestore(&qp->lock, flags);
181                 }
182                 next_state =  attr->qp_state;
183
184         } else if (attr_mask & IB_QP_CUR_STATE) {
185
186                 if (attr->cur_qp_state != IB_QPS_RTR &&
187                     attr->cur_qp_state != IB_QPS_RTS &&
188                     attr->cur_qp_state != IB_QPS_SQD &&
189                     attr->cur_qp_state != IB_QPS_SQE) {
190                         err = -EINVAL;
191                         goto bail0;
192                 } else
193                         wr.next_qp_state =
194                             cpu_to_be32(to_c2_state(attr->cur_qp_state));
195
196                 next_state = attr->cur_qp_state;
197
198         } else {
199                 err = 0;
200                 goto bail0;
201         }
202
203         /* reference the request struct */
204         vq_req_get(c2dev, vq_req);
205
206         err = vq_send_wr(c2dev, (union c2wr *) & wr);
207         if (err) {
208                 vq_req_put(c2dev, vq_req);
209                 goto bail0;
210         }
211
212         err = vq_wait_for_reply(c2dev, vq_req);
213         if (err)
214                 goto bail0;
215
216         reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
217         if (!reply) {
218                 err = -ENOMEM;
219                 goto bail0;
220         }
221
222         err = c2_errno(reply);
223         if (!err)
224                 qp->state = next_state;
225 #ifdef DEBUG
226         else
227                 pr_debug("%s: c2_errno=%d\n", __func__, err);
228 #endif
229         /*
230          * If we're going to error and generating the event here, then
231          * we need to remove the reference because there will be no
232          * close event generated by the adapter
233         */
234         spin_lock_irqsave(&qp->lock, flags);
235         if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
236                 qp->cm_id->rem_ref(qp->cm_id);
237                 qp->cm_id = NULL;
238         }
239         spin_unlock_irqrestore(&qp->lock, flags);
240
241         vq_repbuf_free(c2dev, reply);
242       bail0:
243         vq_req_free(c2dev, vq_req);
244
245         pr_debug("%s:%d qp=%p, cur_state=%s\n",
246                 __func__, __LINE__,
247                 qp,
248                 to_ib_state_str(qp->state));
249         return err;
250 }
251
252 int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
253                           int ord, int ird)
254 {
255         struct c2wr_qp_modify_req wr;
256         struct c2wr_qp_modify_rep *reply;
257         struct c2_vq_req *vq_req;
258         int err;
259
260         vq_req = vq_req_alloc(c2dev);
261         if (!vq_req)
262                 return -ENOMEM;
263
264         c2_wr_set_id(&wr, CCWR_QP_MODIFY);
265         wr.hdr.context = (unsigned long) vq_req;
266         wr.rnic_handle = c2dev->adapter_handle;
267         wr.qp_handle = qp->adapter_handle;
268         wr.ord = cpu_to_be32(ord);
269         wr.ird = cpu_to_be32(ird);
270         wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
271         wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
272         wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
273
274         /* reference the request struct */
275         vq_req_get(c2dev, vq_req);
276
277         err = vq_send_wr(c2dev, (union c2wr *) & wr);
278         if (err) {
279                 vq_req_put(c2dev, vq_req);
280                 goto bail0;
281         }
282
283         err = vq_wait_for_reply(c2dev, vq_req);
284         if (err)
285                 goto bail0;
286
287         reply = (struct c2wr_qp_modify_rep *) (unsigned long)
288                 vq_req->reply_msg;
289         if (!reply) {
290                 err = -ENOMEM;
291                 goto bail0;
292         }
293
294         err = c2_errno(reply);
295         vq_repbuf_free(c2dev, reply);
296       bail0:
297         vq_req_free(c2dev, vq_req);
298         return err;
299 }
300
301 static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
302 {
303         struct c2_vq_req *vq_req;
304         struct c2wr_qp_destroy_req wr;
305         struct c2wr_qp_destroy_rep *reply;
306         unsigned long flags;
307         int err;
308
309         /*
310          * Allocate a verb request message
311          */
312         vq_req = vq_req_alloc(c2dev);
313         if (!vq_req) {
314                 return -ENOMEM;
315         }
316
317         /*
318          * Initialize the WR
319          */
320         c2_wr_set_id(&wr, CCWR_QP_DESTROY);
321         wr.hdr.context = (unsigned long) vq_req;
322         wr.rnic_handle = c2dev->adapter_handle;
323         wr.qp_handle = qp->adapter_handle;
324
325         /*
326          * reference the request struct.  dereferenced in the int handler.
327          */
328         vq_req_get(c2dev, vq_req);
329
330         spin_lock_irqsave(&qp->lock, flags);
331         if (qp->cm_id && qp->state == IB_QPS_RTS) {
332                 pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
333                         "qp=%p, cm_id=%p\n",qp,qp->cm_id);
334                 /* Generate an CLOSE event */
335                 vq_req->qp = qp;
336                 vq_req->cm_id = qp->cm_id;
337                 vq_req->event = IW_CM_EVENT_CLOSE;
338         }
339         spin_unlock_irqrestore(&qp->lock, flags);
340
341         /*
342          * Send WR to adapter
343          */
344         err = vq_send_wr(c2dev, (union c2wr *) & wr);
345         if (err) {
346                 vq_req_put(c2dev, vq_req);
347                 goto bail0;
348         }
349
350         /*
351          * Wait for reply from adapter
352          */
353         err = vq_wait_for_reply(c2dev, vq_req);
354         if (err) {
355                 goto bail0;
356         }
357
358         /*
359          * Process reply
360          */
361         reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
362         if (!reply) {
363                 err = -ENOMEM;
364                 goto bail0;
365         }
366
367         spin_lock_irqsave(&qp->lock, flags);
368         if (qp->cm_id) {
369                 qp->cm_id->rem_ref(qp->cm_id);
370                 qp->cm_id = NULL;
371         }
372         spin_unlock_irqrestore(&qp->lock, flags);
373
374         vq_repbuf_free(c2dev, reply);
375       bail0:
376         vq_req_free(c2dev, vq_req);
377         return err;
378 }
379
380 static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
381 {
382         int ret;
383
384         do {
385                 spin_lock_irq(&c2dev->qp_table.lock);
386                 ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
387                                         c2dev->qp_table.last++, &qp->qpn);
388                 spin_unlock_irq(&c2dev->qp_table.lock);
389         } while ((ret == -EAGAIN) &&
390                  idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
391         return ret;
392 }
393
394 static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
395 {
396         spin_lock_irq(&c2dev->qp_table.lock);
397         idr_remove(&c2dev->qp_table.idr, qpn);
398         spin_unlock_irq(&c2dev->qp_table.lock);
399 }
400
401 struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
402 {
403         unsigned long flags;
404         struct c2_qp *qp;
405
406         spin_lock_irqsave(&c2dev->qp_table.lock, flags);
407         qp = idr_find(&c2dev->qp_table.idr, qpn);
408         spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
409         return qp;
410 }
411
412 int c2_alloc_qp(struct c2_dev *c2dev,
413                 struct c2_pd *pd,
414                 struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
415 {
416         struct c2wr_qp_create_req wr;
417         struct c2wr_qp_create_rep *reply;
418         struct c2_vq_req *vq_req;
419         struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
420         struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
421         unsigned long peer_pa;
422         u32 q_size, msg_size, mmap_size;
423         void __iomem *mmap;
424         int err;
425
426         err = c2_alloc_qpn(c2dev, qp);
427         if (err)
428                 return err;
429         qp->ibqp.qp_num = qp->qpn;
430         qp->ibqp.qp_type = IB_QPT_RC;
431
432         /* Allocate the SQ and RQ shared pointers */
433         qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
434                                          &qp->sq_mq.shared_dma, GFP_KERNEL);
435         if (!qp->sq_mq.shared) {
436                 err = -ENOMEM;
437                 goto bail0;
438         }
439
440         qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
441                                          &qp->rq_mq.shared_dma, GFP_KERNEL);
442         if (!qp->rq_mq.shared) {
443                 err = -ENOMEM;
444                 goto bail1;
445         }
446
447         /* Allocate the verbs request */
448         vq_req = vq_req_alloc(c2dev);
449         if (vq_req == NULL) {
450                 err = -ENOMEM;
451                 goto bail2;
452         }
453
454         /* Initialize the work request */
455         memset(&wr, 0, sizeof(wr));
456         c2_wr_set_id(&wr, CCWR_QP_CREATE);
457         wr.hdr.context = (unsigned long) vq_req;
458         wr.rnic_handle = c2dev->adapter_handle;
459         wr.sq_cq_handle = send_cq->adapter_handle;
460         wr.rq_cq_handle = recv_cq->adapter_handle;
461         wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
462         wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
463         wr.srq_handle = 0;
464         wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
465                                QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
466         wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
467         wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
468         wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
469         wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
470         wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
471         wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
472         wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
473         wr.pd_id = pd->pd_id;
474         wr.user_context = (unsigned long) qp;
475
476         vq_req_get(c2dev, vq_req);
477
478         /* Send the WR to the adapter */
479         err = vq_send_wr(c2dev, (union c2wr *) & wr);
480         if (err) {
481                 vq_req_put(c2dev, vq_req);
482                 goto bail3;
483         }
484
485         /* Wait for the verb reply  */
486         err = vq_wait_for_reply(c2dev, vq_req);
487         if (err) {
488                 goto bail3;
489         }
490
491         /* Process the reply */
492         reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
493         if (!reply) {
494                 err = -ENOMEM;
495                 goto bail3;
496         }
497
498         if ((err = c2_wr_get_result(reply)) != 0) {
499                 goto bail4;
500         }
501
502         /* Fill in the kernel QP struct */
503         atomic_set(&qp->refcount, 1);
504         qp->adapter_handle = reply->qp_handle;
505         qp->state = IB_QPS_RESET;
506         qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
507         qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
508         qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
509         init_waitqueue_head(&qp->wait);
510
511         /* Initialize the SQ MQ */
512         q_size = be32_to_cpu(reply->sq_depth);
513         msg_size = be32_to_cpu(reply->sq_msg_size);
514         peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
515         mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
516         mmap = ioremap_nocache(peer_pa, mmap_size);
517         if (!mmap) {
518                 err = -ENOMEM;
519                 goto bail5;
520         }
521
522         c2_mq_req_init(&qp->sq_mq,
523                        be32_to_cpu(reply->sq_mq_index),
524                        q_size,
525                        msg_size,
526                        mmap + sizeof(struct c2_mq_shared),      /* pool start */
527                        mmap,                            /* peer */
528                        C2_MQ_ADAPTER_TARGET);
529
530         /* Initialize the RQ mq */
531         q_size = be32_to_cpu(reply->rq_depth);
532         msg_size = be32_to_cpu(reply->rq_msg_size);
533         peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
534         mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
535         mmap = ioremap_nocache(peer_pa, mmap_size);
536         if (!mmap) {
537                 err = -ENOMEM;
538                 goto bail6;
539         }
540
541         c2_mq_req_init(&qp->rq_mq,
542                        be32_to_cpu(reply->rq_mq_index),
543                        q_size,
544                        msg_size,
545                        mmap + sizeof(struct c2_mq_shared),      /* pool start */
546                        mmap,                            /* peer */
547                        C2_MQ_ADAPTER_TARGET);
548
549         vq_repbuf_free(c2dev, reply);
550         vq_req_free(c2dev, vq_req);
551
552         return 0;
553
554       bail6:
555         iounmap(qp->sq_mq.peer);
556       bail5:
557         destroy_qp(c2dev, qp);
558       bail4:
559         vq_repbuf_free(c2dev, reply);
560       bail3:
561         vq_req_free(c2dev, vq_req);
562       bail2:
563         c2_free_mqsp(qp->rq_mq.shared);
564       bail1:
565         c2_free_mqsp(qp->sq_mq.shared);
566       bail0:
567         c2_free_qpn(c2dev, qp->qpn);
568         return err;
569 }
570
571 static inline void c2_lock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
572 {
573         if (send_cq == recv_cq)
574                 spin_lock_irq(&send_cq->lock);
575         else if (send_cq > recv_cq) {
576                 spin_lock_irq(&send_cq->lock);
577                 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
578         } else {
579                 spin_lock_irq(&recv_cq->lock);
580                 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
581         }
582 }
583
584 static inline void c2_unlock_cqs(struct c2_cq *send_cq, struct c2_cq *recv_cq)
585 {
586         if (send_cq == recv_cq)
587                 spin_unlock_irq(&send_cq->lock);
588         else if (send_cq > recv_cq) {
589                 spin_unlock(&recv_cq->lock);
590                 spin_unlock_irq(&send_cq->lock);
591         } else {
592                 spin_unlock(&send_cq->lock);
593                 spin_unlock_irq(&recv_cq->lock);
594         }
595 }
596
597 void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
598 {
599         struct c2_cq *send_cq;
600         struct c2_cq *recv_cq;
601
602         send_cq = to_c2cq(qp->ibqp.send_cq);
603         recv_cq = to_c2cq(qp->ibqp.recv_cq);
604
605         /*
606          * Lock CQs here, so that CQ polling code can do QP lookup
607          * without taking a lock.
608          */
609         c2_lock_cqs(send_cq, recv_cq);
610         c2_free_qpn(c2dev, qp->qpn);
611         c2_unlock_cqs(send_cq, recv_cq);
612
613         /*
614          * Destory qp in the rnic...
615          */
616         destroy_qp(c2dev, qp);
617
618         /*
619          * Mark any unreaped CQEs as null and void.
620          */
621         c2_cq_clean(c2dev, qp, send_cq->cqn);
622         if (send_cq != recv_cq)
623                 c2_cq_clean(c2dev, qp, recv_cq->cqn);
624         /*
625          * Unmap the MQs and return the shared pointers
626          * to the message pool.
627          */
628         iounmap(qp->sq_mq.peer);
629         iounmap(qp->rq_mq.peer);
630         c2_free_mqsp(qp->sq_mq.shared);
631         c2_free_mqsp(qp->rq_mq.shared);
632
633         atomic_dec(&qp->refcount);
634         wait_event(qp->wait, !atomic_read(&qp->refcount));
635 }
636
637 /*
638  * Function: move_sgl
639  *
640  * Description:
641  * Move an SGL from the user's work request struct into a CCIL Work Request
642  * message, swapping to WR byte order and ensure the total length doesn't
643  * overflow.
644  *
645  * IN:
646  * dst          - ptr to CCIL Work Request message SGL memory.
647  * src          - ptr to the consumers SGL memory.
648  *
649  * OUT: none
650  *
651  * Return:
652  * CCIL status codes.
653  */
654 static int
655 move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
656          u8 * actual_count)
657 {
658         u32 tot = 0;            /* running total */
659         u8 acount = 0;          /* running total non-0 len sge's */
660
661         while (count > 0) {
662                 /*
663                  * If the addition of this SGE causes the
664                  * total SGL length to exceed 2^32-1, then
665                  * fail-n-bail.
666                  *
667                  * If the current total plus the next element length
668                  * wraps, then it will go negative and be less than the
669                  * current total...
670                  */
671                 if ((tot + src->length) < tot) {
672                         return -EINVAL;
673                 }
674                 /*
675                  * Bug: 1456 (as well as 1498 & 1643)
676                  * Skip over any sge's supplied with len=0
677                  */
678                 if (src->length) {
679                         tot += src->length;
680                         dst->stag = cpu_to_be32(src->lkey);
681                         dst->to = cpu_to_be64(src->addr);
682                         dst->length = cpu_to_be32(src->length);
683                         dst++;
684                         acount++;
685                 }
686                 src++;
687                 count--;
688         }
689
690         if (acount == 0) {
691                 /*
692                  * Bug: 1476 (as well as 1498, 1456 and 1643)
693                  * Setup the SGL in the WR to make it easier for the RNIC.
694                  * This way, the FW doesn't have to deal with special cases.
695                  * Setting length=0 should be sufficient.
696                  */
697                 dst->stag = 0;
698                 dst->to = 0;
699                 dst->length = 0;
700         }
701
702         *p_len = tot;
703         *actual_count = acount;
704         return 0;
705 }
706
707 /*
708  * Function: c2_activity (private function)
709  *
710  * Description:
711  * Post an mq index to the host->adapter activity fifo.
712  *
713  * IN:
714  * c2dev        - ptr to c2dev structure
715  * mq_index     - mq index to post
716  * shared       - value most recently written to shared
717  *
718  * OUT:
719  *
720  * Return:
721  * none
722  */
723 static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
724 {
725         /*
726          * First read the register to see if the FIFO is full, and if so,
727          * spin until it's not.  This isn't perfect -- there is no
728          * synchronization among the clients of the register, but in
729          * practice it prevents multiple CPU from hammering the bus
730          * with PCI RETRY. Note that when this does happen, the card
731          * cannot get on the bus and the card and system hang in a
732          * deadlock -- thus the need for this code. [TOT]
733          */
734         while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000)
735                 udelay(10);
736
737         __raw_writel(C2_HINT_MAKE(mq_index, shared),
738                      c2dev->regs + PCI_BAR0_ADAPTER_HINT);
739 }
740
741 /*
742  * Function: qp_wr_post
743  *
744  * Description:
745  * This in-line function allocates a MQ msg, then moves the host-copy of
746  * the completed WR into msg.  Then it posts the message.
747  *
748  * IN:
749  * q            - ptr to user MQ.
750  * wr           - ptr to host-copy of the WR.
751  * qp           - ptr to user qp
752  * size         - Number of bytes to post.  Assumed to be divisible by 4.
753  *
754  * OUT: none
755  *
756  * Return:
757  * CCIL status codes.
758  */
759 static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
760 {
761         union c2wr *msg;
762
763         msg = c2_mq_alloc(q);
764         if (msg == NULL) {
765                 return -EINVAL;
766         }
767 #ifdef CCMSGMAGIC
768         ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
769 #endif
770
771         /*
772          * Since all header fields in the WR are the same as the
773          * CQE, set the following so the adapter need not.
774          */
775         c2_wr_set_result(wr, CCERR_PENDING);
776
777         /*
778          * Copy the wr down to the adapter
779          */
780         memcpy((void *) msg, (void *) wr, size);
781
782         c2_mq_produce(q);
783         return 0;
784 }
785
786
787 int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
788                  struct ib_send_wr **bad_wr)
789 {
790         struct c2_dev *c2dev = to_c2dev(ibqp->device);
791         struct c2_qp *qp = to_c2qp(ibqp);
792         union c2wr wr;
793         unsigned long lock_flags;
794         int err = 0;
795
796         u32 flags;
797         u32 tot_len;
798         u8 actual_sge_count;
799         u32 msg_size;
800
801         if (qp->state > IB_QPS_RTS)
802                 return -EINVAL;
803
804         while (ib_wr) {
805
806                 flags = 0;
807                 wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
808                 if (ib_wr->send_flags & IB_SEND_SIGNALED) {
809                         flags |= SQ_SIGNALED;
810                 }
811
812                 switch (ib_wr->opcode) {
813                 case IB_WR_SEND:
814                 case IB_WR_SEND_WITH_INV:
815                         if (ib_wr->opcode == IB_WR_SEND) {
816                                 if (ib_wr->send_flags & IB_SEND_SOLICITED)
817                                         c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
818                                 else
819                                         c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
820                                 wr.sqwr.send.remote_stag = 0;
821                         } else {
822                                 if (ib_wr->send_flags & IB_SEND_SOLICITED)
823                                         c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE_INV);
824                                 else
825                                         c2_wr_set_id(&wr, C2_WR_TYPE_SEND_INV);
826                                 wr.sqwr.send.remote_stag =
827                                         cpu_to_be32(ib_wr->ex.invalidate_rkey);
828                         }
829
830                         msg_size = sizeof(struct c2wr_send_req) +
831                                 sizeof(struct c2_data_addr) * ib_wr->num_sge;
832                         if (ib_wr->num_sge > qp->send_sgl_depth) {
833                                 err = -EINVAL;
834                                 break;
835                         }
836                         if (ib_wr->send_flags & IB_SEND_FENCE) {
837                                 flags |= SQ_READ_FENCE;
838                         }
839                         err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
840                                        ib_wr->sg_list,
841                                        ib_wr->num_sge,
842                                        &tot_len, &actual_sge_count);
843                         wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
844                         c2_wr_set_sge_count(&wr, actual_sge_count);
845                         break;
846                 case IB_WR_RDMA_WRITE:
847                         c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
848                         msg_size = sizeof(struct c2wr_rdma_write_req) +
849                             (sizeof(struct c2_data_addr) * ib_wr->num_sge);
850                         if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
851                                 err = -EINVAL;
852                                 break;
853                         }
854                         if (ib_wr->send_flags & IB_SEND_FENCE) {
855                                 flags |= SQ_READ_FENCE;
856                         }
857                         wr.sqwr.rdma_write.remote_stag =
858                             cpu_to_be32(ib_wr->wr.rdma.rkey);
859                         wr.sqwr.rdma_write.remote_to =
860                             cpu_to_be64(ib_wr->wr.rdma.remote_addr);
861                         err = move_sgl((struct c2_data_addr *)
862                                        & (wr.sqwr.rdma_write.data),
863                                        ib_wr->sg_list,
864                                        ib_wr->num_sge,
865                                        &tot_len, &actual_sge_count);
866                         wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
867                         c2_wr_set_sge_count(&wr, actual_sge_count);
868                         break;
869                 case IB_WR_RDMA_READ:
870                         c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
871                         msg_size = sizeof(struct c2wr_rdma_read_req);
872
873                         /* IWarp only suppots 1 sge for RDMA reads */
874                         if (ib_wr->num_sge > 1) {
875                                 err = -EINVAL;
876                                 break;
877                         }
878
879                         /*
880                          * Move the local and remote stag/to/len into the WR.
881                          */
882                         wr.sqwr.rdma_read.local_stag =
883                             cpu_to_be32(ib_wr->sg_list->lkey);
884                         wr.sqwr.rdma_read.local_to =
885                             cpu_to_be64(ib_wr->sg_list->addr);
886                         wr.sqwr.rdma_read.remote_stag =
887                             cpu_to_be32(ib_wr->wr.rdma.rkey);
888                         wr.sqwr.rdma_read.remote_to =
889                             cpu_to_be64(ib_wr->wr.rdma.remote_addr);
890                         wr.sqwr.rdma_read.length =
891                             cpu_to_be32(ib_wr->sg_list->length);
892                         break;
893                 default:
894                         /* error */
895                         msg_size = 0;
896                         err = -EINVAL;
897                         break;
898                 }
899
900                 /*
901                  * If we had an error on the last wr build, then
902                  * break out.  Possible errors include bogus WR
903                  * type, and a bogus SGL length...
904                  */
905                 if (err) {
906                         break;
907                 }
908
909                 /*
910                  * Store flags
911                  */
912                 c2_wr_set_flags(&wr, flags);
913
914                 /*
915                  * Post the puppy!
916                  */
917                 spin_lock_irqsave(&qp->lock, lock_flags);
918                 err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
919                 if (err) {
920                         spin_unlock_irqrestore(&qp->lock, lock_flags);
921                         break;
922                 }
923
924                 /*
925                  * Enqueue mq index to activity FIFO.
926                  */
927                 c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
928                 spin_unlock_irqrestore(&qp->lock, lock_flags);
929
930                 ib_wr = ib_wr->next;
931         }
932
933         if (err)
934                 *bad_wr = ib_wr;
935         return err;
936 }
937
938 int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
939                     struct ib_recv_wr **bad_wr)
940 {
941         struct c2_dev *c2dev = to_c2dev(ibqp->device);
942         struct c2_qp *qp = to_c2qp(ibqp);
943         union c2wr wr;
944         unsigned long lock_flags;
945         int err = 0;
946
947         if (qp->state > IB_QPS_RTS)
948                 return -EINVAL;
949
950         /*
951          * Try and post each work request
952          */
953         while (ib_wr) {
954                 u32 tot_len;
955                 u8 actual_sge_count;
956
957                 if (ib_wr->num_sge > qp->recv_sgl_depth) {
958                         err = -EINVAL;
959                         break;
960                 }
961
962                 /*
963                  * Create local host-copy of the WR
964                  */
965                 wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
966                 c2_wr_set_id(&wr, CCWR_RECV);
967                 c2_wr_set_flags(&wr, 0);
968
969                 /* sge_count is limited to eight bits. */
970                 BUG_ON(ib_wr->num_sge >= 256);
971                 err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
972                                ib_wr->sg_list,
973                                ib_wr->num_sge, &tot_len, &actual_sge_count);
974                 c2_wr_set_sge_count(&wr, actual_sge_count);
975
976                 /*
977                  * If we had an error on the last wr build, then
978                  * break out.  Possible errors include bogus WR
979                  * type, and a bogus SGL length...
980                  */
981                 if (err) {
982                         break;
983                 }
984
985                 spin_lock_irqsave(&qp->lock, lock_flags);
986                 err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
987                 if (err) {
988                         spin_unlock_irqrestore(&qp->lock, lock_flags);
989                         break;
990                 }
991
992                 /*
993                  * Enqueue mq index to activity FIFO
994                  */
995                 c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
996                 spin_unlock_irqrestore(&qp->lock, lock_flags);
997
998                 ib_wr = ib_wr->next;
999         }
1000
1001         if (err)
1002                 *bad_wr = ib_wr;
1003         return err;
1004 }
1005
1006 void __devinit c2_init_qp_table(struct c2_dev *c2dev)
1007 {
1008         spin_lock_init(&c2dev->qp_table.lock);
1009         idr_init(&c2dev->qp_table.idr);
1010 }
1011
1012 void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
1013 {
1014         idr_destroy(&c2dev->qp_table.idr);
1015 }