Merge branch 'master'
[linux-2.6] / drivers / infiniband / hw / ipath / ipath_verbs.c
1 /*
2  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <rdma/ib_mad.h>
34 #include <rdma/ib_user_verbs.h>
35 #include <linux/utsname.h>
36
37 #include "ipath_kernel.h"
38 #include "ipath_verbs.h"
39 #include "ips_common.h"
40
41 /* Not static, because we don't want the compiler removing it */
42 const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
43
44 static unsigned int ib_ipath_qp_table_size = 251;
45 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
46 MODULE_PARM_DESC(qp_table_size, "QP table size");
47
48 unsigned int ib_ipath_lkey_table_size = 12;
49 module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
50                    S_IRUGO);
51 MODULE_PARM_DESC(lkey_table_size,
52                  "LKEY table size in bits (2^n, 1 <= n <= 23)");
53
54 unsigned int ib_ipath_debug;    /* debug mask */
55 module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
56 MODULE_PARM_DESC(debug, "Verbs debug mask");
57
58 MODULE_LICENSE("GPL");
59 MODULE_AUTHOR("PathScale <support@pathscale.com>");
60 MODULE_DESCRIPTION("Pathscale InfiniPath driver");
61
62 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
63         [IB_QPS_RESET] = 0,
64         [IB_QPS_INIT] = IPATH_POST_RECV_OK,
65         [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
66         [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
67             IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
68         [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
69             IPATH_POST_SEND_OK,
70         [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
71         [IB_QPS_ERR] = 0,
72 };
73
74 /*
75  * Translate ib_wr_opcode into ib_wc_opcode.
76  */
77 const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
78         [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
79         [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
80         [IB_WR_SEND] = IB_WC_SEND,
81         [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
82         [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
83         [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
84         [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
85 };
86
87 /*
88  * System image GUID.
89  */
90 static __be64 sys_image_guid;
91
92 /**
93  * ipath_copy_sge - copy data to SGE memory
94  * @ss: the SGE state
95  * @data: the data to copy
96  * @length: the length of the data
97  */
98 void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
99 {
100         struct ipath_sge *sge = &ss->sge;
101
102         while (length) {
103                 u32 len = sge->length;
104
105                 BUG_ON(len == 0);
106                 if (len > length)
107                         len = length;
108                 memcpy(sge->vaddr, data, len);
109                 sge->vaddr += len;
110                 sge->length -= len;
111                 sge->sge_length -= len;
112                 if (sge->sge_length == 0) {
113                         if (--ss->num_sge)
114                                 *sge = *ss->sg_list++;
115                 } else if (sge->length == 0 && sge->mr != NULL) {
116                         if (++sge->n >= IPATH_SEGSZ) {
117                                 if (++sge->m >= sge->mr->mapsz)
118                                         break;
119                                 sge->n = 0;
120                         }
121                         sge->vaddr =
122                                 sge->mr->map[sge->m]->segs[sge->n].vaddr;
123                         sge->length =
124                                 sge->mr->map[sge->m]->segs[sge->n].length;
125                 }
126                 data += len;
127                 length -= len;
128         }
129 }
130
131 /**
132  * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
133  * @ss: the SGE state
134  * @length: the number of bytes to skip
135  */
136 void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
137 {
138         struct ipath_sge *sge = &ss->sge;
139
140         while (length > sge->sge_length) {
141                 length -= sge->sge_length;
142                 ss->sge = *ss->sg_list++;
143         }
144         while (length) {
145                 u32 len = sge->length;
146
147                 BUG_ON(len == 0);
148                 if (len > length)
149                         len = length;
150                 sge->vaddr += len;
151                 sge->length -= len;
152                 sge->sge_length -= len;
153                 if (sge->sge_length == 0) {
154                         if (--ss->num_sge)
155                                 *sge = *ss->sg_list++;
156                 } else if (sge->length == 0 && sge->mr != NULL) {
157                         if (++sge->n >= IPATH_SEGSZ) {
158                                 if (++sge->m >= sge->mr->mapsz)
159                                         break;
160                                 sge->n = 0;
161                         }
162                         sge->vaddr =
163                                 sge->mr->map[sge->m]->segs[sge->n].vaddr;
164                         sge->length =
165                                 sge->mr->map[sge->m]->segs[sge->n].length;
166                 }
167                 length -= len;
168         }
169 }
170
171 /**
172  * ipath_post_send - post a send on a QP
173  * @ibqp: the QP to post the send on
174  * @wr: the list of work requests to post
175  * @bad_wr: the first bad WR is put here
176  *
177  * This may be called from interrupt context.
178  */
179 static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
180                            struct ib_send_wr **bad_wr)
181 {
182         struct ipath_qp *qp = to_iqp(ibqp);
183         int err = 0;
184
185         /* Check that state is OK to post send. */
186         if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)) {
187                 *bad_wr = wr;
188                 err = -EINVAL;
189                 goto bail;
190         }
191
192         for (; wr; wr = wr->next) {
193                 switch (qp->ibqp.qp_type) {
194                 case IB_QPT_UC:
195                 case IB_QPT_RC:
196                         err = ipath_post_rc_send(qp, wr);
197                         break;
198
199                 case IB_QPT_SMI:
200                 case IB_QPT_GSI:
201                 case IB_QPT_UD:
202                         err = ipath_post_ud_send(qp, wr);
203                         break;
204
205                 default:
206                         err = -EINVAL;
207                 }
208                 if (err) {
209                         *bad_wr = wr;
210                         break;
211                 }
212         }
213
214 bail:
215         return err;
216 }
217
218 /**
219  * ipath_post_receive - post a receive on a QP
220  * @ibqp: the QP to post the receive on
221  * @wr: the WR to post
222  * @bad_wr: the first bad WR is put here
223  *
224  * This may be called from interrupt context.
225  */
226 static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
227                               struct ib_recv_wr **bad_wr)
228 {
229         struct ipath_qp *qp = to_iqp(ibqp);
230         unsigned long flags;
231         int ret;
232
233         /* Check that state is OK to post receive. */
234         if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
235                 *bad_wr = wr;
236                 ret = -EINVAL;
237                 goto bail;
238         }
239
240         for (; wr; wr = wr->next) {
241                 struct ipath_rwqe *wqe;
242                 u32 next;
243                 int i, j;
244
245                 if (wr->num_sge > qp->r_rq.max_sge) {
246                         *bad_wr = wr;
247                         ret = -ENOMEM;
248                         goto bail;
249                 }
250
251                 spin_lock_irqsave(&qp->r_rq.lock, flags);
252                 next = qp->r_rq.head + 1;
253                 if (next >= qp->r_rq.size)
254                         next = 0;
255                 if (next == qp->r_rq.tail) {
256                         spin_unlock_irqrestore(&qp->r_rq.lock, flags);
257                         *bad_wr = wr;
258                         ret = -ENOMEM;
259                         goto bail;
260                 }
261
262                 wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
263                 wqe->wr_id = wr->wr_id;
264                 wqe->sg_list[0].mr = NULL;
265                 wqe->sg_list[0].vaddr = NULL;
266                 wqe->sg_list[0].length = 0;
267                 wqe->sg_list[0].sge_length = 0;
268                 wqe->length = 0;
269                 for (i = 0, j = 0; i < wr->num_sge; i++) {
270                         /* Check LKEY */
271                         if (to_ipd(qp->ibqp.pd)->user &&
272                             wr->sg_list[i].lkey == 0) {
273                                 spin_unlock_irqrestore(&qp->r_rq.lock,
274                                                        flags);
275                                 *bad_wr = wr;
276                                 ret = -EINVAL;
277                                 goto bail;
278                         }
279                         if (wr->sg_list[i].length == 0)
280                                 continue;
281                         if (!ipath_lkey_ok(
282                                     &to_idev(qp->ibqp.device)->lk_table,
283                                     &wqe->sg_list[j], &wr->sg_list[i],
284                                     IB_ACCESS_LOCAL_WRITE)) {
285                                 spin_unlock_irqrestore(&qp->r_rq.lock,
286                                                        flags);
287                                 *bad_wr = wr;
288                                 ret = -EINVAL;
289                                 goto bail;
290                         }
291                         wqe->length += wr->sg_list[i].length;
292                         j++;
293                 }
294                 wqe->num_sge = j;
295                 qp->r_rq.head = next;
296                 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
297         }
298         ret = 0;
299
300 bail:
301         return ret;
302 }
303
304 /**
305  * ipath_qp_rcv - processing an incoming packet on a QP
306  * @dev: the device the packet came on
307  * @hdr: the packet header
308  * @has_grh: true if the packet has a GRH
309  * @data: the packet data
310  * @tlen: the packet length
311  * @qp: the QP the packet came on
312  *
313  * This is called from ipath_ib_rcv() to process an incoming packet
314  * for the given QP.
315  * Called at interrupt level.
316  */
317 static void ipath_qp_rcv(struct ipath_ibdev *dev,
318                          struct ipath_ib_header *hdr, int has_grh,
319                          void *data, u32 tlen, struct ipath_qp *qp)
320 {
321         /* Check for valid receive state. */
322         if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
323                 dev->n_pkt_drops++;
324                 return;
325         }
326
327         switch (qp->ibqp.qp_type) {
328         case IB_QPT_SMI:
329         case IB_QPT_GSI:
330         case IB_QPT_UD:
331                 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
332                 break;
333
334         case IB_QPT_RC:
335                 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
336                 break;
337
338         case IB_QPT_UC:
339                 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
340                 break;
341
342         default:
343                 break;
344         }
345 }
346
347 /**
348  * ipath_ib_rcv - process and incoming packet
349  * @arg: the device pointer
350  * @rhdr: the header of the packet
351  * @data: the packet data
352  * @tlen: the packet length
353  *
354  * This is called from ipath_kreceive() to process an incoming packet at
355  * interrupt level. Tlen is the length of the header + data + CRC in bytes.
356  */
357 static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
358 {
359         struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
360         struct ipath_ib_header *hdr = rhdr;
361         struct ipath_other_headers *ohdr;
362         struct ipath_qp *qp;
363         u32 qp_num;
364         int lnh;
365         u8 opcode;
366         u16 lid;
367
368         if (unlikely(dev == NULL))
369                 goto bail;
370
371         if (unlikely(tlen < 24)) {      /* LRH+BTH+CRC */
372                 dev->rcv_errors++;
373                 goto bail;
374         }
375
376         /* Check for a valid destination LID (see ch. 7.11.1). */
377         lid = be16_to_cpu(hdr->lrh[1]);
378         if (lid < IPS_MULTICAST_LID_BASE) {
379                 lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
380                 if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
381                         dev->rcv_errors++;
382                         goto bail;
383                 }
384         }
385
386         /* Check for GRH */
387         lnh = be16_to_cpu(hdr->lrh[0]) & 3;
388         if (lnh == IPS_LRH_BTH)
389                 ohdr = &hdr->u.oth;
390         else if (lnh == IPS_LRH_GRH)
391                 ohdr = &hdr->u.l.oth;
392         else {
393                 dev->rcv_errors++;
394                 goto bail;
395         }
396
397         opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
398         dev->opstats[opcode].n_bytes += tlen;
399         dev->opstats[opcode].n_packets++;
400
401         /* Get the destination QP number. */
402         qp_num = be32_to_cpu(ohdr->bth[1]) & IPS_QPN_MASK;
403         if (qp_num == IPS_MULTICAST_QPN) {
404                 struct ipath_mcast *mcast;
405                 struct ipath_mcast_qp *p;
406
407                 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
408                 if (mcast == NULL) {
409                         dev->n_pkt_drops++;
410                         goto bail;
411                 }
412                 dev->n_multicast_rcv++;
413                 list_for_each_entry_rcu(p, &mcast->qp_list, list)
414                         ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
415                                      tlen, p->qp);
416                 /*
417                  * Notify ipath_multicast_detach() if it is waiting for us
418                  * to finish.
419                  */
420                 if (atomic_dec_return(&mcast->refcount) <= 1)
421                         wake_up(&mcast->wait);
422         } else {
423                 qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
424                 if (qp) {
425                         dev->n_unicast_rcv++;
426                         ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
427                                      tlen, qp);
428                         /*
429                          * Notify ipath_destroy_qp() if it is waiting
430                          * for us to finish.
431                          */
432                         if (atomic_dec_and_test(&qp->refcount))
433                                 wake_up(&qp->wait);
434                 } else
435                         dev->n_pkt_drops++;
436         }
437
438 bail:;
439 }
440
441 /**
442  * ipath_ib_timer - verbs timer
443  * @arg: the device pointer
444  *
445  * This is called from ipath_do_rcv_timer() at interrupt level to check for
446  * QPs which need retransmits and to collect performance numbers.
447  */
448 static void ipath_ib_timer(void *arg)
449 {
450         struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
451         struct ipath_qp *resend = NULL;
452         struct list_head *last;
453         struct ipath_qp *qp;
454         unsigned long flags;
455
456         if (dev == NULL)
457                 return;
458
459         spin_lock_irqsave(&dev->pending_lock, flags);
460         /* Start filling the next pending queue. */
461         if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
462                 dev->pending_index = 0;
463         /* Save any requests still in the new queue, they have timed out. */
464         last = &dev->pending[dev->pending_index];
465         while (!list_empty(last)) {
466                 qp = list_entry(last->next, struct ipath_qp, timerwait);
467                 list_del_init(&qp->timerwait);
468                 qp->timer_next = resend;
469                 resend = qp;
470                 atomic_inc(&qp->refcount);
471         }
472         last = &dev->rnrwait;
473         if (!list_empty(last)) {
474                 qp = list_entry(last->next, struct ipath_qp, timerwait);
475                 if (--qp->s_rnr_timeout == 0) {
476                         do {
477                                 list_del_init(&qp->timerwait);
478                                 tasklet_hi_schedule(&qp->s_task);
479                                 if (list_empty(last))
480                                         break;
481                                 qp = list_entry(last->next, struct ipath_qp,
482                                                 timerwait);
483                         } while (qp->s_rnr_timeout == 0);
484                 }
485         }
486         /*
487          * We should only be in the started state if pma_sample_start != 0
488          */
489         if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
490             --dev->pma_sample_start == 0) {
491                 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
492                 ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
493                                               &dev->ipath_rword,
494                                               &dev->ipath_spkts,
495                                               &dev->ipath_rpkts,
496                                               &dev->ipath_xmit_wait);
497         }
498         if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
499                 if (dev->pma_sample_interval == 0) {
500                         u64 ta, tb, tc, td, te;
501
502                         dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
503                         ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
504                                                       &tc, &td, &te);
505
506                         dev->ipath_sword = ta - dev->ipath_sword;
507                         dev->ipath_rword = tb - dev->ipath_rword;
508                         dev->ipath_spkts = tc - dev->ipath_spkts;
509                         dev->ipath_rpkts = td - dev->ipath_rpkts;
510                         dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
511                 }
512                 else
513                         dev->pma_sample_interval--;
514         }
515         spin_unlock_irqrestore(&dev->pending_lock, flags);
516
517         /* XXX What if timer fires again while this is running? */
518         for (qp = resend; qp != NULL; qp = qp->timer_next) {
519                 struct ib_wc wc;
520
521                 spin_lock_irqsave(&qp->s_lock, flags);
522                 if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) {
523                         dev->n_timeouts++;
524                         ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
525                 }
526                 spin_unlock_irqrestore(&qp->s_lock, flags);
527
528                 /* Notify ipath_destroy_qp() if it is waiting. */
529                 if (atomic_dec_and_test(&qp->refcount))
530                         wake_up(&qp->wait);
531         }
532 }
533
534 /**
535  * ipath_ib_piobufavail - callback when a PIO buffer is available
536  * @arg: the device pointer
537  *
538  * This is called from ipath_intr() at interrupt level when a PIO buffer is
539  * available after ipath_verbs_send() returned an error that no buffers were
540  * available.  Return 1 if we consumed all the PIO buffers and we still have
541  * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
542  * return zero).
543  */
544 static int ipath_ib_piobufavail(void *arg)
545 {
546         struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
547         struct ipath_qp *qp;
548         unsigned long flags;
549
550         if (dev == NULL)
551                 goto bail;
552
553         spin_lock_irqsave(&dev->pending_lock, flags);
554         while (!list_empty(&dev->piowait)) {
555                 qp = list_entry(dev->piowait.next, struct ipath_qp,
556                                 piowait);
557                 list_del_init(&qp->piowait);
558                 tasklet_hi_schedule(&qp->s_task);
559         }
560         spin_unlock_irqrestore(&dev->pending_lock, flags);
561
562 bail:
563         return 0;
564 }
565
566 static int ipath_query_device(struct ib_device *ibdev,
567                               struct ib_device_attr *props)
568 {
569         struct ipath_ibdev *dev = to_idev(ibdev);
570         u32 vendor, boardrev, majrev, minrev;
571
572         memset(props, 0, sizeof(*props));
573
574         props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
575                 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
576                 IB_DEVICE_SYS_IMAGE_GUID;
577         ipath_layer_query_device(dev->dd, &vendor, &boardrev,
578                                  &majrev, &minrev);
579         props->vendor_id = vendor;
580         props->vendor_part_id = boardrev;
581         props->hw_ver = boardrev << 16 | majrev << 8 | minrev;
582
583         props->sys_image_guid = dev->sys_image_guid;
584
585         props->max_mr_size = ~0ull;
586         props->max_qp = 0xffff;
587         props->max_qp_wr = 0xffff;
588         props->max_sge = 255;
589         props->max_cq = 0xffff;
590         props->max_cqe = 0xffff;
591         props->max_mr = 0xffff;
592         props->max_pd = 0xffff;
593         props->max_qp_rd_atom = 1;
594         props->max_qp_init_rd_atom = 1;
595         /* props->max_res_rd_atom */
596         props->max_srq = 0xffff;
597         props->max_srq_wr = 0xffff;
598         props->max_srq_sge = 255;
599         /* props->local_ca_ack_delay */
600         props->atomic_cap = IB_ATOMIC_HCA;
601         props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
602         props->max_mcast_grp = 0xffff;
603         props->max_mcast_qp_attach = 0xffff;
604         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
605                 props->max_mcast_grp;
606
607         return 0;
608 }
609
610 const u8 ipath_cvt_physportstate[16] = {
611         [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3,
612         [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5,
613         [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2,
614         [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2,
615         [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1,
616         [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1,
617         [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4,
618         [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4,
619         [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4,
620         [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4,
621         [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6,
622         [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6,
623         [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
624 };
625
626 static int ipath_query_port(struct ib_device *ibdev,
627                             u8 port, struct ib_port_attr *props)
628 {
629         struct ipath_ibdev *dev = to_idev(ibdev);
630         enum ib_mtu mtu;
631         u16 lid = ipath_layer_get_lid(dev->dd);
632         u64 ibcstat;
633
634         memset(props, 0, sizeof(*props));
635         props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE);
636         props->lmc = dev->mkeyprot_resv_lmc & 7;
637         props->sm_lid = dev->sm_lid;
638         props->sm_sl = dev->sm_sl;
639         ibcstat = ipath_layer_get_lastibcstat(dev->dd);
640         props->state = ((ibcstat >> 4) & 0x3) + 1;
641         /* See phys_state_show() */
642         props->phys_state = ipath_cvt_physportstate[
643                 ipath_layer_get_lastibcstat(dev->dd) & 0xf];
644         props->port_cap_flags = dev->port_cap_flags;
645         props->gid_tbl_len = 1;
646         props->max_msg_sz = 4096;
647         props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
648         props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
649                 dev->n_pkey_violations;
650         props->qkey_viol_cntr = dev->qkey_violations;
651         props->active_width = IB_WIDTH_4X;
652         /* See rate_show() */
653         props->active_speed = 1;        /* Regular 10Mbs speed. */
654         props->max_vl_num = 1;          /* VLCap = VL0 */
655         props->init_type_reply = 0;
656
657         props->max_mtu = IB_MTU_4096;
658         switch (ipath_layer_get_ibmtu(dev->dd)) {
659         case 4096:
660                 mtu = IB_MTU_4096;
661                 break;
662         case 2048:
663                 mtu = IB_MTU_2048;
664                 break;
665         case 1024:
666                 mtu = IB_MTU_1024;
667                 break;
668         case 512:
669                 mtu = IB_MTU_512;
670                 break;
671         case 256:
672                 mtu = IB_MTU_256;
673                 break;
674         default:
675                 mtu = IB_MTU_2048;
676         }
677         props->active_mtu = mtu;
678         props->subnet_timeout = dev->subnet_timeout;
679
680         return 0;
681 }
682
683 static int ipath_modify_device(struct ib_device *device,
684                                int device_modify_mask,
685                                struct ib_device_modify *device_modify)
686 {
687         int ret;
688
689         if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
690                                    IB_DEVICE_MODIFY_NODE_DESC)) {
691                 ret = -EOPNOTSUPP;
692                 goto bail;
693         }
694
695         if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
696                 memcpy(device->node_desc, device_modify->node_desc, 64);
697
698         if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
699                 to_idev(device)->sys_image_guid =
700                         cpu_to_be64(device_modify->sys_image_guid);
701
702         ret = 0;
703
704 bail:
705         return ret;
706 }
707
708 static int ipath_modify_port(struct ib_device *ibdev,
709                              u8 port, int port_modify_mask,
710                              struct ib_port_modify *props)
711 {
712         struct ipath_ibdev *dev = to_idev(ibdev);
713
714         dev->port_cap_flags |= props->set_port_cap_mask;
715         dev->port_cap_flags &= ~props->clr_port_cap_mask;
716         if (port_modify_mask & IB_PORT_SHUTDOWN)
717                 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
718         if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
719                 dev->qkey_violations = 0;
720         return 0;
721 }
722
723 static int ipath_query_gid(struct ib_device *ibdev, u8 port,
724                            int index, union ib_gid *gid)
725 {
726         struct ipath_ibdev *dev = to_idev(ibdev);
727         int ret;
728
729         if (index >= 1) {
730                 ret = -EINVAL;
731                 goto bail;
732         }
733         gid->global.subnet_prefix = dev->gid_prefix;
734         gid->global.interface_id = ipath_layer_get_guid(dev->dd);
735
736         ret = 0;
737
738 bail:
739         return ret;
740 }
741
742 static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
743                                     struct ib_ucontext *context,
744                                     struct ib_udata *udata)
745 {
746         struct ipath_pd *pd;
747         struct ib_pd *ret;
748
749         pd = kmalloc(sizeof *pd, GFP_KERNEL);
750         if (!pd) {
751                 ret = ERR_PTR(-ENOMEM);
752                 goto bail;
753         }
754
755         /* ib_alloc_pd() will initialize pd->ibpd. */
756         pd->user = udata != NULL;
757
758         ret = &pd->ibpd;
759
760 bail:
761         return ret;
762 }
763
764 static int ipath_dealloc_pd(struct ib_pd *ibpd)
765 {
766         struct ipath_pd *pd = to_ipd(ibpd);
767
768         kfree(pd);
769
770         return 0;
771 }
772
773 /**
774  * ipath_create_ah - create an address handle
775  * @pd: the protection domain
776  * @ah_attr: the attributes of the AH
777  *
778  * This may be called from interrupt context.
779  */
780 static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
781                                      struct ib_ah_attr *ah_attr)
782 {
783         struct ipath_ah *ah;
784         struct ib_ah *ret;
785
786         /* A multicast address requires a GRH (see ch. 8.4.1). */
787         if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE &&
788             ah_attr->dlid != IPS_PERMISSIVE_LID &&
789             !(ah_attr->ah_flags & IB_AH_GRH)) {
790                 ret = ERR_PTR(-EINVAL);
791                 goto bail;
792         }
793
794         ah = kmalloc(sizeof *ah, GFP_ATOMIC);
795         if (!ah) {
796                 ret = ERR_PTR(-ENOMEM);
797                 goto bail;
798         }
799
800         /* ib_create_ah() will initialize ah->ibah. */
801         ah->attr = *ah_attr;
802
803         ret = &ah->ibah;
804
805 bail:
806         return ret;
807 }
808
809 /**
810  * ipath_destroy_ah - destroy an address handle
811  * @ibah: the AH to destroy
812  *
813  * This may be called from interrupt context.
814  */
815 static int ipath_destroy_ah(struct ib_ah *ibah)
816 {
817         struct ipath_ah *ah = to_iah(ibah);
818
819         kfree(ah);
820
821         return 0;
822 }
823
824 static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
825 {
826         struct ipath_ah *ah = to_iah(ibah);
827
828         *ah_attr = ah->attr;
829
830         return 0;
831 }
832
833 static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
834                             u16 *pkey)
835 {
836         struct ipath_ibdev *dev = to_idev(ibdev);
837         int ret;
838
839         if (index >= ipath_layer_get_npkeys(dev->dd)) {
840                 ret = -EINVAL;
841                 goto bail;
842         }
843
844         *pkey = ipath_layer_get_pkey(dev->dd, index);
845         ret = 0;
846
847 bail:
848         return ret;
849 }
850
851
852 /**
853  * ipath_alloc_ucontext - allocate a ucontest
854  * @ibdev: the infiniband device
855  * @udata: not used by the InfiniPath driver
856  */
857
858 static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
859                                                 struct ib_udata *udata)
860 {
861         struct ipath_ucontext *context;
862         struct ib_ucontext *ret;
863
864         context = kmalloc(sizeof *context, GFP_KERNEL);
865         if (!context) {
866                 ret = ERR_PTR(-ENOMEM);
867                 goto bail;
868         }
869
870         ret = &context->ibucontext;
871
872 bail:
873         return ret;
874 }
875
876 static int ipath_dealloc_ucontext(struct ib_ucontext *context)
877 {
878         kfree(to_iucontext(context));
879         return 0;
880 }
881
882 static int ipath_verbs_register_sysfs(struct ib_device *dev);
883
884 /**
885  * ipath_register_ib_device - register our device with the infiniband core
886  * @unit: the device number to register
887  * @dd: the device data structure
888  * Return the allocated ipath_ibdev pointer or NULL on error.
889  */
890 static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
891 {
892         struct ipath_ibdev *idev;
893         struct ib_device *dev;
894         int ret;
895
896         idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
897         if (idev == NULL)
898                 goto bail;
899
900         dev = &idev->ibdev;
901
902         /* Only need to initialize non-zero fields. */
903         spin_lock_init(&idev->qp_table.lock);
904         spin_lock_init(&idev->lk_table.lock);
905         idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
906         /* Set the prefix to the default value (see ch. 4.1.1) */
907         idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
908
909         ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
910         if (ret)
911                 goto err_qp;
912
913         /*
914          * The top ib_ipath_lkey_table_size bits are used to index the
915          * table.  The lower 8 bits can be owned by the user (copied from
916          * the LKEY).  The remaining bits act as a generation number or tag.
917          */
918         idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
919         idev->lk_table.table = kzalloc(idev->lk_table.max *
920                                        sizeof(*idev->lk_table.table),
921                                        GFP_KERNEL);
922         if (idev->lk_table.table == NULL) {
923                 ret = -ENOMEM;
924                 goto err_lk;
925         }
926         spin_lock_init(&idev->pending_lock);
927         INIT_LIST_HEAD(&idev->pending[0]);
928         INIT_LIST_HEAD(&idev->pending[1]);
929         INIT_LIST_HEAD(&idev->pending[2]);
930         INIT_LIST_HEAD(&idev->piowait);
931         INIT_LIST_HEAD(&idev->rnrwait);
932         idev->pending_index = 0;
933         idev->port_cap_flags =
934                 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
935         idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
936         idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
937         idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
938         idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
939         idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT;
940         idev->link_width_enabled = 3;   /* 1x or 4x */
941
942         /*
943          * The system image GUID is supposed to be the same for all
944          * IB HCAs in a single system but since there can be other
945          * device types in the system, we can't be sure this is unique.
946          */
947         if (!sys_image_guid)
948                 sys_image_guid = ipath_layer_get_guid(dd);
949         idev->sys_image_guid = sys_image_guid;
950         idev->ib_unit = unit;
951         idev->dd = dd;
952
953         strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
954         dev->owner = THIS_MODULE;
955         dev->node_guid = ipath_layer_get_guid(dd);
956         dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
957         dev->uverbs_cmd_mask =
958                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
959                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
960                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
961                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
962                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
963                 (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
964                 (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
965                 (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
966                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
967                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
968                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
969                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
970                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
971                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
972                 (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
973                 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
974                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
975                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
976                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
977                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
978                 (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
979                 (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
980                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
981                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
982                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
983                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
984                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
985                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
986                 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
987         dev->node_type = IB_NODE_CA;
988         dev->phys_port_cnt = 1;
989         dev->dma_device = ipath_layer_get_device(dd);
990         dev->class_dev.dev = dev->dma_device;
991         dev->query_device = ipath_query_device;
992         dev->modify_device = ipath_modify_device;
993         dev->query_port = ipath_query_port;
994         dev->modify_port = ipath_modify_port;
995         dev->query_pkey = ipath_query_pkey;
996         dev->query_gid = ipath_query_gid;
997         dev->alloc_ucontext = ipath_alloc_ucontext;
998         dev->dealloc_ucontext = ipath_dealloc_ucontext;
999         dev->alloc_pd = ipath_alloc_pd;
1000         dev->dealloc_pd = ipath_dealloc_pd;
1001         dev->create_ah = ipath_create_ah;
1002         dev->destroy_ah = ipath_destroy_ah;
1003         dev->query_ah = ipath_query_ah;
1004         dev->create_srq = ipath_create_srq;
1005         dev->modify_srq = ipath_modify_srq;
1006         dev->query_srq = ipath_query_srq;
1007         dev->destroy_srq = ipath_destroy_srq;
1008         dev->create_qp = ipath_create_qp;
1009         dev->modify_qp = ipath_modify_qp;
1010         dev->query_qp = ipath_query_qp;
1011         dev->destroy_qp = ipath_destroy_qp;
1012         dev->post_send = ipath_post_send;
1013         dev->post_recv = ipath_post_receive;
1014         dev->post_srq_recv = ipath_post_srq_receive;
1015         dev->create_cq = ipath_create_cq;
1016         dev->destroy_cq = ipath_destroy_cq;
1017         dev->resize_cq = ipath_resize_cq;
1018         dev->poll_cq = ipath_poll_cq;
1019         dev->req_notify_cq = ipath_req_notify_cq;
1020         dev->get_dma_mr = ipath_get_dma_mr;
1021         dev->reg_phys_mr = ipath_reg_phys_mr;
1022         dev->reg_user_mr = ipath_reg_user_mr;
1023         dev->dereg_mr = ipath_dereg_mr;
1024         dev->alloc_fmr = ipath_alloc_fmr;
1025         dev->map_phys_fmr = ipath_map_phys_fmr;
1026         dev->unmap_fmr = ipath_unmap_fmr;
1027         dev->dealloc_fmr = ipath_dealloc_fmr;
1028         dev->attach_mcast = ipath_multicast_attach;
1029         dev->detach_mcast = ipath_multicast_detach;
1030         dev->process_mad = ipath_process_mad;
1031
1032         snprintf(dev->node_desc, sizeof(dev->node_desc),
1033                  IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
1034
1035         ret = ib_register_device(dev);
1036         if (ret)
1037                 goto err_reg;
1038
1039         if (ipath_verbs_register_sysfs(dev))
1040                 goto err_class;
1041
1042         ipath_layer_enable_timer(dd);
1043
1044         goto bail;
1045
1046 err_class:
1047         ib_unregister_device(dev);
1048 err_reg:
1049         kfree(idev->lk_table.table);
1050 err_lk:
1051         kfree(idev->qp_table.table);
1052 err_qp:
1053         ib_dealloc_device(dev);
1054         _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
1055                      unit, -ret);
1056         idev = NULL;
1057
1058 bail:
1059         return idev;
1060 }
1061
1062 static void ipath_unregister_ib_device(void *arg)
1063 {
1064         struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
1065         struct ib_device *ibdev = &dev->ibdev;
1066
1067         ipath_layer_disable_timer(dev->dd);
1068
1069         ib_unregister_device(ibdev);
1070
1071         if (!list_empty(&dev->pending[0]) ||
1072             !list_empty(&dev->pending[1]) ||
1073             !list_empty(&dev->pending[2]))
1074                 _VERBS_ERROR("ipath%d pending list not empty!\n",
1075                              dev->ib_unit);
1076         if (!list_empty(&dev->piowait))
1077                 _VERBS_ERROR("ipath%d piowait list not empty!\n",
1078                              dev->ib_unit);
1079         if (!list_empty(&dev->rnrwait))
1080                 _VERBS_ERROR("ipath%d rnrwait list not empty!\n",
1081                              dev->ib_unit);
1082         if (!ipath_mcast_tree_empty())
1083                 _VERBS_ERROR("ipath%d multicast table memory leak!\n",
1084                              dev->ib_unit);
1085         /*
1086          * Note that ipath_unregister_ib_device() can be called before all
1087          * the QPs are destroyed!
1088          */
1089         ipath_free_all_qps(&dev->qp_table);
1090         kfree(dev->qp_table.table);
1091         kfree(dev->lk_table.table);
1092         ib_dealloc_device(ibdev);
1093 }
1094
1095 static int __init ipath_verbs_init(void)
1096 {
1097         return ipath_verbs_register(ipath_register_ib_device,
1098                                     ipath_unregister_ib_device,
1099                                     ipath_ib_piobufavail, ipath_ib_rcv,
1100                                     ipath_ib_timer);
1101 }
1102
1103 static void __exit ipath_verbs_cleanup(void)
1104 {
1105         ipath_verbs_unregister();
1106 }
1107
1108 static ssize_t show_rev(struct class_device *cdev, char *buf)
1109 {
1110         struct ipath_ibdev *dev =
1111                 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1112         int vendor, boardrev, majrev, minrev;
1113
1114         ipath_layer_query_device(dev->dd, &vendor, &boardrev,
1115                                  &majrev, &minrev);
1116         return sprintf(buf, "%d.%d\n", majrev, minrev);
1117 }
1118
1119 static ssize_t show_hca(struct class_device *cdev, char *buf)
1120 {
1121         struct ipath_ibdev *dev =
1122                 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1123         int ret;
1124
1125         ret = ipath_layer_get_boardname(dev->dd, buf, 128);
1126         if (ret < 0)
1127                 goto bail;
1128         strcat(buf, "\n");
1129         ret = strlen(buf);
1130
1131 bail:
1132         return ret;
1133 }
1134
1135 static ssize_t show_stats(struct class_device *cdev, char *buf)
1136 {
1137         struct ipath_ibdev *dev =
1138                 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1139         int i;
1140         int len;
1141
1142         len = sprintf(buf,
1143                       "RC resends  %d\n"
1144                       "RC no QACK  %d\n"
1145                       "RC ACKs     %d\n"
1146                       "RC SEQ NAKs %d\n"
1147                       "RC RDMA seq %d\n"
1148                       "RC RNR NAKs %d\n"
1149                       "RC OTH NAKs %d\n"
1150                       "RC timeouts %d\n"
1151                       "RC RDMA dup %d\n"
1152                       "piobuf wait %d\n"
1153                       "no piobuf   %d\n"
1154                       "PKT drops   %d\n"
1155                       "WQE errs    %d\n",
1156                       dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
1157                       dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
1158                       dev->n_other_naks, dev->n_timeouts,
1159                       dev->n_rdma_dup_busy, dev->n_piowait,
1160                       dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
1161         for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
1162                 const struct ipath_opcode_stats *si = &dev->opstats[i];
1163
1164                 if (!si->n_packets && !si->n_bytes)
1165                         continue;
1166                 len += sprintf(buf + len, "%02x %llu/%llu\n", i,
1167                                (unsigned long long) si->n_packets,
1168                                (unsigned long long) si->n_bytes);
1169         }
1170         return len;
1171 }
1172
1173 static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1174 static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1175 static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
1176 static CLASS_DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
1177
1178 static struct class_device_attribute *ipath_class_attributes[] = {
1179         &class_device_attr_hw_rev,
1180         &class_device_attr_hca_type,
1181         &class_device_attr_board_id,
1182         &class_device_attr_stats
1183 };
1184
1185 static int ipath_verbs_register_sysfs(struct ib_device *dev)
1186 {
1187         int i;
1188         int ret;
1189
1190         for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
1191                 if (class_device_create_file(&dev->class_dev,
1192                                              ipath_class_attributes[i])) {
1193                         ret = 1;
1194                         goto bail;
1195                 }
1196
1197         ret = 0;
1198
1199 bail:
1200         return ret;
1201 }
1202
1203 module_init(ipath_verbs_init);
1204 module_exit(ipath_verbs_cleanup);