2  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
 
   4  * This software is available to you under a choice of one of two
 
   5  * licenses.  You may choose to be licensed under the terms of the GNU
 
   6  * General Public License (GPL) Version 2, available from the file
 
   7  * COPYING in the main directory of this source tree, or the
 
   8  * OpenIB.org BSD license below:
 
  10  *     Redistribution and use in source and binary forms, with or
 
  11  *     without modification, are permitted provided that the following
 
  14  *      - Redistributions of source code must retain the above
 
  15  *        copyright notice, this list of conditions and the following
 
  18  *      - Redistributions in binary form must reproduce the above
 
  19  *        copyright notice, this list of conditions and the following
 
  20  *        disclaimer in the documentation and/or other materials
 
  21  *        provided with the distribution.
 
  23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
  24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
  25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 
  26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 
  27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 
  28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 
  29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
  33 #include <rdma/ib_mad.h>
 
  34 #include <rdma/ib_user_verbs.h>
 
  35 #include <linux/utsname.h>
 
  37 #include "ipath_kernel.h"
 
  38 #include "ipath_verbs.h"
 
  39 #include "ips_common.h"
 
  41 /* Not static, because we don't want the compiler removing it */
 
  42 const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
 
  44 static unsigned int ib_ipath_qp_table_size = 251;
 
  45 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
 
  46 MODULE_PARM_DESC(qp_table_size, "QP table size");
 
  48 unsigned int ib_ipath_lkey_table_size = 12;
 
  49 module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
 
  51 MODULE_PARM_DESC(lkey_table_size,
 
  52                  "LKEY table size in bits (2^n, 1 <= n <= 23)");
 
  54 unsigned int ib_ipath_debug;    /* debug mask */
 
  55 module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
 
  56 MODULE_PARM_DESC(debug, "Verbs debug mask");
 
  58 MODULE_LICENSE("GPL");
 
  59 MODULE_AUTHOR("PathScale <support@pathscale.com>");
 
  60 MODULE_DESCRIPTION("Pathscale InfiniPath driver");
 
  62 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 
  64         [IB_QPS_INIT] = IPATH_POST_RECV_OK,
 
  65         [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
 
  66         [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 
  67             IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
 
  68         [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 
  70         [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
 
  75  * Translate ib_wr_opcode into ib_wc_opcode.
 
  77 const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
 
  78         [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
 
  79         [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
 
  80         [IB_WR_SEND] = IB_WC_SEND,
 
  81         [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
 
  82         [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
 
  83         [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
 
  84         [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
 
  90 static __be64 sys_image_guid;
 
  93  * ipath_copy_sge - copy data to SGE memory
 
  95  * @data: the data to copy
 
  96  * @length: the length of the data
 
  98 void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
 
 100         struct ipath_sge *sge = &ss->sge;
 
 103                 u32 len = sge->length;
 
 108                 memcpy(sge->vaddr, data, len);
 
 111                 sge->sge_length -= len;
 
 112                 if (sge->sge_length == 0) {
 
 114                                 *sge = *ss->sg_list++;
 
 115                 } else if (sge->length == 0 && sge->mr != NULL) {
 
 116                         if (++sge->n >= IPATH_SEGSZ) {
 
 117                                 if (++sge->m >= sge->mr->mapsz)
 
 122                                 sge->mr->map[sge->m]->segs[sge->n].vaddr;
 
 124                                 sge->mr->map[sge->m]->segs[sge->n].length;
 
 132  * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
 
 134  * @length: the number of bytes to skip
 
 136 void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
 
 138         struct ipath_sge *sge = &ss->sge;
 
 140         while (length > sge->sge_length) {
 
 141                 length -= sge->sge_length;
 
 142                 ss->sge = *ss->sg_list++;
 
 145                 u32 len = sge->length;
 
 152                 sge->sge_length -= len;
 
 153                 if (sge->sge_length == 0) {
 
 155                                 *sge = *ss->sg_list++;
 
 156                 } else if (sge->length == 0 && sge->mr != NULL) {
 
 157                         if (++sge->n >= IPATH_SEGSZ) {
 
 158                                 if (++sge->m >= sge->mr->mapsz)
 
 163                                 sge->mr->map[sge->m]->segs[sge->n].vaddr;
 
 165                                 sge->mr->map[sge->m]->segs[sge->n].length;
 
 172  * ipath_post_send - post a send on a QP
 
 173  * @ibqp: the QP to post the send on
 
 174  * @wr: the list of work requests to post
 
 175  * @bad_wr: the first bad WR is put here
 
 177  * This may be called from interrupt context.
 
 179 static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 180                            struct ib_send_wr **bad_wr)
 
 182         struct ipath_qp *qp = to_iqp(ibqp);
 
 185         /* Check that state is OK to post send. */
 
 186         if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)) {
 
 192         for (; wr; wr = wr->next) {
 
 193                 switch (qp->ibqp.qp_type) {
 
 196                         err = ipath_post_rc_send(qp, wr);
 
 202                         err = ipath_post_ud_send(qp, wr);
 
 219  * ipath_post_receive - post a receive on a QP
 
 220  * @ibqp: the QP to post the receive on
 
 221  * @wr: the WR to post
 
 222  * @bad_wr: the first bad WR is put here
 
 224  * This may be called from interrupt context.
 
 226 static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
 227                               struct ib_recv_wr **bad_wr)
 
 229         struct ipath_qp *qp = to_iqp(ibqp);
 
 233         /* Check that state is OK to post receive. */
 
 234         if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) {
 
 240         for (; wr; wr = wr->next) {
 
 241                 struct ipath_rwqe *wqe;
 
 245                 if (wr->num_sge > qp->r_rq.max_sge) {
 
 251                 spin_lock_irqsave(&qp->r_rq.lock, flags);
 
 252                 next = qp->r_rq.head + 1;
 
 253                 if (next >= qp->r_rq.size)
 
 255                 if (next == qp->r_rq.tail) {
 
 256                         spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 
 262                 wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head);
 
 263                 wqe->wr_id = wr->wr_id;
 
 264                 wqe->sg_list[0].mr = NULL;
 
 265                 wqe->sg_list[0].vaddr = NULL;
 
 266                 wqe->sg_list[0].length = 0;
 
 267                 wqe->sg_list[0].sge_length = 0;
 
 269                 for (i = 0, j = 0; i < wr->num_sge; i++) {
 
 271                         if (to_ipd(qp->ibqp.pd)->user &&
 
 272                             wr->sg_list[i].lkey == 0) {
 
 273                                 spin_unlock_irqrestore(&qp->r_rq.lock,
 
 279                         if (wr->sg_list[i].length == 0)
 
 282                                     &to_idev(qp->ibqp.device)->lk_table,
 
 283                                     &wqe->sg_list[j], &wr->sg_list[i],
 
 284                                     IB_ACCESS_LOCAL_WRITE)) {
 
 285                                 spin_unlock_irqrestore(&qp->r_rq.lock,
 
 291                         wqe->length += wr->sg_list[i].length;
 
 295                 qp->r_rq.head = next;
 
 296                 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 
 305  * ipath_qp_rcv - processing an incoming packet on a QP
 
 306  * @dev: the device the packet came on
 
 307  * @hdr: the packet header
 
 308  * @has_grh: true if the packet has a GRH
 
 309  * @data: the packet data
 
 310  * @tlen: the packet length
 
 311  * @qp: the QP the packet came on
 
 313  * This is called from ipath_ib_rcv() to process an incoming packet
 
 315  * Called at interrupt level.
 
 317 static void ipath_qp_rcv(struct ipath_ibdev *dev,
 
 318                          struct ipath_ib_header *hdr, int has_grh,
 
 319                          void *data, u32 tlen, struct ipath_qp *qp)
 
 321         /* Check for valid receive state. */
 
 322         if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
 
 327         switch (qp->ibqp.qp_type) {
 
 331                 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
 
 335                 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
 
 339                 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
 
 348  * ipath_ib_rcv - process and incoming packet
 
 349  * @arg: the device pointer
 
 350  * @rhdr: the header of the packet
 
 351  * @data: the packet data
 
 352  * @tlen: the packet length
 
 354  * This is called from ipath_kreceive() to process an incoming packet at
 
 355  * interrupt level. Tlen is the length of the header + data + CRC in bytes.
 
 357 static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
 
 359         struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 
 360         struct ipath_ib_header *hdr = rhdr;
 
 361         struct ipath_other_headers *ohdr;
 
 368         if (unlikely(dev == NULL))
 
 371         if (unlikely(tlen < 24)) {      /* LRH+BTH+CRC */
 
 376         /* Check for a valid destination LID (see ch. 7.11.1). */
 
 377         lid = be16_to_cpu(hdr->lrh[1]);
 
 378         if (lid < IPS_MULTICAST_LID_BASE) {
 
 379                 lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
 
 380                 if (unlikely(lid != ipath_layer_get_lid(dev->dd))) {
 
 387         lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 
 388         if (lnh == IPS_LRH_BTH)
 
 390         else if (lnh == IPS_LRH_GRH)
 
 391                 ohdr = &hdr->u.l.oth;
 
 397         opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
 
 398         dev->opstats[opcode].n_bytes += tlen;
 
 399         dev->opstats[opcode].n_packets++;
 
 401         /* Get the destination QP number. */
 
 402         qp_num = be32_to_cpu(ohdr->bth[1]) & IPS_QPN_MASK;
 
 403         if (qp_num == IPS_MULTICAST_QPN) {
 
 404                 struct ipath_mcast *mcast;
 
 405                 struct ipath_mcast_qp *p;
 
 407                 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
 
 412                 dev->n_multicast_rcv++;
 
 413                 list_for_each_entry_rcu(p, &mcast->qp_list, list)
 
 414                         ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
 
 417                  * Notify ipath_multicast_detach() if it is waiting for us
 
 420                 if (atomic_dec_return(&mcast->refcount) <= 1)
 
 421                         wake_up(&mcast->wait);
 
 423                 qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
 
 425                         dev->n_unicast_rcv++;
 
 426                         ipath_qp_rcv(dev, hdr, lnh == IPS_LRH_GRH, data,
 
 429                          * Notify ipath_destroy_qp() if it is waiting
 
 432                         if (atomic_dec_and_test(&qp->refcount))
 
 442  * ipath_ib_timer - verbs timer
 
 443  * @arg: the device pointer
 
 445  * This is called from ipath_do_rcv_timer() at interrupt level to check for
 
 446  * QPs which need retransmits and to collect performance numbers.
 
 448 static void ipath_ib_timer(void *arg)
 
 450         struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 
 451         struct ipath_qp *resend = NULL;
 
 452         struct list_head *last;
 
 459         spin_lock_irqsave(&dev->pending_lock, flags);
 
 460         /* Start filling the next pending queue. */
 
 461         if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
 
 462                 dev->pending_index = 0;
 
 463         /* Save any requests still in the new queue, they have timed out. */
 
 464         last = &dev->pending[dev->pending_index];
 
 465         while (!list_empty(last)) {
 
 466                 qp = list_entry(last->next, struct ipath_qp, timerwait);
 
 467                 list_del_init(&qp->timerwait);
 
 468                 qp->timer_next = resend;
 
 470                 atomic_inc(&qp->refcount);
 
 472         last = &dev->rnrwait;
 
 473         if (!list_empty(last)) {
 
 474                 qp = list_entry(last->next, struct ipath_qp, timerwait);
 
 475                 if (--qp->s_rnr_timeout == 0) {
 
 477                                 list_del_init(&qp->timerwait);
 
 478                                 tasklet_hi_schedule(&qp->s_task);
 
 479                                 if (list_empty(last))
 
 481                                 qp = list_entry(last->next, struct ipath_qp,
 
 483                         } while (qp->s_rnr_timeout == 0);
 
 487          * We should only be in the started state if pma_sample_start != 0
 
 489         if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
 
 490             --dev->pma_sample_start == 0) {
 
 491                 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
 
 492                 ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword,
 
 496                                               &dev->ipath_xmit_wait);
 
 498         if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 
 499                 if (dev->pma_sample_interval == 0) {
 
 500                         u64 ta, tb, tc, td, te;
 
 502                         dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 
 503                         ipath_layer_snapshot_counters(dev->dd, &ta, &tb,
 
 506                         dev->ipath_sword = ta - dev->ipath_sword;
 
 507                         dev->ipath_rword = tb - dev->ipath_rword;
 
 508                         dev->ipath_spkts = tc - dev->ipath_spkts;
 
 509                         dev->ipath_rpkts = td - dev->ipath_rpkts;
 
 510                         dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
 
 513                         dev->pma_sample_interval--;
 
 515         spin_unlock_irqrestore(&dev->pending_lock, flags);
 
 517         /* XXX What if timer fires again while this is running? */
 
 518         for (qp = resend; qp != NULL; qp = qp->timer_next) {
 
 521                 spin_lock_irqsave(&qp->s_lock, flags);
 
 522                 if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) {
 
 524                         ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
 
 526                 spin_unlock_irqrestore(&qp->s_lock, flags);
 
 528                 /* Notify ipath_destroy_qp() if it is waiting. */
 
 529                 if (atomic_dec_and_test(&qp->refcount))
 
 535  * ipath_ib_piobufavail - callback when a PIO buffer is available
 
 536  * @arg: the device pointer
 
 538  * This is called from ipath_intr() at interrupt level when a PIO buffer is
 
 539  * available after ipath_verbs_send() returned an error that no buffers were
 
 540  * available.  Return 1 if we consumed all the PIO buffers and we still have
 
 541  * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
 
 544 static int ipath_ib_piobufavail(void *arg)
 
 546         struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 
 553         spin_lock_irqsave(&dev->pending_lock, flags);
 
 554         while (!list_empty(&dev->piowait)) {
 
 555                 qp = list_entry(dev->piowait.next, struct ipath_qp,
 
 557                 list_del_init(&qp->piowait);
 
 558                 tasklet_hi_schedule(&qp->s_task);
 
 560         spin_unlock_irqrestore(&dev->pending_lock, flags);
 
 566 static int ipath_query_device(struct ib_device *ibdev,
 
 567                               struct ib_device_attr *props)
 
 569         struct ipath_ibdev *dev = to_idev(ibdev);
 
 570         u32 vendor, boardrev, majrev, minrev;
 
 572         memset(props, 0, sizeof(*props));
 
 574         props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
 
 575                 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
 
 576                 IB_DEVICE_SYS_IMAGE_GUID;
 
 577         ipath_layer_query_device(dev->dd, &vendor, &boardrev,
 
 579         props->vendor_id = vendor;
 
 580         props->vendor_part_id = boardrev;
 
 581         props->hw_ver = boardrev << 16 | majrev << 8 | minrev;
 
 583         props->sys_image_guid = dev->sys_image_guid;
 
 585         props->max_mr_size = ~0ull;
 
 586         props->max_qp = 0xffff;
 
 587         props->max_qp_wr = 0xffff;
 
 588         props->max_sge = 255;
 
 589         props->max_cq = 0xffff;
 
 590         props->max_cqe = 0xffff;
 
 591         props->max_mr = 0xffff;
 
 592         props->max_pd = 0xffff;
 
 593         props->max_qp_rd_atom = 1;
 
 594         props->max_qp_init_rd_atom = 1;
 
 595         /* props->max_res_rd_atom */
 
 596         props->max_srq = 0xffff;
 
 597         props->max_srq_wr = 0xffff;
 
 598         props->max_srq_sge = 255;
 
 599         /* props->local_ca_ack_delay */
 
 600         props->atomic_cap = IB_ATOMIC_HCA;
 
 601         props->max_pkeys = ipath_layer_get_npkeys(dev->dd);
 
 602         props->max_mcast_grp = 0xffff;
 
 603         props->max_mcast_qp_attach = 0xffff;
 
 604         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 
 605                 props->max_mcast_grp;
 
 610 const u8 ipath_cvt_physportstate[16] = {
 
 611         [INFINIPATH_IBCS_LT_STATE_DISABLED] = 3,
 
 612         [INFINIPATH_IBCS_LT_STATE_LINKUP] = 5,
 
 613         [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = 2,
 
 614         [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = 2,
 
 615         [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = 1,
 
 616         [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = 1,
 
 617         [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 4,
 
 618         [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 4,
 
 619         [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 4,
 
 620         [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = 4,
 
 621         [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 6,
 
 622         [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 6,
 
 623         [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
 
 626 static int ipath_query_port(struct ib_device *ibdev,
 
 627                             u8 port, struct ib_port_attr *props)
 
 629         struct ipath_ibdev *dev = to_idev(ibdev);
 
 631         u16 lid = ipath_layer_get_lid(dev->dd);
 
 634         memset(props, 0, sizeof(*props));
 
 635         props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE);
 
 636         props->lmc = dev->mkeyprot_resv_lmc & 7;
 
 637         props->sm_lid = dev->sm_lid;
 
 638         props->sm_sl = dev->sm_sl;
 
 639         ibcstat = ipath_layer_get_lastibcstat(dev->dd);
 
 640         props->state = ((ibcstat >> 4) & 0x3) + 1;
 
 641         /* See phys_state_show() */
 
 642         props->phys_state = ipath_cvt_physportstate[
 
 643                 ipath_layer_get_lastibcstat(dev->dd) & 0xf];
 
 644         props->port_cap_flags = dev->port_cap_flags;
 
 645         props->gid_tbl_len = 1;
 
 646         props->max_msg_sz = 4096;
 
 647         props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd);
 
 648         props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) -
 
 649                 dev->n_pkey_violations;
 
 650         props->qkey_viol_cntr = dev->qkey_violations;
 
 651         props->active_width = IB_WIDTH_4X;
 
 652         /* See rate_show() */
 
 653         props->active_speed = 1;        /* Regular 10Mbs speed. */
 
 654         props->max_vl_num = 1;          /* VLCap = VL0 */
 
 655         props->init_type_reply = 0;
 
 657         props->max_mtu = IB_MTU_4096;
 
 658         switch (ipath_layer_get_ibmtu(dev->dd)) {
 
 677         props->active_mtu = mtu;
 
 678         props->subnet_timeout = dev->subnet_timeout;
 
 683 static int ipath_modify_device(struct ib_device *device,
 
 684                                int device_modify_mask,
 
 685                                struct ib_device_modify *device_modify)
 
 689         if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
 
 690                                    IB_DEVICE_MODIFY_NODE_DESC)) {
 
 695         if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
 
 696                 memcpy(device->node_desc, device_modify->node_desc, 64);
 
 698         if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
 
 699                 to_idev(device)->sys_image_guid =
 
 700                         cpu_to_be64(device_modify->sys_image_guid);
 
 708 static int ipath_modify_port(struct ib_device *ibdev,
 
 709                              u8 port, int port_modify_mask,
 
 710                              struct ib_port_modify *props)
 
 712         struct ipath_ibdev *dev = to_idev(ibdev);
 
 714         dev->port_cap_flags |= props->set_port_cap_mask;
 
 715         dev->port_cap_flags &= ~props->clr_port_cap_mask;
 
 716         if (port_modify_mask & IB_PORT_SHUTDOWN)
 
 717                 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
 
 718         if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
 
 719                 dev->qkey_violations = 0;
 
 723 static int ipath_query_gid(struct ib_device *ibdev, u8 port,
 
 724                            int index, union ib_gid *gid)
 
 726         struct ipath_ibdev *dev = to_idev(ibdev);
 
 733         gid->global.subnet_prefix = dev->gid_prefix;
 
 734         gid->global.interface_id = ipath_layer_get_guid(dev->dd);
 
 742 static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
 
 743                                     struct ib_ucontext *context,
 
 744                                     struct ib_udata *udata)
 
 749         pd = kmalloc(sizeof *pd, GFP_KERNEL);
 
 751                 ret = ERR_PTR(-ENOMEM);
 
 755         /* ib_alloc_pd() will initialize pd->ibpd. */
 
 756         pd->user = udata != NULL;
 
 764 static int ipath_dealloc_pd(struct ib_pd *ibpd)
 
 766         struct ipath_pd *pd = to_ipd(ibpd);
 
 774  * ipath_create_ah - create an address handle
 
 775  * @pd: the protection domain
 
 776  * @ah_attr: the attributes of the AH
 
 778  * This may be called from interrupt context.
 
 780 static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
 
 781                                      struct ib_ah_attr *ah_attr)
 
 786         /* A multicast address requires a GRH (see ch. 8.4.1). */
 
 787         if (ah_attr->dlid >= IPS_MULTICAST_LID_BASE &&
 
 788             ah_attr->dlid != IPS_PERMISSIVE_LID &&
 
 789             !(ah_attr->ah_flags & IB_AH_GRH)) {
 
 790                 ret = ERR_PTR(-EINVAL);
 
 794         ah = kmalloc(sizeof *ah, GFP_ATOMIC);
 
 796                 ret = ERR_PTR(-ENOMEM);
 
 800         /* ib_create_ah() will initialize ah->ibah. */
 
 810  * ipath_destroy_ah - destroy an address handle
 
 811  * @ibah: the AH to destroy
 
 813  * This may be called from interrupt context.
 
 815 static int ipath_destroy_ah(struct ib_ah *ibah)
 
 817         struct ipath_ah *ah = to_iah(ibah);
 
 824 static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
 
 826         struct ipath_ah *ah = to_iah(ibah);
 
 833 static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 
 836         struct ipath_ibdev *dev = to_idev(ibdev);
 
 839         if (index >= ipath_layer_get_npkeys(dev->dd)) {
 
 844         *pkey = ipath_layer_get_pkey(dev->dd, index);
 
 853  * ipath_alloc_ucontext - allocate a ucontest
 
 854  * @ibdev: the infiniband device
 
 855  * @udata: not used by the InfiniPath driver
 
 858 static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
 
 859                                                 struct ib_udata *udata)
 
 861         struct ipath_ucontext *context;
 
 862         struct ib_ucontext *ret;
 
 864         context = kmalloc(sizeof *context, GFP_KERNEL);
 
 866                 ret = ERR_PTR(-ENOMEM);
 
 870         ret = &context->ibucontext;
 
 876 static int ipath_dealloc_ucontext(struct ib_ucontext *context)
 
 878         kfree(to_iucontext(context));
 
 882 static int ipath_verbs_register_sysfs(struct ib_device *dev);
 
 885  * ipath_register_ib_device - register our device with the infiniband core
 
 886  * @unit: the device number to register
 
 887  * @dd: the device data structure
 
 888  * Return the allocated ipath_ibdev pointer or NULL on error.
 
 890 static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
 
 892         struct ipath_ibdev *idev;
 
 893         struct ib_device *dev;
 
 896         idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
 
 902         /* Only need to initialize non-zero fields. */
 
 903         spin_lock_init(&idev->qp_table.lock);
 
 904         spin_lock_init(&idev->lk_table.lock);
 
 905         idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
 
 906         /* Set the prefix to the default value (see ch. 4.1.1) */
 
 907         idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
 
 909         ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
 
 914          * The top ib_ipath_lkey_table_size bits are used to index the
 
 915          * table.  The lower 8 bits can be owned by the user (copied from
 
 916          * the LKEY).  The remaining bits act as a generation number or tag.
 
 918         idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
 
 919         idev->lk_table.table = kzalloc(idev->lk_table.max *
 
 920                                        sizeof(*idev->lk_table.table),
 
 922         if (idev->lk_table.table == NULL) {
 
 926         spin_lock_init(&idev->pending_lock);
 
 927         INIT_LIST_HEAD(&idev->pending[0]);
 
 928         INIT_LIST_HEAD(&idev->pending[1]);
 
 929         INIT_LIST_HEAD(&idev->pending[2]);
 
 930         INIT_LIST_HEAD(&idev->piowait);
 
 931         INIT_LIST_HEAD(&idev->rnrwait);
 
 932         idev->pending_index = 0;
 
 933         idev->port_cap_flags =
 
 934                 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
 
 935         idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
 
 936         idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
 
 937         idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
 
 938         idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
 
 939         idev->pma_counter_select[5] = IB_PMA_PORT_XMIT_WAIT;
 
 940         idev->link_width_enabled = 3;   /* 1x or 4x */
 
 943          * The system image GUID is supposed to be the same for all
 
 944          * IB HCAs in a single system but since there can be other
 
 945          * device types in the system, we can't be sure this is unique.
 
 948                 sys_image_guid = ipath_layer_get_guid(dd);
 
 949         idev->sys_image_guid = sys_image_guid;
 
 950         idev->ib_unit = unit;
 
 953         strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
 
 954         dev->owner = THIS_MODULE;
 
 955         dev->node_guid = ipath_layer_get_guid(dd);
 
 956         dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
 
 957         dev->uverbs_cmd_mask =
 
 958                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
 
 959                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
 
 960                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
 
 961                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
 
 962                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
 
 963                 (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
 
 964                 (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
 
 965                 (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
 
 966                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
 
 967                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
 
 968                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
 
 969                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
 
 970                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
 
 971                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
 
 972                 (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
 
 973                 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
 
 974                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
 
 975                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
 
 976                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
 
 977                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
 
 978                 (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
 
 979                 (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
 
 980                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
 
 981                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
 
 982                 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
 
 983                 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
 
 984                 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
 
 985                 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
 
 986                 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
 
 987         dev->node_type = IB_NODE_CA;
 
 988         dev->phys_port_cnt = 1;
 
 989         dev->dma_device = ipath_layer_get_device(dd);
 
 990         dev->class_dev.dev = dev->dma_device;
 
 991         dev->query_device = ipath_query_device;
 
 992         dev->modify_device = ipath_modify_device;
 
 993         dev->query_port = ipath_query_port;
 
 994         dev->modify_port = ipath_modify_port;
 
 995         dev->query_pkey = ipath_query_pkey;
 
 996         dev->query_gid = ipath_query_gid;
 
 997         dev->alloc_ucontext = ipath_alloc_ucontext;
 
 998         dev->dealloc_ucontext = ipath_dealloc_ucontext;
 
 999         dev->alloc_pd = ipath_alloc_pd;
 
1000         dev->dealloc_pd = ipath_dealloc_pd;
 
1001         dev->create_ah = ipath_create_ah;
 
1002         dev->destroy_ah = ipath_destroy_ah;
 
1003         dev->query_ah = ipath_query_ah;
 
1004         dev->create_srq = ipath_create_srq;
 
1005         dev->modify_srq = ipath_modify_srq;
 
1006         dev->query_srq = ipath_query_srq;
 
1007         dev->destroy_srq = ipath_destroy_srq;
 
1008         dev->create_qp = ipath_create_qp;
 
1009         dev->modify_qp = ipath_modify_qp;
 
1010         dev->query_qp = ipath_query_qp;
 
1011         dev->destroy_qp = ipath_destroy_qp;
 
1012         dev->post_send = ipath_post_send;
 
1013         dev->post_recv = ipath_post_receive;
 
1014         dev->post_srq_recv = ipath_post_srq_receive;
 
1015         dev->create_cq = ipath_create_cq;
 
1016         dev->destroy_cq = ipath_destroy_cq;
 
1017         dev->resize_cq = ipath_resize_cq;
 
1018         dev->poll_cq = ipath_poll_cq;
 
1019         dev->req_notify_cq = ipath_req_notify_cq;
 
1020         dev->get_dma_mr = ipath_get_dma_mr;
 
1021         dev->reg_phys_mr = ipath_reg_phys_mr;
 
1022         dev->reg_user_mr = ipath_reg_user_mr;
 
1023         dev->dereg_mr = ipath_dereg_mr;
 
1024         dev->alloc_fmr = ipath_alloc_fmr;
 
1025         dev->map_phys_fmr = ipath_map_phys_fmr;
 
1026         dev->unmap_fmr = ipath_unmap_fmr;
 
1027         dev->dealloc_fmr = ipath_dealloc_fmr;
 
1028         dev->attach_mcast = ipath_multicast_attach;
 
1029         dev->detach_mcast = ipath_multicast_detach;
 
1030         dev->process_mad = ipath_process_mad;
 
1032         snprintf(dev->node_desc, sizeof(dev->node_desc),
 
1033                  IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename);
 
1035         ret = ib_register_device(dev);
 
1039         if (ipath_verbs_register_sysfs(dev))
 
1042         ipath_layer_enable_timer(dd);
 
1047         ib_unregister_device(dev);
 
1049         kfree(idev->lk_table.table);
 
1051         kfree(idev->qp_table.table);
 
1053         ib_dealloc_device(dev);
 
1054         _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
 
1062 static void ipath_unregister_ib_device(void *arg)
 
1064         struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 
1065         struct ib_device *ibdev = &dev->ibdev;
 
1067         ipath_layer_disable_timer(dev->dd);
 
1069         ib_unregister_device(ibdev);
 
1071         if (!list_empty(&dev->pending[0]) ||
 
1072             !list_empty(&dev->pending[1]) ||
 
1073             !list_empty(&dev->pending[2]))
 
1074                 _VERBS_ERROR("ipath%d pending list not empty!\n",
 
1076         if (!list_empty(&dev->piowait))
 
1077                 _VERBS_ERROR("ipath%d piowait list not empty!\n",
 
1079         if (!list_empty(&dev->rnrwait))
 
1080                 _VERBS_ERROR("ipath%d rnrwait list not empty!\n",
 
1082         if (!ipath_mcast_tree_empty())
 
1083                 _VERBS_ERROR("ipath%d multicast table memory leak!\n",
 
1086          * Note that ipath_unregister_ib_device() can be called before all
 
1087          * the QPs are destroyed!
 
1089         ipath_free_all_qps(&dev->qp_table);
 
1090         kfree(dev->qp_table.table);
 
1091         kfree(dev->lk_table.table);
 
1092         ib_dealloc_device(ibdev);
 
1095 static int __init ipath_verbs_init(void)
 
1097         return ipath_verbs_register(ipath_register_ib_device,
 
1098                                     ipath_unregister_ib_device,
 
1099                                     ipath_ib_piobufavail, ipath_ib_rcv,
 
1103 static void __exit ipath_verbs_cleanup(void)
 
1105         ipath_verbs_unregister();
 
1108 static ssize_t show_rev(struct class_device *cdev, char *buf)
 
1110         struct ipath_ibdev *dev =
 
1111                 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 
1112         int vendor, boardrev, majrev, minrev;
 
1114         ipath_layer_query_device(dev->dd, &vendor, &boardrev,
 
1116         return sprintf(buf, "%d.%d\n", majrev, minrev);
 
1119 static ssize_t show_hca(struct class_device *cdev, char *buf)
 
1121         struct ipath_ibdev *dev =
 
1122                 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 
1125         ret = ipath_layer_get_boardname(dev->dd, buf, 128);
 
1135 static ssize_t show_stats(struct class_device *cdev, char *buf)
 
1137         struct ipath_ibdev *dev =
 
1138                 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
 
1156                       dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
 
1157                       dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
 
1158                       dev->n_other_naks, dev->n_timeouts,
 
1159                       dev->n_rdma_dup_busy, dev->n_piowait,
 
1160                       dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
 
1161         for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
 
1162                 const struct ipath_opcode_stats *si = &dev->opstats[i];
 
1164                 if (!si->n_packets && !si->n_bytes)
 
1166                 len += sprintf(buf + len, "%02x %llu/%llu\n", i,
 
1167                                (unsigned long long) si->n_packets,
 
1168                                (unsigned long long) si->n_bytes);
 
1173 static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
 
1174 static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
 
1175 static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
 
1176 static CLASS_DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
 
1178 static struct class_device_attribute *ipath_class_attributes[] = {
 
1179         &class_device_attr_hw_rev,
 
1180         &class_device_attr_hca_type,
 
1181         &class_device_attr_board_id,
 
1182         &class_device_attr_stats
 
1185 static int ipath_verbs_register_sysfs(struct ib_device *dev)
 
1190         for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
 
1191                 if (class_device_create_file(&dev->class_dev,
 
1192                                              ipath_class_attributes[i])) {
 
1203 module_init(ipath_verbs_init);
 
1204 module_exit(ipath_verbs_cleanup);