2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 
   3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
 
   4  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
 
   5  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 
   6  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
 
   8  * This software is available to you under a choice of one of two
 
   9  * licenses.  You may choose to be licensed under the terms of the GNU
 
  10  * General Public License (GPL) Version 2, available from the file
 
  11  * COPYING in the main directory of this source tree, or the
 
  12  * OpenIB.org BSD license below:
 
  14  *     Redistribution and use in source and binary forms, with or
 
  15  *     without modification, are permitted provided that the following
 
  18  *      - Redistributions of source code must retain the above
 
  19  *        copyright notice, this list of conditions and the following
 
  22  *      - Redistributions in binary form must reproduce the above
 
  23  *        copyright notice, this list of conditions and the following
 
  24  *        disclaimer in the documentation and/or other materials
 
  25  *        provided with the distribution.
 
  27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
  28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
  29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 
  30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 
  31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 
  32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 
  33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
  36  * $Id: mthca_provider.c 4859 2006-01-09 21:55:10Z roland $
 
  39 #include <rdma/ib_smi.h>
 
  40 #include <rdma/ib_user_verbs.h>
 
  43 #include "mthca_dev.h"
 
  44 #include "mthca_cmd.h"
 
  45 #include "mthca_user.h"
 
  46 #include "mthca_memfree.h"
 
  48 static void init_query_mad(struct ib_smp *mad)
 
  50         mad->base_version  = 1;
 
  51         mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 
  52         mad->class_version = 1;
 
  53         mad->method        = IB_MGMT_METHOD_GET;
 
  56 static int mthca_query_device(struct ib_device *ibdev,
 
  57                               struct ib_device_attr *props)
 
  59         struct ib_smp *in_mad  = NULL;
 
  60         struct ib_smp *out_mad = NULL;
 
  62         struct mthca_dev* mdev = to_mdev(ibdev);
 
  66         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
  67         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
  68         if (!in_mad || !out_mad)
 
  71         memset(props, 0, sizeof *props);
 
  73         props->fw_ver              = mdev->fw_ver;
 
  75         init_query_mad(in_mad);
 
  76         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
 
  78         err = mthca_MAD_IFC(mdev, 1, 1,
 
  79                             1, NULL, NULL, in_mad, out_mad,
 
  88         props->device_cap_flags    = mdev->device_cap_flags;
 
  89         props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 
  91         props->vendor_part_id      = be16_to_cpup((__be16 *) (out_mad->data + 30));
 
  92         props->hw_ver              = be32_to_cpup((__be32 *) (out_mad->data + 32));
 
  93         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
 
  95         props->max_mr_size         = ~0ull;
 
  96         props->page_size_cap       = mdev->limits.page_size_cap;
 
  97         props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
 
  98         props->max_qp_wr           = mdev->limits.max_wqes;
 
  99         props->max_sge             = mdev->limits.max_sg;
 
 100         props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
 
 101         props->max_cqe             = mdev->limits.max_cqes;
 
 102         props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
 
 103         props->max_pd              = mdev->limits.num_pds - mdev->limits.reserved_pds;
 
 104         props->max_qp_rd_atom      = 1 << mdev->qp_table.rdb_shift;
 
 105         props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
 
 106         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
 
 107         props->max_srq             = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
 
 108         props->max_srq_wr          = mdev->limits.max_srq_wqes;
 
 109         props->max_srq_sge         = mdev->limits.max_srq_sge;
 
 110         props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
 
 111         props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
 
 112                                         IB_ATOMIC_HCA : IB_ATOMIC_NONE;
 
 113         props->max_pkeys           = mdev->limits.pkey_table_len;
 
 114         props->max_mcast_grp       = mdev->limits.num_mgms + mdev->limits.num_amgms;
 
 115         props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
 
 116         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 
 117                                            props->max_mcast_grp;
 
 119          * If Sinai memory key optimization is being used, then only
 
 120          * the 8-bit key portion will change.  For other HCAs, the
 
 121          * unused index bits will also be used for FMR remapping.
 
 123         if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 
 124                 props->max_map_per_fmr = 255;
 
 126                 props->max_map_per_fmr =
 
 127                         (1 << (32 - long_log2(mdev->limits.num_mpts))) - 1;
 
 136 static int mthca_query_port(struct ib_device *ibdev,
 
 137                             u8 port, struct ib_port_attr *props)
 
 139         struct ib_smp *in_mad  = NULL;
 
 140         struct ib_smp *out_mad = NULL;
 
 144         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
 145         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
 146         if (!in_mad || !out_mad)
 
 149         memset(props, 0, sizeof *props);
 
 151         init_query_mad(in_mad);
 
 152         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 
 153         in_mad->attr_mod = cpu_to_be32(port);
 
 155         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
 
 156                             port, NULL, NULL, in_mad, out_mad,
 
 165         props->lid               = be16_to_cpup((__be16 *) (out_mad->data + 16));
 
 166         props->lmc               = out_mad->data[34] & 0x7;
 
 167         props->sm_lid            = be16_to_cpup((__be16 *) (out_mad->data + 18));
 
 168         props->sm_sl             = out_mad->data[36] & 0xf;
 
 169         props->state             = out_mad->data[32] & 0xf;
 
 170         props->phys_state        = out_mad->data[33] >> 4;
 
 171         props->port_cap_flags    = be32_to_cpup((__be32 *) (out_mad->data + 20));
 
 172         props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
 
 173         props->max_msg_sz        = 0x80000000;
 
 174         props->pkey_tbl_len      = to_mdev(ibdev)->limits.pkey_table_len;
 
 175         props->bad_pkey_cntr     = be16_to_cpup((__be16 *) (out_mad->data + 46));
 
 176         props->qkey_viol_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 48));
 
 177         props->active_width      = out_mad->data[31] & 0xf;
 
 178         props->active_speed      = out_mad->data[35] >> 4;
 
 179         props->max_mtu           = out_mad->data[41] & 0xf;
 
 180         props->active_mtu        = out_mad->data[36] >> 4;
 
 181         props->subnet_timeout    = out_mad->data[51] & 0x1f;
 
 182         props->max_vl_num        = out_mad->data[37] >> 4;
 
 183         props->init_type_reply   = out_mad->data[41] >> 4;
 
 191 static int mthca_modify_device(struct ib_device *ibdev,
 
 193                                struct ib_device_modify *props)
 
 195         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
 
 198         if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
 
 199                 if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
 
 201                 memcpy(ibdev->node_desc, props->node_desc, 64);
 
 202                 mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
 
 208 static int mthca_modify_port(struct ib_device *ibdev,
 
 209                              u8 port, int port_modify_mask,
 
 210                              struct ib_port_modify *props)
 
 212         struct mthca_set_ib_param set_ib;
 
 213         struct ib_port_attr attr;
 
 217         if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
 
 220         err = mthca_query_port(ibdev, port, &attr);
 
 224         set_ib.set_si_guid     = 0;
 
 225         set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
 
 227         set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
 
 228                 ~props->clr_port_cap_mask;
 
 230         err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
 
 239         mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
 
 243 static int mthca_query_pkey(struct ib_device *ibdev,
 
 244                             u8 port, u16 index, u16 *pkey)
 
 246         struct ib_smp *in_mad  = NULL;
 
 247         struct ib_smp *out_mad = NULL;
 
 251         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
 252         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
 253         if (!in_mad || !out_mad)
 
 256         init_query_mad(in_mad);
 
 257         in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
 
 258         in_mad->attr_mod = cpu_to_be32(index / 32);
 
 260         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
 
 261                             port, NULL, NULL, in_mad, out_mad,
 
 270         *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
 
 278 static int mthca_query_gid(struct ib_device *ibdev, u8 port,
 
 279                            int index, union ib_gid *gid)
 
 281         struct ib_smp *in_mad  = NULL;
 
 282         struct ib_smp *out_mad = NULL;
 
 286         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
 287         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
 288         if (!in_mad || !out_mad)
 
 291         init_query_mad(in_mad);
 
 292         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 
 293         in_mad->attr_mod = cpu_to_be32(port);
 
 295         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
 
 296                             port, NULL, NULL, in_mad, out_mad,
 
 305         memcpy(gid->raw, out_mad->data + 8, 8);
 
 307         init_query_mad(in_mad);
 
 308         in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
 
 309         in_mad->attr_mod = cpu_to_be32(index / 8);
 
 311         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
 
 312                             port, NULL, NULL, in_mad, out_mad,
 
 321         memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
 
 329 static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
 
 330                                                 struct ib_udata *udata)
 
 332         struct mthca_alloc_ucontext_resp uresp;
 
 333         struct mthca_ucontext           *context;
 
 336         memset(&uresp, 0, sizeof uresp);
 
 338         uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
 
 339         if (mthca_is_memfree(to_mdev(ibdev)))
 
 340                 uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
 
 344         context = kmalloc(sizeof *context, GFP_KERNEL);
 
 346                 return ERR_PTR(-ENOMEM);
 
 348         err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
 
 354         context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
 
 355         if (IS_ERR(context->db_tab)) {
 
 356                 err = PTR_ERR(context->db_tab);
 
 357                 mthca_uar_free(to_mdev(ibdev), &context->uar);
 
 362         if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
 
 363                 mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
 
 364                 mthca_uar_free(to_mdev(ibdev), &context->uar);
 
 366                 return ERR_PTR(-EFAULT);
 
 369         return &context->ibucontext;
 
 372 static int mthca_dealloc_ucontext(struct ib_ucontext *context)
 
 374         mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
 
 375                                   to_mucontext(context)->db_tab);
 
 376         mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
 
 377         kfree(to_mucontext(context));
 
 382 static int mthca_mmap_uar(struct ib_ucontext *context,
 
 383                           struct vm_area_struct *vma)
 
 385         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 
 388         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 390         if (io_remap_pfn_range(vma, vma->vm_start,
 
 391                                to_mucontext(context)->uar.pfn,
 
 392                                PAGE_SIZE, vma->vm_page_prot))
 
 398 static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
 
 399                                     struct ib_ucontext *context,
 
 400                                     struct ib_udata *udata)
 
 405         pd = kmalloc(sizeof *pd, GFP_KERNEL);
 
 407                 return ERR_PTR(-ENOMEM);
 
 409         err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
 
 416                 if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
 
 417                         mthca_pd_free(to_mdev(ibdev), pd);
 
 419                         return ERR_PTR(-EFAULT);
 
 426 static int mthca_dealloc_pd(struct ib_pd *pd)
 
 428         mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
 
 434 static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
 
 435                                      struct ib_ah_attr *ah_attr)
 
 440         ah = kmalloc(sizeof *ah, GFP_ATOMIC);
 
 442                 return ERR_PTR(-ENOMEM);
 
 444         err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
 
 453 static int mthca_ah_destroy(struct ib_ah *ah)
 
 455         mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
 
 461 static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
 
 462                                        struct ib_srq_init_attr *init_attr,
 
 463                                        struct ib_udata *udata)
 
 465         struct mthca_create_srq ucmd;
 
 466         struct mthca_ucontext *context = NULL;
 
 467         struct mthca_srq *srq;
 
 470         srq = kmalloc(sizeof *srq, GFP_KERNEL);
 
 472                 return ERR_PTR(-ENOMEM);
 
 475                 context = to_mucontext(pd->uobject->context);
 
 477                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 
 482                 err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
 
 483                                         context->db_tab, ucmd.db_index,
 
 489                 srq->mr.ibmr.lkey = ucmd.lkey;
 
 490                 srq->db_index     = ucmd.db_index;
 
 493         err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
 
 494                               &init_attr->attr, srq);
 
 496         if (err && pd->uobject)
 
 497                 mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
 
 498                                     context->db_tab, ucmd.db_index);
 
 503         if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
 
 504                 mthca_free_srq(to_mdev(pd->device), srq);
 
 517 static int mthca_destroy_srq(struct ib_srq *srq)
 
 519         struct mthca_ucontext *context;
 
 522                 context = to_mucontext(srq->uobject->context);
 
 524                 mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
 
 525                                     context->db_tab, to_msrq(srq)->db_index);
 
 528         mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
 
 534 static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
 
 535                                      struct ib_qp_init_attr *init_attr,
 
 536                                      struct ib_udata *udata)
 
 538         struct mthca_create_qp ucmd;
 
 542         switch (init_attr->qp_type) {
 
 547                 struct mthca_ucontext *context;
 
 549                 qp = kmalloc(sizeof *qp, GFP_KERNEL);
 
 551                         return ERR_PTR(-ENOMEM);
 
 554                         context = to_mucontext(pd->uobject->context);
 
 556                         if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 
 558                                 return ERR_PTR(-EFAULT);
 
 561                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
 
 563                                                 ucmd.sq_db_index, ucmd.sq_db_page);
 
 569                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
 
 571                                                 ucmd.rq_db_index, ucmd.rq_db_page);
 
 573                                 mthca_unmap_user_db(to_mdev(pd->device),
 
 581                         qp->mr.ibmr.lkey = ucmd.lkey;
 
 582                         qp->sq.db_index  = ucmd.sq_db_index;
 
 583                         qp->rq.db_index  = ucmd.rq_db_index;
 
 586                 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
 
 587                                      to_mcq(init_attr->send_cq),
 
 588                                      to_mcq(init_attr->recv_cq),
 
 589                                      init_attr->qp_type, init_attr->sq_sig_type,
 
 590                                      &init_attr->cap, qp);
 
 592                 if (err && pd->uobject) {
 
 593                         context = to_mucontext(pd->uobject->context);
 
 595                         mthca_unmap_user_db(to_mdev(pd->device),
 
 599                         mthca_unmap_user_db(to_mdev(pd->device),
 
 605                 qp->ibqp.qp_num = qp->qpn;
 
 611                 /* Don't allow userspace to create special QPs */
 
 613                         return ERR_PTR(-EINVAL);
 
 615                 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
 
 617                         return ERR_PTR(-ENOMEM);
 
 619                 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
 
 621                 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
 
 622                                       to_mcq(init_attr->send_cq),
 
 623                                       to_mcq(init_attr->recv_cq),
 
 624                                       init_attr->sq_sig_type, &init_attr->cap,
 
 625                                       qp->ibqp.qp_num, init_attr->port_num,
 
 630                 /* Don't support raw QPs */
 
 631                 return ERR_PTR(-ENOSYS);
 
 639         init_attr->cap.max_send_wr     = qp->sq.max;
 
 640         init_attr->cap.max_recv_wr     = qp->rq.max;
 
 641         init_attr->cap.max_send_sge    = qp->sq.max_gs;
 
 642         init_attr->cap.max_recv_sge    = qp->rq.max_gs;
 
 643         init_attr->cap.max_inline_data = qp->max_inline_data;
 
 648 static int mthca_destroy_qp(struct ib_qp *qp)
 
 651                 mthca_unmap_user_db(to_mdev(qp->device),
 
 652                                     &to_mucontext(qp->uobject->context)->uar,
 
 653                                     to_mucontext(qp->uobject->context)->db_tab,
 
 654                                     to_mqp(qp)->sq.db_index);
 
 655                 mthca_unmap_user_db(to_mdev(qp->device),
 
 656                                     &to_mucontext(qp->uobject->context)->uar,
 
 657                                     to_mucontext(qp->uobject->context)->db_tab,
 
 658                                     to_mqp(qp)->rq.db_index);
 
 660         mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
 
 665 static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
 
 666                                      struct ib_ucontext *context,
 
 667                                      struct ib_udata *udata)
 
 669         struct mthca_create_cq ucmd;
 
 674         if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
 
 675                 return ERR_PTR(-EINVAL);
 
 678                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
 
 679                         return ERR_PTR(-EFAULT);
 
 681                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
 
 682                                         to_mucontext(context)->db_tab,
 
 683                                         ucmd.set_db_index, ucmd.set_db_page);
 
 687                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
 
 688                                         to_mucontext(context)->db_tab,
 
 689                                         ucmd.arm_db_index, ucmd.arm_db_page);
 
 694         cq = kmalloc(sizeof *cq, GFP_KERNEL);
 
 701                 cq->buf.mr.ibmr.lkey = ucmd.lkey;
 
 702                 cq->set_ci_db_index  = ucmd.set_db_index;
 
 703                 cq->arm_db_index     = ucmd.arm_db_index;
 
 706         for (nent = 1; nent <= entries; nent <<= 1)
 
 709         err = mthca_init_cq(to_mdev(ibdev), nent,
 
 710                             context ? to_mucontext(context) : NULL,
 
 711                             context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
 
 716         if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
 
 717                 mthca_free_cq(to_mdev(ibdev), cq);
 
 721         cq->resize_buf = NULL;
 
 730                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
 
 731                                     to_mucontext(context)->db_tab, ucmd.arm_db_index);
 
 735                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
 
 736                                     to_mucontext(context)->db_tab, ucmd.set_db_index);
 
 741 static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
 
 746         spin_lock_irq(&cq->lock);
 
 747         if (cq->resize_buf) {
 
 752         cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
 
 753         if (!cq->resize_buf) {
 
 758         cq->resize_buf->state = CQ_RESIZE_ALLOC;
 
 763         spin_unlock_irq(&cq->lock);
 
 768         ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
 
 770                 spin_lock_irq(&cq->lock);
 
 771                 kfree(cq->resize_buf);
 
 772                 cq->resize_buf = NULL;
 
 773                 spin_unlock_irq(&cq->lock);
 
 777         cq->resize_buf->cqe = entries - 1;
 
 779         spin_lock_irq(&cq->lock);
 
 780         cq->resize_buf->state = CQ_RESIZE_READY;
 
 781         spin_unlock_irq(&cq->lock);
 
 786 static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 
 788         struct mthca_dev *dev = to_mdev(ibcq->device);
 
 789         struct mthca_cq *cq = to_mcq(ibcq);
 
 790         struct mthca_resize_cq ucmd;
 
 795         if (entries < 1 || entries > dev->limits.max_cqes)
 
 798         mutex_lock(&cq->mutex);
 
 800         entries = roundup_pow_of_two(entries + 1);
 
 801         if (entries == ibcq->cqe + 1) {
 
 807                 ret = mthca_alloc_resize_buf(dev, cq, entries);
 
 810                 lkey = cq->resize_buf->buf.mr.ibmr.lkey;
 
 812                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 
 819         ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status);
 
 824                 if (cq->resize_buf) {
 
 825                         mthca_free_cq_buf(dev, &cq->resize_buf->buf,
 
 826                                           cq->resize_buf->cqe);
 
 827                         kfree(cq->resize_buf);
 
 828                         spin_lock_irq(&cq->lock);
 
 829                         cq->resize_buf = NULL;
 
 830                         spin_unlock_irq(&cq->lock);
 
 836                 struct mthca_cq_buf tbuf;
 
 839                 spin_lock_irq(&cq->lock);
 
 840                 if (cq->resize_buf->state == CQ_RESIZE_READY) {
 
 841                         mthca_cq_resize_copy_cqes(cq);
 
 844                         cq->buf      = cq->resize_buf->buf;
 
 845                         cq->ibcq.cqe = cq->resize_buf->cqe;
 
 847                         tbuf = cq->resize_buf->buf;
 
 848                         tcqe = cq->resize_buf->cqe;
 
 851                 kfree(cq->resize_buf);
 
 852                 cq->resize_buf = NULL;
 
 853                 spin_unlock_irq(&cq->lock);
 
 855                 mthca_free_cq_buf(dev, &tbuf, tcqe);
 
 857                 ibcq->cqe = entries - 1;
 
 860         mutex_unlock(&cq->mutex);
 
 865 static int mthca_destroy_cq(struct ib_cq *cq)
 
 868                 mthca_unmap_user_db(to_mdev(cq->device),
 
 869                                     &to_mucontext(cq->uobject->context)->uar,
 
 870                                     to_mucontext(cq->uobject->context)->db_tab,
 
 871                                     to_mcq(cq)->arm_db_index);
 
 872                 mthca_unmap_user_db(to_mdev(cq->device),
 
 873                                     &to_mucontext(cq->uobject->context)->uar,
 
 874                                     to_mucontext(cq->uobject->context)->db_tab,
 
 875                                     to_mcq(cq)->set_ci_db_index);
 
 877         mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
 
 883 static inline u32 convert_access(int acc)
 
 885         return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC       : 0) |
 
 886                (acc & IB_ACCESS_REMOTE_WRITE  ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
 
 887                (acc & IB_ACCESS_REMOTE_READ   ? MTHCA_MPT_FLAG_REMOTE_READ  : 0) |
 
 888                (acc & IB_ACCESS_LOCAL_WRITE   ? MTHCA_MPT_FLAG_LOCAL_WRITE  : 0) |
 
 889                MTHCA_MPT_FLAG_LOCAL_READ;
 
 892 static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
 
 897         mr = kmalloc(sizeof *mr, GFP_KERNEL);
 
 899                 return ERR_PTR(-ENOMEM);
 
 901         err = mthca_mr_alloc_notrans(to_mdev(pd->device),
 
 903                                      convert_access(acc), mr);
 
 913 static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
 
 914                                        struct ib_phys_buf *buffer_list,
 
 928         /* First check that we have enough alignment */
 
 929         if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK))
 
 930                 return ERR_PTR(-EINVAL);
 
 934         for (i = 0; i < num_phys_buf; ++i) {
 
 936                         mask |= buffer_list[i].addr;
 
 937                 if (i != num_phys_buf - 1)
 
 938                         mask |= buffer_list[i].addr + buffer_list[i].size;
 
 940                 total_size += buffer_list[i].size;
 
 943         if (mask & ~PAGE_MASK)
 
 944                 return ERR_PTR(-EINVAL);
 
 946         /* Find largest page shift we can use to cover buffers */
 
 947         for (shift = PAGE_SHIFT; shift < 31; ++shift)
 
 948                 if (num_phys_buf > 1) {
 
 949                         if ((1ULL << shift) & mask)
 
 953                             buffer_list[0].size +
 
 954                             (buffer_list[0].addr & ((1ULL << shift) - 1)))
 
 958         buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
 
 959         buffer_list[0].addr &= ~0ull << shift;
 
 961         mr = kmalloc(sizeof *mr, GFP_KERNEL);
 
 963                 return ERR_PTR(-ENOMEM);
 
 966         for (i = 0; i < num_phys_buf; ++i)
 
 967                 npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
 
 972         page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
 
 975                 return ERR_PTR(-ENOMEM);
 
 979         for (i = 0; i < num_phys_buf; ++i)
 
 981                      j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
 
 983                         page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
 
 985         mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
 
 986                   "in PD %x; shift %d, npages %d.\n",
 
 987                   (unsigned long long) buffer_list[0].addr,
 
 988                   (unsigned long long) *iova_start,
 
 992         err = mthca_mr_alloc_phys(to_mdev(pd->device),
 
 994                                   page_list, shift, npages,
 
 995                                   *iova_start, total_size,
 
 996                                   convert_access(acc), mr);
 
1001                 return ERR_PTR(err);
 
1008 static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
 
1009                                        int acc, struct ib_udata *udata)
 
1011         struct mthca_dev *dev = to_mdev(pd->device);
 
1012         struct ib_umem_chunk *chunk;
 
1013         struct mthca_mr *mr;
 
1019         shift = ffs(region->page_size) - 1;
 
1021         mr = kmalloc(sizeof *mr, GFP_KERNEL);
 
1023                 return ERR_PTR(-ENOMEM);
 
1026         list_for_each_entry(chunk, ®ion->chunk_list, list)
 
1029         mr->mtt = mthca_alloc_mtt(dev, n);
 
1030         if (IS_ERR(mr->mtt)) {
 
1031                 err = PTR_ERR(mr->mtt);
 
1035         pages = (u64 *) __get_free_page(GFP_KERNEL);
 
1043         list_for_each_entry(chunk, ®ion->chunk_list, list)
 
1044                 for (j = 0; j < chunk->nmap; ++j) {
 
1045                         len = sg_dma_len(&chunk->page_list[j]) >> shift;
 
1046                         for (k = 0; k < len; ++k) {
 
1047                                 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
 
1048                                         region->page_size * k;
 
1050                                  * Be friendly to WRITE_MTT command
 
1051                                  * and leave two empty slots for the
 
1052                                  * index and reserved fields of the
 
1055                                 if (i == PAGE_SIZE / sizeof (u64) - 2) {
 
1056                                         err = mthca_write_mtt(dev, mr->mtt,
 
1067                 err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
 
1069         free_page((unsigned long) pages);
 
1073         err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
 
1074                              region->length, convert_access(acc), mr);
 
1082         mthca_free_mtt(dev, mr->mtt);
 
1086         return ERR_PTR(err);
 
1089 static int mthca_dereg_mr(struct ib_mr *mr)
 
1091         struct mthca_mr *mmr = to_mmr(mr);
 
1092         mthca_free_mr(to_mdev(mr->device), mmr);
 
1097 static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
 
1098                                       struct ib_fmr_attr *fmr_attr)
 
1100         struct mthca_fmr *fmr;
 
1103         fmr = kmemdup(fmr_attr, sizeof *fmr, GFP_KERNEL);
 
1105                 return ERR_PTR(-ENOMEM);
 
1107         err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
 
1108                              convert_access(mr_access_flags), fmr);
 
1112                 return ERR_PTR(err);
 
1118 static int mthca_dealloc_fmr(struct ib_fmr *fmr)
 
1120         struct mthca_fmr *mfmr = to_mfmr(fmr);
 
1123         err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
 
1131 static int mthca_unmap_fmr(struct list_head *fmr_list)
 
1136         struct mthca_dev *mdev = NULL;
 
1138         list_for_each_entry(fmr, fmr_list, list) {
 
1139                 if (mdev && to_mdev(fmr->device) != mdev)
 
1141                 mdev = to_mdev(fmr->device);
 
1147         if (mthca_is_memfree(mdev)) {
 
1148                 list_for_each_entry(fmr, fmr_list, list)
 
1149                         mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
 
1153                 list_for_each_entry(fmr, fmr_list, list)
 
1154                         mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
 
1156         err = mthca_SYNC_TPT(mdev, &status);
 
1164 static ssize_t show_rev(struct class_device *cdev, char *buf)
 
1166         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
 
1167         return sprintf(buf, "%x\n", dev->rev_id);
 
1170 static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
 
1172         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
 
1173         return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
 
1174                        (int) (dev->fw_ver >> 16) & 0xffff,
 
1175                        (int) dev->fw_ver & 0xffff);
 
1178 static ssize_t show_hca(struct class_device *cdev, char *buf)
 
1180         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
 
1181         switch (dev->pdev->device) {
 
1182         case PCI_DEVICE_ID_MELLANOX_TAVOR:
 
1183                 return sprintf(buf, "MT23108\n");
 
1184         case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
 
1185                 return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
 
1186         case PCI_DEVICE_ID_MELLANOX_ARBEL:
 
1187                 return sprintf(buf, "MT25208\n");
 
1188         case PCI_DEVICE_ID_MELLANOX_SINAI:
 
1189         case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
 
1190                 return sprintf(buf, "MT25204\n");
 
1192                 return sprintf(buf, "unknown\n");
 
1196 static ssize_t show_board(struct class_device *cdev, char *buf)
 
1198         struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
 
1199         return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
 
1202 static CLASS_DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
 
1203 static CLASS_DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
 
1204 static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
 
1205 static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
 
1207 static struct class_device_attribute *mthca_class_attributes[] = {
 
1208         &class_device_attr_hw_rev,
 
1209         &class_device_attr_fw_ver,
 
1210         &class_device_attr_hca_type,
 
1211         &class_device_attr_board_id
 
1214 static int mthca_init_node_data(struct mthca_dev *dev)
 
1216         struct ib_smp *in_mad  = NULL;
 
1217         struct ib_smp *out_mad = NULL;
 
1221         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
1222         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
1223         if (!in_mad || !out_mad)
 
1226         init_query_mad(in_mad);
 
1227         in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
 
1229         err = mthca_MAD_IFC(dev, 1, 1,
 
1230                             1, NULL, NULL, in_mad, out_mad,
 
1239         memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
 
1241         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
 
1243         err = mthca_MAD_IFC(dev, 1, 1,
 
1244                             1, NULL, NULL, in_mad, out_mad,
 
1253         memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
 
1261 int mthca_register_device(struct mthca_dev *dev)
 
1266         ret = mthca_init_node_data(dev);
 
1270         strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
 
1271         dev->ib_dev.owner                = THIS_MODULE;
 
1273         dev->ib_dev.uverbs_abi_ver       = MTHCA_UVERBS_ABI_VERSION;
 
1274         dev->ib_dev.uverbs_cmd_mask      =
 
1275                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
 
1276                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
 
1277                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
 
1278                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
 
1279                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
 
1280                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
 
1281                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
 
1282                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
 
1283                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
 
1284                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
 
1285                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
 
1286                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
 
1287                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
 
1288                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
 
1289                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
 
1290                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
 
1291                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
 
1292         dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
 
1293         dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 
1294         dev->ib_dev.dma_device           = &dev->pdev->dev;
 
1295         dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
 
1296         dev->ib_dev.query_device         = mthca_query_device;
 
1297         dev->ib_dev.query_port           = mthca_query_port;
 
1298         dev->ib_dev.modify_device        = mthca_modify_device;
 
1299         dev->ib_dev.modify_port          = mthca_modify_port;
 
1300         dev->ib_dev.query_pkey           = mthca_query_pkey;
 
1301         dev->ib_dev.query_gid            = mthca_query_gid;
 
1302         dev->ib_dev.alloc_ucontext       = mthca_alloc_ucontext;
 
1303         dev->ib_dev.dealloc_ucontext     = mthca_dealloc_ucontext;
 
1304         dev->ib_dev.mmap                 = mthca_mmap_uar;
 
1305         dev->ib_dev.alloc_pd             = mthca_alloc_pd;
 
1306         dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
 
1307         dev->ib_dev.create_ah            = mthca_ah_create;
 
1308         dev->ib_dev.query_ah             = mthca_ah_query;
 
1309         dev->ib_dev.destroy_ah           = mthca_ah_destroy;
 
1311         if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
 
1312                 dev->ib_dev.create_srq           = mthca_create_srq;
 
1313                 dev->ib_dev.modify_srq           = mthca_modify_srq;
 
1314                 dev->ib_dev.query_srq            = mthca_query_srq;
 
1315                 dev->ib_dev.destroy_srq          = mthca_destroy_srq;
 
1316                 dev->ib_dev.uverbs_cmd_mask     |=
 
1317                         (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
 
1318                         (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
 
1319                         (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
 
1320                         (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 
1322                 if (mthca_is_memfree(dev))
 
1323                         dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
 
1325                         dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
 
1328         dev->ib_dev.create_qp            = mthca_create_qp;
 
1329         dev->ib_dev.modify_qp            = mthca_modify_qp;
 
1330         dev->ib_dev.query_qp             = mthca_query_qp;
 
1331         dev->ib_dev.destroy_qp           = mthca_destroy_qp;
 
1332         dev->ib_dev.create_cq            = mthca_create_cq;
 
1333         dev->ib_dev.resize_cq            = mthca_resize_cq;
 
1334         dev->ib_dev.destroy_cq           = mthca_destroy_cq;
 
1335         dev->ib_dev.poll_cq              = mthca_poll_cq;
 
1336         dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
 
1337         dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
 
1338         dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
 
1339         dev->ib_dev.dereg_mr             = mthca_dereg_mr;
 
1341         if (dev->mthca_flags & MTHCA_FLAG_FMR) {
 
1342                 dev->ib_dev.alloc_fmr            = mthca_alloc_fmr;
 
1343                 dev->ib_dev.unmap_fmr            = mthca_unmap_fmr;
 
1344                 dev->ib_dev.dealloc_fmr          = mthca_dealloc_fmr;
 
1345                 if (mthca_is_memfree(dev))
 
1346                         dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
 
1348                         dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
 
1351         dev->ib_dev.attach_mcast         = mthca_multicast_attach;
 
1352         dev->ib_dev.detach_mcast         = mthca_multicast_detach;
 
1353         dev->ib_dev.process_mad          = mthca_process_mad;
 
1355         if (mthca_is_memfree(dev)) {
 
1356                 dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
 
1357                 dev->ib_dev.post_send     = mthca_arbel_post_send;
 
1358                 dev->ib_dev.post_recv     = mthca_arbel_post_receive;
 
1360                 dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
 
1361                 dev->ib_dev.post_send     = mthca_tavor_post_send;
 
1362                 dev->ib_dev.post_recv     = mthca_tavor_post_receive;
 
1365         mutex_init(&dev->cap_mask_mutex);
 
1367         ret = ib_register_device(&dev->ib_dev);
 
1371         for (i = 0; i < ARRAY_SIZE(mthca_class_attributes); ++i) {
 
1372                 ret = class_device_create_file(&dev->ib_dev.class_dev,
 
1373                                                mthca_class_attributes[i]);
 
1375                         ib_unregister_device(&dev->ib_dev);
 
1380         mthca_start_catas_poll(dev);
 
1385 void mthca_unregister_device(struct mthca_dev *dev)
 
1387         mthca_stop_catas_poll(dev);
 
1388         ib_unregister_device(&dev->ib_dev);