2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
 
   3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
 
   4  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
 
   5  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 
   6  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
 
   8  * This software is available to you under a choice of one of two
 
   9  * licenses.  You may choose to be licensed under the terms of the GNU
 
  10  * General Public License (GPL) Version 2, available from the file
 
  11  * COPYING in the main directory of this source tree, or the
 
  12  * OpenIB.org BSD license below:
 
  14  *     Redistribution and use in source and binary forms, with or
 
  15  *     without modification, are permitted provided that the following
 
  18  *      - Redistributions of source code must retain the above
 
  19  *        copyright notice, this list of conditions and the following
 
  22  *      - Redistributions in binary form must reproduce the above
 
  23  *        copyright notice, this list of conditions and the following
 
  24  *        disclaimer in the documentation and/or other materials
 
  25  *        provided with the distribution.
 
  27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
  28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
  29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 
  30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 
  31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 
  32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 
  33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
  37 #include <rdma/ib_smi.h>
 
  38 #include <rdma/ib_umem.h>
 
  39 #include <rdma/ib_user_verbs.h>
 
  41 #include <linux/sched.h>
 
  44 #include "mthca_dev.h"
 
  45 #include "mthca_cmd.h"
 
  46 #include "mthca_user.h"
 
  47 #include "mthca_memfree.h"
 
  49 static void init_query_mad(struct ib_smp *mad)
 
  51         mad->base_version  = 1;
 
  52         mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 
  53         mad->class_version = 1;
 
  54         mad->method        = IB_MGMT_METHOD_GET;
 
  57 static int mthca_query_device(struct ib_device *ibdev,
 
  58                               struct ib_device_attr *props)
 
  60         struct ib_smp *in_mad  = NULL;
 
  61         struct ib_smp *out_mad = NULL;
 
  63         struct mthca_dev *mdev = to_mdev(ibdev);
 
  67         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
  68         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
  69         if (!in_mad || !out_mad)
 
  72         memset(props, 0, sizeof *props);
 
  74         props->fw_ver              = mdev->fw_ver;
 
  76         init_query_mad(in_mad);
 
  77         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
 
  79         err = mthca_MAD_IFC(mdev, 1, 1,
 
  80                             1, NULL, NULL, in_mad, out_mad,
 
  89         props->device_cap_flags    = mdev->device_cap_flags;
 
  90         props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 
  92         props->vendor_part_id      = be16_to_cpup((__be16 *) (out_mad->data + 30));
 
  93         props->hw_ver              = be32_to_cpup((__be32 *) (out_mad->data + 32));
 
  94         memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
 
  96         props->max_mr_size         = ~0ull;
 
  97         props->page_size_cap       = mdev->limits.page_size_cap;
 
  98         props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
 
  99         props->max_qp_wr           = mdev->limits.max_wqes;
 
 100         props->max_sge             = mdev->limits.max_sg;
 
 101         props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
 
 102         props->max_cqe             = mdev->limits.max_cqes;
 
 103         props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
 
 104         props->max_pd              = mdev->limits.num_pds - mdev->limits.reserved_pds;
 
 105         props->max_qp_rd_atom      = 1 << mdev->qp_table.rdb_shift;
 
 106         props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
 
 107         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
 
 108         props->max_srq             = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
 
 109         props->max_srq_wr          = mdev->limits.max_srq_wqes;
 
 110         props->max_srq_sge         = mdev->limits.max_srq_sge;
 
 111         props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
 
 112         props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
 
 113                                         IB_ATOMIC_HCA : IB_ATOMIC_NONE;
 
 114         props->max_pkeys           = mdev->limits.pkey_table_len;
 
 115         props->max_mcast_grp       = mdev->limits.num_mgms + mdev->limits.num_amgms;
 
 116         props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
 
 117         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 
 118                                            props->max_mcast_grp;
 
 120          * If Sinai memory key optimization is being used, then only
 
 121          * the 8-bit key portion will change.  For other HCAs, the
 
 122          * unused index bits will also be used for FMR remapping.
 
 124         if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 
 125                 props->max_map_per_fmr = 255;
 
 127                 props->max_map_per_fmr =
 
 128                         (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1;
 
 137 static int mthca_query_port(struct ib_device *ibdev,
 
 138                             u8 port, struct ib_port_attr *props)
 
 140         struct ib_smp *in_mad  = NULL;
 
 141         struct ib_smp *out_mad = NULL;
 
 145         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
 146         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
 147         if (!in_mad || !out_mad)
 
 150         memset(props, 0, sizeof *props);
 
 152         init_query_mad(in_mad);
 
 153         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 
 154         in_mad->attr_mod = cpu_to_be32(port);
 
 156         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
 
 157                             port, NULL, NULL, in_mad, out_mad,
 
 166         props->lid               = be16_to_cpup((__be16 *) (out_mad->data + 16));
 
 167         props->lmc               = out_mad->data[34] & 0x7;
 
 168         props->sm_lid            = be16_to_cpup((__be16 *) (out_mad->data + 18));
 
 169         props->sm_sl             = out_mad->data[36] & 0xf;
 
 170         props->state             = out_mad->data[32] & 0xf;
 
 171         props->phys_state        = out_mad->data[33] >> 4;
 
 172         props->port_cap_flags    = be32_to_cpup((__be32 *) (out_mad->data + 20));
 
 173         props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
 
 174         props->max_msg_sz        = 0x80000000;
 
 175         props->pkey_tbl_len      = to_mdev(ibdev)->limits.pkey_table_len;
 
 176         props->bad_pkey_cntr     = be16_to_cpup((__be16 *) (out_mad->data + 46));
 
 177         props->qkey_viol_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 48));
 
 178         props->active_width      = out_mad->data[31] & 0xf;
 
 179         props->active_speed      = out_mad->data[35] >> 4;
 
 180         props->max_mtu           = out_mad->data[41] & 0xf;
 
 181         props->active_mtu        = out_mad->data[36] >> 4;
 
 182         props->subnet_timeout    = out_mad->data[51] & 0x1f;
 
 183         props->max_vl_num        = out_mad->data[37] >> 4;
 
 184         props->init_type_reply   = out_mad->data[41] >> 4;
 
 192 static int mthca_modify_device(struct ib_device *ibdev,
 
 194                                struct ib_device_modify *props)
 
 196         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
 
 199         if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
 
 200                 if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
 
 202                 memcpy(ibdev->node_desc, props->node_desc, 64);
 
 203                 mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
 
 209 static int mthca_modify_port(struct ib_device *ibdev,
 
 210                              u8 port, int port_modify_mask,
 
 211                              struct ib_port_modify *props)
 
 213         struct mthca_set_ib_param set_ib;
 
 214         struct ib_port_attr attr;
 
 218         if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
 
 221         err = mthca_query_port(ibdev, port, &attr);
 
 225         set_ib.set_si_guid     = 0;
 
 226         set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
 
 228         set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
 
 229                 ~props->clr_port_cap_mask;
 
 231         err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
 
 240         mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
 
 244 static int mthca_query_pkey(struct ib_device *ibdev,
 
 245                             u8 port, u16 index, u16 *pkey)
 
 247         struct ib_smp *in_mad  = NULL;
 
 248         struct ib_smp *out_mad = NULL;
 
 252         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
 253         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
 254         if (!in_mad || !out_mad)
 
 257         init_query_mad(in_mad);
 
 258         in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
 
 259         in_mad->attr_mod = cpu_to_be32(index / 32);
 
 261         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
 
 262                             port, NULL, NULL, in_mad, out_mad,
 
 271         *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
 
 279 static int mthca_query_gid(struct ib_device *ibdev, u8 port,
 
 280                            int index, union ib_gid *gid)
 
 282         struct ib_smp *in_mad  = NULL;
 
 283         struct ib_smp *out_mad = NULL;
 
 287         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
 288         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
 289         if (!in_mad || !out_mad)
 
 292         init_query_mad(in_mad);
 
 293         in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
 
 294         in_mad->attr_mod = cpu_to_be32(port);
 
 296         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
 
 297                             port, NULL, NULL, in_mad, out_mad,
 
 306         memcpy(gid->raw, out_mad->data + 8, 8);
 
 308         init_query_mad(in_mad);
 
 309         in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
 
 310         in_mad->attr_mod = cpu_to_be32(index / 8);
 
 312         err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
 
 313                             port, NULL, NULL, in_mad, out_mad,
 
 322         memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
 
 330 static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
 
 331                                                 struct ib_udata *udata)
 
 333         struct mthca_alloc_ucontext_resp uresp;
 
 334         struct mthca_ucontext           *context;
 
 337         memset(&uresp, 0, sizeof uresp);
 
 339         uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
 
 340         if (mthca_is_memfree(to_mdev(ibdev)))
 
 341                 uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
 
 345         context = kmalloc(sizeof *context, GFP_KERNEL);
 
 347                 return ERR_PTR(-ENOMEM);
 
 349         err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
 
 355         context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
 
 356         if (IS_ERR(context->db_tab)) {
 
 357                 err = PTR_ERR(context->db_tab);
 
 358                 mthca_uar_free(to_mdev(ibdev), &context->uar);
 
 363         if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
 
 364                 mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
 
 365                 mthca_uar_free(to_mdev(ibdev), &context->uar);
 
 367                 return ERR_PTR(-EFAULT);
 
 370         context->reg_mr_warned = 0;
 
 372         return &context->ibucontext;
 
 375 static int mthca_dealloc_ucontext(struct ib_ucontext *context)
 
 377         mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
 
 378                                   to_mucontext(context)->db_tab);
 
 379         mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
 
 380         kfree(to_mucontext(context));
 
 385 static int mthca_mmap_uar(struct ib_ucontext *context,
 
 386                           struct vm_area_struct *vma)
 
 388         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 
 391         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 393         if (io_remap_pfn_range(vma, vma->vm_start,
 
 394                                to_mucontext(context)->uar.pfn,
 
 395                                PAGE_SIZE, vma->vm_page_prot))
 
 401 static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
 
 402                                     struct ib_ucontext *context,
 
 403                                     struct ib_udata *udata)
 
 408         pd = kmalloc(sizeof *pd, GFP_KERNEL);
 
 410                 return ERR_PTR(-ENOMEM);
 
 412         err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
 
 419                 if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
 
 420                         mthca_pd_free(to_mdev(ibdev), pd);
 
 422                         return ERR_PTR(-EFAULT);
 
 429 static int mthca_dealloc_pd(struct ib_pd *pd)
 
 431         mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
 
 437 static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
 
 438                                      struct ib_ah_attr *ah_attr)
 
 443         ah = kmalloc(sizeof *ah, GFP_ATOMIC);
 
 445                 return ERR_PTR(-ENOMEM);
 
 447         err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
 
 456 static int mthca_ah_destroy(struct ib_ah *ah)
 
 458         mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
 
 464 static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
 
 465                                        struct ib_srq_init_attr *init_attr,
 
 466                                        struct ib_udata *udata)
 
 468         struct mthca_create_srq ucmd;
 
 469         struct mthca_ucontext *context = NULL;
 
 470         struct mthca_srq *srq;
 
 473         srq = kmalloc(sizeof *srq, GFP_KERNEL);
 
 475                 return ERR_PTR(-ENOMEM);
 
 478                 context = to_mucontext(pd->uobject->context);
 
 480                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 
 485                 err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
 
 486                                         context->db_tab, ucmd.db_index,
 
 492                 srq->mr.ibmr.lkey = ucmd.lkey;
 
 493                 srq->db_index     = ucmd.db_index;
 
 496         err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
 
 497                               &init_attr->attr, srq);
 
 499         if (err && pd->uobject)
 
 500                 mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
 
 501                                     context->db_tab, ucmd.db_index);
 
 506         if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
 
 507                 mthca_free_srq(to_mdev(pd->device), srq);
 
 520 static int mthca_destroy_srq(struct ib_srq *srq)
 
 522         struct mthca_ucontext *context;
 
 525                 context = to_mucontext(srq->uobject->context);
 
 527                 mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
 
 528                                     context->db_tab, to_msrq(srq)->db_index);
 
 531         mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
 
 537 static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
 
 538                                      struct ib_qp_init_attr *init_attr,
 
 539                                      struct ib_udata *udata)
 
 541         struct mthca_create_qp ucmd;
 
 545         if (init_attr->create_flags)
 
 546                 return ERR_PTR(-EINVAL);
 
 548         switch (init_attr->qp_type) {
 
 553                 struct mthca_ucontext *context;
 
 555                 qp = kmalloc(sizeof *qp, GFP_KERNEL);
 
 557                         return ERR_PTR(-ENOMEM);
 
 560                         context = to_mucontext(pd->uobject->context);
 
 562                         if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 
 564                                 return ERR_PTR(-EFAULT);
 
 567                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
 
 569                                                 ucmd.sq_db_index, ucmd.sq_db_page);
 
 575                         err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
 
 577                                                 ucmd.rq_db_index, ucmd.rq_db_page);
 
 579                                 mthca_unmap_user_db(to_mdev(pd->device),
 
 587                         qp->mr.ibmr.lkey = ucmd.lkey;
 
 588                         qp->sq.db_index  = ucmd.sq_db_index;
 
 589                         qp->rq.db_index  = ucmd.rq_db_index;
 
 592                 err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
 
 593                                      to_mcq(init_attr->send_cq),
 
 594                                      to_mcq(init_attr->recv_cq),
 
 595                                      init_attr->qp_type, init_attr->sq_sig_type,
 
 596                                      &init_attr->cap, qp);
 
 598                 if (err && pd->uobject) {
 
 599                         context = to_mucontext(pd->uobject->context);
 
 601                         mthca_unmap_user_db(to_mdev(pd->device),
 
 605                         mthca_unmap_user_db(to_mdev(pd->device),
 
 611                 qp->ibqp.qp_num = qp->qpn;
 
 617                 /* Don't allow userspace to create special QPs */
 
 619                         return ERR_PTR(-EINVAL);
 
 621                 qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
 
 623                         return ERR_PTR(-ENOMEM);
 
 625                 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
 
 627                 err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
 
 628                                       to_mcq(init_attr->send_cq),
 
 629                                       to_mcq(init_attr->recv_cq),
 
 630                                       init_attr->sq_sig_type, &init_attr->cap,
 
 631                                       qp->ibqp.qp_num, init_attr->port_num,
 
 636                 /* Don't support raw QPs */
 
 637                 return ERR_PTR(-ENOSYS);
 
 645         init_attr->cap.max_send_wr     = qp->sq.max;
 
 646         init_attr->cap.max_recv_wr     = qp->rq.max;
 
 647         init_attr->cap.max_send_sge    = qp->sq.max_gs;
 
 648         init_attr->cap.max_recv_sge    = qp->rq.max_gs;
 
 649         init_attr->cap.max_inline_data = qp->max_inline_data;
 
 654 static int mthca_destroy_qp(struct ib_qp *qp)
 
 657                 mthca_unmap_user_db(to_mdev(qp->device),
 
 658                                     &to_mucontext(qp->uobject->context)->uar,
 
 659                                     to_mucontext(qp->uobject->context)->db_tab,
 
 660                                     to_mqp(qp)->sq.db_index);
 
 661                 mthca_unmap_user_db(to_mdev(qp->device),
 
 662                                     &to_mucontext(qp->uobject->context)->uar,
 
 663                                     to_mucontext(qp->uobject->context)->db_tab,
 
 664                                     to_mqp(qp)->rq.db_index);
 
 666         mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
 
 671 static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
 
 673                                      struct ib_ucontext *context,
 
 674                                      struct ib_udata *udata)
 
 676         struct mthca_create_cq ucmd;
 
 681         if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
 
 682                 return ERR_PTR(-EINVAL);
 
 685                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
 
 686                         return ERR_PTR(-EFAULT);
 
 688                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
 
 689                                         to_mucontext(context)->db_tab,
 
 690                                         ucmd.set_db_index, ucmd.set_db_page);
 
 694                 err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
 
 695                                         to_mucontext(context)->db_tab,
 
 696                                         ucmd.arm_db_index, ucmd.arm_db_page);
 
 701         cq = kmalloc(sizeof *cq, GFP_KERNEL);
 
 708                 cq->buf.mr.ibmr.lkey = ucmd.lkey;
 
 709                 cq->set_ci_db_index  = ucmd.set_db_index;
 
 710                 cq->arm_db_index     = ucmd.arm_db_index;
 
 713         for (nent = 1; nent <= entries; nent <<= 1)
 
 716         err = mthca_init_cq(to_mdev(ibdev), nent,
 
 717                             context ? to_mucontext(context) : NULL,
 
 718                             context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
 
 723         if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
 
 724                 mthca_free_cq(to_mdev(ibdev), cq);
 
 728         cq->resize_buf = NULL;
 
 737                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
 
 738                                     to_mucontext(context)->db_tab, ucmd.arm_db_index);
 
 742                 mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
 
 743                                     to_mucontext(context)->db_tab, ucmd.set_db_index);
 
 748 static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
 
 753         spin_lock_irq(&cq->lock);
 
 754         if (cq->resize_buf) {
 
 759         cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
 
 760         if (!cq->resize_buf) {
 
 765         cq->resize_buf->state = CQ_RESIZE_ALLOC;
 
 770         spin_unlock_irq(&cq->lock);
 
 775         ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
 
 777                 spin_lock_irq(&cq->lock);
 
 778                 kfree(cq->resize_buf);
 
 779                 cq->resize_buf = NULL;
 
 780                 spin_unlock_irq(&cq->lock);
 
 784         cq->resize_buf->cqe = entries - 1;
 
 786         spin_lock_irq(&cq->lock);
 
 787         cq->resize_buf->state = CQ_RESIZE_READY;
 
 788         spin_unlock_irq(&cq->lock);
 
 793 static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 
 795         struct mthca_dev *dev = to_mdev(ibcq->device);
 
 796         struct mthca_cq *cq = to_mcq(ibcq);
 
 797         struct mthca_resize_cq ucmd;
 
 802         if (entries < 1 || entries > dev->limits.max_cqes)
 
 805         mutex_lock(&cq->mutex);
 
 807         entries = roundup_pow_of_two(entries + 1);
 
 808         if (entries == ibcq->cqe + 1) {
 
 814                 ret = mthca_alloc_resize_buf(dev, cq, entries);
 
 817                 lkey = cq->resize_buf->buf.mr.ibmr.lkey;
 
 819                 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 
 826         ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries), &status);
 
 831                 if (cq->resize_buf) {
 
 832                         mthca_free_cq_buf(dev, &cq->resize_buf->buf,
 
 833                                           cq->resize_buf->cqe);
 
 834                         kfree(cq->resize_buf);
 
 835                         spin_lock_irq(&cq->lock);
 
 836                         cq->resize_buf = NULL;
 
 837                         spin_unlock_irq(&cq->lock);
 
 843                 struct mthca_cq_buf tbuf;
 
 846                 spin_lock_irq(&cq->lock);
 
 847                 if (cq->resize_buf->state == CQ_RESIZE_READY) {
 
 848                         mthca_cq_resize_copy_cqes(cq);
 
 851                         cq->buf      = cq->resize_buf->buf;
 
 852                         cq->ibcq.cqe = cq->resize_buf->cqe;
 
 854                         tbuf = cq->resize_buf->buf;
 
 855                         tcqe = cq->resize_buf->cqe;
 
 858                 kfree(cq->resize_buf);
 
 859                 cq->resize_buf = NULL;
 
 860                 spin_unlock_irq(&cq->lock);
 
 862                 mthca_free_cq_buf(dev, &tbuf, tcqe);
 
 864                 ibcq->cqe = entries - 1;
 
 867         mutex_unlock(&cq->mutex);
 
 872 static int mthca_destroy_cq(struct ib_cq *cq)
 
 875                 mthca_unmap_user_db(to_mdev(cq->device),
 
 876                                     &to_mucontext(cq->uobject->context)->uar,
 
 877                                     to_mucontext(cq->uobject->context)->db_tab,
 
 878                                     to_mcq(cq)->arm_db_index);
 
 879                 mthca_unmap_user_db(to_mdev(cq->device),
 
 880                                     &to_mucontext(cq->uobject->context)->uar,
 
 881                                     to_mucontext(cq->uobject->context)->db_tab,
 
 882                                     to_mcq(cq)->set_ci_db_index);
 
 884         mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
 
 890 static inline u32 convert_access(int acc)
 
 892         return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC       : 0) |
 
 893                (acc & IB_ACCESS_REMOTE_WRITE  ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
 
 894                (acc & IB_ACCESS_REMOTE_READ   ? MTHCA_MPT_FLAG_REMOTE_READ  : 0) |
 
 895                (acc & IB_ACCESS_LOCAL_WRITE   ? MTHCA_MPT_FLAG_LOCAL_WRITE  : 0) |
 
 896                MTHCA_MPT_FLAG_LOCAL_READ;
 
 899 static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
 
 904         mr = kmalloc(sizeof *mr, GFP_KERNEL);
 
 906                 return ERR_PTR(-ENOMEM);
 
 908         err = mthca_mr_alloc_notrans(to_mdev(pd->device),
 
 910                                      convert_access(acc), mr);
 
 922 static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
 
 923                                        struct ib_phys_buf *buffer_list,
 
 937         mask = buffer_list[0].addr ^ *iova_start;
 
 939         for (i = 0; i < num_phys_buf; ++i) {
 
 941                         mask |= buffer_list[i].addr;
 
 942                 if (i != num_phys_buf - 1)
 
 943                         mask |= buffer_list[i].addr + buffer_list[i].size;
 
 945                 total_size += buffer_list[i].size;
 
 948         if (mask & ~PAGE_MASK)
 
 949                 return ERR_PTR(-EINVAL);
 
 951         shift = __ffs(mask | 1 << 31);
 
 953         buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
 
 954         buffer_list[0].addr &= ~0ull << shift;
 
 956         mr = kmalloc(sizeof *mr, GFP_KERNEL);
 
 958                 return ERR_PTR(-ENOMEM);
 
 961         for (i = 0; i < num_phys_buf; ++i)
 
 962                 npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
 
 967         page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
 
 970                 return ERR_PTR(-ENOMEM);
 
 974         for (i = 0; i < num_phys_buf; ++i)
 
 976                      j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
 
 978                         page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
 
 980         mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
 
 981                   "in PD %x; shift %d, npages %d.\n",
 
 982                   (unsigned long long) buffer_list[0].addr,
 
 983                   (unsigned long long) *iova_start,
 
 987         err = mthca_mr_alloc_phys(to_mdev(pd->device),
 
 989                                   page_list, shift, npages,
 
 990                                   *iova_start, total_size,
 
 991                                   convert_access(acc), mr);
 
1005 static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
1006                                        u64 virt, int acc, struct ib_udata *udata)
 
1008         struct mthca_dev *dev = to_mdev(pd->device);
 
1009         struct ib_umem_chunk *chunk;
 
1010         struct mthca_mr *mr;
 
1011         struct mthca_reg_mr ucmd;
 
1018         if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) {
 
1019                 if (!to_mucontext(pd->uobject->context)->reg_mr_warned) {
 
1020                         mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
 
1022                         mthca_warn(dev, "  Update libmthca to fix this.\n");
 
1024                 ++to_mucontext(pd->uobject->context)->reg_mr_warned;
 
1026         } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
 
1027                 return ERR_PTR(-EFAULT);
 
1029         mr = kmalloc(sizeof *mr, GFP_KERNEL);
 
1031                 return ERR_PTR(-ENOMEM);
 
1033         mr->umem = ib_umem_get(pd->uobject->context, start, length, acc,
 
1034                                ucmd.mr_attrs & MTHCA_MR_DMASYNC);
 
1036         if (IS_ERR(mr->umem)) {
 
1037                 err = PTR_ERR(mr->umem);
 
1041         shift = ffs(mr->umem->page_size) - 1;
 
1044         list_for_each_entry(chunk, &mr->umem->chunk_list, list)
 
1047         mr->mtt = mthca_alloc_mtt(dev, n);
 
1048         if (IS_ERR(mr->mtt)) {
 
1049                 err = PTR_ERR(mr->mtt);
 
1053         pages = (u64 *) __get_free_page(GFP_KERNEL);
 
1061         write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
 
1063         list_for_each_entry(chunk, &mr->umem->chunk_list, list)
 
1064                 for (j = 0; j < chunk->nmap; ++j) {
 
1065                         len = sg_dma_len(&chunk->page_list[j]) >> shift;
 
1066                         for (k = 0; k < len; ++k) {
 
1067                                 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
 
1068                                         mr->umem->page_size * k;
 
1070                                  * Be friendly to write_mtt and pass it chunks
 
1071                                  * of appropriate size.
 
1073                                 if (i == write_mtt_size) {
 
1074                                         err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
 
1084                 err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
 
1086         free_page((unsigned long) pages);
 
1090         err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
 
1091                              convert_access(acc), mr);
 
1099         mthca_free_mtt(dev, mr->mtt);
 
1102         ib_umem_release(mr->umem);
 
1106         return ERR_PTR(err);
 
1109 static int mthca_dereg_mr(struct ib_mr *mr)
 
1111         struct mthca_mr *mmr = to_mmr(mr);
 
1113         mthca_free_mr(to_mdev(mr->device), mmr);
 
1115                 ib_umem_release(mmr->umem);
 
1121 static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
 
1122                                       struct ib_fmr_attr *fmr_attr)
 
1124         struct mthca_fmr *fmr;
 
1127         fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
 
1129                 return ERR_PTR(-ENOMEM);
 
1131         memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
 
1132         err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
 
1133                              convert_access(mr_access_flags), fmr);
 
1137                 return ERR_PTR(err);
 
1143 static int mthca_dealloc_fmr(struct ib_fmr *fmr)
 
1145         struct mthca_fmr *mfmr = to_mfmr(fmr);
 
1148         err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
 
1156 static int mthca_unmap_fmr(struct list_head *fmr_list)
 
1161         struct mthca_dev *mdev = NULL;
 
1163         list_for_each_entry(fmr, fmr_list, list) {
 
1164                 if (mdev && to_mdev(fmr->device) != mdev)
 
1166                 mdev = to_mdev(fmr->device);
 
1172         if (mthca_is_memfree(mdev)) {
 
1173                 list_for_each_entry(fmr, fmr_list, list)
 
1174                         mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
 
1178                 list_for_each_entry(fmr, fmr_list, list)
 
1179                         mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
 
1181         err = mthca_SYNC_TPT(mdev, &status);
 
1189 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 
1192         struct mthca_dev *dev =
 
1193                 container_of(device, struct mthca_dev, ib_dev.dev);
 
1194         return sprintf(buf, "%x\n", dev->rev_id);
 
1197 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
 
1200         struct mthca_dev *dev =
 
1201                 container_of(device, struct mthca_dev, ib_dev.dev);
 
1202         return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32),
 
1203                        (int) (dev->fw_ver >> 16) & 0xffff,
 
1204                        (int) dev->fw_ver & 0xffff);
 
1207 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 
1210         struct mthca_dev *dev =
 
1211                 container_of(device, struct mthca_dev, ib_dev.dev);
 
1212         switch (dev->pdev->device) {
 
1213         case PCI_DEVICE_ID_MELLANOX_TAVOR:
 
1214                 return sprintf(buf, "MT23108\n");
 
1215         case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
 
1216                 return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
 
1217         case PCI_DEVICE_ID_MELLANOX_ARBEL:
 
1218                 return sprintf(buf, "MT25208\n");
 
1219         case PCI_DEVICE_ID_MELLANOX_SINAI:
 
1220         case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
 
1221                 return sprintf(buf, "MT25204\n");
 
1223                 return sprintf(buf, "unknown\n");
 
1227 static ssize_t show_board(struct device *device, struct device_attribute *attr,
 
1230         struct mthca_dev *dev =
 
1231                 container_of(device, struct mthca_dev, ib_dev.dev);
 
1232         return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
 
1235 static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
 
1236 static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
 
1237 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
 
1238 static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
 
1240 static struct device_attribute *mthca_dev_attributes[] = {
 
1247 static int mthca_init_node_data(struct mthca_dev *dev)
 
1249         struct ib_smp *in_mad  = NULL;
 
1250         struct ib_smp *out_mad = NULL;
 
1254         in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 
1255         out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 
1256         if (!in_mad || !out_mad)
 
1259         init_query_mad(in_mad);
 
1260         in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
 
1262         err = mthca_MAD_IFC(dev, 1, 1,
 
1263                             1, NULL, NULL, in_mad, out_mad,
 
1272         memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
 
1274         in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
 
1276         err = mthca_MAD_IFC(dev, 1, 1,
 
1277                             1, NULL, NULL, in_mad, out_mad,
 
1286         if (mthca_is_memfree(dev))
 
1287                 dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
 
1288         memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
 
1296 int mthca_register_device(struct mthca_dev *dev)
 
1301         ret = mthca_init_node_data(dev);
 
1305         strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
 
1306         dev->ib_dev.owner                = THIS_MODULE;
 
1308         dev->ib_dev.uverbs_abi_ver       = MTHCA_UVERBS_ABI_VERSION;
 
1309         dev->ib_dev.uverbs_cmd_mask      =
 
1310                 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
 
1311                 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
 
1312                 (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
 
1313                 (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
 
1314                 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
 
1315                 (1ull << IB_USER_VERBS_CMD_REG_MR)              |
 
1316                 (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
 
1317                 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
 
1318                 (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
 
1319                 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
 
1320                 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
 
1321                 (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
 
1322                 (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
 
1323                 (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
 
1324                 (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
 
1325                 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
 
1326                 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
 
1327         dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
 
1328         dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 
1329         dev->ib_dev.num_comp_vectors     = 1;
 
1330         dev->ib_dev.dma_device           = &dev->pdev->dev;
 
1331         dev->ib_dev.query_device         = mthca_query_device;
 
1332         dev->ib_dev.query_port           = mthca_query_port;
 
1333         dev->ib_dev.modify_device        = mthca_modify_device;
 
1334         dev->ib_dev.modify_port          = mthca_modify_port;
 
1335         dev->ib_dev.query_pkey           = mthca_query_pkey;
 
1336         dev->ib_dev.query_gid            = mthca_query_gid;
 
1337         dev->ib_dev.alloc_ucontext       = mthca_alloc_ucontext;
 
1338         dev->ib_dev.dealloc_ucontext     = mthca_dealloc_ucontext;
 
1339         dev->ib_dev.mmap                 = mthca_mmap_uar;
 
1340         dev->ib_dev.alloc_pd             = mthca_alloc_pd;
 
1341         dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
 
1342         dev->ib_dev.create_ah            = mthca_ah_create;
 
1343         dev->ib_dev.query_ah             = mthca_ah_query;
 
1344         dev->ib_dev.destroy_ah           = mthca_ah_destroy;
 
1346         if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
 
1347                 dev->ib_dev.create_srq           = mthca_create_srq;
 
1348                 dev->ib_dev.modify_srq           = mthca_modify_srq;
 
1349                 dev->ib_dev.query_srq            = mthca_query_srq;
 
1350                 dev->ib_dev.destroy_srq          = mthca_destroy_srq;
 
1351                 dev->ib_dev.uverbs_cmd_mask     |=
 
1352                         (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
 
1353                         (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
 
1354                         (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
 
1355                         (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 
1357                 if (mthca_is_memfree(dev))
 
1358                         dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
 
1360                         dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
 
1363         dev->ib_dev.create_qp            = mthca_create_qp;
 
1364         dev->ib_dev.modify_qp            = mthca_modify_qp;
 
1365         dev->ib_dev.query_qp             = mthca_query_qp;
 
1366         dev->ib_dev.destroy_qp           = mthca_destroy_qp;
 
1367         dev->ib_dev.create_cq            = mthca_create_cq;
 
1368         dev->ib_dev.resize_cq            = mthca_resize_cq;
 
1369         dev->ib_dev.destroy_cq           = mthca_destroy_cq;
 
1370         dev->ib_dev.poll_cq              = mthca_poll_cq;
 
1371         dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
 
1372         dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
 
1373         dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
 
1374         dev->ib_dev.dereg_mr             = mthca_dereg_mr;
 
1376         if (dev->mthca_flags & MTHCA_FLAG_FMR) {
 
1377                 dev->ib_dev.alloc_fmr            = mthca_alloc_fmr;
 
1378                 dev->ib_dev.unmap_fmr            = mthca_unmap_fmr;
 
1379                 dev->ib_dev.dealloc_fmr          = mthca_dealloc_fmr;
 
1380                 if (mthca_is_memfree(dev))
 
1381                         dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
 
1383                         dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
 
1386         dev->ib_dev.attach_mcast         = mthca_multicast_attach;
 
1387         dev->ib_dev.detach_mcast         = mthca_multicast_detach;
 
1388         dev->ib_dev.process_mad          = mthca_process_mad;
 
1390         if (mthca_is_memfree(dev)) {
 
1391                 dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
 
1392                 dev->ib_dev.post_send     = mthca_arbel_post_send;
 
1393                 dev->ib_dev.post_recv     = mthca_arbel_post_receive;
 
1395                 dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
 
1396                 dev->ib_dev.post_send     = mthca_tavor_post_send;
 
1397                 dev->ib_dev.post_recv     = mthca_tavor_post_receive;
 
1400         mutex_init(&dev->cap_mask_mutex);
 
1402         ret = ib_register_device(&dev->ib_dev);
 
1406         for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) {
 
1407                 ret = device_create_file(&dev->ib_dev.dev,
 
1408                                          mthca_dev_attributes[i]);
 
1410                         ib_unregister_device(&dev->ib_dev);
 
1415         mthca_start_catas_poll(dev);
 
1420 void mthca_unregister_device(struct mthca_dev *dev)
 
1422         mthca_stop_catas_poll(dev);
 
1423         ib_unregister_device(&dev->ib_dev);