2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
 
   3  * Copyright (c) 2005 Cisco Systems. All rights reserved.
 
   4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 
   5  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
 
   7  * This software is available to you under a choice of one of two
 
   8  * licenses.  You may choose to be licensed under the terms of the GNU
 
   9  * General Public License (GPL) Version 2, available from the file
 
  10  * COPYING in the main directory of this source tree, or the
 
  11  * OpenIB.org BSD license below:
 
  13  *     Redistribution and use in source and binary forms, with or
 
  14  *     without modification, are permitted provided that the following
 
  17  *      - Redistributions of source code must retain the above
 
  18  *        copyright notice, this list of conditions and the following
 
  21  *      - Redistributions in binary form must reproduce the above
 
  22  *        copyright notice, this list of conditions and the following
 
  23  *        disclaimer in the documentation and/or other materials
 
  24  *        provided with the distribution.
 
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 
  36 #include <linux/string.h>
 
  37 #include <linux/slab.h>
 
  38 #include <linux/sched.h>
 
  42 #include <rdma/ib_verbs.h>
 
  43 #include <rdma/ib_cache.h>
 
  44 #include <rdma/ib_pack.h>
 
  46 #include "mthca_dev.h"
 
  47 #include "mthca_cmd.h"
 
  48 #include "mthca_memfree.h"
 
  49 #include "mthca_wqe.h"
 
  52         MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
 
  53         MTHCA_ACK_REQ_FREQ       = 10,
 
  54         MTHCA_FLIGHT_LIMIT       = 9,
 
  55         MTHCA_UD_HEADER_SIZE     = 72, /* largest UD header possible */
 
  56         MTHCA_INLINE_HEADER_SIZE = 4,  /* data segment overhead for inline */
 
  57         MTHCA_INLINE_CHUNK_SIZE  = 16  /* inline data segment chunk */
 
  61         MTHCA_QP_STATE_RST  = 0,
 
  62         MTHCA_QP_STATE_INIT = 1,
 
  63         MTHCA_QP_STATE_RTR  = 2,
 
  64         MTHCA_QP_STATE_RTS  = 3,
 
  65         MTHCA_QP_STATE_SQE  = 4,
 
  66         MTHCA_QP_STATE_SQD  = 5,
 
  67         MTHCA_QP_STATE_ERR  = 6,
 
  68         MTHCA_QP_STATE_DRAINING = 7
 
  80         MTHCA_QP_PM_MIGRATED = 0x3,
 
  81         MTHCA_QP_PM_ARMED    = 0x0,
 
  82         MTHCA_QP_PM_REARM    = 0x1
 
  86         /* qp_context flags */
 
  87         MTHCA_QP_BIT_DE  = 1 <<  8,
 
  89         MTHCA_QP_BIT_SRE = 1 << 15,
 
  90         MTHCA_QP_BIT_SWE = 1 << 14,
 
  91         MTHCA_QP_BIT_SAE = 1 << 13,
 
  92         MTHCA_QP_BIT_SIC = 1 <<  4,
 
  93         MTHCA_QP_BIT_SSC = 1 <<  3,
 
  95         MTHCA_QP_BIT_RRE = 1 << 15,
 
  96         MTHCA_QP_BIT_RWE = 1 << 14,
 
  97         MTHCA_QP_BIT_RAE = 1 << 13,
 
  98         MTHCA_QP_BIT_RIC = 1 <<  4,
 
  99         MTHCA_QP_BIT_RSC = 1 <<  3
 
 103         MTHCA_SEND_DOORBELL_FENCE = 1 << 5
 
 106 struct mthca_qp_path {
 
 115         __be32 sl_tclass_flowlabel;
 
 117 } __attribute__((packed));
 
 119 struct mthca_qp_context {
 
 121         __be32 tavor_sched_queue; /* Reserved on Arbel */
 
 123         u8     rq_size_stride;  /* Reserved on Tavor */
 
 124         u8     sq_size_stride;  /* Reserved on Tavor */
 
 125         u8     rlkey_arbel_sched_queue; /* Reserved on Tavor */
 
 130         struct mthca_qp_path pri_path;
 
 131         struct mthca_qp_path alt_path;
 
 138         __be32 next_send_psn;
 
 140         __be32 snd_wqe_base_l;  /* Next send WQE on Tavor */
 
 141         __be32 snd_db_index;    /* (debugging only entries) */
 
 142         __be32 last_acked_psn;
 
 145         __be32 rnr_nextrecvpsn;
 
 148         __be32 rcv_wqe_base_l;  /* Next recv WQE on Tavor */
 
 149         __be32 rcv_db_index;    /* (debugging only entries) */
 
 153         __be16 rq_wqe_counter;  /* reserved on Tavor */
 
 154         __be16 sq_wqe_counter;  /* reserved on Tavor */
 
 156 } __attribute__((packed));
 
 158 struct mthca_qp_param {
 
 159         __be32 opt_param_mask;
 
 161         struct mthca_qp_context context;
 
 163 } __attribute__((packed));
 
 166         MTHCA_QP_OPTPAR_ALT_ADDR_PATH     = 1 << 0,
 
 167         MTHCA_QP_OPTPAR_RRE               = 1 << 1,
 
 168         MTHCA_QP_OPTPAR_RAE               = 1 << 2,
 
 169         MTHCA_QP_OPTPAR_RWE               = 1 << 3,
 
 170         MTHCA_QP_OPTPAR_PKEY_INDEX        = 1 << 4,
 
 171         MTHCA_QP_OPTPAR_Q_KEY             = 1 << 5,
 
 172         MTHCA_QP_OPTPAR_RNR_TIMEOUT       = 1 << 6,
 
 173         MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
 
 174         MTHCA_QP_OPTPAR_SRA_MAX           = 1 << 8,
 
 175         MTHCA_QP_OPTPAR_RRA_MAX           = 1 << 9,
 
 176         MTHCA_QP_OPTPAR_PM_STATE          = 1 << 10,
 
 177         MTHCA_QP_OPTPAR_PORT_NUM          = 1 << 11,
 
 178         MTHCA_QP_OPTPAR_RETRY_COUNT       = 1 << 12,
 
 179         MTHCA_QP_OPTPAR_ALT_RNR_RETRY     = 1 << 13,
 
 180         MTHCA_QP_OPTPAR_ACK_TIMEOUT       = 1 << 14,
 
 181         MTHCA_QP_OPTPAR_RNR_RETRY         = 1 << 15,
 
 182         MTHCA_QP_OPTPAR_SCHED_QUEUE       = 1 << 16
 
 185 static const u8 mthca_opcode[] = {
 
 186         [IB_WR_SEND]                 = MTHCA_OPCODE_SEND,
 
 187         [IB_WR_SEND_WITH_IMM]        = MTHCA_OPCODE_SEND_IMM,
 
 188         [IB_WR_RDMA_WRITE]           = MTHCA_OPCODE_RDMA_WRITE,
 
 189         [IB_WR_RDMA_WRITE_WITH_IMM]  = MTHCA_OPCODE_RDMA_WRITE_IMM,
 
 190         [IB_WR_RDMA_READ]            = MTHCA_OPCODE_RDMA_READ,
 
 191         [IB_WR_ATOMIC_CMP_AND_SWP]   = MTHCA_OPCODE_ATOMIC_CS,
 
 192         [IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
 
 195 static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp)
 
 197         return qp->qpn >= dev->qp_table.sqp_start &&
 
 198                 qp->qpn <= dev->qp_table.sqp_start + 3;
 
 201 static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp)
 
 203         return qp->qpn >= dev->qp_table.sqp_start &&
 
 204                 qp->qpn <= dev->qp_table.sqp_start + 1;
 
 207 static void *get_recv_wqe(struct mthca_qp *qp, int n)
 
 210                 return qp->queue.direct.buf + (n << qp->rq.wqe_shift);
 
 212                 return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf +
 
 213                         ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1));
 
 216 static void *get_send_wqe(struct mthca_qp *qp, int n)
 
 219                 return qp->queue.direct.buf + qp->send_wqe_offset +
 
 220                         (n << qp->sq.wqe_shift);
 
 222                 return qp->queue.page_list[(qp->send_wqe_offset +
 
 223                                             (n << qp->sq.wqe_shift)) >>
 
 225                         ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) &
 
 229 static void mthca_wq_reset(struct mthca_wq *wq)
 
 232         wq->last_comp = wq->max - 1;
 
 237 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
 
 238                     enum ib_event_type event_type)
 
 241         struct ib_event event;
 
 243         spin_lock(&dev->qp_table.lock);
 
 244         qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
 
 247         spin_unlock(&dev->qp_table.lock);
 
 250                 mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);
 
 254         if (event_type == IB_EVENT_PATH_MIG)
 
 255                 qp->port = qp->alt_port;
 
 257         event.device      = &dev->ib_dev;
 
 258         event.event       = event_type;
 
 259         event.element.qp  = &qp->ibqp;
 
 260         if (qp->ibqp.event_handler)
 
 261                 qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
 
 263         spin_lock(&dev->qp_table.lock);
 
 266         spin_unlock(&dev->qp_table.lock);
 
 269 static int to_mthca_state(enum ib_qp_state ib_state)
 
 272         case IB_QPS_RESET: return MTHCA_QP_STATE_RST;
 
 273         case IB_QPS_INIT:  return MTHCA_QP_STATE_INIT;
 
 274         case IB_QPS_RTR:   return MTHCA_QP_STATE_RTR;
 
 275         case IB_QPS_RTS:   return MTHCA_QP_STATE_RTS;
 
 276         case IB_QPS_SQD:   return MTHCA_QP_STATE_SQD;
 
 277         case IB_QPS_SQE:   return MTHCA_QP_STATE_SQE;
 
 278         case IB_QPS_ERR:   return MTHCA_QP_STATE_ERR;
 
 283 enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS };
 
 285 static int to_mthca_st(int transport)
 
 288         case RC:  return MTHCA_QP_ST_RC;
 
 289         case UC:  return MTHCA_QP_ST_UC;
 
 290         case UD:  return MTHCA_QP_ST_UD;
 
 291         case RD:  return MTHCA_QP_ST_RD;
 
 292         case MLX: return MTHCA_QP_ST_MLX;
 
 297 static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
 
 300         if (attr_mask & IB_QP_PKEY_INDEX)
 
 301                 sqp->pkey_index = attr->pkey_index;
 
 302         if (attr_mask & IB_QP_QKEY)
 
 303                 sqp->qkey = attr->qkey;
 
 304         if (attr_mask & IB_QP_SQ_PSN)
 
 305                 sqp->send_psn = attr->sq_psn;
 
 308 static void init_port(struct mthca_dev *dev, int port)
 
 312         struct mthca_init_ib_param param;
 
 314         memset(¶m, 0, sizeof param);
 
 316         param.port_width = dev->limits.port_width_cap;
 
 317         param.vl_cap     = dev->limits.vl_cap;
 
 318         param.mtu_cap    = dev->limits.mtu_cap;
 
 319         param.gid_cap    = dev->limits.gid_table_len;
 
 320         param.pkey_cap   = dev->limits.pkey_table_len;
 
 322         err = mthca_INIT_IB(dev, ¶m, port, &status);
 
 324                 mthca_warn(dev, "INIT_IB failed, return code %d.\n", err);
 
 326                 mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
 
 329 static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
 
 334         u32 hw_access_flags = 0;
 
 336         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 
 337                 dest_rd_atomic = attr->max_dest_rd_atomic;
 
 339                 dest_rd_atomic = qp->resp_depth;
 
 341         if (attr_mask & IB_QP_ACCESS_FLAGS)
 
 342                 access_flags = attr->qp_access_flags;
 
 344                 access_flags = qp->atomic_rd_en;
 
 347                 access_flags &= IB_ACCESS_REMOTE_WRITE;
 
 349         if (access_flags & IB_ACCESS_REMOTE_READ)
 
 350                 hw_access_flags |= MTHCA_QP_BIT_RRE;
 
 351         if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
 
 352                 hw_access_flags |= MTHCA_QP_BIT_RAE;
 
 353         if (access_flags & IB_ACCESS_REMOTE_WRITE)
 
 354                 hw_access_flags |= MTHCA_QP_BIT_RWE;
 
 356         return cpu_to_be32(hw_access_flags);
 
 359 static inline enum ib_qp_state to_ib_qp_state(int mthca_state)
 
 361         switch (mthca_state) {
 
 362         case MTHCA_QP_STATE_RST:      return IB_QPS_RESET;
 
 363         case MTHCA_QP_STATE_INIT:     return IB_QPS_INIT;
 
 364         case MTHCA_QP_STATE_RTR:      return IB_QPS_RTR;
 
 365         case MTHCA_QP_STATE_RTS:      return IB_QPS_RTS;
 
 366         case MTHCA_QP_STATE_DRAINING:
 
 367         case MTHCA_QP_STATE_SQD:      return IB_QPS_SQD;
 
 368         case MTHCA_QP_STATE_SQE:      return IB_QPS_SQE;
 
 369         case MTHCA_QP_STATE_ERR:      return IB_QPS_ERR;
 
 374 static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state)
 
 376         switch (mthca_mig_state) {
 
 377         case 0:  return IB_MIG_ARMED;
 
 378         case 1:  return IB_MIG_REARM;
 
 379         case 3:  return IB_MIG_MIGRATED;
 
 384 static int to_ib_qp_access_flags(int mthca_flags)
 
 388         if (mthca_flags & MTHCA_QP_BIT_RRE)
 
 389                 ib_flags |= IB_ACCESS_REMOTE_READ;
 
 390         if (mthca_flags & MTHCA_QP_BIT_RWE)
 
 391                 ib_flags |= IB_ACCESS_REMOTE_WRITE;
 
 392         if (mthca_flags & MTHCA_QP_BIT_RAE)
 
 393                 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
 
 398 static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
 
 399                                 struct mthca_qp_path *path)
 
 401         memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
 
 402         ib_ah_attr->port_num      = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
 
 404         if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
 
 407         ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
 
 408         ib_ah_attr->sl            = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
 
 409         ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
 
 410         ib_ah_attr->static_rate   = mthca_rate_to_ib(dev,
 
 411                                                      path->static_rate & 0xf,
 
 412                                                      ib_ah_attr->port_num);
 
 413         ib_ah_attr->ah_flags      = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
 
 414         if (ib_ah_attr->ah_flags) {
 
 415                 ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
 
 416                 ib_ah_attr->grh.hop_limit  = path->hop_limit;
 
 417                 ib_ah_attr->grh.traffic_class =
 
 418                         (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;
 
 419                 ib_ah_attr->grh.flow_label =
 
 420                         be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;
 
 421                 memcpy(ib_ah_attr->grh.dgid.raw,
 
 422                         path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
 
 426 int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 
 427                    struct ib_qp_init_attr *qp_init_attr)
 
 429         struct mthca_dev *dev = to_mdev(ibqp->device);
 
 430         struct mthca_qp *qp = to_mqp(ibqp);
 
 432         struct mthca_mailbox *mailbox = NULL;
 
 433         struct mthca_qp_param *qp_param;
 
 434         struct mthca_qp_context *context;
 
 438         mutex_lock(&qp->mutex);
 
 440         if (qp->state == IB_QPS_RESET) {
 
 441                 qp_attr->qp_state = IB_QPS_RESET;
 
 445         mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 
 446         if (IS_ERR(mailbox)) {
 
 447                 err = PTR_ERR(mailbox);
 
 451         err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status);
 
 455                 mthca_warn(dev, "QUERY_QP returned status %02x\n", status);
 
 460         qp_param    = mailbox->buf;
 
 461         context     = &qp_param->context;
 
 462         mthca_state = be32_to_cpu(context->flags) >> 28;
 
 464         qp->state                    = to_ib_qp_state(mthca_state);
 
 465         qp_attr->qp_state            = qp->state;
 
 466         qp_attr->path_mtu            = context->mtu_msgmax >> 5;
 
 467         qp_attr->path_mig_state      =
 
 468                 to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
 
 469         qp_attr->qkey                = be32_to_cpu(context->qkey);
 
 470         qp_attr->rq_psn              = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
 
 471         qp_attr->sq_psn              = be32_to_cpu(context->next_send_psn) & 0xffffff;
 
 472         qp_attr->dest_qp_num         = be32_to_cpu(context->remote_qpn) & 0xffffff;
 
 473         qp_attr->qp_access_flags     =
 
 474                 to_ib_qp_access_flags(be32_to_cpu(context->params2));
 
 476         if (qp->transport == RC || qp->transport == UC) {
 
 477                 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
 
 478                 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
 
 479                 qp_attr->alt_pkey_index =
 
 480                         be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
 
 481                 qp_attr->alt_port_num   = qp_attr->alt_ah_attr.port_num;
 
 484         qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
 
 486                 (be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;
 
 488         /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
 
 489         qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
 
 491         qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
 
 493         qp_attr->max_dest_rd_atomic =
 
 494                 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
 
 495         qp_attr->min_rnr_timer      =
 
 496                 (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
 
 497         qp_attr->timeout            = context->pri_path.ackto >> 3;
 
 498         qp_attr->retry_cnt          = (be32_to_cpu(context->params1) >> 16) & 0x7;
 
 499         qp_attr->rnr_retry          = context->pri_path.rnr_retry >> 5;
 
 500         qp_attr->alt_timeout        = context->alt_path.ackto >> 3;
 
 503         qp_attr->cur_qp_state        = qp_attr->qp_state;
 
 504         qp_attr->cap.max_send_wr     = qp->sq.max;
 
 505         qp_attr->cap.max_recv_wr     = qp->rq.max;
 
 506         qp_attr->cap.max_send_sge    = qp->sq.max_gs;
 
 507         qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
 
 508         qp_attr->cap.max_inline_data = qp->max_inline_data;
 
 510         qp_init_attr->cap            = qp_attr->cap;
 
 513         mthca_free_mailbox(dev, mailbox);
 
 516         mutex_unlock(&qp->mutex);
 
 520 static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
 
 521                           struct mthca_qp_path *path, u8 port)
 
 523         path->g_mylmc     = ah->src_path_bits & 0x7f;
 
 524         path->rlid        = cpu_to_be16(ah->dlid);
 
 525         path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
 
 527         if (ah->ah_flags & IB_AH_GRH) {
 
 528                 if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
 
 529                         mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",
 
 530                                   ah->grh.sgid_index, dev->limits.gid_table_len-1);
 
 534                 path->g_mylmc   |= 1 << 7;
 
 535                 path->mgid_index = ah->grh.sgid_index;
 
 536                 path->hop_limit  = ah->grh.hop_limit;
 
 537                 path->sl_tclass_flowlabel =
 
 538                         cpu_to_be32((ah->sl << 28)                |
 
 539                                     (ah->grh.traffic_class << 20) |
 
 540                                     (ah->grh.flow_label));
 
 541                 memcpy(path->rgid, ah->grh.dgid.raw, 16);
 
 543                 path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
 
 548 static int __mthca_modify_qp(struct ib_qp *ibqp,
 
 549                              const struct ib_qp_attr *attr, int attr_mask,
 
 550                              enum ib_qp_state cur_state, enum ib_qp_state new_state)
 
 552         struct mthca_dev *dev = to_mdev(ibqp->device);
 
 553         struct mthca_qp *qp = to_mqp(ibqp);
 
 554         struct mthca_mailbox *mailbox;
 
 555         struct mthca_qp_param *qp_param;
 
 556         struct mthca_qp_context *qp_context;
 
 561         mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 
 562         if (IS_ERR(mailbox)) {
 
 563                 err = PTR_ERR(mailbox);
 
 566         qp_param = mailbox->buf;
 
 567         qp_context = &qp_param->context;
 
 568         memset(qp_param, 0, sizeof *qp_param);
 
 570         qp_context->flags      = cpu_to_be32((to_mthca_state(new_state) << 28) |
 
 571                                              (to_mthca_st(qp->transport) << 16));
 
 572         qp_context->flags     |= cpu_to_be32(MTHCA_QP_BIT_DE);
 
 573         if (!(attr_mask & IB_QP_PATH_MIG_STATE))
 
 574                 qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
 
 576                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE);
 
 577                 switch (attr->path_mig_state) {
 
 578                 case IB_MIG_MIGRATED:
 
 579                         qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
 
 582                         qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11);
 
 585                         qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11);
 
 590         /* leave tavor_sched_queue as 0 */
 
 592         if (qp->transport == MLX || qp->transport == UD)
 
 593                 qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
 
 594         else if (attr_mask & IB_QP_PATH_MTU) {
 
 595                 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) {
 
 596                         mthca_dbg(dev, "path MTU (%u) is invalid\n",
 
 600                 qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
 
 603         if (mthca_is_memfree(dev)) {
 
 605                         qp_context->rq_size_stride = ilog2(qp->rq.max) << 3;
 
 606                 qp_context->rq_size_stride |= qp->rq.wqe_shift - 4;
 
 609                         qp_context->sq_size_stride = ilog2(qp->sq.max) << 3;
 
 610                 qp_context->sq_size_stride |= qp->sq.wqe_shift - 4;
 
 613         /* leave arbel_sched_queue as 0 */
 
 615         if (qp->ibqp.uobject)
 
 616                 qp_context->usr_page =
 
 617                         cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
 
 619                 qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
 
 620         qp_context->local_qpn  = cpu_to_be32(qp->qpn);
 
 621         if (attr_mask & IB_QP_DEST_QPN) {
 
 622                 qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
 
 625         if (qp->transport == MLX)
 
 626                 qp_context->pri_path.port_pkey |=
 
 627                         cpu_to_be32(qp->port << 24);
 
 629                 if (attr_mask & IB_QP_PORT) {
 
 630                         qp_context->pri_path.port_pkey |=
 
 631                                 cpu_to_be32(attr->port_num << 24);
 
 632                         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM);
 
 636         if (attr_mask & IB_QP_PKEY_INDEX) {
 
 637                 qp_context->pri_path.port_pkey |=
 
 638                         cpu_to_be32(attr->pkey_index);
 
 639                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX);
 
 642         if (attr_mask & IB_QP_RNR_RETRY) {
 
 643                 qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
 
 644                         attr->rnr_retry << 5;
 
 645                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |
 
 646                                                         MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
 
 649         if (attr_mask & IB_QP_AV) {
 
 650                 if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
 
 651                                    attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
 
 654                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
 
 657         if (ibqp->qp_type == IB_QPT_RC &&
 
 658             cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
 
 659                 u8 sched_queue = ibqp->uobject ? 0x2 : 0x1;
 
 661                 if (mthca_is_memfree(dev))
 
 662                         qp_context->rlkey_arbel_sched_queue |= sched_queue;
 
 664                         qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue);
 
 666                 qp_param->opt_param_mask |=
 
 667                         cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE);
 
 670         if (attr_mask & IB_QP_TIMEOUT) {
 
 671                 qp_context->pri_path.ackto = attr->timeout << 3;
 
 672                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
 
 675         if (attr_mask & IB_QP_ALT_PATH) {
 
 676                 if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {
 
 677                         mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",
 
 678                                   attr->alt_pkey_index, dev->limits.pkey_table_len-1);
 
 682                 if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
 
 683                         mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
 
 688                 if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
 
 689                                    attr->alt_ah_attr.port_num))
 
 692                 qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
 
 693                                                               attr->alt_port_num << 24);
 
 694                 qp_context->alt_path.ackto = attr->alt_timeout << 3;
 
 695                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
 
 699         qp_context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
 
 700         /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */
 
 701         qp_context->wqe_lkey   = cpu_to_be32(qp->mr.ibmr.lkey);
 
 702         qp_context->params1    = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
 
 703                                              (MTHCA_FLIGHT_LIMIT << 24) |
 
 705         if (qp->sq_policy == IB_SIGNAL_ALL_WR)
 
 706                 qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
 
 707         if (attr_mask & IB_QP_RETRY_CNT) {
 
 708                 qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
 
 709                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
 
 712         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
 
 713                 if (attr->max_rd_atomic) {
 
 714                         qp_context->params1 |=
 
 715                                 cpu_to_be32(MTHCA_QP_BIT_SRE |
 
 717                         qp_context->params1 |=
 
 718                                 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
 
 720                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
 
 723         if (attr_mask & IB_QP_SQ_PSN)
 
 724                 qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);
 
 725         qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);
 
 727         if (mthca_is_memfree(dev)) {
 
 728                 qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset);
 
 729                 qp_context->snd_db_index   = cpu_to_be32(qp->sq.db_index);
 
 732         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
 
 733                 if (attr->max_dest_rd_atomic)
 
 734                         qp_context->params2 |=
 
 735                                 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
 
 737                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
 
 740         if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
 
 741                 qp_context->params2      |= get_hw_access_flags(qp, attr, attr_mask);
 
 742                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
 
 743                                                         MTHCA_QP_OPTPAR_RRE |
 
 744                                                         MTHCA_QP_OPTPAR_RAE);
 
 747         qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
 
 750                 qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC);
 
 752         if (attr_mask & IB_QP_MIN_RNR_TIMER) {
 
 753                 qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
 
 754                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
 
 756         if (attr_mask & IB_QP_RQ_PSN)
 
 757                 qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
 
 759         qp_context->ra_buff_indx =
 
 760                 cpu_to_be32(dev->qp_table.rdb_base +
 
 761                             ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
 
 762                              dev->qp_table.rdb_shift));
 
 764         qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);
 
 766         if (mthca_is_memfree(dev))
 
 767                 qp_context->rcv_db_index   = cpu_to_be32(qp->rq.db_index);
 
 769         if (attr_mask & IB_QP_QKEY) {
 
 770                 qp_context->qkey = cpu_to_be32(attr->qkey);
 
 771                 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
 
 775                 qp_context->srqn = cpu_to_be32(1 << 24 |
 
 776                                                to_msrq(ibqp->srq)->srqn);
 
 778         if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD  &&
 
 779             attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY               &&
 
 780             attr->en_sqd_async_notify)
 
 783         err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
 
 784                               mailbox, sqd_event, &status);
 
 788                 mthca_warn(dev, "modify QP %d->%d returned status %02x.\n",
 
 789                            cur_state, new_state, status);
 
 794         qp->state = new_state;
 
 795         if (attr_mask & IB_QP_ACCESS_FLAGS)
 
 796                 qp->atomic_rd_en = attr->qp_access_flags;
 
 797         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 
 798                 qp->resp_depth = attr->max_dest_rd_atomic;
 
 799         if (attr_mask & IB_QP_PORT)
 
 800                 qp->port = attr->port_num;
 
 801         if (attr_mask & IB_QP_ALT_PATH)
 
 802                 qp->alt_port = attr->alt_port_num;
 
 805                 store_attrs(to_msqp(qp), attr, attr_mask);
 
 808          * If we moved QP0 to RTR, bring the IB link up; if we moved
 
 809          * QP0 to RESET or ERROR, bring the link back down.
 
 811         if (is_qp0(dev, qp)) {
 
 812                 if (cur_state != IB_QPS_RTR &&
 
 813                     new_state == IB_QPS_RTR)
 
 814                         init_port(dev, qp->port);
 
 816                 if (cur_state != IB_QPS_RESET &&
 
 817                     cur_state != IB_QPS_ERR &&
 
 818                     (new_state == IB_QPS_RESET ||
 
 819                      new_state == IB_QPS_ERR))
 
 820                         mthca_CLOSE_IB(dev, qp->port, &status);
 
 824          * If we moved a kernel QP to RESET, clean up all old CQ
 
 825          * entries and reinitialize the QP.
 
 827         if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
 
 828                 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
 
 829                                qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
 
 830                 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
 
 831                         mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
 
 833                 mthca_wq_reset(&qp->sq);
 
 834                 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
 
 836                 mthca_wq_reset(&qp->rq);
 
 837                 qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
 
 839                 if (mthca_is_memfree(dev)) {
 
 846         mthca_free_mailbox(dev, mailbox);
 
 851 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 
 852                     struct ib_udata *udata)
 
 854         struct mthca_dev *dev = to_mdev(ibqp->device);
 
 855         struct mthca_qp *qp = to_mqp(ibqp);
 
 856         enum ib_qp_state cur_state, new_state;
 
 859         mutex_lock(&qp->mutex);
 
 860         if (attr_mask & IB_QP_CUR_STATE) {
 
 861                 cur_state = attr->cur_qp_state;
 
 863                 spin_lock_irq(&qp->sq.lock);
 
 864                 spin_lock(&qp->rq.lock);
 
 865                 cur_state = qp->state;
 
 866                 spin_unlock(&qp->rq.lock);
 
 867                 spin_unlock_irq(&qp->sq.lock);
 
 870         new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
 872         if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
 
 873                 mthca_dbg(dev, "Bad QP transition (transport %d) "
 
 874                           "%d->%d with attr 0x%08x\n",
 
 875                           qp->transport, cur_state, new_state,
 
 880         if ((attr_mask & IB_QP_PKEY_INDEX) &&
 
 881              attr->pkey_index >= dev->limits.pkey_table_len) {
 
 882                 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
 
 883                           attr->pkey_index, dev->limits.pkey_table_len-1);
 
 887         if ((attr_mask & IB_QP_PORT) &&
 
 888             (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
 
 889                 mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
 
 893         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
 
 894             attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
 
 895                 mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
 
 896                           attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
 
 900         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
 
 901             attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
 
 902                 mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
 
 903                           attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
 
 907         if (cur_state == new_state && cur_state == IB_QPS_RESET) {
 
 912         err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
 
 915         mutex_unlock(&qp->mutex);
 
 919 static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)
 
 922          * Calculate the maximum size of WQE s/g segments, excluding
 
 923          * the next segment and other non-data segments.
 
 925         int max_data_size = desc_sz - sizeof (struct mthca_next_seg);
 
 927         switch (qp->transport) {
 
 929                 max_data_size -= 2 * sizeof (struct mthca_data_seg);
 
 933                 if (mthca_is_memfree(dev))
 
 934                         max_data_size -= sizeof (struct mthca_arbel_ud_seg);
 
 936                         max_data_size -= sizeof (struct mthca_tavor_ud_seg);
 
 940                 max_data_size -= sizeof (struct mthca_raddr_seg);
 
 944         return max_data_size;
 
 947 static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size)
 
 949         /* We don't support inline data for kernel QPs (yet). */
 
 950         return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0;
 
 953 static void mthca_adjust_qp_caps(struct mthca_dev *dev,
 
 957         int max_data_size = mthca_max_data_size(dev, qp,
 
 958                                                 min(dev->limits.max_desc_sz,
 
 959                                                     1 << qp->sq.wqe_shift));
 
 961         qp->max_inline_data = mthca_max_inline_data(pd, max_data_size);
 
 963         qp->sq.max_gs = min_t(int, dev->limits.max_sg,
 
 964                               max_data_size / sizeof (struct mthca_data_seg));
 
 965         qp->rq.max_gs = min_t(int, dev->limits.max_sg,
 
 966                                (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
 
 967                                 sizeof (struct mthca_next_seg)) /
 
 968                                sizeof (struct mthca_data_seg));
 
 972  * Allocate and register buffer for WQEs.  qp->rq.max, sq.max,
 
 973  * rq.max_gs and sq.max_gs must all be assigned.
 
 974  * mthca_alloc_wqe_buf will calculate rq.wqe_shift and
 
 975  * sq.wqe_shift (as well as send_wqe_offset, is_direct, and
 
 978 static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
 
 985         size = sizeof (struct mthca_next_seg) +
 
 986                 qp->rq.max_gs * sizeof (struct mthca_data_seg);
 
 988         if (size > dev->limits.max_desc_sz)
 
 991         for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
 
 995         size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
 
 996         switch (qp->transport) {
 
 998                 size += 2 * sizeof (struct mthca_data_seg);
 
1002                 size += mthca_is_memfree(dev) ?
 
1003                         sizeof (struct mthca_arbel_ud_seg) :
 
1004                         sizeof (struct mthca_tavor_ud_seg);
 
1008                 size += sizeof (struct mthca_raddr_seg);
 
1012                 size += sizeof (struct mthca_raddr_seg);
 
1014                  * An atomic op will require an atomic segment, a
 
1015                  * remote address segment and one scatter entry.
 
1017                 size = max_t(int, size,
 
1018                              sizeof (struct mthca_atomic_seg) +
 
1019                              sizeof (struct mthca_raddr_seg) +
 
1020                              sizeof (struct mthca_data_seg));
 
1027         /* Make sure that we have enough space for a bind request */
 
1028         size = max_t(int, size, sizeof (struct mthca_bind_seg));
 
1030         size += sizeof (struct mthca_next_seg);
 
1032         if (size > dev->limits.max_desc_sz)
 
1035         for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
 
1039         qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
 
1040                                     1 << qp->sq.wqe_shift);
 
1043          * If this is a userspace QP, we don't actually have to
 
1044          * allocate anything.  All we need is to calculate the WQE
 
1045          * sizes and the send_wqe_offset, so we're done now.
 
1047         if (pd->ibpd.uobject)
 
1050         size = PAGE_ALIGN(qp->send_wqe_offset +
 
1051                           (qp->sq.max << qp->sq.wqe_shift));
 
1053         qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64),
 
1058         err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE,
 
1059                               &qp->queue, &qp->is_direct, pd, 0, &qp->mr);
 
1070 static void mthca_free_wqe_buf(struct mthca_dev *dev,
 
1071                                struct mthca_qp *qp)
 
1073         mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset +
 
1074                                        (qp->sq.max << qp->sq.wqe_shift)),
 
1075                        &qp->queue, qp->is_direct, &qp->mr);
 
1079 static int mthca_map_memfree(struct mthca_dev *dev,
 
1080                              struct mthca_qp *qp)
 
1084         if (mthca_is_memfree(dev)) {
 
1085                 ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
 
1089                 ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);
 
1093                 ret = mthca_table_get(dev, dev->qp_table.rdb_table,
 
1094                                       qp->qpn << dev->qp_table.rdb_shift);
 
1103         mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
 
1106         mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
 
1111 static void mthca_unmap_memfree(struct mthca_dev *dev,
 
1112                                 struct mthca_qp *qp)
 
1114         mthca_table_put(dev, dev->qp_table.rdb_table,
 
1115                         qp->qpn << dev->qp_table.rdb_shift);
 
1116         mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
 
1117         mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
 
1120 static int mthca_alloc_memfree(struct mthca_dev *dev,
 
1121                                struct mthca_qp *qp)
 
1123         if (mthca_is_memfree(dev)) {
 
1124                 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
 
1125                                                  qp->qpn, &qp->rq.db);
 
1126                 if (qp->rq.db_index < 0)
 
1129                 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
 
1130                                                  qp->qpn, &qp->sq.db);
 
1131                 if (qp->sq.db_index < 0) {
 
1132                         mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
 
1140 static void mthca_free_memfree(struct mthca_dev *dev,
 
1141                                struct mthca_qp *qp)
 
1143         if (mthca_is_memfree(dev)) {
 
1144                 mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
 
1145                 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
 
1149 static int mthca_alloc_qp_common(struct mthca_dev *dev,
 
1150                                  struct mthca_pd *pd,
 
1151                                  struct mthca_cq *send_cq,
 
1152                                  struct mthca_cq *recv_cq,
 
1153                                  enum ib_sig_type send_policy,
 
1154                                  struct mthca_qp *qp)
 
1158         struct mthca_next_seg *next;
 
1161         init_waitqueue_head(&qp->wait);
 
1162         mutex_init(&qp->mutex);
 
1163         qp->state        = IB_QPS_RESET;
 
1164         qp->atomic_rd_en = 0;
 
1166         qp->sq_policy    = send_policy;
 
1167         mthca_wq_reset(&qp->sq);
 
1168         mthca_wq_reset(&qp->rq);
 
1170         spin_lock_init(&qp->sq.lock);
 
1171         spin_lock_init(&qp->rq.lock);
 
1173         ret = mthca_map_memfree(dev, qp);
 
1177         ret = mthca_alloc_wqe_buf(dev, pd, qp);
 
1179                 mthca_unmap_memfree(dev, qp);
 
1183         mthca_adjust_qp_caps(dev, pd, qp);
 
1186          * If this is a userspace QP, we're done now.  The doorbells
 
1187          * will be allocated and buffers will be initialized in
 
1190         if (pd->ibpd.uobject)
 
1193         ret = mthca_alloc_memfree(dev, qp);
 
1195                 mthca_free_wqe_buf(dev, qp);
 
1196                 mthca_unmap_memfree(dev, qp);
 
1200         if (mthca_is_memfree(dev)) {
 
1201                 struct mthca_data_seg *scatter;
 
1202                 int size = (sizeof (struct mthca_next_seg) +
 
1203                             qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;
 
1205                 for (i = 0; i < qp->rq.max; ++i) {
 
1206                         next = get_recv_wqe(qp, i);
 
1207                         next->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<
 
1209                         next->ee_nds = cpu_to_be32(size);
 
1211                         for (scatter = (void *) (next + 1);
 
1212                              (void *) scatter < (void *) next + (1 << qp->rq.wqe_shift);
 
1214                                 scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
 
1217                 for (i = 0; i < qp->sq.max; ++i) {
 
1218                         next = get_send_wqe(qp, i);
 
1219                         next->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) <<
 
1221                                                    qp->send_wqe_offset);
 
1224                 for (i = 0; i < qp->rq.max; ++i) {
 
1225                         next = get_recv_wqe(qp, i);
 
1226                         next->nda_op = htonl((((i + 1) % qp->rq.max) <<
 
1227                                               qp->rq.wqe_shift) | 1);
 
1232         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
 
1233         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
 
1238 static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
 
1239                              struct mthca_pd *pd, struct mthca_qp *qp)
 
1241         int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);
 
1243         /* Sanity check QP size before proceeding */
 
1244         if (cap->max_send_wr     > dev->limits.max_wqes ||
 
1245             cap->max_recv_wr     > dev->limits.max_wqes ||
 
1246             cap->max_send_sge    > dev->limits.max_sg   ||
 
1247             cap->max_recv_sge    > dev->limits.max_sg   ||
 
1248             cap->max_inline_data > mthca_max_inline_data(pd, max_data_size))
 
1252          * For MLX transport we need 2 extra send gather entries:
 
1253          * one for the header and one for the checksum at the end
 
1255         if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg)
 
1258         if (mthca_is_memfree(dev)) {
 
1259                 qp->rq.max = cap->max_recv_wr ?
 
1260                         roundup_pow_of_two(cap->max_recv_wr) : 0;
 
1261                 qp->sq.max = cap->max_send_wr ?
 
1262                         roundup_pow_of_two(cap->max_send_wr) : 0;
 
1264                 qp->rq.max = cap->max_recv_wr;
 
1265                 qp->sq.max = cap->max_send_wr;
 
1268         qp->rq.max_gs = cap->max_recv_sge;
 
1269         qp->sq.max_gs = max_t(int, cap->max_send_sge,
 
1270                               ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
 
1271                                     MTHCA_INLINE_CHUNK_SIZE) /
 
1272                               sizeof (struct mthca_data_seg));
 
1277 int mthca_alloc_qp(struct mthca_dev *dev,
 
1278                    struct mthca_pd *pd,
 
1279                    struct mthca_cq *send_cq,
 
1280                    struct mthca_cq *recv_cq,
 
1281                    enum ib_qp_type type,
 
1282                    enum ib_sig_type send_policy,
 
1283                    struct ib_qp_cap *cap,
 
1284                    struct mthca_qp *qp)
 
1289         case IB_QPT_RC: qp->transport = RC; break;
 
1290         case IB_QPT_UC: qp->transport = UC; break;
 
1291         case IB_QPT_UD: qp->transport = UD; break;
 
1292         default: return -EINVAL;
 
1295         err = mthca_set_qp_size(dev, cap, pd, qp);
 
1299         qp->qpn = mthca_alloc(&dev->qp_table.alloc);
 
1303         /* initialize port to zero for error-catching. */
 
1306         err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
 
1309                 mthca_free(&dev->qp_table.alloc, qp->qpn);
 
1313         spin_lock_irq(&dev->qp_table.lock);
 
1314         mthca_array_set(&dev->qp_table.qp,
 
1315                         qp->qpn & (dev->limits.num_qps - 1), qp);
 
1316         spin_unlock_irq(&dev->qp_table.lock);
 
1321 static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
 
1323         if (send_cq == recv_cq)
 
1324                 spin_lock_irq(&send_cq->lock);
 
1325         else if (send_cq->cqn < recv_cq->cqn) {
 
1326                 spin_lock_irq(&send_cq->lock);
 
1327                 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
 
1329                 spin_lock_irq(&recv_cq->lock);
 
1330                 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
 
1334 static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
 
1336         if (send_cq == recv_cq)
 
1337                 spin_unlock_irq(&send_cq->lock);
 
1338         else if (send_cq->cqn < recv_cq->cqn) {
 
1339                 spin_unlock(&recv_cq->lock);
 
1340                 spin_unlock_irq(&send_cq->lock);
 
1342                 spin_unlock(&send_cq->lock);
 
1343                 spin_unlock_irq(&recv_cq->lock);
 
1347 int mthca_alloc_sqp(struct mthca_dev *dev,
 
1348                     struct mthca_pd *pd,
 
1349                     struct mthca_cq *send_cq,
 
1350                     struct mthca_cq *recv_cq,
 
1351                     enum ib_sig_type send_policy,
 
1352                     struct ib_qp_cap *cap,
 
1355                     struct mthca_sqp *sqp)
 
1357         u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
 
1360         sqp->qp.transport = MLX;
 
1361         err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
 
1365         sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
 
1366         sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
 
1367                                              &sqp->header_dma, GFP_KERNEL);
 
1368         if (!sqp->header_buf)
 
1371         spin_lock_irq(&dev->qp_table.lock);
 
1372         if (mthca_array_get(&dev->qp_table.qp, mqpn))
 
1375                 mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
 
1376         spin_unlock_irq(&dev->qp_table.lock);
 
1381         sqp->qp.port      = port;
 
1383         sqp->qp.transport = MLX;
 
1385         err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
 
1386                                     send_policy, &sqp->qp);
 
1390         atomic_inc(&pd->sqp_count);
 
1396          * Lock CQs here, so that CQ polling code can do QP lookup
 
1397          * without taking a lock.
 
1399         mthca_lock_cqs(send_cq, recv_cq);
 
1401         spin_lock(&dev->qp_table.lock);
 
1402         mthca_array_clear(&dev->qp_table.qp, mqpn);
 
1403         spin_unlock(&dev->qp_table.lock);
 
1405         mthca_unlock_cqs(send_cq, recv_cq);
 
1408         dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
 
1409                           sqp->header_buf, sqp->header_dma);
 
1414 static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp)
 
1418         spin_lock_irq(&dev->qp_table.lock);
 
1420         spin_unlock_irq(&dev->qp_table.lock);
 
1425 void mthca_free_qp(struct mthca_dev *dev,
 
1426                    struct mthca_qp *qp)
 
1429         struct mthca_cq *send_cq;
 
1430         struct mthca_cq *recv_cq;
 
1432         send_cq = to_mcq(qp->ibqp.send_cq);
 
1433         recv_cq = to_mcq(qp->ibqp.recv_cq);
 
1436          * Lock CQs here, so that CQ polling code can do QP lookup
 
1437          * without taking a lock.
 
1439         mthca_lock_cqs(send_cq, recv_cq);
 
1441         spin_lock(&dev->qp_table.lock);
 
1442         mthca_array_clear(&dev->qp_table.qp,
 
1443                           qp->qpn & (dev->limits.num_qps - 1));
 
1445         spin_unlock(&dev->qp_table.lock);
 
1447         mthca_unlock_cqs(send_cq, recv_cq);
 
1449         wait_event(qp->wait, !get_qp_refcount(dev, qp));
 
1451         if (qp->state != IB_QPS_RESET)
 
1452                 mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
 
1456          * If this is a userspace QP, the buffers, MR, CQs and so on
 
1457          * will be cleaned up in userspace, so all we have to do is
 
1458          * unref the mem-free tables and free the QPN in our table.
 
1460         if (!qp->ibqp.uobject) {
 
1461                 mthca_cq_clean(dev, recv_cq, qp->qpn,
 
1462                                qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
 
1463                 if (send_cq != recv_cq)
 
1464                         mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
 
1466                 mthca_free_memfree(dev, qp);
 
1467                 mthca_free_wqe_buf(dev, qp);
 
1470         mthca_unmap_memfree(dev, qp);
 
1472         if (is_sqp(dev, qp)) {
 
1473                 atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
 
1474                 dma_free_coherent(&dev->pdev->dev,
 
1475                                   to_msqp(qp)->header_buf_size,
 
1476                                   to_msqp(qp)->header_buf,
 
1477                                   to_msqp(qp)->header_dma);
 
1479                 mthca_free(&dev->qp_table.alloc, qp->qpn);
 
1482 /* Create UD header for an MLX send and build a data segment for it */
 
1483 static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
 
1484                             int ind, struct ib_send_wr *wr,
 
1485                             struct mthca_mlx_seg *mlx,
 
1486                             struct mthca_data_seg *data)
 
1492         ib_ud_header_init(256, /* assume a MAD */
 
1493                           mthca_ah_grh_present(to_mah(wr->wr.ud.ah)),
 
1496         err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
 
1499         mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
 
1500         mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
 
1501                                   (sqp->ud_header.lrh.destination_lid ==
 
1502                                    IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
 
1503                                   (sqp->ud_header.lrh.service_level << 8));
 
1504         mlx->rlid = sqp->ud_header.lrh.destination_lid;
 
1507         switch (wr->opcode) {
 
1509                 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
 
1510                 sqp->ud_header.immediate_present = 0;
 
1512         case IB_WR_SEND_WITH_IMM:
 
1513                 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
 
1514                 sqp->ud_header.immediate_present = 1;
 
1515                 sqp->ud_header.immediate_data = wr->ex.imm_data;
 
1521         sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
 
1522         if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
 
1523                 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
 
1524         sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
 
1525         if (!sqp->qp.ibqp.qp_num)
 
1526                 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
 
1527                                    sqp->pkey_index, &pkey);
 
1529                 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
 
1530                                    wr->wr.ud.pkey_index, &pkey);
 
1531         sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
 
1532         sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
 
1533         sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
 
1534         sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
 
1535                                                sqp->qkey : wr->wr.ud.remote_qkey);
 
1536         sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
 
1538         header_size = ib_ud_header_pack(&sqp->ud_header,
 
1540                                         ind * MTHCA_UD_HEADER_SIZE);
 
1542         data->byte_count = cpu_to_be32(header_size);
 
1543         data->lkey       = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
 
1544         data->addr       = cpu_to_be64(sqp->header_dma +
 
1545                                        ind * MTHCA_UD_HEADER_SIZE);
 
1550 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
 
1551                                     struct ib_cq *ib_cq)
 
1554         struct mthca_cq *cq;
 
1556         cur = wq->head - wq->tail;
 
1557         if (likely(cur + nreq < wq->max))
 
1561         spin_lock(&cq->lock);
 
1562         cur = wq->head - wq->tail;
 
1563         spin_unlock(&cq->lock);
 
1565         return cur + nreq >= wq->max;
 
1568 static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
 
1569                                           u64 remote_addr, u32 rkey)
 
1571         rseg->raddr    = cpu_to_be64(remote_addr);
 
1572         rseg->rkey     = cpu_to_be32(rkey);
 
1576 static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
 
1577                                            struct ib_send_wr *wr)
 
1579         if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 
1580                 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
 
1581                 aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
 
1583                 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
 
1589 static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
 
1590                              struct ib_send_wr *wr)
 
1592         useg->lkey    = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
 
1593         useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
 
1594         useg->dqpn    = cpu_to_be32(wr->wr.ud.remote_qpn);
 
1595         useg->qkey    = cpu_to_be32(wr->wr.ud.remote_qkey);
 
1599 static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
 
1600                              struct ib_send_wr *wr)
 
1602         memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
 
1603         useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
 
1604         useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
 
1607 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
1608                           struct ib_send_wr **bad_wr)
 
1610         struct mthca_dev *dev = to_mdev(ibqp->device);
 
1611         struct mthca_qp *qp = to_mqp(ibqp);
 
1614         unsigned long flags;
 
1620          * f0 and size0 are only used if nreq != 0, and they will
 
1621          * always be initialized the first time through the main loop
 
1622          * before nreq is incremented.  So nreq cannot become non-zero
 
1623          * without initializing f0 and size0, and they are in fact
 
1624          * never used uninitialized.
 
1626         int uninitialized_var(size0);
 
1627         u32 uninitialized_var(f0);
 
1631         spin_lock_irqsave(&qp->sq.lock, flags);
 
1633         /* XXX check that state is OK to post send */
 
1635         ind = qp->sq.next_ind;
 
1637         for (nreq = 0; wr; ++nreq, wr = wr->next) {
 
1638                 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
 
1639                         mthca_err(dev, "SQ %06x full (%u head, %u tail,"
 
1640                                         " %d max, %d nreq)\n", qp->qpn,
 
1641                                         qp->sq.head, qp->sq.tail,
 
1648                 wqe = get_send_wqe(qp, ind);
 
1649                 prev_wqe = qp->sq.last;
 
1652                 ((struct mthca_next_seg *) wqe)->nda_op = 0;
 
1653                 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
 
1654                 ((struct mthca_next_seg *) wqe)->flags =
 
1655                         ((wr->send_flags & IB_SEND_SIGNALED) ?
 
1656                          cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
 
1657                         ((wr->send_flags & IB_SEND_SOLICITED) ?
 
1658                          cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0)   |
 
1660                 if (wr->opcode == IB_WR_SEND_WITH_IMM ||
 
1661                     wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
 
1662                         ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
1664                 wqe += sizeof (struct mthca_next_seg);
 
1665                 size = sizeof (struct mthca_next_seg) / 16;
 
1667                 switch (qp->transport) {
 
1669                         switch (wr->opcode) {
 
1670                         case IB_WR_ATOMIC_CMP_AND_SWP:
 
1671                         case IB_WR_ATOMIC_FETCH_AND_ADD:
 
1672                                 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
 
1673                                               wr->wr.atomic.rkey);
 
1674                                 wqe += sizeof (struct mthca_raddr_seg);
 
1676                                 set_atomic_seg(wqe, wr);
 
1677                                 wqe += sizeof (struct mthca_atomic_seg);
 
1678                                 size += (sizeof (struct mthca_raddr_seg) +
 
1679                                          sizeof (struct mthca_atomic_seg)) / 16;
 
1682                         case IB_WR_RDMA_WRITE:
 
1683                         case IB_WR_RDMA_WRITE_WITH_IMM:
 
1684                         case IB_WR_RDMA_READ:
 
1685                                 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
 
1687                                 wqe  += sizeof (struct mthca_raddr_seg);
 
1688                                 size += sizeof (struct mthca_raddr_seg) / 16;
 
1692                                 /* No extra segments required for sends */
 
1699                         switch (wr->opcode) {
 
1700                         case IB_WR_RDMA_WRITE:
 
1701                         case IB_WR_RDMA_WRITE_WITH_IMM:
 
1702                                 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
 
1704                                 wqe  += sizeof (struct mthca_raddr_seg);
 
1705                                 size += sizeof (struct mthca_raddr_seg) / 16;
 
1709                                 /* No extra segments required for sends */
 
1716                         set_tavor_ud_seg(wqe, wr);
 
1717                         wqe  += sizeof (struct mthca_tavor_ud_seg);
 
1718                         size += sizeof (struct mthca_tavor_ud_seg) / 16;
 
1722                         err = build_mlx_header(dev, to_msqp(qp), ind, wr,
 
1723                                                wqe - sizeof (struct mthca_next_seg),
 
1729                         wqe += sizeof (struct mthca_data_seg);
 
1730                         size += sizeof (struct mthca_data_seg) / 16;
 
1734                 if (wr->num_sge > qp->sq.max_gs) {
 
1735                         mthca_err(dev, "too many gathers\n");
 
1741                 for (i = 0; i < wr->num_sge; ++i) {
 
1742                         mthca_set_data_seg(wqe, wr->sg_list + i);
 
1743                         wqe  += sizeof (struct mthca_data_seg);
 
1744                         size += sizeof (struct mthca_data_seg) / 16;
 
1747                 /* Add one more inline data segment for ICRC */
 
1748                 if (qp->transport == MLX) {
 
1749                         ((struct mthca_data_seg *) wqe)->byte_count =
 
1750                                 cpu_to_be32((1 << 31) | 4);
 
1751                         ((u32 *) wqe)[1] = 0;
 
1752                         wqe += sizeof (struct mthca_data_seg);
 
1753                         size += sizeof (struct mthca_data_seg) / 16;
 
1756                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
 
1758                 if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
 
1759                         mthca_err(dev, "opcode invalid\n");
 
1765                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
 
1766                         cpu_to_be32(((ind << qp->sq.wqe_shift) +
 
1767                                      qp->send_wqe_offset) |
 
1768                                     mthca_opcode[wr->opcode]);
 
1770                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
 
1771                         cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size |
 
1772                                     ((wr->send_flags & IB_SEND_FENCE) ?
 
1773                                     MTHCA_NEXT_FENCE : 0));
 
1777                         op0   = mthca_opcode[wr->opcode];
 
1778                         f0    = wr->send_flags & IB_SEND_FENCE ?
 
1779                                 MTHCA_SEND_DOORBELL_FENCE : 0;
 
1783                 if (unlikely(ind >= qp->sq.max))
 
1791                 mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +
 
1792                                qp->send_wqe_offset) | f0 | op0,
 
1793                               (qp->qpn << 8) | size0,
 
1794                               dev->kar + MTHCA_SEND_DOORBELL,
 
1795                               MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 
1797                  * Make sure doorbells don't leak out of SQ spinlock
 
1798                  * and reach the HCA out of order:
 
1803         qp->sq.next_ind = ind;
 
1804         qp->sq.head    += nreq;
 
1806         spin_unlock_irqrestore(&qp->sq.lock, flags);
 
1810 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
1811                              struct ib_recv_wr **bad_wr)
 
1813         struct mthca_dev *dev = to_mdev(ibqp->device);
 
1814         struct mthca_qp *qp = to_mqp(ibqp);
 
1815         unsigned long flags;
 
1821          * size0 is only used if nreq != 0, and it will always be
 
1822          * initialized the first time through the main loop before
 
1823          * nreq is incremented.  So nreq cannot become non-zero
 
1824          * without initializing size0, and it is in fact never used
 
1827         int uninitialized_var(size0);
 
1832         spin_lock_irqsave(&qp->rq.lock, flags);
 
1834         /* XXX check that state is OK to post receive */
 
1836         ind = qp->rq.next_ind;
 
1838         for (nreq = 0; wr; wr = wr->next) {
 
1839                 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
 
1840                         mthca_err(dev, "RQ %06x full (%u head, %u tail,"
 
1841                                         " %d max, %d nreq)\n", qp->qpn,
 
1842                                         qp->rq.head, qp->rq.tail,
 
1849                 wqe = get_recv_wqe(qp, ind);
 
1850                 prev_wqe = qp->rq.last;
 
1853                 ((struct mthca_next_seg *) wqe)->ee_nds =
 
1854                         cpu_to_be32(MTHCA_NEXT_DBD);
 
1855                 ((struct mthca_next_seg *) wqe)->flags = 0;
 
1857                 wqe += sizeof (struct mthca_next_seg);
 
1858                 size = sizeof (struct mthca_next_seg) / 16;
 
1860                 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
 
1866                 for (i = 0; i < wr->num_sge; ++i) {
 
1867                         mthca_set_data_seg(wqe, wr->sg_list + i);
 
1868                         wqe  += sizeof (struct mthca_data_seg);
 
1869                         size += sizeof (struct mthca_data_seg) / 16;
 
1872                 qp->wrid[ind] = wr->wr_id;
 
1874                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
 
1875                         cpu_to_be32(MTHCA_NEXT_DBD | size);
 
1881                 if (unlikely(ind >= qp->rq.max))
 
1885                 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
 
1890                         mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
 
1891                                       qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,
 
1892                                       MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 
1894                         qp->rq.next_ind = ind;
 
1895                         qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
 
1903                 mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
 
1904                               qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,
 
1905                               MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 
1908         qp->rq.next_ind = ind;
 
1909         qp->rq.head    += nreq;
 
1912          * Make sure doorbells don't leak out of RQ spinlock and reach
 
1913          * the HCA out of order:
 
1917         spin_unlock_irqrestore(&qp->rq.lock, flags);
 
1921 int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
1922                           struct ib_send_wr **bad_wr)
 
1924         struct mthca_dev *dev = to_mdev(ibqp->device);
 
1925         struct mthca_qp *qp = to_mqp(ibqp);
 
1929         unsigned long flags;
 
1935          * f0 and size0 are only used if nreq != 0, and they will
 
1936          * always be initialized the first time through the main loop
 
1937          * before nreq is incremented.  So nreq cannot become non-zero
 
1938          * without initializing f0 and size0, and they are in fact
 
1939          * never used uninitialized.
 
1941         int uninitialized_var(size0);
 
1942         u32 uninitialized_var(f0);
 
1946         spin_lock_irqsave(&qp->sq.lock, flags);
 
1948         /* XXX check that state is OK to post send */
 
1950         ind = qp->sq.head & (qp->sq.max - 1);
 
1952         for (nreq = 0; wr; ++nreq, wr = wr->next) {
 
1953                 if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
 
1956                         dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
 
1957                                 ((qp->sq.head & 0xffff) << 8) | f0 | op0;
 
1959                         qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
 
1962                          * Make sure that descriptors are written before
 
1966                         *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
 
1969                          * Make sure doorbell record is written before we
 
1970                          * write MMIO send doorbell.
 
1974                         mthca_write64(dbhi, (qp->qpn << 8) | size0,
 
1975                                       dev->kar + MTHCA_SEND_DOORBELL,
 
1976                                       MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 
1979                 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
 
1980                         mthca_err(dev, "SQ %06x full (%u head, %u tail,"
 
1981                                         " %d max, %d nreq)\n", qp->qpn,
 
1982                                         qp->sq.head, qp->sq.tail,
 
1989                 wqe = get_send_wqe(qp, ind);
 
1990                 prev_wqe = qp->sq.last;
 
1993                 ((struct mthca_next_seg *) wqe)->flags =
 
1994                         ((wr->send_flags & IB_SEND_SIGNALED) ?
 
1995                          cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
 
1996                         ((wr->send_flags & IB_SEND_SOLICITED) ?
 
1997                          cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0)   |
 
1998                         ((wr->send_flags & IB_SEND_IP_CSUM) ?
 
1999                          cpu_to_be32(MTHCA_NEXT_IP_CSUM | MTHCA_NEXT_TCP_UDP_CSUM) : 0) |
 
2001                 if (wr->opcode == IB_WR_SEND_WITH_IMM ||
 
2002                     wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
 
2003                         ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
2005                 wqe += sizeof (struct mthca_next_seg);
 
2006                 size = sizeof (struct mthca_next_seg) / 16;
 
2008                 switch (qp->transport) {
 
2010                         switch (wr->opcode) {
 
2011                         case IB_WR_ATOMIC_CMP_AND_SWP:
 
2012                         case IB_WR_ATOMIC_FETCH_AND_ADD:
 
2013                                 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
 
2014                                               wr->wr.atomic.rkey);
 
2015                                 wqe += sizeof (struct mthca_raddr_seg);
 
2017                                 set_atomic_seg(wqe, wr);
 
2018                                 wqe  += sizeof (struct mthca_atomic_seg);
 
2019                                 size += (sizeof (struct mthca_raddr_seg) +
 
2020                                          sizeof (struct mthca_atomic_seg)) / 16;
 
2023                         case IB_WR_RDMA_READ:
 
2024                         case IB_WR_RDMA_WRITE:
 
2025                         case IB_WR_RDMA_WRITE_WITH_IMM:
 
2026                                 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
 
2028                                 wqe  += sizeof (struct mthca_raddr_seg);
 
2029                                 size += sizeof (struct mthca_raddr_seg) / 16;
 
2033                                 /* No extra segments required for sends */
 
2040                         switch (wr->opcode) {
 
2041                         case IB_WR_RDMA_WRITE:
 
2042                         case IB_WR_RDMA_WRITE_WITH_IMM:
 
2043                                 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
 
2045                                 wqe  += sizeof (struct mthca_raddr_seg);
 
2046                                 size += sizeof (struct mthca_raddr_seg) / 16;
 
2050                                 /* No extra segments required for sends */
 
2057                         set_arbel_ud_seg(wqe, wr);
 
2058                         wqe  += sizeof (struct mthca_arbel_ud_seg);
 
2059                         size += sizeof (struct mthca_arbel_ud_seg) / 16;
 
2063                         err = build_mlx_header(dev, to_msqp(qp), ind, wr,
 
2064                                                wqe - sizeof (struct mthca_next_seg),
 
2070                         wqe += sizeof (struct mthca_data_seg);
 
2071                         size += sizeof (struct mthca_data_seg) / 16;
 
2075                 if (wr->num_sge > qp->sq.max_gs) {
 
2076                         mthca_err(dev, "too many gathers\n");
 
2082                 for (i = 0; i < wr->num_sge; ++i) {
 
2083                         mthca_set_data_seg(wqe, wr->sg_list + i);
 
2084                         wqe  += sizeof (struct mthca_data_seg);
 
2085                         size += sizeof (struct mthca_data_seg) / 16;
 
2088                 /* Add one more inline data segment for ICRC */
 
2089                 if (qp->transport == MLX) {
 
2090                         ((struct mthca_data_seg *) wqe)->byte_count =
 
2091                                 cpu_to_be32((1 << 31) | 4);
 
2092                         ((u32 *) wqe)[1] = 0;
 
2093                         wqe += sizeof (struct mthca_data_seg);
 
2094                         size += sizeof (struct mthca_data_seg) / 16;
 
2097                 qp->wrid[ind + qp->rq.max] = wr->wr_id;
 
2099                 if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
 
2100                         mthca_err(dev, "opcode invalid\n");
 
2106                 ((struct mthca_next_seg *) prev_wqe)->nda_op =
 
2107                         cpu_to_be32(((ind << qp->sq.wqe_shift) +
 
2108                                      qp->send_wqe_offset) |
 
2109                                     mthca_opcode[wr->opcode]);
 
2111                 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
 
2112                         cpu_to_be32(MTHCA_NEXT_DBD | size |
 
2113                                     ((wr->send_flags & IB_SEND_FENCE) ?
 
2114                                      MTHCA_NEXT_FENCE : 0));
 
2118                         op0   = mthca_opcode[wr->opcode];
 
2119                         f0    = wr->send_flags & IB_SEND_FENCE ?
 
2120                                 MTHCA_SEND_DOORBELL_FENCE : 0;
 
2124                 if (unlikely(ind >= qp->sq.max))
 
2130                 dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
 
2132                 qp->sq.head += nreq;
 
2135                  * Make sure that descriptors are written before
 
2139                 *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
 
2142                  * Make sure doorbell record is written before we
 
2143                  * write MMIO send doorbell.
 
2147                 mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,
 
2148                               MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 
2152          * Make sure doorbells don't leak out of SQ spinlock and reach
 
2153          * the HCA out of order:
 
2157         spin_unlock_irqrestore(&qp->sq.lock, flags);
 
2161 int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
2162                              struct ib_recv_wr **bad_wr)
 
2164         struct mthca_dev *dev = to_mdev(ibqp->device);
 
2165         struct mthca_qp *qp = to_mqp(ibqp);
 
2166         unsigned long flags;
 
2173         spin_lock_irqsave(&qp->rq.lock, flags);
 
2175         /* XXX check that state is OK to post receive */
 
2177         ind = qp->rq.head & (qp->rq.max - 1);
 
2179         for (nreq = 0; wr; ++nreq, wr = wr->next) {
 
2180                 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
 
2181                         mthca_err(dev, "RQ %06x full (%u head, %u tail,"
 
2182                                         " %d max, %d nreq)\n", qp->qpn,
 
2183                                         qp->rq.head, qp->rq.tail,
 
2190                 wqe = get_recv_wqe(qp, ind);
 
2192                 ((struct mthca_next_seg *) wqe)->flags = 0;
 
2194                 wqe += sizeof (struct mthca_next_seg);
 
2196                 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
 
2202                 for (i = 0; i < wr->num_sge; ++i) {
 
2203                         mthca_set_data_seg(wqe, wr->sg_list + i);
 
2204                         wqe += sizeof (struct mthca_data_seg);
 
2207                 if (i < qp->rq.max_gs)
 
2208                         mthca_set_data_seg_inval(wqe);
 
2210                 qp->wrid[ind] = wr->wr_id;
 
2213                 if (unlikely(ind >= qp->rq.max))
 
2218                 qp->rq.head += nreq;
 
2221                  * Make sure that descriptors are written before
 
2225                 *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);
 
2228         spin_unlock_irqrestore(&qp->rq.lock, flags);
 
2232 void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
 
2233                         int index, int *dbd, __be32 *new_wqe)
 
2235         struct mthca_next_seg *next;
 
2238          * For SRQs, all receive WQEs generate a CQE, so we're always
 
2239          * at the end of the doorbell chain.
 
2241         if (qp->ibqp.srq && !is_send) {
 
2247                 next = get_send_wqe(qp, index);
 
2249                 next = get_recv_wqe(qp, index);
 
2251         *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
 
2252         if (next->ee_nds & cpu_to_be32(0x3f))
 
2253                 *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) |
 
2254                         (next->ee_nds & cpu_to_be32(0x3f));
 
2259 int mthca_init_qp_table(struct mthca_dev *dev)
 
2265         spin_lock_init(&dev->qp_table.lock);
 
2268          * We reserve 2 extra QPs per port for the special QPs.  The
 
2269          * special QP for port 1 has to be even, so round up.
 
2271         dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;
 
2272         err = mthca_alloc_init(&dev->qp_table.alloc,
 
2273                                dev->limits.num_qps,
 
2275                                dev->qp_table.sqp_start +
 
2276                                MTHCA_MAX_PORTS * 2);
 
2280         err = mthca_array_init(&dev->qp_table.qp,
 
2281                                dev->limits.num_qps);
 
2283                 mthca_alloc_cleanup(&dev->qp_table.alloc);
 
2287         for (i = 0; i < 2; ++i) {
 
2288                 err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI,
 
2289                                             dev->qp_table.sqp_start + i * 2,
 
2294                         mthca_warn(dev, "CONF_SPECIAL_QP returned "
 
2295                                    "status %02x, aborting.\n",
 
2304         for (i = 0; i < 2; ++i)
 
2305                 mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
 
2307         mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
 
2308         mthca_alloc_cleanup(&dev->qp_table.alloc);
 
2313 void mthca_cleanup_qp_table(struct mthca_dev *dev)
 
2318         for (i = 0; i < 2; ++i)
 
2319                 mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
 
2321         mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
 
2322         mthca_alloc_cleanup(&dev->qp_table.alloc);