2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/completion.h>
38 #include <linux/in6.h>
39 #include <linux/mutex.h>
40 #include <linux/random.h>
41 #include <linux/idr.h>
42 #include <linux/inetdevice.h>
46 #include <rdma/rdma_cm.h>
47 #include <rdma/rdma_cm_ib.h>
48 #include <rdma/ib_cache.h>
49 #include <rdma/ib_cm.h>
50 #include <rdma/ib_sa.h>
51 #include <rdma/iw_cm.h>
53 MODULE_AUTHOR("Sean Hefty");
54 MODULE_DESCRIPTION("Generic RDMA CM Agent");
55 MODULE_LICENSE("Dual BSD/GPL");
57 #define CMA_CM_RESPONSE_TIMEOUT 20
58 #define CMA_MAX_CM_RETRIES 15
59 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
61 static void cma_add_one(struct ib_device *device);
62 static void cma_remove_one(struct ib_device *device);
64 static struct ib_client cma_client = {
67 .remove = cma_remove_one
70 static struct ib_sa_client sa_client;
71 static struct rdma_addr_client addr_client;
72 static LIST_HEAD(dev_list);
73 static LIST_HEAD(listen_any_list);
74 static DEFINE_MUTEX(lock);
75 static struct workqueue_struct *cma_wq;
76 static DEFINE_IDR(sdp_ps);
77 static DEFINE_IDR(tcp_ps);
78 static DEFINE_IDR(udp_ps);
79 static DEFINE_IDR(ipoib_ps);
83 struct list_head list;
84 struct ib_device *device;
85 struct completion comp;
87 struct list_head id_list;
104 struct rdma_bind_list {
106 struct hlist_head owners;
111 * Device removal can occur at anytime, so we need extra handling to
112 * serialize notifying the user of device removal with other callbacks.
113 * We do this by disabling removal notification while a callback is in process,
114 * and reporting it after the callback completes.
116 struct rdma_id_private {
117 struct rdma_cm_id id;
119 struct rdma_bind_list *bind_list;
120 struct hlist_node node;
121 struct list_head list; /* listen_any_list or cma_device.list */
122 struct list_head listen_list; /* per device listens */
123 struct cma_device *cma_dev;
124 struct list_head mc_list;
127 enum cma_state state;
129 struct mutex qp_mutex;
131 struct completion comp;
133 struct mutex handler_mutex;
137 struct ib_sa_query *query;
151 struct cma_multicast {
152 struct rdma_id_private *id_priv;
154 struct ib_sa_multicast *ib;
156 struct list_head list;
158 struct sockaddr_storage addr;
162 struct work_struct work;
163 struct rdma_id_private *id;
164 enum cma_state old_state;
165 enum cma_state new_state;
166 struct rdma_cm_event event;
169 struct cma_ndev_work {
170 struct work_struct work;
171 struct rdma_id_private *id;
172 struct rdma_cm_event event;
185 u8 ip_version; /* IP version: 7:4 */
187 union cma_ip_addr src_addr;
188 union cma_ip_addr dst_addr;
193 u8 sdp_version; /* Major version: 7:4 */
194 u8 ip_version; /* IP version: 7:4 */
195 u8 sdp_specific1[10];
197 __be16 sdp_specific2;
198 union cma_ip_addr src_addr;
199 union cma_ip_addr dst_addr;
207 #define CMA_VERSION 0x00
208 #define SDP_MAJ_VERSION 0x2
210 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
215 spin_lock_irqsave(&id_priv->lock, flags);
216 ret = (id_priv->state == comp);
217 spin_unlock_irqrestore(&id_priv->lock, flags);
221 static int cma_comp_exch(struct rdma_id_private *id_priv,
222 enum cma_state comp, enum cma_state exch)
227 spin_lock_irqsave(&id_priv->lock, flags);
228 if ((ret = (id_priv->state == comp)))
229 id_priv->state = exch;
230 spin_unlock_irqrestore(&id_priv->lock, flags);
234 static enum cma_state cma_exch(struct rdma_id_private *id_priv,
240 spin_lock_irqsave(&id_priv->lock, flags);
241 old = id_priv->state;
242 id_priv->state = exch;
243 spin_unlock_irqrestore(&id_priv->lock, flags);
247 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
249 return hdr->ip_version >> 4;
252 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
254 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
257 static inline u8 sdp_get_majv(u8 sdp_version)
259 return sdp_version >> 4;
262 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
264 return hh->ip_version >> 4;
267 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
269 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
272 static inline int cma_is_ud_ps(enum rdma_port_space ps)
274 return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
277 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
278 struct cma_device *cma_dev)
280 atomic_inc(&cma_dev->refcount);
281 id_priv->cma_dev = cma_dev;
282 id_priv->id.device = cma_dev->device;
283 list_add_tail(&id_priv->list, &cma_dev->id_list);
286 static inline void cma_deref_dev(struct cma_device *cma_dev)
288 if (atomic_dec_and_test(&cma_dev->refcount))
289 complete(&cma_dev->comp);
292 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
294 list_del(&id_priv->list);
295 cma_deref_dev(id_priv->cma_dev);
296 id_priv->cma_dev = NULL;
299 static int cma_set_qkey(struct ib_device *device, u8 port_num,
300 enum rdma_port_space ps,
301 struct rdma_dev_addr *dev_addr, u32 *qkey)
303 struct ib_sa_mcmember_rec rec;
308 *qkey = RDMA_UDP_QKEY;
311 ib_addr_get_mgid(dev_addr, &rec.mgid);
312 ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
313 *qkey = be32_to_cpu(rec.qkey);
321 static int cma_acquire_dev(struct rdma_id_private *id_priv)
323 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
324 struct cma_device *cma_dev;
328 switch (rdma_node_get_transport(dev_addr->dev_type)) {
329 case RDMA_TRANSPORT_IB:
330 ib_addr_get_sgid(dev_addr, &gid);
332 case RDMA_TRANSPORT_IWARP:
333 iw_addr_get_sgid(dev_addr, &gid);
339 list_for_each_entry(cma_dev, &dev_list, list) {
340 ret = ib_find_cached_gid(cma_dev->device, &gid,
341 &id_priv->id.port_num, NULL);
343 ret = cma_set_qkey(cma_dev->device,
344 id_priv->id.port_num,
345 id_priv->id.ps, dev_addr,
348 cma_attach_to_dev(id_priv, cma_dev);
355 static void cma_deref_id(struct rdma_id_private *id_priv)
357 if (atomic_dec_and_test(&id_priv->refcount))
358 complete(&id_priv->comp);
361 static int cma_disable_callback(struct rdma_id_private *id_priv,
362 enum cma_state state)
364 mutex_lock(&id_priv->handler_mutex);
365 if (id_priv->state != state) {
366 mutex_unlock(&id_priv->handler_mutex);
372 static int cma_has_cm_dev(struct rdma_id_private *id_priv)
374 return (id_priv->id.device && id_priv->cm_id.ib);
377 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
378 void *context, enum rdma_port_space ps)
380 struct rdma_id_private *id_priv;
382 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
384 return ERR_PTR(-ENOMEM);
386 id_priv->state = CMA_IDLE;
387 id_priv->id.context = context;
388 id_priv->id.event_handler = event_handler;
390 spin_lock_init(&id_priv->lock);
391 mutex_init(&id_priv->qp_mutex);
392 init_completion(&id_priv->comp);
393 atomic_set(&id_priv->refcount, 1);
394 mutex_init(&id_priv->handler_mutex);
395 INIT_LIST_HEAD(&id_priv->listen_list);
396 INIT_LIST_HEAD(&id_priv->mc_list);
397 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
401 EXPORT_SYMBOL(rdma_create_id);
403 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
405 struct ib_qp_attr qp_attr;
406 int qp_attr_mask, ret;
408 qp_attr.qp_state = IB_QPS_INIT;
409 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
413 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
417 qp_attr.qp_state = IB_QPS_RTR;
418 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
422 qp_attr.qp_state = IB_QPS_RTS;
424 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
429 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
431 struct ib_qp_attr qp_attr;
432 int qp_attr_mask, ret;
434 qp_attr.qp_state = IB_QPS_INIT;
435 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
439 return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
442 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
443 struct ib_qp_init_attr *qp_init_attr)
445 struct rdma_id_private *id_priv;
449 id_priv = container_of(id, struct rdma_id_private, id);
450 if (id->device != pd->device)
453 qp = ib_create_qp(pd, qp_init_attr);
457 if (cma_is_ud_ps(id_priv->id.ps))
458 ret = cma_init_ud_qp(id_priv, qp);
460 ret = cma_init_conn_qp(id_priv, qp);
465 id_priv->qp_num = qp->qp_num;
466 id_priv->srq = (qp->srq != NULL);
472 EXPORT_SYMBOL(rdma_create_qp);
474 void rdma_destroy_qp(struct rdma_cm_id *id)
476 struct rdma_id_private *id_priv;
478 id_priv = container_of(id, struct rdma_id_private, id);
479 mutex_lock(&id_priv->qp_mutex);
480 ib_destroy_qp(id_priv->id.qp);
481 id_priv->id.qp = NULL;
482 mutex_unlock(&id_priv->qp_mutex);
484 EXPORT_SYMBOL(rdma_destroy_qp);
486 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
487 struct rdma_conn_param *conn_param)
489 struct ib_qp_attr qp_attr;
490 int qp_attr_mask, ret;
492 mutex_lock(&id_priv->qp_mutex);
493 if (!id_priv->id.qp) {
498 /* Need to update QP attributes from default values. */
499 qp_attr.qp_state = IB_QPS_INIT;
500 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
504 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
508 qp_attr.qp_state = IB_QPS_RTR;
509 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
514 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
515 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
517 mutex_unlock(&id_priv->qp_mutex);
521 static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
522 struct rdma_conn_param *conn_param)
524 struct ib_qp_attr qp_attr;
525 int qp_attr_mask, ret;
527 mutex_lock(&id_priv->qp_mutex);
528 if (!id_priv->id.qp) {
533 qp_attr.qp_state = IB_QPS_RTS;
534 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
539 qp_attr.max_rd_atomic = conn_param->initiator_depth;
540 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
542 mutex_unlock(&id_priv->qp_mutex);
546 static int cma_modify_qp_err(struct rdma_id_private *id_priv)
548 struct ib_qp_attr qp_attr;
551 mutex_lock(&id_priv->qp_mutex);
552 if (!id_priv->id.qp) {
557 qp_attr.qp_state = IB_QPS_ERR;
558 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
560 mutex_unlock(&id_priv->qp_mutex);
564 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
565 struct ib_qp_attr *qp_attr, int *qp_attr_mask)
567 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
570 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
571 ib_addr_get_pkey(dev_addr),
572 &qp_attr->pkey_index);
576 qp_attr->port_num = id_priv->id.port_num;
577 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
579 if (cma_is_ud_ps(id_priv->id.ps)) {
580 qp_attr->qkey = id_priv->qkey;
581 *qp_attr_mask |= IB_QP_QKEY;
583 qp_attr->qp_access_flags = 0;
584 *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
589 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
592 struct rdma_id_private *id_priv;
595 id_priv = container_of(id, struct rdma_id_private, id);
596 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
597 case RDMA_TRANSPORT_IB:
598 if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
599 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
601 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
603 if (qp_attr->qp_state == IB_QPS_RTR)
604 qp_attr->rq_psn = id_priv->seq_num;
606 case RDMA_TRANSPORT_IWARP:
607 if (!id_priv->cm_id.iw) {
608 qp_attr->qp_access_flags = 0;
609 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
611 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
621 EXPORT_SYMBOL(rdma_init_qp_attr);
623 static inline int cma_zero_addr(struct sockaddr *addr)
625 struct in6_addr *ip6;
627 if (addr->sa_family == AF_INET)
628 return ipv4_is_zeronet(
629 ((struct sockaddr_in *)addr)->sin_addr.s_addr);
631 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
632 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
633 ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
637 static inline int cma_loopback_addr(struct sockaddr *addr)
639 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
642 static inline int cma_any_addr(struct sockaddr *addr)
644 return cma_zero_addr(addr) || cma_loopback_addr(addr);
647 static inline __be16 cma_port(struct sockaddr *addr)
649 if (addr->sa_family == AF_INET)
650 return ((struct sockaddr_in *) addr)->sin_port;
652 return ((struct sockaddr_in6 *) addr)->sin6_port;
655 static inline int cma_any_port(struct sockaddr *addr)
657 return !cma_port(addr);
660 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
661 u8 *ip_ver, __be16 *port,
662 union cma_ip_addr **src, union cma_ip_addr **dst)
666 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
670 *ip_ver = sdp_get_ip_ver(hdr);
671 *port = ((struct sdp_hh *) hdr)->port;
672 *src = &((struct sdp_hh *) hdr)->src_addr;
673 *dst = &((struct sdp_hh *) hdr)->dst_addr;
676 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
679 *ip_ver = cma_get_ip_ver(hdr);
680 *port = ((struct cma_hdr *) hdr)->port;
681 *src = &((struct cma_hdr *) hdr)->src_addr;
682 *dst = &((struct cma_hdr *) hdr)->dst_addr;
686 if (*ip_ver != 4 && *ip_ver != 6)
691 static void cma_save_net_info(struct rdma_addr *addr,
692 struct rdma_addr *listen_addr,
693 u8 ip_ver, __be16 port,
694 union cma_ip_addr *src, union cma_ip_addr *dst)
696 struct sockaddr_in *listen4, *ip4;
697 struct sockaddr_in6 *listen6, *ip6;
701 listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
702 ip4 = (struct sockaddr_in *) &addr->src_addr;
703 ip4->sin_family = listen4->sin_family;
704 ip4->sin_addr.s_addr = dst->ip4.addr;
705 ip4->sin_port = listen4->sin_port;
707 ip4 = (struct sockaddr_in *) &addr->dst_addr;
708 ip4->sin_family = listen4->sin_family;
709 ip4->sin_addr.s_addr = src->ip4.addr;
710 ip4->sin_port = port;
713 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
714 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
715 ip6->sin6_family = listen6->sin6_family;
716 ip6->sin6_addr = dst->ip6;
717 ip6->sin6_port = listen6->sin6_port;
719 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
720 ip6->sin6_family = listen6->sin6_family;
721 ip6->sin6_addr = src->ip6;
722 ip6->sin6_port = port;
729 static inline int cma_user_data_offset(enum rdma_port_space ps)
735 return sizeof(struct cma_hdr);
739 static void cma_cancel_route(struct rdma_id_private *id_priv)
741 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
742 case RDMA_TRANSPORT_IB:
744 ib_sa_cancel_query(id_priv->query_id, id_priv->query);
751 static void cma_cancel_listens(struct rdma_id_private *id_priv)
753 struct rdma_id_private *dev_id_priv;
756 * Remove from listen_any_list to prevent added devices from spawning
757 * additional listen requests.
760 list_del(&id_priv->list);
762 while (!list_empty(&id_priv->listen_list)) {
763 dev_id_priv = list_entry(id_priv->listen_list.next,
764 struct rdma_id_private, listen_list);
765 /* sync with device removal to avoid duplicate destruction */
766 list_del_init(&dev_id_priv->list);
767 list_del(&dev_id_priv->listen_list);
770 rdma_destroy_id(&dev_id_priv->id);
776 static void cma_cancel_operation(struct rdma_id_private *id_priv,
777 enum cma_state state)
781 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
783 case CMA_ROUTE_QUERY:
784 cma_cancel_route(id_priv);
787 if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
788 && !id_priv->cma_dev)
789 cma_cancel_listens(id_priv);
796 static void cma_release_port(struct rdma_id_private *id_priv)
798 struct rdma_bind_list *bind_list = id_priv->bind_list;
804 hlist_del(&id_priv->node);
805 if (hlist_empty(&bind_list->owners)) {
806 idr_remove(bind_list->ps, bind_list->port);
812 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
814 struct cma_multicast *mc;
816 while (!list_empty(&id_priv->mc_list)) {
817 mc = container_of(id_priv->mc_list.next,
818 struct cma_multicast, list);
820 ib_sa_free_multicast(mc->multicast.ib);
825 void rdma_destroy_id(struct rdma_cm_id *id)
827 struct rdma_id_private *id_priv;
828 enum cma_state state;
830 id_priv = container_of(id, struct rdma_id_private, id);
831 state = cma_exch(id_priv, CMA_DESTROYING);
832 cma_cancel_operation(id_priv, state);
835 if (id_priv->cma_dev) {
837 switch (rdma_node_get_transport(id->device->node_type)) {
838 case RDMA_TRANSPORT_IB:
839 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
840 ib_destroy_cm_id(id_priv->cm_id.ib);
842 case RDMA_TRANSPORT_IWARP:
843 if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
844 iw_destroy_cm_id(id_priv->cm_id.iw);
849 cma_leave_mc_groups(id_priv);
851 cma_detach_from_dev(id_priv);
855 cma_release_port(id_priv);
856 cma_deref_id(id_priv);
857 wait_for_completion(&id_priv->comp);
859 if (id_priv->internal_id)
860 cma_deref_id(id_priv->id.context);
862 kfree(id_priv->id.route.path_rec);
865 EXPORT_SYMBOL(rdma_destroy_id);
867 static int cma_rep_recv(struct rdma_id_private *id_priv)
871 ret = cma_modify_qp_rtr(id_priv, NULL);
875 ret = cma_modify_qp_rts(id_priv, NULL);
879 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
885 cma_modify_qp_err(id_priv);
886 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
891 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
893 if (id_priv->id.ps == RDMA_PS_SDP &&
894 sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
901 static void cma_set_rep_event_data(struct rdma_cm_event *event,
902 struct ib_cm_rep_event_param *rep_data,
905 event->param.conn.private_data = private_data;
906 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
907 event->param.conn.responder_resources = rep_data->responder_resources;
908 event->param.conn.initiator_depth = rep_data->initiator_depth;
909 event->param.conn.flow_control = rep_data->flow_control;
910 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
911 event->param.conn.srq = rep_data->srq;
912 event->param.conn.qp_num = rep_data->remote_qpn;
915 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
917 struct rdma_id_private *id_priv = cm_id->context;
918 struct rdma_cm_event event;
921 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
922 cma_disable_callback(id_priv, CMA_CONNECT)) ||
923 (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
924 cma_disable_callback(id_priv, CMA_DISCONNECT)))
927 memset(&event, 0, sizeof event);
928 switch (ib_event->event) {
929 case IB_CM_REQ_ERROR:
930 case IB_CM_REP_ERROR:
931 event.event = RDMA_CM_EVENT_UNREACHABLE;
932 event.status = -ETIMEDOUT;
934 case IB_CM_REP_RECEIVED:
935 event.status = cma_verify_rep(id_priv, ib_event->private_data);
937 event.event = RDMA_CM_EVENT_CONNECT_ERROR;
938 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
939 event.status = cma_rep_recv(id_priv);
940 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
941 RDMA_CM_EVENT_ESTABLISHED;
943 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
944 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
945 ib_event->private_data);
947 case IB_CM_RTU_RECEIVED:
948 case IB_CM_USER_ESTABLISHED:
949 event.event = RDMA_CM_EVENT_ESTABLISHED;
951 case IB_CM_DREQ_ERROR:
952 event.status = -ETIMEDOUT; /* fall through */
953 case IB_CM_DREQ_RECEIVED:
954 case IB_CM_DREP_RECEIVED:
955 if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
957 event.event = RDMA_CM_EVENT_DISCONNECTED;
959 case IB_CM_TIMEWAIT_EXIT:
960 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
962 case IB_CM_MRA_RECEIVED:
965 case IB_CM_REJ_RECEIVED:
966 cma_modify_qp_err(id_priv);
967 event.status = ib_event->param.rej_rcvd.reason;
968 event.event = RDMA_CM_EVENT_REJECTED;
969 event.param.conn.private_data = ib_event->private_data;
970 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
973 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
978 ret = id_priv->id.event_handler(&id_priv->id, &event);
980 /* Destroy the CM ID by returning a non-zero value. */
981 id_priv->cm_id.ib = NULL;
982 cma_exch(id_priv, CMA_DESTROYING);
983 mutex_unlock(&id_priv->handler_mutex);
984 rdma_destroy_id(&id_priv->id);
988 mutex_unlock(&id_priv->handler_mutex);
992 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
993 struct ib_cm_event *ib_event)
995 struct rdma_id_private *id_priv;
996 struct rdma_cm_id *id;
997 struct rdma_route *rt;
998 union cma_ip_addr *src, *dst;
1003 if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1004 &ip_ver, &port, &src, &dst))
1007 id = rdma_create_id(listen_id->event_handler, listen_id->context,
1012 cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1013 ip_ver, port, src, dst);
1016 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1017 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1022 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1023 if (rt->num_paths == 2)
1024 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1026 ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1027 ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
1028 &id->route.addr.dev_addr);
1032 id_priv = container_of(id, struct rdma_id_private, id);
1033 id_priv->state = CMA_CONNECT;
1037 rdma_destroy_id(id);
1042 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1043 struct ib_cm_event *ib_event)
1045 struct rdma_id_private *id_priv;
1046 struct rdma_cm_id *id;
1047 union cma_ip_addr *src, *dst;
1052 id = rdma_create_id(listen_id->event_handler, listen_id->context,
1058 if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1059 &ip_ver, &port, &src, &dst))
1062 cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1063 ip_ver, port, src, dst);
1065 ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
1066 &id->route.addr.dev_addr);
1070 id_priv = container_of(id, struct rdma_id_private, id);
1071 id_priv->state = CMA_CONNECT;
1074 rdma_destroy_id(id);
1078 static void cma_set_req_event_data(struct rdma_cm_event *event,
1079 struct ib_cm_req_event_param *req_data,
1080 void *private_data, int offset)
1082 event->param.conn.private_data = private_data + offset;
1083 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1084 event->param.conn.responder_resources = req_data->responder_resources;
1085 event->param.conn.initiator_depth = req_data->initiator_depth;
1086 event->param.conn.flow_control = req_data->flow_control;
1087 event->param.conn.retry_count = req_data->retry_count;
1088 event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1089 event->param.conn.srq = req_data->srq;
1090 event->param.conn.qp_num = req_data->remote_qpn;
1093 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1095 struct rdma_id_private *listen_id, *conn_id;
1096 struct rdma_cm_event event;
1099 listen_id = cm_id->context;
1100 if (cma_disable_callback(listen_id, CMA_LISTEN))
1101 return -ECONNABORTED;
1103 memset(&event, 0, sizeof event);
1104 offset = cma_user_data_offset(listen_id->id.ps);
1105 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1106 if (cma_is_ud_ps(listen_id->id.ps)) {
1107 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1108 event.param.ud.private_data = ib_event->private_data + offset;
1109 event.param.ud.private_data_len =
1110 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1112 conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1113 cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1114 ib_event->private_data, offset);
1121 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1123 ret = cma_acquire_dev(conn_id);
1124 mutex_unlock(&lock);
1126 goto release_conn_id;
1128 conn_id->cm_id.ib = cm_id;
1129 cm_id->context = conn_id;
1130 cm_id->cm_handler = cma_ib_handler;
1132 ret = conn_id->id.event_handler(&conn_id->id, &event);
1135 * Acquire mutex to prevent user executing rdma_destroy_id()
1136 * while we're accessing the cm_id.
1139 if (cma_comp(conn_id, CMA_CONNECT) &&
1140 !cma_is_ud_ps(conn_id->id.ps))
1141 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1142 mutex_unlock(&lock);
1143 mutex_unlock(&conn_id->handler_mutex);
1147 /* Destroy the CM ID by returning a non-zero value. */
1148 conn_id->cm_id.ib = NULL;
1151 cma_exch(conn_id, CMA_DESTROYING);
1152 mutex_unlock(&conn_id->handler_mutex);
1153 rdma_destroy_id(&conn_id->id);
1156 mutex_unlock(&listen_id->handler_mutex);
1160 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1162 return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1165 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1166 struct ib_cm_compare_data *compare)
1168 struct cma_hdr *cma_data, *cma_mask;
1169 struct sdp_hh *sdp_data, *sdp_mask;
1171 struct in6_addr ip6_addr;
1173 memset(compare, 0, sizeof *compare);
1174 cma_data = (void *) compare->data;
1175 cma_mask = (void *) compare->mask;
1176 sdp_data = (void *) compare->data;
1177 sdp_mask = (void *) compare->mask;
1179 switch (addr->sa_family) {
1181 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1182 if (ps == RDMA_PS_SDP) {
1183 sdp_set_ip_ver(sdp_data, 4);
1184 sdp_set_ip_ver(sdp_mask, 0xF);
1185 sdp_data->dst_addr.ip4.addr = ip4_addr;
1186 sdp_mask->dst_addr.ip4.addr = htonl(~0);
1188 cma_set_ip_ver(cma_data, 4);
1189 cma_set_ip_ver(cma_mask, 0xF);
1190 cma_data->dst_addr.ip4.addr = ip4_addr;
1191 cma_mask->dst_addr.ip4.addr = htonl(~0);
1195 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1196 if (ps == RDMA_PS_SDP) {
1197 sdp_set_ip_ver(sdp_data, 6);
1198 sdp_set_ip_ver(sdp_mask, 0xF);
1199 sdp_data->dst_addr.ip6 = ip6_addr;
1200 memset(&sdp_mask->dst_addr.ip6, 0xFF,
1201 sizeof sdp_mask->dst_addr.ip6);
1203 cma_set_ip_ver(cma_data, 6);
1204 cma_set_ip_ver(cma_mask, 0xF);
1205 cma_data->dst_addr.ip6 = ip6_addr;
1206 memset(&cma_mask->dst_addr.ip6, 0xFF,
1207 sizeof cma_mask->dst_addr.ip6);
1215 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1217 struct rdma_id_private *id_priv = iw_id->context;
1218 struct rdma_cm_event event;
1219 struct sockaddr_in *sin;
1222 if (cma_disable_callback(id_priv, CMA_CONNECT))
1225 memset(&event, 0, sizeof event);
1226 switch (iw_event->event) {
1227 case IW_CM_EVENT_CLOSE:
1228 event.event = RDMA_CM_EVENT_DISCONNECTED;
1230 case IW_CM_EVENT_CONNECT_REPLY:
1231 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1232 *sin = iw_event->local_addr;
1233 sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1234 *sin = iw_event->remote_addr;
1235 switch (iw_event->status) {
1237 event.event = RDMA_CM_EVENT_ESTABLISHED;
1241 event.event = RDMA_CM_EVENT_REJECTED;
1244 event.event = RDMA_CM_EVENT_UNREACHABLE;
1247 event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1251 case IW_CM_EVENT_ESTABLISHED:
1252 event.event = RDMA_CM_EVENT_ESTABLISHED;
1258 event.status = iw_event->status;
1259 event.param.conn.private_data = iw_event->private_data;
1260 event.param.conn.private_data_len = iw_event->private_data_len;
1261 ret = id_priv->id.event_handler(&id_priv->id, &event);
1263 /* Destroy the CM ID by returning a non-zero value. */
1264 id_priv->cm_id.iw = NULL;
1265 cma_exch(id_priv, CMA_DESTROYING);
1266 mutex_unlock(&id_priv->handler_mutex);
1267 rdma_destroy_id(&id_priv->id);
1271 mutex_unlock(&id_priv->handler_mutex);
1275 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1276 struct iw_cm_event *iw_event)
1278 struct rdma_cm_id *new_cm_id;
1279 struct rdma_id_private *listen_id, *conn_id;
1280 struct sockaddr_in *sin;
1281 struct net_device *dev = NULL;
1282 struct rdma_cm_event event;
1284 struct ib_device_attr attr;
1286 listen_id = cm_id->context;
1287 if (cma_disable_callback(listen_id, CMA_LISTEN))
1288 return -ECONNABORTED;
1290 /* Create a new RDMA id for the new IW CM ID */
1291 new_cm_id = rdma_create_id(listen_id->id.event_handler,
1292 listen_id->id.context,
1294 if (IS_ERR(new_cm_id)) {
1298 conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1299 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1300 conn_id->state = CMA_CONNECT;
1302 dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
1304 ret = -EADDRNOTAVAIL;
1305 mutex_unlock(&conn_id->handler_mutex);
1306 rdma_destroy_id(new_cm_id);
1309 ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1311 mutex_unlock(&conn_id->handler_mutex);
1312 rdma_destroy_id(new_cm_id);
1317 ret = cma_acquire_dev(conn_id);
1318 mutex_unlock(&lock);
1320 mutex_unlock(&conn_id->handler_mutex);
1321 rdma_destroy_id(new_cm_id);
1325 conn_id->cm_id.iw = cm_id;
1326 cm_id->context = conn_id;
1327 cm_id->cm_handler = cma_iw_handler;
1329 sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1330 *sin = iw_event->local_addr;
1331 sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1332 *sin = iw_event->remote_addr;
1334 ret = ib_query_device(conn_id->id.device, &attr);
1336 mutex_unlock(&conn_id->handler_mutex);
1337 rdma_destroy_id(new_cm_id);
1341 memset(&event, 0, sizeof event);
1342 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1343 event.param.conn.private_data = iw_event->private_data;
1344 event.param.conn.private_data_len = iw_event->private_data_len;
1345 event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1346 event.param.conn.responder_resources = attr.max_qp_rd_atom;
1347 ret = conn_id->id.event_handler(&conn_id->id, &event);
1349 /* User wants to destroy the CM ID */
1350 conn_id->cm_id.iw = NULL;
1351 cma_exch(conn_id, CMA_DESTROYING);
1352 mutex_unlock(&conn_id->handler_mutex);
1353 rdma_destroy_id(&conn_id->id);
1357 mutex_unlock(&conn_id->handler_mutex);
1362 mutex_unlock(&listen_id->handler_mutex);
1366 static int cma_ib_listen(struct rdma_id_private *id_priv)
1368 struct ib_cm_compare_data compare_data;
1369 struct sockaddr *addr;
1373 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1375 if (IS_ERR(id_priv->cm_id.ib))
1376 return PTR_ERR(id_priv->cm_id.ib);
1378 addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
1379 svc_id = cma_get_service_id(id_priv->id.ps, addr);
1380 if (cma_any_addr(addr))
1381 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1383 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1384 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1388 ib_destroy_cm_id(id_priv->cm_id.ib);
1389 id_priv->cm_id.ib = NULL;
1395 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1398 struct sockaddr_in *sin;
1400 id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1401 iw_conn_req_handler,
1403 if (IS_ERR(id_priv->cm_id.iw))
1404 return PTR_ERR(id_priv->cm_id.iw);
1406 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1407 id_priv->cm_id.iw->local_addr = *sin;
1409 ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1412 iw_destroy_cm_id(id_priv->cm_id.iw);
1413 id_priv->cm_id.iw = NULL;
1419 static int cma_listen_handler(struct rdma_cm_id *id,
1420 struct rdma_cm_event *event)
1422 struct rdma_id_private *id_priv = id->context;
1424 id->context = id_priv->id.context;
1425 id->event_handler = id_priv->id.event_handler;
1426 return id_priv->id.event_handler(id, event);
1429 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1430 struct cma_device *cma_dev)
1432 struct rdma_id_private *dev_id_priv;
1433 struct rdma_cm_id *id;
1436 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1440 dev_id_priv = container_of(id, struct rdma_id_private, id);
1442 dev_id_priv->state = CMA_ADDR_BOUND;
1443 memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1444 ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
1446 cma_attach_to_dev(dev_id_priv, cma_dev);
1447 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1448 atomic_inc(&id_priv->refcount);
1449 dev_id_priv->internal_id = 1;
1451 ret = rdma_listen(id, id_priv->backlog);
1453 printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1454 "listening on device %s\n", ret, cma_dev->device->name);
1457 static void cma_listen_on_all(struct rdma_id_private *id_priv)
1459 struct cma_device *cma_dev;
1462 list_add_tail(&id_priv->list, &listen_any_list);
1463 list_for_each_entry(cma_dev, &dev_list, list)
1464 cma_listen_on_dev(id_priv, cma_dev);
1465 mutex_unlock(&lock);
1468 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1470 struct sockaddr_in addr_in;
1472 memset(&addr_in, 0, sizeof addr_in);
1473 addr_in.sin_family = af;
1474 return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1477 int rdma_listen(struct rdma_cm_id *id, int backlog)
1479 struct rdma_id_private *id_priv;
1482 id_priv = container_of(id, struct rdma_id_private, id);
1483 if (id_priv->state == CMA_IDLE) {
1484 ret = cma_bind_any(id, AF_INET);
1489 if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1492 id_priv->backlog = backlog;
1494 switch (rdma_node_get_transport(id->device->node_type)) {
1495 case RDMA_TRANSPORT_IB:
1496 ret = cma_ib_listen(id_priv);
1500 case RDMA_TRANSPORT_IWARP:
1501 ret = cma_iw_listen(id_priv, backlog);
1510 cma_listen_on_all(id_priv);
1514 id_priv->backlog = 0;
1515 cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1518 EXPORT_SYMBOL(rdma_listen);
1520 void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1522 struct rdma_id_private *id_priv;
1524 id_priv = container_of(id, struct rdma_id_private, id);
1525 id_priv->tos = (u8) tos;
1527 EXPORT_SYMBOL(rdma_set_service_type);
1529 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1532 struct cma_work *work = context;
1533 struct rdma_route *route;
1535 route = &work->id->id.route;
1538 route->num_paths = 1;
1539 *route->path_rec = *path_rec;
1541 work->old_state = CMA_ROUTE_QUERY;
1542 work->new_state = CMA_ADDR_RESOLVED;
1543 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1544 work->event.status = status;
1547 queue_work(cma_wq, &work->work);
1550 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1551 struct cma_work *work)
1553 struct rdma_addr *addr = &id_priv->id.route.addr;
1554 struct ib_sa_path_rec path_rec;
1555 ib_sa_comp_mask comp_mask;
1556 struct sockaddr_in6 *sin6;
1558 memset(&path_rec, 0, sizeof path_rec);
1559 ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1560 ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1561 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1562 path_rec.numb_path = 1;
1563 path_rec.reversible = 1;
1564 path_rec.service_id = cma_get_service_id(id_priv->id.ps,
1565 (struct sockaddr *) &addr->dst_addr);
1567 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1568 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1569 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1571 if (addr->src_addr.ss_family == AF_INET) {
1572 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1573 comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1575 sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1576 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1577 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1580 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1581 id_priv->id.port_num, &path_rec,
1582 comp_mask, timeout_ms,
1583 GFP_KERNEL, cma_query_handler,
1584 work, &id_priv->query);
1586 return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1589 static void cma_work_handler(struct work_struct *_work)
1591 struct cma_work *work = container_of(_work, struct cma_work, work);
1592 struct rdma_id_private *id_priv = work->id;
1595 mutex_lock(&id_priv->handler_mutex);
1596 if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1599 if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1600 cma_exch(id_priv, CMA_DESTROYING);
1604 mutex_unlock(&id_priv->handler_mutex);
1605 cma_deref_id(id_priv);
1607 rdma_destroy_id(&id_priv->id);
1611 static void cma_ndev_work_handler(struct work_struct *_work)
1613 struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
1614 struct rdma_id_private *id_priv = work->id;
1617 mutex_lock(&id_priv->handler_mutex);
1618 if (id_priv->state == CMA_DESTROYING ||
1619 id_priv->state == CMA_DEVICE_REMOVAL)
1622 if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1623 cma_exch(id_priv, CMA_DESTROYING);
1628 mutex_unlock(&id_priv->handler_mutex);
1629 cma_deref_id(id_priv);
1631 rdma_destroy_id(&id_priv->id);
1635 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1637 struct rdma_route *route = &id_priv->id.route;
1638 struct cma_work *work;
1641 work = kzalloc(sizeof *work, GFP_KERNEL);
1646 INIT_WORK(&work->work, cma_work_handler);
1647 work->old_state = CMA_ROUTE_QUERY;
1648 work->new_state = CMA_ROUTE_RESOLVED;
1649 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1651 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1652 if (!route->path_rec) {
1657 ret = cma_query_ib_route(id_priv, timeout_ms, work);
1663 kfree(route->path_rec);
1664 route->path_rec = NULL;
1670 int rdma_set_ib_paths(struct rdma_cm_id *id,
1671 struct ib_sa_path_rec *path_rec, int num_paths)
1673 struct rdma_id_private *id_priv;
1676 id_priv = container_of(id, struct rdma_id_private, id);
1677 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1680 id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1681 if (!id->route.path_rec) {
1686 memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1689 cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1692 EXPORT_SYMBOL(rdma_set_ib_paths);
1694 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1696 struct cma_work *work;
1698 work = kzalloc(sizeof *work, GFP_KERNEL);
1703 INIT_WORK(&work->work, cma_work_handler);
1704 work->old_state = CMA_ROUTE_QUERY;
1705 work->new_state = CMA_ROUTE_RESOLVED;
1706 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1707 queue_work(cma_wq, &work->work);
1711 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1713 struct rdma_id_private *id_priv;
1716 id_priv = container_of(id, struct rdma_id_private, id);
1717 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1720 atomic_inc(&id_priv->refcount);
1721 switch (rdma_node_get_transport(id->device->node_type)) {
1722 case RDMA_TRANSPORT_IB:
1723 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1725 case RDMA_TRANSPORT_IWARP:
1726 ret = cma_resolve_iw_route(id_priv, timeout_ms);
1737 cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1738 cma_deref_id(id_priv);
1741 EXPORT_SYMBOL(rdma_resolve_route);
1743 static int cma_bind_loopback(struct rdma_id_private *id_priv)
1745 struct cma_device *cma_dev;
1746 struct ib_port_attr port_attr;
1753 if (list_empty(&dev_list)) {
1757 list_for_each_entry(cma_dev, &dev_list, list)
1758 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1759 if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1760 port_attr.state == IB_PORT_ACTIVE)
1764 cma_dev = list_entry(dev_list.next, struct cma_device, list);
1767 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1771 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1775 ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1776 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1777 id_priv->id.port_num = p;
1778 cma_attach_to_dev(id_priv, cma_dev);
1780 mutex_unlock(&lock);
1784 static void addr_handler(int status, struct sockaddr *src_addr,
1785 struct rdma_dev_addr *dev_addr, void *context)
1787 struct rdma_id_private *id_priv = context;
1788 struct rdma_cm_event event;
1790 memset(&event, 0, sizeof event);
1791 mutex_lock(&id_priv->handler_mutex);
1794 * Grab mutex to block rdma_destroy_id() from removing the device while
1795 * we're trying to acquire it.
1798 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1799 mutex_unlock(&lock);
1803 if (!status && !id_priv->cma_dev)
1804 status = cma_acquire_dev(id_priv);
1805 mutex_unlock(&lock);
1808 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1810 event.event = RDMA_CM_EVENT_ADDR_ERROR;
1811 event.status = status;
1813 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1814 ip_addr_size(src_addr));
1815 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1818 if (id_priv->id.event_handler(&id_priv->id, &event)) {
1819 cma_exch(id_priv, CMA_DESTROYING);
1820 mutex_unlock(&id_priv->handler_mutex);
1821 cma_deref_id(id_priv);
1822 rdma_destroy_id(&id_priv->id);
1826 mutex_unlock(&id_priv->handler_mutex);
1827 cma_deref_id(id_priv);
1830 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1832 struct cma_work *work;
1833 struct sockaddr_in *src_in, *dst_in;
1837 work = kzalloc(sizeof *work, GFP_KERNEL);
1841 if (!id_priv->cma_dev) {
1842 ret = cma_bind_loopback(id_priv);
1847 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1848 ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1850 if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
1851 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1852 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1853 src_in->sin_family = dst_in->sin_family;
1854 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1858 INIT_WORK(&work->work, cma_work_handler);
1859 work->old_state = CMA_ADDR_QUERY;
1860 work->new_state = CMA_ADDR_RESOLVED;
1861 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1862 queue_work(cma_wq, &work->work);
1869 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1870 struct sockaddr *dst_addr)
1872 if (src_addr && src_addr->sa_family)
1873 return rdma_bind_addr(id, src_addr);
1875 return cma_bind_any(id, dst_addr->sa_family);
1878 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1879 struct sockaddr *dst_addr, int timeout_ms)
1881 struct rdma_id_private *id_priv;
1884 id_priv = container_of(id, struct rdma_id_private, id);
1885 if (id_priv->state == CMA_IDLE) {
1886 ret = cma_bind_addr(id, src_addr, dst_addr);
1891 if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1894 atomic_inc(&id_priv->refcount);
1895 memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1896 if (cma_any_addr(dst_addr))
1897 ret = cma_resolve_loopback(id_priv);
1899 ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
1900 dst_addr, &id->route.addr.dev_addr,
1901 timeout_ms, addr_handler, id_priv);
1907 cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1908 cma_deref_id(id_priv);
1911 EXPORT_SYMBOL(rdma_resolve_addr);
1913 static void cma_bind_port(struct rdma_bind_list *bind_list,
1914 struct rdma_id_private *id_priv)
1916 struct sockaddr_in *sin;
1918 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1919 sin->sin_port = htons(bind_list->port);
1920 id_priv->bind_list = bind_list;
1921 hlist_add_head(&id_priv->node, &bind_list->owners);
1924 static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1925 unsigned short snum)
1927 struct rdma_bind_list *bind_list;
1930 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1935 ret = idr_get_new_above(ps, bind_list, snum, &port);
1936 } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1942 ret = -EADDRNOTAVAIL;
1947 bind_list->port = (unsigned short) port;
1948 cma_bind_port(bind_list, id_priv);
1951 idr_remove(ps, port);
1957 static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
1959 struct rdma_bind_list *bind_list;
1960 int port, ret, low, high;
1962 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1967 /* FIXME: add proper port randomization per like inet_csk_get_port */
1969 ret = idr_get_new_above(ps, bind_list, next_port, &port);
1970 } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1975 inet_get_local_port_range(&low, &high);
1977 if (next_port != low) {
1978 idr_remove(ps, port);
1982 ret = -EADDRNOTAVAIL;
1989 next_port = port + 1;
1992 bind_list->port = (unsigned short) port;
1993 cma_bind_port(bind_list, id_priv);
1996 idr_remove(ps, port);
2002 static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
2004 struct rdma_id_private *cur_id;
2005 struct sockaddr_in *sin, *cur_sin;
2006 struct rdma_bind_list *bind_list;
2007 struct hlist_node *node;
2008 unsigned short snum;
2010 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
2011 snum = ntohs(sin->sin_port);
2012 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2015 bind_list = idr_find(ps, snum);
2017 return cma_alloc_port(ps, id_priv, snum);
2020 * We don't support binding to any address if anyone is bound to
2021 * a specific address on the same port.
2023 if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr))
2024 return -EADDRNOTAVAIL;
2026 hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
2027 if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr))
2028 return -EADDRNOTAVAIL;
2030 cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
2031 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
2035 cma_bind_port(bind_list, id_priv);
2039 static int cma_get_port(struct rdma_id_private *id_priv)
2044 switch (id_priv->id.ps) {
2058 return -EPROTONOSUPPORT;
2062 if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
2063 ret = cma_alloc_any_port(ps, id_priv);
2065 ret = cma_use_port(ps, id_priv);
2066 mutex_unlock(&lock);
2071 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2073 struct rdma_id_private *id_priv;
2076 if (addr->sa_family != AF_INET)
2077 return -EAFNOSUPPORT;
2079 id_priv = container_of(id, struct rdma_id_private, id);
2080 if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2083 if (!cma_any_addr(addr)) {
2084 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2089 ret = cma_acquire_dev(id_priv);
2090 mutex_unlock(&lock);
2095 memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2096 ret = cma_get_port(id_priv);
2102 if (!cma_any_addr(addr)) {
2104 cma_detach_from_dev(id_priv);
2105 mutex_unlock(&lock);
2108 cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2111 EXPORT_SYMBOL(rdma_bind_addr);
2113 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2114 struct rdma_route *route)
2116 struct sockaddr_in *src4, *dst4;
2117 struct cma_hdr *cma_hdr;
2118 struct sdp_hh *sdp_hdr;
2120 src4 = (struct sockaddr_in *) &route->addr.src_addr;
2121 dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2126 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2128 sdp_set_ip_ver(sdp_hdr, 4);
2129 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2130 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2131 sdp_hdr->port = src4->sin_port;
2135 cma_hdr->cma_version = CMA_VERSION;
2136 cma_set_ip_ver(cma_hdr, 4);
2137 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2138 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2139 cma_hdr->port = src4->sin_port;
2145 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2146 struct ib_cm_event *ib_event)
2148 struct rdma_id_private *id_priv = cm_id->context;
2149 struct rdma_cm_event event;
2150 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2153 if (cma_disable_callback(id_priv, CMA_CONNECT))
2156 memset(&event, 0, sizeof event);
2157 switch (ib_event->event) {
2158 case IB_CM_SIDR_REQ_ERROR:
2159 event.event = RDMA_CM_EVENT_UNREACHABLE;
2160 event.status = -ETIMEDOUT;
2162 case IB_CM_SIDR_REP_RECEIVED:
2163 event.param.ud.private_data = ib_event->private_data;
2164 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2165 if (rep->status != IB_SIDR_SUCCESS) {
2166 event.event = RDMA_CM_EVENT_UNREACHABLE;
2167 event.status = ib_event->param.sidr_rep_rcvd.status;
2170 if (id_priv->qkey != rep->qkey) {
2171 event.event = RDMA_CM_EVENT_UNREACHABLE;
2172 event.status = -EINVAL;
2175 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2176 id_priv->id.route.path_rec,
2177 &event.param.ud.ah_attr);
2178 event.param.ud.qp_num = rep->qpn;
2179 event.param.ud.qkey = rep->qkey;
2180 event.event = RDMA_CM_EVENT_ESTABLISHED;
2184 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
2189 ret = id_priv->id.event_handler(&id_priv->id, &event);
2191 /* Destroy the CM ID by returning a non-zero value. */
2192 id_priv->cm_id.ib = NULL;
2193 cma_exch(id_priv, CMA_DESTROYING);
2194 mutex_unlock(&id_priv->handler_mutex);
2195 rdma_destroy_id(&id_priv->id);
2199 mutex_unlock(&id_priv->handler_mutex);
2203 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2204 struct rdma_conn_param *conn_param)
2206 struct ib_cm_sidr_req_param req;
2207 struct rdma_route *route;
2210 req.private_data_len = sizeof(struct cma_hdr) +
2211 conn_param->private_data_len;
2212 req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2213 if (!req.private_data)
2216 if (conn_param->private_data && conn_param->private_data_len)
2217 memcpy((void *) req.private_data + sizeof(struct cma_hdr),
2218 conn_param->private_data, conn_param->private_data_len);
2220 route = &id_priv->id.route;
2221 ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2225 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2226 cma_sidr_rep_handler, id_priv);
2227 if (IS_ERR(id_priv->cm_id.ib)) {
2228 ret = PTR_ERR(id_priv->cm_id.ib);
2232 req.path = route->path_rec;
2233 req.service_id = cma_get_service_id(id_priv->id.ps,
2234 (struct sockaddr *) &route->addr.dst_addr);
2235 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2236 req.max_cm_retries = CMA_MAX_CM_RETRIES;
2238 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2240 ib_destroy_cm_id(id_priv->cm_id.ib);
2241 id_priv->cm_id.ib = NULL;
2244 kfree(req.private_data);
2248 static int cma_connect_ib(struct rdma_id_private *id_priv,
2249 struct rdma_conn_param *conn_param)
2251 struct ib_cm_req_param req;
2252 struct rdma_route *route;
2256 memset(&req, 0, sizeof req);
2257 offset = cma_user_data_offset(id_priv->id.ps);
2258 req.private_data_len = offset + conn_param->private_data_len;
2259 private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2263 if (conn_param->private_data && conn_param->private_data_len)
2264 memcpy(private_data + offset, conn_param->private_data,
2265 conn_param->private_data_len);
2267 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2269 if (IS_ERR(id_priv->cm_id.ib)) {
2270 ret = PTR_ERR(id_priv->cm_id.ib);
2274 route = &id_priv->id.route;
2275 ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2278 req.private_data = private_data;
2280 req.primary_path = &route->path_rec[0];
2281 if (route->num_paths == 2)
2282 req.alternate_path = &route->path_rec[1];
2284 req.service_id = cma_get_service_id(id_priv->id.ps,
2285 (struct sockaddr *) &route->addr.dst_addr);
2286 req.qp_num = id_priv->qp_num;
2287 req.qp_type = IB_QPT_RC;
2288 req.starting_psn = id_priv->seq_num;
2289 req.responder_resources = conn_param->responder_resources;
2290 req.initiator_depth = conn_param->initiator_depth;
2291 req.flow_control = conn_param->flow_control;
2292 req.retry_count = conn_param->retry_count;
2293 req.rnr_retry_count = conn_param->rnr_retry_count;
2294 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2295 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2296 req.max_cm_retries = CMA_MAX_CM_RETRIES;
2297 req.srq = id_priv->srq ? 1 : 0;
2299 ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2301 if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2302 ib_destroy_cm_id(id_priv->cm_id.ib);
2303 id_priv->cm_id.ib = NULL;
2306 kfree(private_data);
2310 static int cma_connect_iw(struct rdma_id_private *id_priv,
2311 struct rdma_conn_param *conn_param)
2313 struct iw_cm_id *cm_id;
2314 struct sockaddr_in* sin;
2316 struct iw_cm_conn_param iw_param;
2318 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
2319 if (IS_ERR(cm_id)) {
2320 ret = PTR_ERR(cm_id);
2324 id_priv->cm_id.iw = cm_id;
2326 sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2327 cm_id->local_addr = *sin;
2329 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2330 cm_id->remote_addr = *sin;
2332 ret = cma_modify_qp_rtr(id_priv, conn_param);
2336 iw_param.ord = conn_param->initiator_depth;
2337 iw_param.ird = conn_param->responder_resources;
2338 iw_param.private_data = conn_param->private_data;
2339 iw_param.private_data_len = conn_param->private_data_len;
2341 iw_param.qpn = id_priv->qp_num;
2343 iw_param.qpn = conn_param->qp_num;
2344 ret = iw_cm_connect(cm_id, &iw_param);
2346 if (ret && !IS_ERR(cm_id)) {
2347 iw_destroy_cm_id(cm_id);
2348 id_priv->cm_id.iw = NULL;
2353 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2355 struct rdma_id_private *id_priv;
2358 id_priv = container_of(id, struct rdma_id_private, id);
2359 if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2363 id_priv->qp_num = conn_param->qp_num;
2364 id_priv->srq = conn_param->srq;
2367 switch (rdma_node_get_transport(id->device->node_type)) {
2368 case RDMA_TRANSPORT_IB:
2369 if (cma_is_ud_ps(id->ps))
2370 ret = cma_resolve_ib_udp(id_priv, conn_param);
2372 ret = cma_connect_ib(id_priv, conn_param);
2374 case RDMA_TRANSPORT_IWARP:
2375 ret = cma_connect_iw(id_priv, conn_param);
2386 cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2389 EXPORT_SYMBOL(rdma_connect);
2391 static int cma_accept_ib(struct rdma_id_private *id_priv,
2392 struct rdma_conn_param *conn_param)
2394 struct ib_cm_rep_param rep;
2397 ret = cma_modify_qp_rtr(id_priv, conn_param);
2401 ret = cma_modify_qp_rts(id_priv, conn_param);
2405 memset(&rep, 0, sizeof rep);
2406 rep.qp_num = id_priv->qp_num;
2407 rep.starting_psn = id_priv->seq_num;
2408 rep.private_data = conn_param->private_data;
2409 rep.private_data_len = conn_param->private_data_len;
2410 rep.responder_resources = conn_param->responder_resources;
2411 rep.initiator_depth = conn_param->initiator_depth;
2412 rep.failover_accepted = 0;
2413 rep.flow_control = conn_param->flow_control;
2414 rep.rnr_retry_count = conn_param->rnr_retry_count;
2415 rep.srq = id_priv->srq ? 1 : 0;
2417 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2422 static int cma_accept_iw(struct rdma_id_private *id_priv,
2423 struct rdma_conn_param *conn_param)
2425 struct iw_cm_conn_param iw_param;
2428 ret = cma_modify_qp_rtr(id_priv, conn_param);
2432 iw_param.ord = conn_param->initiator_depth;
2433 iw_param.ird = conn_param->responder_resources;
2434 iw_param.private_data = conn_param->private_data;
2435 iw_param.private_data_len = conn_param->private_data_len;
2436 if (id_priv->id.qp) {
2437 iw_param.qpn = id_priv->qp_num;
2439 iw_param.qpn = conn_param->qp_num;
2441 return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2444 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2445 enum ib_cm_sidr_status status,
2446 const void *private_data, int private_data_len)
2448 struct ib_cm_sidr_rep_param rep;
2450 memset(&rep, 0, sizeof rep);
2451 rep.status = status;
2452 if (status == IB_SIDR_SUCCESS) {
2453 rep.qp_num = id_priv->qp_num;
2454 rep.qkey = id_priv->qkey;
2456 rep.private_data = private_data;
2457 rep.private_data_len = private_data_len;
2459 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2462 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2464 struct rdma_id_private *id_priv;
2467 id_priv = container_of(id, struct rdma_id_private, id);
2468 if (!cma_comp(id_priv, CMA_CONNECT))
2471 if (!id->qp && conn_param) {
2472 id_priv->qp_num = conn_param->qp_num;
2473 id_priv->srq = conn_param->srq;
2476 switch (rdma_node_get_transport(id->device->node_type)) {
2477 case RDMA_TRANSPORT_IB:
2478 if (cma_is_ud_ps(id->ps))
2479 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2480 conn_param->private_data,
2481 conn_param->private_data_len);
2482 else if (conn_param)
2483 ret = cma_accept_ib(id_priv, conn_param);
2485 ret = cma_rep_recv(id_priv);
2487 case RDMA_TRANSPORT_IWARP:
2488 ret = cma_accept_iw(id_priv, conn_param);
2500 cma_modify_qp_err(id_priv);
2501 rdma_reject(id, NULL, 0);
2504 EXPORT_SYMBOL(rdma_accept);
2506 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2508 struct rdma_id_private *id_priv;
2511 id_priv = container_of(id, struct rdma_id_private, id);
2512 if (!cma_has_cm_dev(id_priv))
2515 switch (id->device->node_type) {
2516 case RDMA_NODE_IB_CA:
2517 ret = ib_cm_notify(id_priv->cm_id.ib, event);
2525 EXPORT_SYMBOL(rdma_notify);
2527 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2528 u8 private_data_len)
2530 struct rdma_id_private *id_priv;
2533 id_priv = container_of(id, struct rdma_id_private, id);
2534 if (!cma_has_cm_dev(id_priv))
2537 switch (rdma_node_get_transport(id->device->node_type)) {
2538 case RDMA_TRANSPORT_IB:
2539 if (cma_is_ud_ps(id->ps))
2540 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2541 private_data, private_data_len);
2543 ret = ib_send_cm_rej(id_priv->cm_id.ib,
2544 IB_CM_REJ_CONSUMER_DEFINED, NULL,
2545 0, private_data, private_data_len);
2547 case RDMA_TRANSPORT_IWARP:
2548 ret = iw_cm_reject(id_priv->cm_id.iw,
2549 private_data, private_data_len);
2557 EXPORT_SYMBOL(rdma_reject);
2559 int rdma_disconnect(struct rdma_cm_id *id)
2561 struct rdma_id_private *id_priv;
2564 id_priv = container_of(id, struct rdma_id_private, id);
2565 if (!cma_has_cm_dev(id_priv))
2568 switch (rdma_node_get_transport(id->device->node_type)) {
2569 case RDMA_TRANSPORT_IB:
2570 ret = cma_modify_qp_err(id_priv);
2573 /* Initiate or respond to a disconnect. */
2574 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2575 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2577 case RDMA_TRANSPORT_IWARP:
2578 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2587 EXPORT_SYMBOL(rdma_disconnect);
2589 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2591 struct rdma_id_private *id_priv;
2592 struct cma_multicast *mc = multicast->context;
2593 struct rdma_cm_event event;
2596 id_priv = mc->id_priv;
2597 if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
2598 cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
2601 mutex_lock(&id_priv->qp_mutex);
2602 if (!status && id_priv->id.qp)
2603 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2604 multicast->rec.mlid);
2605 mutex_unlock(&id_priv->qp_mutex);
2607 memset(&event, 0, sizeof event);
2608 event.status = status;
2609 event.param.ud.private_data = mc->context;
2611 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2612 ib_init_ah_from_mcmember(id_priv->id.device,
2613 id_priv->id.port_num, &multicast->rec,
2614 &event.param.ud.ah_attr);
2615 event.param.ud.qp_num = 0xFFFFFF;
2616 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2618 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2620 ret = id_priv->id.event_handler(&id_priv->id, &event);
2622 cma_exch(id_priv, CMA_DESTROYING);
2623 mutex_unlock(&id_priv->handler_mutex);
2624 rdma_destroy_id(&id_priv->id);
2628 mutex_unlock(&id_priv->handler_mutex);
2632 static void cma_set_mgid(struct rdma_id_private *id_priv,
2633 struct sockaddr *addr, union ib_gid *mgid)
2635 unsigned char mc_map[MAX_ADDR_LEN];
2636 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2637 struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2638 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2640 if (cma_any_addr(addr)) {
2641 memset(mgid, 0, sizeof *mgid);
2642 } else if ((addr->sa_family == AF_INET6) &&
2643 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
2645 /* IPv6 address is an SA assigned MGID. */
2646 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2648 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
2649 if (id_priv->id.ps == RDMA_PS_UDP)
2650 mc_map[7] = 0x01; /* Use RDMA CM signature */
2651 *mgid = *(union ib_gid *) (mc_map + 4);
2655 static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2656 struct cma_multicast *mc)
2658 struct ib_sa_mcmember_rec rec;
2659 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2660 ib_sa_comp_mask comp_mask;
2663 ib_addr_get_mgid(dev_addr, &rec.mgid);
2664 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2669 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
2670 if (id_priv->id.ps == RDMA_PS_UDP)
2671 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2672 ib_addr_get_sgid(dev_addr, &rec.port_gid);
2673 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2676 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2677 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2678 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2679 IB_SA_MCMEMBER_REC_FLOW_LABEL |
2680 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2682 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2683 id_priv->id.port_num, &rec,
2684 comp_mask, GFP_KERNEL,
2685 cma_ib_mc_handler, mc);
2686 if (IS_ERR(mc->multicast.ib))
2687 return PTR_ERR(mc->multicast.ib);
2692 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2695 struct rdma_id_private *id_priv;
2696 struct cma_multicast *mc;
2699 id_priv = container_of(id, struct rdma_id_private, id);
2700 if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2701 !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2704 mc = kmalloc(sizeof *mc, GFP_KERNEL);
2708 memcpy(&mc->addr, addr, ip_addr_size(addr));
2709 mc->context = context;
2710 mc->id_priv = id_priv;
2712 spin_lock(&id_priv->lock);
2713 list_add(&mc->list, &id_priv->mc_list);
2714 spin_unlock(&id_priv->lock);
2716 switch (rdma_node_get_transport(id->device->node_type)) {
2717 case RDMA_TRANSPORT_IB:
2718 ret = cma_join_ib_multicast(id_priv, mc);
2726 spin_lock_irq(&id_priv->lock);
2727 list_del(&mc->list);
2728 spin_unlock_irq(&id_priv->lock);
2733 EXPORT_SYMBOL(rdma_join_multicast);
2735 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2737 struct rdma_id_private *id_priv;
2738 struct cma_multicast *mc;
2740 id_priv = container_of(id, struct rdma_id_private, id);
2741 spin_lock_irq(&id_priv->lock);
2742 list_for_each_entry(mc, &id_priv->mc_list, list) {
2743 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
2744 list_del(&mc->list);
2745 spin_unlock_irq(&id_priv->lock);
2748 ib_detach_mcast(id->qp,
2749 &mc->multicast.ib->rec.mgid,
2750 mc->multicast.ib->rec.mlid);
2751 ib_sa_free_multicast(mc->multicast.ib);
2756 spin_unlock_irq(&id_priv->lock);
2758 EXPORT_SYMBOL(rdma_leave_multicast);
2760 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
2762 struct rdma_dev_addr *dev_addr;
2763 struct cma_ndev_work *work;
2765 dev_addr = &id_priv->id.route.addr.dev_addr;
2767 if ((dev_addr->src_dev == ndev) &&
2768 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
2769 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
2770 ndev->name, &id_priv->id);
2771 work = kzalloc(sizeof *work, GFP_KERNEL);
2775 INIT_WORK(&work->work, cma_ndev_work_handler);
2777 work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
2778 atomic_inc(&id_priv->refcount);
2779 queue_work(cma_wq, &work->work);
2785 static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
2788 struct net_device *ndev = (struct net_device *)ctx;
2789 struct cma_device *cma_dev;
2790 struct rdma_id_private *id_priv;
2791 int ret = NOTIFY_DONE;
2793 if (dev_net(ndev) != &init_net)
2796 if (event != NETDEV_BONDING_FAILOVER)
2799 if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
2803 list_for_each_entry(cma_dev, &dev_list, list)
2804 list_for_each_entry(id_priv, &cma_dev->id_list, list) {
2805 ret = cma_netdev_change(ndev, id_priv);
2811 mutex_unlock(&lock);
2815 static struct notifier_block cma_nb = {
2816 .notifier_call = cma_netdev_callback
2819 static void cma_add_one(struct ib_device *device)
2821 struct cma_device *cma_dev;
2822 struct rdma_id_private *id_priv;
2824 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
2828 cma_dev->device = device;
2830 init_completion(&cma_dev->comp);
2831 atomic_set(&cma_dev->refcount, 1);
2832 INIT_LIST_HEAD(&cma_dev->id_list);
2833 ib_set_client_data(device, &cma_client, cma_dev);
2836 list_add_tail(&cma_dev->list, &dev_list);
2837 list_for_each_entry(id_priv, &listen_any_list, list)
2838 cma_listen_on_dev(id_priv, cma_dev);
2839 mutex_unlock(&lock);
2842 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2844 struct rdma_cm_event event;
2845 enum cma_state state;
2848 /* Record that we want to remove the device */
2849 state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
2850 if (state == CMA_DESTROYING)
2853 cma_cancel_operation(id_priv, state);
2854 mutex_lock(&id_priv->handler_mutex);
2856 /* Check for destruction from another callback. */
2857 if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
2860 memset(&event, 0, sizeof event);
2861 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
2862 ret = id_priv->id.event_handler(&id_priv->id, &event);
2864 mutex_unlock(&id_priv->handler_mutex);
2868 static void cma_process_remove(struct cma_device *cma_dev)
2870 struct rdma_id_private *id_priv;
2874 while (!list_empty(&cma_dev->id_list)) {
2875 id_priv = list_entry(cma_dev->id_list.next,
2876 struct rdma_id_private, list);
2878 list_del(&id_priv->listen_list);
2879 list_del_init(&id_priv->list);
2880 atomic_inc(&id_priv->refcount);
2881 mutex_unlock(&lock);
2883 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
2884 cma_deref_id(id_priv);
2886 rdma_destroy_id(&id_priv->id);
2890 mutex_unlock(&lock);
2892 cma_deref_dev(cma_dev);
2893 wait_for_completion(&cma_dev->comp);
2896 static void cma_remove_one(struct ib_device *device)
2898 struct cma_device *cma_dev;
2900 cma_dev = ib_get_client_data(device, &cma_client);
2905 list_del(&cma_dev->list);
2906 mutex_unlock(&lock);
2908 cma_process_remove(cma_dev);
2912 static int cma_init(void)
2914 int ret, low, high, remaining;
2916 get_random_bytes(&next_port, sizeof next_port);
2917 inet_get_local_port_range(&low, &high);
2918 remaining = (high - low) + 1;
2919 next_port = ((unsigned int) next_port % remaining) + low;
2921 cma_wq = create_singlethread_workqueue("rdma_cm");
2925 ib_sa_register_client(&sa_client);
2926 rdma_addr_register_client(&addr_client);
2927 register_netdevice_notifier(&cma_nb);
2929 ret = ib_register_client(&cma_client);
2935 unregister_netdevice_notifier(&cma_nb);
2936 rdma_addr_unregister_client(&addr_client);
2937 ib_sa_unregister_client(&sa_client);
2938 destroy_workqueue(cma_wq);
2942 static void cma_cleanup(void)
2944 ib_unregister_client(&cma_client);
2945 unregister_netdevice_notifier(&cma_nb);
2946 rdma_addr_unregister_client(&addr_client);
2947 ib_sa_unregister_client(&sa_client);
2948 destroy_workqueue(cma_wq);
2949 idr_destroy(&sdp_ps);
2950 idr_destroy(&tcp_ps);
2951 idr_destroy(&udp_ps);
2952 idr_destroy(&ipoib_ps);
2955 module_init(cma_init);
2956 module_exit(cma_cleanup);