Merge git://git.kernel.org/pub/scm/linux/kernel/git/bart/ide-2.6
[linux-2.6] / drivers / infiniband / core / cma.c
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/in.h>
38 #include <linux/in6.h>
39 #include <linux/mutex.h>
40 #include <linux/random.h>
41 #include <linux/idr.h>
42 #include <linux/inetdevice.h>
43
44 #include <net/tcp.h>
45
46 #include <rdma/rdma_cm.h>
47 #include <rdma/rdma_cm_ib.h>
48 #include <rdma/ib_cache.h>
49 #include <rdma/ib_cm.h>
50 #include <rdma/ib_sa.h>
51 #include <rdma/iw_cm.h>
52
53 MODULE_AUTHOR("Sean Hefty");
54 MODULE_DESCRIPTION("Generic RDMA CM Agent");
55 MODULE_LICENSE("Dual BSD/GPL");
56
57 #define CMA_CM_RESPONSE_TIMEOUT 20
58 #define CMA_MAX_CM_RETRIES 15
59 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
60
61 static void cma_add_one(struct ib_device *device);
62 static void cma_remove_one(struct ib_device *device);
63
64 static struct ib_client cma_client = {
65         .name   = "cma",
66         .add    = cma_add_one,
67         .remove = cma_remove_one
68 };
69
70 static struct ib_sa_client sa_client;
71 static struct rdma_addr_client addr_client;
72 static LIST_HEAD(dev_list);
73 static LIST_HEAD(listen_any_list);
74 static DEFINE_MUTEX(lock);
75 static struct workqueue_struct *cma_wq;
76 static DEFINE_IDR(sdp_ps);
77 static DEFINE_IDR(tcp_ps);
78 static DEFINE_IDR(udp_ps);
79 static DEFINE_IDR(ipoib_ps);
80 static int next_port;
81
82 struct cma_device {
83         struct list_head        list;
84         struct ib_device        *device;
85         struct completion       comp;
86         atomic_t                refcount;
87         struct list_head        id_list;
88 };
89
90 enum cma_state {
91         CMA_IDLE,
92         CMA_ADDR_QUERY,
93         CMA_ADDR_RESOLVED,
94         CMA_ROUTE_QUERY,
95         CMA_ROUTE_RESOLVED,
96         CMA_CONNECT,
97         CMA_DISCONNECT,
98         CMA_ADDR_BOUND,
99         CMA_LISTEN,
100         CMA_DEVICE_REMOVAL,
101         CMA_DESTROYING
102 };
103
104 struct rdma_bind_list {
105         struct idr              *ps;
106         struct hlist_head       owners;
107         unsigned short          port;
108 };
109
110 /*
111  * Device removal can occur at anytime, so we need extra handling to
112  * serialize notifying the user of device removal with other callbacks.
113  * We do this by disabling removal notification while a callback is in process,
114  * and reporting it after the callback completes.
115  */
116 struct rdma_id_private {
117         struct rdma_cm_id       id;
118
119         struct rdma_bind_list   *bind_list;
120         struct hlist_node       node;
121         struct list_head        list; /* listen_any_list or cma_device.list */
122         struct list_head        listen_list; /* per device listens */
123         struct cma_device       *cma_dev;
124         struct list_head        mc_list;
125
126         int                     internal_id;
127         enum cma_state          state;
128         spinlock_t              lock;
129         struct mutex            qp_mutex;
130
131         struct completion       comp;
132         atomic_t                refcount;
133         struct mutex            handler_mutex;
134
135         int                     backlog;
136         int                     timeout_ms;
137         struct ib_sa_query      *query;
138         int                     query_id;
139         union {
140                 struct ib_cm_id *ib;
141                 struct iw_cm_id *iw;
142         } cm_id;
143
144         u32                     seq_num;
145         u32                     qkey;
146         u32                     qp_num;
147         u8                      srq;
148         u8                      tos;
149 };
150
151 struct cma_multicast {
152         struct rdma_id_private *id_priv;
153         union {
154                 struct ib_sa_multicast *ib;
155         } multicast;
156         struct list_head        list;
157         void                    *context;
158         struct sockaddr         addr;
159         u8                      pad[sizeof(struct sockaddr_in6) -
160                                     sizeof(struct sockaddr)];
161 };
162
163 struct cma_work {
164         struct work_struct      work;
165         struct rdma_id_private  *id;
166         enum cma_state          old_state;
167         enum cma_state          new_state;
168         struct rdma_cm_event    event;
169 };
170
171 union cma_ip_addr {
172         struct in6_addr ip6;
173         struct {
174                 __be32 pad[3];
175                 __be32 addr;
176         } ip4;
177 };
178
179 struct cma_hdr {
180         u8 cma_version;
181         u8 ip_version;  /* IP version: 7:4 */
182         __be16 port;
183         union cma_ip_addr src_addr;
184         union cma_ip_addr dst_addr;
185 };
186
187 struct sdp_hh {
188         u8 bsdh[16];
189         u8 sdp_version; /* Major version: 7:4 */
190         u8 ip_version;  /* IP version: 7:4 */
191         u8 sdp_specific1[10];
192         __be16 port;
193         __be16 sdp_specific2;
194         union cma_ip_addr src_addr;
195         union cma_ip_addr dst_addr;
196 };
197
198 struct sdp_hah {
199         u8 bsdh[16];
200         u8 sdp_version;
201 };
202
203 #define CMA_VERSION 0x00
204 #define SDP_MAJ_VERSION 0x2
205
206 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
207 {
208         unsigned long flags;
209         int ret;
210
211         spin_lock_irqsave(&id_priv->lock, flags);
212         ret = (id_priv->state == comp);
213         spin_unlock_irqrestore(&id_priv->lock, flags);
214         return ret;
215 }
216
217 static int cma_comp_exch(struct rdma_id_private *id_priv,
218                          enum cma_state comp, enum cma_state exch)
219 {
220         unsigned long flags;
221         int ret;
222
223         spin_lock_irqsave(&id_priv->lock, flags);
224         if ((ret = (id_priv->state == comp)))
225                 id_priv->state = exch;
226         spin_unlock_irqrestore(&id_priv->lock, flags);
227         return ret;
228 }
229
230 static enum cma_state cma_exch(struct rdma_id_private *id_priv,
231                                enum cma_state exch)
232 {
233         unsigned long flags;
234         enum cma_state old;
235
236         spin_lock_irqsave(&id_priv->lock, flags);
237         old = id_priv->state;
238         id_priv->state = exch;
239         spin_unlock_irqrestore(&id_priv->lock, flags);
240         return old;
241 }
242
243 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
244 {
245         return hdr->ip_version >> 4;
246 }
247
248 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
249 {
250         hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
251 }
252
253 static inline u8 sdp_get_majv(u8 sdp_version)
254 {
255         return sdp_version >> 4;
256 }
257
258 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
259 {
260         return hh->ip_version >> 4;
261 }
262
263 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
264 {
265         hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
266 }
267
268 static inline int cma_is_ud_ps(enum rdma_port_space ps)
269 {
270         return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
271 }
272
273 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
274                               struct cma_device *cma_dev)
275 {
276         atomic_inc(&cma_dev->refcount);
277         id_priv->cma_dev = cma_dev;
278         id_priv->id.device = cma_dev->device;
279         list_add_tail(&id_priv->list, &cma_dev->id_list);
280 }
281
282 static inline void cma_deref_dev(struct cma_device *cma_dev)
283 {
284         if (atomic_dec_and_test(&cma_dev->refcount))
285                 complete(&cma_dev->comp);
286 }
287
288 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
289 {
290         list_del(&id_priv->list);
291         cma_deref_dev(id_priv->cma_dev);
292         id_priv->cma_dev = NULL;
293 }
294
295 static int cma_set_qkey(struct ib_device *device, u8 port_num,
296                         enum rdma_port_space ps,
297                         struct rdma_dev_addr *dev_addr, u32 *qkey)
298 {
299         struct ib_sa_mcmember_rec rec;
300         int ret = 0;
301
302         switch (ps) {
303         case RDMA_PS_UDP:
304                 *qkey = RDMA_UDP_QKEY;
305                 break;
306         case RDMA_PS_IPOIB:
307                 ib_addr_get_mgid(dev_addr, &rec.mgid);
308                 ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
309                 *qkey = be32_to_cpu(rec.qkey);
310                 break;
311         default:
312                 break;
313         }
314         return ret;
315 }
316
317 static int cma_acquire_dev(struct rdma_id_private *id_priv)
318 {
319         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
320         struct cma_device *cma_dev;
321         union ib_gid gid;
322         int ret = -ENODEV;
323
324         switch (rdma_node_get_transport(dev_addr->dev_type)) {
325         case RDMA_TRANSPORT_IB:
326                 ib_addr_get_sgid(dev_addr, &gid);
327                 break;
328         case RDMA_TRANSPORT_IWARP:
329                 iw_addr_get_sgid(dev_addr, &gid);
330                 break;
331         default:
332                 return -ENODEV;
333         }
334
335         list_for_each_entry(cma_dev, &dev_list, list) {
336                 ret = ib_find_cached_gid(cma_dev->device, &gid,
337                                          &id_priv->id.port_num, NULL);
338                 if (!ret) {
339                         ret = cma_set_qkey(cma_dev->device,
340                                            id_priv->id.port_num,
341                                            id_priv->id.ps, dev_addr,
342                                            &id_priv->qkey);
343                         if (!ret)
344                                 cma_attach_to_dev(id_priv, cma_dev);
345                         break;
346                 }
347         }
348         return ret;
349 }
350
351 static void cma_deref_id(struct rdma_id_private *id_priv)
352 {
353         if (atomic_dec_and_test(&id_priv->refcount))
354                 complete(&id_priv->comp);
355 }
356
357 static int cma_disable_callback(struct rdma_id_private *id_priv,
358                               enum cma_state state)
359 {
360         mutex_lock(&id_priv->handler_mutex);
361         if (id_priv->state != state) {
362                 mutex_unlock(&id_priv->handler_mutex);
363                 return -EINVAL;
364         }
365         return 0;
366 }
367
368 static int cma_has_cm_dev(struct rdma_id_private *id_priv)
369 {
370         return (id_priv->id.device && id_priv->cm_id.ib);
371 }
372
373 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
374                                   void *context, enum rdma_port_space ps)
375 {
376         struct rdma_id_private *id_priv;
377
378         id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
379         if (!id_priv)
380                 return ERR_PTR(-ENOMEM);
381
382         id_priv->state = CMA_IDLE;
383         id_priv->id.context = context;
384         id_priv->id.event_handler = event_handler;
385         id_priv->id.ps = ps;
386         spin_lock_init(&id_priv->lock);
387         mutex_init(&id_priv->qp_mutex);
388         init_completion(&id_priv->comp);
389         atomic_set(&id_priv->refcount, 1);
390         mutex_init(&id_priv->handler_mutex);
391         INIT_LIST_HEAD(&id_priv->listen_list);
392         INIT_LIST_HEAD(&id_priv->mc_list);
393         get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
394
395         return &id_priv->id;
396 }
397 EXPORT_SYMBOL(rdma_create_id);
398
399 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
400 {
401         struct ib_qp_attr qp_attr;
402         int qp_attr_mask, ret;
403
404         qp_attr.qp_state = IB_QPS_INIT;
405         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
406         if (ret)
407                 return ret;
408
409         ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
410         if (ret)
411                 return ret;
412
413         qp_attr.qp_state = IB_QPS_RTR;
414         ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
415         if (ret)
416                 return ret;
417
418         qp_attr.qp_state = IB_QPS_RTS;
419         qp_attr.sq_psn = 0;
420         ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
421
422         return ret;
423 }
424
425 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
426 {
427         struct ib_qp_attr qp_attr;
428         int qp_attr_mask, ret;
429
430         qp_attr.qp_state = IB_QPS_INIT;
431         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
432         if (ret)
433                 return ret;
434
435         return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
436 }
437
438 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
439                    struct ib_qp_init_attr *qp_init_attr)
440 {
441         struct rdma_id_private *id_priv;
442         struct ib_qp *qp;
443         int ret;
444
445         id_priv = container_of(id, struct rdma_id_private, id);
446         if (id->device != pd->device)
447                 return -EINVAL;
448
449         qp = ib_create_qp(pd, qp_init_attr);
450         if (IS_ERR(qp))
451                 return PTR_ERR(qp);
452
453         if (cma_is_ud_ps(id_priv->id.ps))
454                 ret = cma_init_ud_qp(id_priv, qp);
455         else
456                 ret = cma_init_conn_qp(id_priv, qp);
457         if (ret)
458                 goto err;
459
460         id->qp = qp;
461         id_priv->qp_num = qp->qp_num;
462         id_priv->srq = (qp->srq != NULL);
463         return 0;
464 err:
465         ib_destroy_qp(qp);
466         return ret;
467 }
468 EXPORT_SYMBOL(rdma_create_qp);
469
470 void rdma_destroy_qp(struct rdma_cm_id *id)
471 {
472         struct rdma_id_private *id_priv;
473
474         id_priv = container_of(id, struct rdma_id_private, id);
475         mutex_lock(&id_priv->qp_mutex);
476         ib_destroy_qp(id_priv->id.qp);
477         id_priv->id.qp = NULL;
478         mutex_unlock(&id_priv->qp_mutex);
479 }
480 EXPORT_SYMBOL(rdma_destroy_qp);
481
482 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
483                              struct rdma_conn_param *conn_param)
484 {
485         struct ib_qp_attr qp_attr;
486         int qp_attr_mask, ret;
487
488         mutex_lock(&id_priv->qp_mutex);
489         if (!id_priv->id.qp) {
490                 ret = 0;
491                 goto out;
492         }
493
494         /* Need to update QP attributes from default values. */
495         qp_attr.qp_state = IB_QPS_INIT;
496         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
497         if (ret)
498                 goto out;
499
500         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
501         if (ret)
502                 goto out;
503
504         qp_attr.qp_state = IB_QPS_RTR;
505         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
506         if (ret)
507                 goto out;
508
509         if (conn_param)
510                 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
511         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
512 out:
513         mutex_unlock(&id_priv->qp_mutex);
514         return ret;
515 }
516
517 static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
518                              struct rdma_conn_param *conn_param)
519 {
520         struct ib_qp_attr qp_attr;
521         int qp_attr_mask, ret;
522
523         mutex_lock(&id_priv->qp_mutex);
524         if (!id_priv->id.qp) {
525                 ret = 0;
526                 goto out;
527         }
528
529         qp_attr.qp_state = IB_QPS_RTS;
530         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
531         if (ret)
532                 goto out;
533
534         if (conn_param)
535                 qp_attr.max_rd_atomic = conn_param->initiator_depth;
536         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
537 out:
538         mutex_unlock(&id_priv->qp_mutex);
539         return ret;
540 }
541
542 static int cma_modify_qp_err(struct rdma_id_private *id_priv)
543 {
544         struct ib_qp_attr qp_attr;
545         int ret;
546
547         mutex_lock(&id_priv->qp_mutex);
548         if (!id_priv->id.qp) {
549                 ret = 0;
550                 goto out;
551         }
552
553         qp_attr.qp_state = IB_QPS_ERR;
554         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
555 out:
556         mutex_unlock(&id_priv->qp_mutex);
557         return ret;
558 }
559
560 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
561                                struct ib_qp_attr *qp_attr, int *qp_attr_mask)
562 {
563         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
564         int ret;
565
566         ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
567                                   ib_addr_get_pkey(dev_addr),
568                                   &qp_attr->pkey_index);
569         if (ret)
570                 return ret;
571
572         qp_attr->port_num = id_priv->id.port_num;
573         *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
574
575         if (cma_is_ud_ps(id_priv->id.ps)) {
576                 qp_attr->qkey = id_priv->qkey;
577                 *qp_attr_mask |= IB_QP_QKEY;
578         } else {
579                 qp_attr->qp_access_flags = 0;
580                 *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
581         }
582         return 0;
583 }
584
585 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
586                        int *qp_attr_mask)
587 {
588         struct rdma_id_private *id_priv;
589         int ret = 0;
590
591         id_priv = container_of(id, struct rdma_id_private, id);
592         switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
593         case RDMA_TRANSPORT_IB:
594                 if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
595                         ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
596                 else
597                         ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
598                                                  qp_attr_mask);
599                 if (qp_attr->qp_state == IB_QPS_RTR)
600                         qp_attr->rq_psn = id_priv->seq_num;
601                 break;
602         case RDMA_TRANSPORT_IWARP:
603                 if (!id_priv->cm_id.iw) {
604                         qp_attr->qp_access_flags = 0;
605                         *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
606                 } else
607                         ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
608                                                  qp_attr_mask);
609                 break;
610         default:
611                 ret = -ENOSYS;
612                 break;
613         }
614
615         return ret;
616 }
617 EXPORT_SYMBOL(rdma_init_qp_attr);
618
619 static inline int cma_zero_addr(struct sockaddr *addr)
620 {
621         struct in6_addr *ip6;
622
623         if (addr->sa_family == AF_INET)
624                 return ipv4_is_zeronet(
625                         ((struct sockaddr_in *)addr)->sin_addr.s_addr);
626         else {
627                 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
628                 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
629                         ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
630         }
631 }
632
633 static inline int cma_loopback_addr(struct sockaddr *addr)
634 {
635         return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
636 }
637
638 static inline int cma_any_addr(struct sockaddr *addr)
639 {
640         return cma_zero_addr(addr) || cma_loopback_addr(addr);
641 }
642
643 static inline __be16 cma_port(struct sockaddr *addr)
644 {
645         if (addr->sa_family == AF_INET)
646                 return ((struct sockaddr_in *) addr)->sin_port;
647         else
648                 return ((struct sockaddr_in6 *) addr)->sin6_port;
649 }
650
651 static inline int cma_any_port(struct sockaddr *addr)
652 {
653         return !cma_port(addr);
654 }
655
656 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
657                             u8 *ip_ver, __be16 *port,
658                             union cma_ip_addr **src, union cma_ip_addr **dst)
659 {
660         switch (ps) {
661         case RDMA_PS_SDP:
662                 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
663                     SDP_MAJ_VERSION)
664                         return -EINVAL;
665
666                 *ip_ver = sdp_get_ip_ver(hdr);
667                 *port   = ((struct sdp_hh *) hdr)->port;
668                 *src    = &((struct sdp_hh *) hdr)->src_addr;
669                 *dst    = &((struct sdp_hh *) hdr)->dst_addr;
670                 break;
671         default:
672                 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
673                         return -EINVAL;
674
675                 *ip_ver = cma_get_ip_ver(hdr);
676                 *port   = ((struct cma_hdr *) hdr)->port;
677                 *src    = &((struct cma_hdr *) hdr)->src_addr;
678                 *dst    = &((struct cma_hdr *) hdr)->dst_addr;
679                 break;
680         }
681
682         if (*ip_ver != 4 && *ip_ver != 6)
683                 return -EINVAL;
684         return 0;
685 }
686
687 static void cma_save_net_info(struct rdma_addr *addr,
688                               struct rdma_addr *listen_addr,
689                               u8 ip_ver, __be16 port,
690                               union cma_ip_addr *src, union cma_ip_addr *dst)
691 {
692         struct sockaddr_in *listen4, *ip4;
693         struct sockaddr_in6 *listen6, *ip6;
694
695         switch (ip_ver) {
696         case 4:
697                 listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
698                 ip4 = (struct sockaddr_in *) &addr->src_addr;
699                 ip4->sin_family = listen4->sin_family;
700                 ip4->sin_addr.s_addr = dst->ip4.addr;
701                 ip4->sin_port = listen4->sin_port;
702
703                 ip4 = (struct sockaddr_in *) &addr->dst_addr;
704                 ip4->sin_family = listen4->sin_family;
705                 ip4->sin_addr.s_addr = src->ip4.addr;
706                 ip4->sin_port = port;
707                 break;
708         case 6:
709                 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
710                 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
711                 ip6->sin6_family = listen6->sin6_family;
712                 ip6->sin6_addr = dst->ip6;
713                 ip6->sin6_port = listen6->sin6_port;
714
715                 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
716                 ip6->sin6_family = listen6->sin6_family;
717                 ip6->sin6_addr = src->ip6;
718                 ip6->sin6_port = port;
719                 break;
720         default:
721                 break;
722         }
723 }
724
725 static inline int cma_user_data_offset(enum rdma_port_space ps)
726 {
727         switch (ps) {
728         case RDMA_PS_SDP:
729                 return 0;
730         default:
731                 return sizeof(struct cma_hdr);
732         }
733 }
734
735 static void cma_cancel_route(struct rdma_id_private *id_priv)
736 {
737         switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
738         case RDMA_TRANSPORT_IB:
739                 if (id_priv->query)
740                         ib_sa_cancel_query(id_priv->query_id, id_priv->query);
741                 break;
742         default:
743                 break;
744         }
745 }
746
747 static void cma_cancel_listens(struct rdma_id_private *id_priv)
748 {
749         struct rdma_id_private *dev_id_priv;
750
751         /*
752          * Remove from listen_any_list to prevent added devices from spawning
753          * additional listen requests.
754          */
755         mutex_lock(&lock);
756         list_del(&id_priv->list);
757
758         while (!list_empty(&id_priv->listen_list)) {
759                 dev_id_priv = list_entry(id_priv->listen_list.next,
760                                          struct rdma_id_private, listen_list);
761                 /* sync with device removal to avoid duplicate destruction */
762                 list_del_init(&dev_id_priv->list);
763                 list_del(&dev_id_priv->listen_list);
764                 mutex_unlock(&lock);
765
766                 rdma_destroy_id(&dev_id_priv->id);
767                 mutex_lock(&lock);
768         }
769         mutex_unlock(&lock);
770 }
771
772 static void cma_cancel_operation(struct rdma_id_private *id_priv,
773                                  enum cma_state state)
774 {
775         switch (state) {
776         case CMA_ADDR_QUERY:
777                 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
778                 break;
779         case CMA_ROUTE_QUERY:
780                 cma_cancel_route(id_priv);
781                 break;
782         case CMA_LISTEN:
783                 if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
784                     !id_priv->cma_dev)
785                         cma_cancel_listens(id_priv);
786                 break;
787         default:
788                 break;
789         }
790 }
791
792 static void cma_release_port(struct rdma_id_private *id_priv)
793 {
794         struct rdma_bind_list *bind_list = id_priv->bind_list;
795
796         if (!bind_list)
797                 return;
798
799         mutex_lock(&lock);
800         hlist_del(&id_priv->node);
801         if (hlist_empty(&bind_list->owners)) {
802                 idr_remove(bind_list->ps, bind_list->port);
803                 kfree(bind_list);
804         }
805         mutex_unlock(&lock);
806 }
807
808 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
809 {
810         struct cma_multicast *mc;
811
812         while (!list_empty(&id_priv->mc_list)) {
813                 mc = container_of(id_priv->mc_list.next,
814                                   struct cma_multicast, list);
815                 list_del(&mc->list);
816                 ib_sa_free_multicast(mc->multicast.ib);
817                 kfree(mc);
818         }
819 }
820
821 void rdma_destroy_id(struct rdma_cm_id *id)
822 {
823         struct rdma_id_private *id_priv;
824         enum cma_state state;
825
826         id_priv = container_of(id, struct rdma_id_private, id);
827         state = cma_exch(id_priv, CMA_DESTROYING);
828         cma_cancel_operation(id_priv, state);
829
830         mutex_lock(&lock);
831         if (id_priv->cma_dev) {
832                 mutex_unlock(&lock);
833                 switch (rdma_node_get_transport(id->device->node_type)) {
834                 case RDMA_TRANSPORT_IB:
835                         if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
836                                 ib_destroy_cm_id(id_priv->cm_id.ib);
837                         break;
838                 case RDMA_TRANSPORT_IWARP:
839                         if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
840                                 iw_destroy_cm_id(id_priv->cm_id.iw);
841                         break;
842                 default:
843                         break;
844                 }
845                 cma_leave_mc_groups(id_priv);
846                 mutex_lock(&lock);
847                 cma_detach_from_dev(id_priv);
848         }
849         mutex_unlock(&lock);
850
851         cma_release_port(id_priv);
852         cma_deref_id(id_priv);
853         wait_for_completion(&id_priv->comp);
854
855         if (id_priv->internal_id)
856                 cma_deref_id(id_priv->id.context);
857
858         kfree(id_priv->id.route.path_rec);
859         kfree(id_priv);
860 }
861 EXPORT_SYMBOL(rdma_destroy_id);
862
863 static int cma_rep_recv(struct rdma_id_private *id_priv)
864 {
865         int ret;
866
867         ret = cma_modify_qp_rtr(id_priv, NULL);
868         if (ret)
869                 goto reject;
870
871         ret = cma_modify_qp_rts(id_priv, NULL);
872         if (ret)
873                 goto reject;
874
875         ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
876         if (ret)
877                 goto reject;
878
879         return 0;
880 reject:
881         cma_modify_qp_err(id_priv);
882         ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
883                        NULL, 0, NULL, 0);
884         return ret;
885 }
886
887 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
888 {
889         if (id_priv->id.ps == RDMA_PS_SDP &&
890             sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
891             SDP_MAJ_VERSION)
892                 return -EINVAL;
893
894         return 0;
895 }
896
897 static void cma_set_rep_event_data(struct rdma_cm_event *event,
898                                    struct ib_cm_rep_event_param *rep_data,
899                                    void *private_data)
900 {
901         event->param.conn.private_data = private_data;
902         event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
903         event->param.conn.responder_resources = rep_data->responder_resources;
904         event->param.conn.initiator_depth = rep_data->initiator_depth;
905         event->param.conn.flow_control = rep_data->flow_control;
906         event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
907         event->param.conn.srq = rep_data->srq;
908         event->param.conn.qp_num = rep_data->remote_qpn;
909 }
910
911 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
912 {
913         struct rdma_id_private *id_priv = cm_id->context;
914         struct rdma_cm_event event;
915         int ret = 0;
916
917         if (cma_disable_callback(id_priv, CMA_CONNECT))
918                 return 0;
919
920         memset(&event, 0, sizeof event);
921         switch (ib_event->event) {
922         case IB_CM_REQ_ERROR:
923         case IB_CM_REP_ERROR:
924                 event.event = RDMA_CM_EVENT_UNREACHABLE;
925                 event.status = -ETIMEDOUT;
926                 break;
927         case IB_CM_REP_RECEIVED:
928                 event.status = cma_verify_rep(id_priv, ib_event->private_data);
929                 if (event.status)
930                         event.event = RDMA_CM_EVENT_CONNECT_ERROR;
931                 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
932                         event.status = cma_rep_recv(id_priv);
933                         event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
934                                                      RDMA_CM_EVENT_ESTABLISHED;
935                 } else
936                         event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
937                 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
938                                        ib_event->private_data);
939                 break;
940         case IB_CM_RTU_RECEIVED:
941         case IB_CM_USER_ESTABLISHED:
942                 event.event = RDMA_CM_EVENT_ESTABLISHED;
943                 break;
944         case IB_CM_DREQ_ERROR:
945                 event.status = -ETIMEDOUT; /* fall through */
946         case IB_CM_DREQ_RECEIVED:
947         case IB_CM_DREP_RECEIVED:
948                 if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
949                         goto out;
950                 event.event = RDMA_CM_EVENT_DISCONNECTED;
951                 break;
952         case IB_CM_TIMEWAIT_EXIT:
953         case IB_CM_MRA_RECEIVED:
954                 /* ignore event */
955                 goto out;
956         case IB_CM_REJ_RECEIVED:
957                 cma_modify_qp_err(id_priv);
958                 event.status = ib_event->param.rej_rcvd.reason;
959                 event.event = RDMA_CM_EVENT_REJECTED;
960                 event.param.conn.private_data = ib_event->private_data;
961                 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
962                 break;
963         default:
964                 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
965                        ib_event->event);
966                 goto out;
967         }
968
969         ret = id_priv->id.event_handler(&id_priv->id, &event);
970         if (ret) {
971                 /* Destroy the CM ID by returning a non-zero value. */
972                 id_priv->cm_id.ib = NULL;
973                 cma_exch(id_priv, CMA_DESTROYING);
974                 mutex_unlock(&id_priv->handler_mutex);
975                 rdma_destroy_id(&id_priv->id);
976                 return ret;
977         }
978 out:
979         mutex_unlock(&id_priv->handler_mutex);
980         return ret;
981 }
982
983 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
984                                                struct ib_cm_event *ib_event)
985 {
986         struct rdma_id_private *id_priv;
987         struct rdma_cm_id *id;
988         struct rdma_route *rt;
989         union cma_ip_addr *src, *dst;
990         __be16 port;
991         u8 ip_ver;
992         int ret;
993
994         if (cma_get_net_info(ib_event->private_data, listen_id->ps,
995                              &ip_ver, &port, &src, &dst))
996                 goto err;
997
998         id = rdma_create_id(listen_id->event_handler, listen_id->context,
999                             listen_id->ps);
1000         if (IS_ERR(id))
1001                 goto err;
1002
1003         cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1004                           ip_ver, port, src, dst);
1005
1006         rt = &id->route;
1007         rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1008         rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1009                                GFP_KERNEL);
1010         if (!rt->path_rec)
1011                 goto destroy_id;
1012
1013         rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1014         if (rt->num_paths == 2)
1015                 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1016
1017         ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1018         ret = rdma_translate_ip(&id->route.addr.src_addr,
1019                                 &id->route.addr.dev_addr);
1020         if (ret)
1021                 goto destroy_id;
1022
1023         id_priv = container_of(id, struct rdma_id_private, id);
1024         id_priv->state = CMA_CONNECT;
1025         return id_priv;
1026
1027 destroy_id:
1028         rdma_destroy_id(id);
1029 err:
1030         return NULL;
1031 }
1032
1033 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1034                                               struct ib_cm_event *ib_event)
1035 {
1036         struct rdma_id_private *id_priv;
1037         struct rdma_cm_id *id;
1038         union cma_ip_addr *src, *dst;
1039         __be16 port;
1040         u8 ip_ver;
1041         int ret;
1042
1043         id = rdma_create_id(listen_id->event_handler, listen_id->context,
1044                             listen_id->ps);
1045         if (IS_ERR(id))
1046                 return NULL;
1047
1048
1049         if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1050                              &ip_ver, &port, &src, &dst))
1051                 goto err;
1052
1053         cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1054                           ip_ver, port, src, dst);
1055
1056         ret = rdma_translate_ip(&id->route.addr.src_addr,
1057                                 &id->route.addr.dev_addr);
1058         if (ret)
1059                 goto err;
1060
1061         id_priv = container_of(id, struct rdma_id_private, id);
1062         id_priv->state = CMA_CONNECT;
1063         return id_priv;
1064 err:
1065         rdma_destroy_id(id);
1066         return NULL;
1067 }
1068
1069 static void cma_set_req_event_data(struct rdma_cm_event *event,
1070                                    struct ib_cm_req_event_param *req_data,
1071                                    void *private_data, int offset)
1072 {
1073         event->param.conn.private_data = private_data + offset;
1074         event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1075         event->param.conn.responder_resources = req_data->responder_resources;
1076         event->param.conn.initiator_depth = req_data->initiator_depth;
1077         event->param.conn.flow_control = req_data->flow_control;
1078         event->param.conn.retry_count = req_data->retry_count;
1079         event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1080         event->param.conn.srq = req_data->srq;
1081         event->param.conn.qp_num = req_data->remote_qpn;
1082 }
1083
1084 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1085 {
1086         struct rdma_id_private *listen_id, *conn_id;
1087         struct rdma_cm_event event;
1088         int offset, ret;
1089
1090         listen_id = cm_id->context;
1091         if (cma_disable_callback(listen_id, CMA_LISTEN))
1092                 return -ECONNABORTED;
1093
1094         memset(&event, 0, sizeof event);
1095         offset = cma_user_data_offset(listen_id->id.ps);
1096         event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1097         if (cma_is_ud_ps(listen_id->id.ps)) {
1098                 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1099                 event.param.ud.private_data = ib_event->private_data + offset;
1100                 event.param.ud.private_data_len =
1101                                 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1102         } else {
1103                 conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1104                 cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1105                                        ib_event->private_data, offset);
1106         }
1107         if (!conn_id) {
1108                 ret = -ENOMEM;
1109                 goto out;
1110         }
1111
1112         mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1113         mutex_lock(&lock);
1114         ret = cma_acquire_dev(conn_id);
1115         mutex_unlock(&lock);
1116         if (ret)
1117                 goto release_conn_id;
1118
1119         conn_id->cm_id.ib = cm_id;
1120         cm_id->context = conn_id;
1121         cm_id->cm_handler = cma_ib_handler;
1122
1123         ret = conn_id->id.event_handler(&conn_id->id, &event);
1124         if (!ret) {
1125                 /*
1126                  * Acquire mutex to prevent user executing rdma_destroy_id()
1127                  * while we're accessing the cm_id.
1128                  */
1129                 mutex_lock(&lock);
1130                 if (cma_comp(conn_id, CMA_CONNECT) &&
1131                     !cma_is_ud_ps(conn_id->id.ps))
1132                         ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1133                 mutex_unlock(&lock);
1134                 mutex_unlock(&conn_id->handler_mutex);
1135                 goto out;
1136         }
1137
1138         /* Destroy the CM ID by returning a non-zero value. */
1139         conn_id->cm_id.ib = NULL;
1140
1141 release_conn_id:
1142         cma_exch(conn_id, CMA_DESTROYING);
1143         mutex_unlock(&conn_id->handler_mutex);
1144         rdma_destroy_id(&conn_id->id);
1145
1146 out:
1147         mutex_unlock(&listen_id->handler_mutex);
1148         return ret;
1149 }
1150
1151 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1152 {
1153         return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1154 }
1155
1156 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1157                                  struct ib_cm_compare_data *compare)
1158 {
1159         struct cma_hdr *cma_data, *cma_mask;
1160         struct sdp_hh *sdp_data, *sdp_mask;
1161         __be32 ip4_addr;
1162         struct in6_addr ip6_addr;
1163
1164         memset(compare, 0, sizeof *compare);
1165         cma_data = (void *) compare->data;
1166         cma_mask = (void *) compare->mask;
1167         sdp_data = (void *) compare->data;
1168         sdp_mask = (void *) compare->mask;
1169
1170         switch (addr->sa_family) {
1171         case AF_INET:
1172                 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1173                 if (ps == RDMA_PS_SDP) {
1174                         sdp_set_ip_ver(sdp_data, 4);
1175                         sdp_set_ip_ver(sdp_mask, 0xF);
1176                         sdp_data->dst_addr.ip4.addr = ip4_addr;
1177                         sdp_mask->dst_addr.ip4.addr = htonl(~0);
1178                 } else {
1179                         cma_set_ip_ver(cma_data, 4);
1180                         cma_set_ip_ver(cma_mask, 0xF);
1181                         cma_data->dst_addr.ip4.addr = ip4_addr;
1182                         cma_mask->dst_addr.ip4.addr = htonl(~0);
1183                 }
1184                 break;
1185         case AF_INET6:
1186                 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1187                 if (ps == RDMA_PS_SDP) {
1188                         sdp_set_ip_ver(sdp_data, 6);
1189                         sdp_set_ip_ver(sdp_mask, 0xF);
1190                         sdp_data->dst_addr.ip6 = ip6_addr;
1191                         memset(&sdp_mask->dst_addr.ip6, 0xFF,
1192                                sizeof sdp_mask->dst_addr.ip6);
1193                 } else {
1194                         cma_set_ip_ver(cma_data, 6);
1195                         cma_set_ip_ver(cma_mask, 0xF);
1196                         cma_data->dst_addr.ip6 = ip6_addr;
1197                         memset(&cma_mask->dst_addr.ip6, 0xFF,
1198                                sizeof cma_mask->dst_addr.ip6);
1199                 }
1200                 break;
1201         default:
1202                 break;
1203         }
1204 }
1205
1206 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1207 {
1208         struct rdma_id_private *id_priv = iw_id->context;
1209         struct rdma_cm_event event;
1210         struct sockaddr_in *sin;
1211         int ret = 0;
1212
1213         if (cma_disable_callback(id_priv, CMA_CONNECT))
1214                 return 0;
1215
1216         memset(&event, 0, sizeof event);
1217         switch (iw_event->event) {
1218         case IW_CM_EVENT_CLOSE:
1219                 event.event = RDMA_CM_EVENT_DISCONNECTED;
1220                 break;
1221         case IW_CM_EVENT_CONNECT_REPLY:
1222                 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1223                 *sin = iw_event->local_addr;
1224                 sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1225                 *sin = iw_event->remote_addr;
1226                 switch (iw_event->status) {
1227                 case 0:
1228                         event.event = RDMA_CM_EVENT_ESTABLISHED;
1229                         break;
1230                 case -ECONNRESET:
1231                 case -ECONNREFUSED:
1232                         event.event = RDMA_CM_EVENT_REJECTED;
1233                         break;
1234                 case -ETIMEDOUT:
1235                         event.event = RDMA_CM_EVENT_UNREACHABLE;
1236                         break;
1237                 default:
1238                         event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1239                         break;
1240                 }
1241                 break;
1242         case IW_CM_EVENT_ESTABLISHED:
1243                 event.event = RDMA_CM_EVENT_ESTABLISHED;
1244                 break;
1245         default:
1246                 BUG_ON(1);
1247         }
1248
1249         event.status = iw_event->status;
1250         event.param.conn.private_data = iw_event->private_data;
1251         event.param.conn.private_data_len = iw_event->private_data_len;
1252         ret = id_priv->id.event_handler(&id_priv->id, &event);
1253         if (ret) {
1254                 /* Destroy the CM ID by returning a non-zero value. */
1255                 id_priv->cm_id.iw = NULL;
1256                 cma_exch(id_priv, CMA_DESTROYING);
1257                 mutex_unlock(&id_priv->handler_mutex);
1258                 rdma_destroy_id(&id_priv->id);
1259                 return ret;
1260         }
1261
1262         mutex_unlock(&id_priv->handler_mutex);
1263         return ret;
1264 }
1265
1266 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1267                                struct iw_cm_event *iw_event)
1268 {
1269         struct rdma_cm_id *new_cm_id;
1270         struct rdma_id_private *listen_id, *conn_id;
1271         struct sockaddr_in *sin;
1272         struct net_device *dev = NULL;
1273         struct rdma_cm_event event;
1274         int ret;
1275         struct ib_device_attr attr;
1276
1277         listen_id = cm_id->context;
1278         if (cma_disable_callback(listen_id, CMA_LISTEN))
1279                 return -ECONNABORTED;
1280
1281         /* Create a new RDMA id for the new IW CM ID */
1282         new_cm_id = rdma_create_id(listen_id->id.event_handler,
1283                                    listen_id->id.context,
1284                                    RDMA_PS_TCP);
1285         if (IS_ERR(new_cm_id)) {
1286                 ret = -ENOMEM;
1287                 goto out;
1288         }
1289         conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1290         mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1291         conn_id->state = CMA_CONNECT;
1292
1293         dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
1294         if (!dev) {
1295                 ret = -EADDRNOTAVAIL;
1296                 mutex_unlock(&conn_id->handler_mutex);
1297                 rdma_destroy_id(new_cm_id);
1298                 goto out;
1299         }
1300         ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1301         if (ret) {
1302                 mutex_unlock(&conn_id->handler_mutex);
1303                 rdma_destroy_id(new_cm_id);
1304                 goto out;
1305         }
1306
1307         mutex_lock(&lock);
1308         ret = cma_acquire_dev(conn_id);
1309         mutex_unlock(&lock);
1310         if (ret) {
1311                 mutex_unlock(&conn_id->handler_mutex);
1312                 rdma_destroy_id(new_cm_id);
1313                 goto out;
1314         }
1315
1316         conn_id->cm_id.iw = cm_id;
1317         cm_id->context = conn_id;
1318         cm_id->cm_handler = cma_iw_handler;
1319
1320         sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1321         *sin = iw_event->local_addr;
1322         sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1323         *sin = iw_event->remote_addr;
1324
1325         ret = ib_query_device(conn_id->id.device, &attr);
1326         if (ret) {
1327                 mutex_unlock(&conn_id->handler_mutex);
1328                 rdma_destroy_id(new_cm_id);
1329                 goto out;
1330         }
1331
1332         memset(&event, 0, sizeof event);
1333         event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1334         event.param.conn.private_data = iw_event->private_data;
1335         event.param.conn.private_data_len = iw_event->private_data_len;
1336         event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1337         event.param.conn.responder_resources = attr.max_qp_rd_atom;
1338         ret = conn_id->id.event_handler(&conn_id->id, &event);
1339         if (ret) {
1340                 /* User wants to destroy the CM ID */
1341                 conn_id->cm_id.iw = NULL;
1342                 cma_exch(conn_id, CMA_DESTROYING);
1343                 mutex_unlock(&conn_id->handler_mutex);
1344                 rdma_destroy_id(&conn_id->id);
1345                 goto out;
1346         }
1347
1348         mutex_unlock(&conn_id->handler_mutex);
1349
1350 out:
1351         if (dev)
1352                 dev_put(dev);
1353         mutex_unlock(&listen_id->handler_mutex);
1354         return ret;
1355 }
1356
1357 static int cma_ib_listen(struct rdma_id_private *id_priv)
1358 {
1359         struct ib_cm_compare_data compare_data;
1360         struct sockaddr *addr;
1361         __be64 svc_id;
1362         int ret;
1363
1364         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1365                                             id_priv);
1366         if (IS_ERR(id_priv->cm_id.ib))
1367                 return PTR_ERR(id_priv->cm_id.ib);
1368
1369         addr = &id_priv->id.route.addr.src_addr;
1370         svc_id = cma_get_service_id(id_priv->id.ps, addr);
1371         if (cma_any_addr(addr))
1372                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1373         else {
1374                 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1375                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1376         }
1377
1378         if (ret) {
1379                 ib_destroy_cm_id(id_priv->cm_id.ib);
1380                 id_priv->cm_id.ib = NULL;
1381         }
1382
1383         return ret;
1384 }
1385
1386 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1387 {
1388         int ret;
1389         struct sockaddr_in *sin;
1390
1391         id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1392                                             iw_conn_req_handler,
1393                                             id_priv);
1394         if (IS_ERR(id_priv->cm_id.iw))
1395                 return PTR_ERR(id_priv->cm_id.iw);
1396
1397         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1398         id_priv->cm_id.iw->local_addr = *sin;
1399
1400         ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1401
1402         if (ret) {
1403                 iw_destroy_cm_id(id_priv->cm_id.iw);
1404                 id_priv->cm_id.iw = NULL;
1405         }
1406
1407         return ret;
1408 }
1409
1410 static int cma_listen_handler(struct rdma_cm_id *id,
1411                               struct rdma_cm_event *event)
1412 {
1413         struct rdma_id_private *id_priv = id->context;
1414
1415         id->context = id_priv->id.context;
1416         id->event_handler = id_priv->id.event_handler;
1417         return id_priv->id.event_handler(id, event);
1418 }
1419
1420 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1421                               struct cma_device *cma_dev)
1422 {
1423         struct rdma_id_private *dev_id_priv;
1424         struct rdma_cm_id *id;
1425         int ret;
1426
1427         id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1428         if (IS_ERR(id))
1429                 return;
1430
1431         dev_id_priv = container_of(id, struct rdma_id_private, id);
1432
1433         dev_id_priv->state = CMA_ADDR_BOUND;
1434         memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1435                ip_addr_size(&id_priv->id.route.addr.src_addr));
1436
1437         cma_attach_to_dev(dev_id_priv, cma_dev);
1438         list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1439         atomic_inc(&id_priv->refcount);
1440         dev_id_priv->internal_id = 1;
1441
1442         ret = rdma_listen(id, id_priv->backlog);
1443         if (ret)
1444                 printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1445                        "listening on device %s\n", ret, cma_dev->device->name);
1446 }
1447
1448 static void cma_listen_on_all(struct rdma_id_private *id_priv)
1449 {
1450         struct cma_device *cma_dev;
1451
1452         mutex_lock(&lock);
1453         list_add_tail(&id_priv->list, &listen_any_list);
1454         list_for_each_entry(cma_dev, &dev_list, list)
1455                 cma_listen_on_dev(id_priv, cma_dev);
1456         mutex_unlock(&lock);
1457 }
1458
1459 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1460 {
1461         struct sockaddr_in addr_in;
1462
1463         memset(&addr_in, 0, sizeof addr_in);
1464         addr_in.sin_family = af;
1465         return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1466 }
1467
1468 int rdma_listen(struct rdma_cm_id *id, int backlog)
1469 {
1470         struct rdma_id_private *id_priv;
1471         int ret;
1472
1473         id_priv = container_of(id, struct rdma_id_private, id);
1474         if (id_priv->state == CMA_IDLE) {
1475                 ret = cma_bind_any(id, AF_INET);
1476                 if (ret)
1477                         return ret;
1478         }
1479
1480         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1481                 return -EINVAL;
1482
1483         id_priv->backlog = backlog;
1484         if (id->device) {
1485                 switch (rdma_node_get_transport(id->device->node_type)) {
1486                 case RDMA_TRANSPORT_IB:
1487                         ret = cma_ib_listen(id_priv);
1488                         if (ret)
1489                                 goto err;
1490                         break;
1491                 case RDMA_TRANSPORT_IWARP:
1492                         ret = cma_iw_listen(id_priv, backlog);
1493                         if (ret)
1494                                 goto err;
1495                         break;
1496                 default:
1497                         ret = -ENOSYS;
1498                         goto err;
1499                 }
1500         } else
1501                 cma_listen_on_all(id_priv);
1502
1503         return 0;
1504 err:
1505         id_priv->backlog = 0;
1506         cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1507         return ret;
1508 }
1509 EXPORT_SYMBOL(rdma_listen);
1510
1511 void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1512 {
1513         struct rdma_id_private *id_priv;
1514
1515         id_priv = container_of(id, struct rdma_id_private, id);
1516         id_priv->tos = (u8) tos;
1517 }
1518 EXPORT_SYMBOL(rdma_set_service_type);
1519
1520 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1521                               void *context)
1522 {
1523         struct cma_work *work = context;
1524         struct rdma_route *route;
1525
1526         route = &work->id->id.route;
1527
1528         if (!status) {
1529                 route->num_paths = 1;
1530                 *route->path_rec = *path_rec;
1531         } else {
1532                 work->old_state = CMA_ROUTE_QUERY;
1533                 work->new_state = CMA_ADDR_RESOLVED;
1534                 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1535                 work->event.status = status;
1536         }
1537
1538         queue_work(cma_wq, &work->work);
1539 }
1540
1541 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1542                               struct cma_work *work)
1543 {
1544         struct rdma_addr *addr = &id_priv->id.route.addr;
1545         struct ib_sa_path_rec path_rec;
1546         ib_sa_comp_mask comp_mask;
1547         struct sockaddr_in6 *sin6;
1548
1549         memset(&path_rec, 0, sizeof path_rec);
1550         ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1551         ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1552         path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1553         path_rec.numb_path = 1;
1554         path_rec.reversible = 1;
1555         path_rec.service_id = cma_get_service_id(id_priv->id.ps, &addr->dst_addr);
1556
1557         comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1558                     IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1559                     IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1560
1561         if (addr->src_addr.sa_family == AF_INET) {
1562                 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1563                 comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1564         } else {
1565                 sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1566                 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1567                 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1568         }
1569
1570         id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1571                                                id_priv->id.port_num, &path_rec,
1572                                                comp_mask, timeout_ms,
1573                                                GFP_KERNEL, cma_query_handler,
1574                                                work, &id_priv->query);
1575
1576         return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1577 }
1578
1579 static void cma_work_handler(struct work_struct *_work)
1580 {
1581         struct cma_work *work = container_of(_work, struct cma_work, work);
1582         struct rdma_id_private *id_priv = work->id;
1583         int destroy = 0;
1584
1585         mutex_lock(&id_priv->handler_mutex);
1586         if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1587                 goto out;
1588
1589         if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1590                 cma_exch(id_priv, CMA_DESTROYING);
1591                 destroy = 1;
1592         }
1593 out:
1594         mutex_unlock(&id_priv->handler_mutex);
1595         cma_deref_id(id_priv);
1596         if (destroy)
1597                 rdma_destroy_id(&id_priv->id);
1598         kfree(work);
1599 }
1600
1601 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1602 {
1603         struct rdma_route *route = &id_priv->id.route;
1604         struct cma_work *work;
1605         int ret;
1606
1607         work = kzalloc(sizeof *work, GFP_KERNEL);
1608         if (!work)
1609                 return -ENOMEM;
1610
1611         work->id = id_priv;
1612         INIT_WORK(&work->work, cma_work_handler);
1613         work->old_state = CMA_ROUTE_QUERY;
1614         work->new_state = CMA_ROUTE_RESOLVED;
1615         work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1616
1617         route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1618         if (!route->path_rec) {
1619                 ret = -ENOMEM;
1620                 goto err1;
1621         }
1622
1623         ret = cma_query_ib_route(id_priv, timeout_ms, work);
1624         if (ret)
1625                 goto err2;
1626
1627         return 0;
1628 err2:
1629         kfree(route->path_rec);
1630         route->path_rec = NULL;
1631 err1:
1632         kfree(work);
1633         return ret;
1634 }
1635
1636 int rdma_set_ib_paths(struct rdma_cm_id *id,
1637                       struct ib_sa_path_rec *path_rec, int num_paths)
1638 {
1639         struct rdma_id_private *id_priv;
1640         int ret;
1641
1642         id_priv = container_of(id, struct rdma_id_private, id);
1643         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1644                 return -EINVAL;
1645
1646         id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1647         if (!id->route.path_rec) {
1648                 ret = -ENOMEM;
1649                 goto err;
1650         }
1651
1652         memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1653         return 0;
1654 err:
1655         cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1656         return ret;
1657 }
1658 EXPORT_SYMBOL(rdma_set_ib_paths);
1659
1660 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1661 {
1662         struct cma_work *work;
1663
1664         work = kzalloc(sizeof *work, GFP_KERNEL);
1665         if (!work)
1666                 return -ENOMEM;
1667
1668         work->id = id_priv;
1669         INIT_WORK(&work->work, cma_work_handler);
1670         work->old_state = CMA_ROUTE_QUERY;
1671         work->new_state = CMA_ROUTE_RESOLVED;
1672         work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1673         queue_work(cma_wq, &work->work);
1674         return 0;
1675 }
1676
1677 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1678 {
1679         struct rdma_id_private *id_priv;
1680         int ret;
1681
1682         id_priv = container_of(id, struct rdma_id_private, id);
1683         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1684                 return -EINVAL;
1685
1686         atomic_inc(&id_priv->refcount);
1687         switch (rdma_node_get_transport(id->device->node_type)) {
1688         case RDMA_TRANSPORT_IB:
1689                 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1690                 break;
1691         case RDMA_TRANSPORT_IWARP:
1692                 ret = cma_resolve_iw_route(id_priv, timeout_ms);
1693                 break;
1694         default:
1695                 ret = -ENOSYS;
1696                 break;
1697         }
1698         if (ret)
1699                 goto err;
1700
1701         return 0;
1702 err:
1703         cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1704         cma_deref_id(id_priv);
1705         return ret;
1706 }
1707 EXPORT_SYMBOL(rdma_resolve_route);
1708
1709 static int cma_bind_loopback(struct rdma_id_private *id_priv)
1710 {
1711         struct cma_device *cma_dev;
1712         struct ib_port_attr port_attr;
1713         union ib_gid gid;
1714         u16 pkey;
1715         int ret;
1716         u8 p;
1717
1718         mutex_lock(&lock);
1719         if (list_empty(&dev_list)) {
1720                 ret = -ENODEV;
1721                 goto out;
1722         }
1723         list_for_each_entry(cma_dev, &dev_list, list)
1724                 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1725                         if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1726                             port_attr.state == IB_PORT_ACTIVE)
1727                                 goto port_found;
1728
1729         p = 1;
1730         cma_dev = list_entry(dev_list.next, struct cma_device, list);
1731
1732 port_found:
1733         ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1734         if (ret)
1735                 goto out;
1736
1737         ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1738         if (ret)
1739                 goto out;
1740
1741         ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1742         ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1743         id_priv->id.port_num = p;
1744         cma_attach_to_dev(id_priv, cma_dev);
1745 out:
1746         mutex_unlock(&lock);
1747         return ret;
1748 }
1749
1750 static void addr_handler(int status, struct sockaddr *src_addr,
1751                          struct rdma_dev_addr *dev_addr, void *context)
1752 {
1753         struct rdma_id_private *id_priv = context;
1754         struct rdma_cm_event event;
1755
1756         memset(&event, 0, sizeof event);
1757         mutex_lock(&id_priv->handler_mutex);
1758
1759         /*
1760          * Grab mutex to block rdma_destroy_id() from removing the device while
1761          * we're trying to acquire it.
1762          */
1763         mutex_lock(&lock);
1764         if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1765                 mutex_unlock(&lock);
1766                 goto out;
1767         }
1768
1769         if (!status && !id_priv->cma_dev)
1770                 status = cma_acquire_dev(id_priv);
1771         mutex_unlock(&lock);
1772
1773         if (status) {
1774                 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1775                         goto out;
1776                 event.event = RDMA_CM_EVENT_ADDR_ERROR;
1777                 event.status = status;
1778         } else {
1779                 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1780                        ip_addr_size(src_addr));
1781                 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1782         }
1783
1784         if (id_priv->id.event_handler(&id_priv->id, &event)) {
1785                 cma_exch(id_priv, CMA_DESTROYING);
1786                 mutex_unlock(&id_priv->handler_mutex);
1787                 cma_deref_id(id_priv);
1788                 rdma_destroy_id(&id_priv->id);
1789                 return;
1790         }
1791 out:
1792         mutex_unlock(&id_priv->handler_mutex);
1793         cma_deref_id(id_priv);
1794 }
1795
1796 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1797 {
1798         struct cma_work *work;
1799         struct sockaddr_in *src_in, *dst_in;
1800         union ib_gid gid;
1801         int ret;
1802
1803         work = kzalloc(sizeof *work, GFP_KERNEL);
1804         if (!work)
1805                 return -ENOMEM;
1806
1807         if (!id_priv->cma_dev) {
1808                 ret = cma_bind_loopback(id_priv);
1809                 if (ret)
1810                         goto err;
1811         }
1812
1813         ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1814         ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1815
1816         if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1817                 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1818                 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1819                 src_in->sin_family = dst_in->sin_family;
1820                 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1821         }
1822
1823         work->id = id_priv;
1824         INIT_WORK(&work->work, cma_work_handler);
1825         work->old_state = CMA_ADDR_QUERY;
1826         work->new_state = CMA_ADDR_RESOLVED;
1827         work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1828         queue_work(cma_wq, &work->work);
1829         return 0;
1830 err:
1831         kfree(work);
1832         return ret;
1833 }
1834
1835 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1836                          struct sockaddr *dst_addr)
1837 {
1838         if (src_addr && src_addr->sa_family)
1839                 return rdma_bind_addr(id, src_addr);
1840         else
1841                 return cma_bind_any(id, dst_addr->sa_family);
1842 }
1843
1844 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1845                       struct sockaddr *dst_addr, int timeout_ms)
1846 {
1847         struct rdma_id_private *id_priv;
1848         int ret;
1849
1850         id_priv = container_of(id, struct rdma_id_private, id);
1851         if (id_priv->state == CMA_IDLE) {
1852                 ret = cma_bind_addr(id, src_addr, dst_addr);
1853                 if (ret)
1854                         return ret;
1855         }
1856
1857         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1858                 return -EINVAL;
1859
1860         atomic_inc(&id_priv->refcount);
1861         memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1862         if (cma_any_addr(dst_addr))
1863                 ret = cma_resolve_loopback(id_priv);
1864         else
1865                 ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
1866                                       dst_addr, &id->route.addr.dev_addr,
1867                                       timeout_ms, addr_handler, id_priv);
1868         if (ret)
1869                 goto err;
1870
1871         return 0;
1872 err:
1873         cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1874         cma_deref_id(id_priv);
1875         return ret;
1876 }
1877 EXPORT_SYMBOL(rdma_resolve_addr);
1878
1879 static void cma_bind_port(struct rdma_bind_list *bind_list,
1880                           struct rdma_id_private *id_priv)
1881 {
1882         struct sockaddr_in *sin;
1883
1884         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1885         sin->sin_port = htons(bind_list->port);
1886         id_priv->bind_list = bind_list;
1887         hlist_add_head(&id_priv->node, &bind_list->owners);
1888 }
1889
1890 static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1891                           unsigned short snum)
1892 {
1893         struct rdma_bind_list *bind_list;
1894         int port, ret;
1895
1896         bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1897         if (!bind_list)
1898                 return -ENOMEM;
1899
1900         do {
1901                 ret = idr_get_new_above(ps, bind_list, snum, &port);
1902         } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1903
1904         if (ret)
1905                 goto err1;
1906
1907         if (port != snum) {
1908                 ret = -EADDRNOTAVAIL;
1909                 goto err2;
1910         }
1911
1912         bind_list->ps = ps;
1913         bind_list->port = (unsigned short) port;
1914         cma_bind_port(bind_list, id_priv);
1915         return 0;
1916 err2:
1917         idr_remove(ps, port);
1918 err1:
1919         kfree(bind_list);
1920         return ret;
1921 }
1922
1923 static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
1924 {
1925         struct rdma_bind_list *bind_list;
1926         int port, ret, low, high;
1927
1928         bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1929         if (!bind_list)
1930                 return -ENOMEM;
1931
1932 retry:
1933         /* FIXME: add proper port randomization per like inet_csk_get_port */
1934         do {
1935                 ret = idr_get_new_above(ps, bind_list, next_port, &port);
1936         } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1937
1938         if (ret)
1939                 goto err1;
1940
1941         inet_get_local_port_range(&low, &high);
1942         if (port > high) {
1943                 if (next_port != low) {
1944                         idr_remove(ps, port);
1945                         next_port = low;
1946                         goto retry;
1947                 }
1948                 ret = -EADDRNOTAVAIL;
1949                 goto err2;
1950         }
1951
1952         if (port == high)
1953                 next_port = low;
1954         else
1955                 next_port = port + 1;
1956
1957         bind_list->ps = ps;
1958         bind_list->port = (unsigned short) port;
1959         cma_bind_port(bind_list, id_priv);
1960         return 0;
1961 err2:
1962         idr_remove(ps, port);
1963 err1:
1964         kfree(bind_list);
1965         return ret;
1966 }
1967
1968 static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1969 {
1970         struct rdma_id_private *cur_id;
1971         struct sockaddr_in *sin, *cur_sin;
1972         struct rdma_bind_list *bind_list;
1973         struct hlist_node *node;
1974         unsigned short snum;
1975
1976         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1977         snum = ntohs(sin->sin_port);
1978         if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
1979                 return -EACCES;
1980
1981         bind_list = idr_find(ps, snum);
1982         if (!bind_list)
1983                 return cma_alloc_port(ps, id_priv, snum);
1984
1985         /*
1986          * We don't support binding to any address if anyone is bound to
1987          * a specific address on the same port.
1988          */
1989         if (cma_any_addr(&id_priv->id.route.addr.src_addr))
1990                 return -EADDRNOTAVAIL;
1991
1992         hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1993                 if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1994                         return -EADDRNOTAVAIL;
1995
1996                 cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
1997                 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
1998                         return -EADDRINUSE;
1999         }
2000
2001         cma_bind_port(bind_list, id_priv);
2002         return 0;
2003 }
2004
2005 static int cma_get_port(struct rdma_id_private *id_priv)
2006 {
2007         struct idr *ps;
2008         int ret;
2009
2010         switch (id_priv->id.ps) {
2011         case RDMA_PS_SDP:
2012                 ps = &sdp_ps;
2013                 break;
2014         case RDMA_PS_TCP:
2015                 ps = &tcp_ps;
2016                 break;
2017         case RDMA_PS_UDP:
2018                 ps = &udp_ps;
2019                 break;
2020         case RDMA_PS_IPOIB:
2021                 ps = &ipoib_ps;
2022                 break;
2023         default:
2024                 return -EPROTONOSUPPORT;
2025         }
2026
2027         mutex_lock(&lock);
2028         if (cma_any_port(&id_priv->id.route.addr.src_addr))
2029                 ret = cma_alloc_any_port(ps, id_priv);
2030         else
2031                 ret = cma_use_port(ps, id_priv);
2032         mutex_unlock(&lock);
2033
2034         return ret;
2035 }
2036
2037 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2038 {
2039         struct rdma_id_private *id_priv;
2040         int ret;
2041
2042         if (addr->sa_family != AF_INET)
2043                 return -EAFNOSUPPORT;
2044
2045         id_priv = container_of(id, struct rdma_id_private, id);
2046         if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2047                 return -EINVAL;
2048
2049         if (!cma_any_addr(addr)) {
2050                 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2051                 if (ret)
2052                         goto err1;
2053
2054                 mutex_lock(&lock);
2055                 ret = cma_acquire_dev(id_priv);
2056                 mutex_unlock(&lock);
2057                 if (ret)
2058                         goto err1;
2059         }
2060
2061         memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2062         ret = cma_get_port(id_priv);
2063         if (ret)
2064                 goto err2;
2065
2066         return 0;
2067 err2:
2068         if (!cma_any_addr(addr)) {
2069                 mutex_lock(&lock);
2070                 cma_detach_from_dev(id_priv);
2071                 mutex_unlock(&lock);
2072         }
2073 err1:
2074         cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2075         return ret;
2076 }
2077 EXPORT_SYMBOL(rdma_bind_addr);
2078
2079 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2080                           struct rdma_route *route)
2081 {
2082         struct sockaddr_in *src4, *dst4;
2083         struct cma_hdr *cma_hdr;
2084         struct sdp_hh *sdp_hdr;
2085
2086         src4 = (struct sockaddr_in *) &route->addr.src_addr;
2087         dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2088
2089         switch (ps) {
2090         case RDMA_PS_SDP:
2091                 sdp_hdr = hdr;
2092                 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2093                         return -EINVAL;
2094                 sdp_set_ip_ver(sdp_hdr, 4);
2095                 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2096                 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2097                 sdp_hdr->port = src4->sin_port;
2098                 break;
2099         default:
2100                 cma_hdr = hdr;
2101                 cma_hdr->cma_version = CMA_VERSION;
2102                 cma_set_ip_ver(cma_hdr, 4);
2103                 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2104                 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2105                 cma_hdr->port = src4->sin_port;
2106                 break;
2107         }
2108         return 0;
2109 }
2110
2111 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2112                                 struct ib_cm_event *ib_event)
2113 {
2114         struct rdma_id_private *id_priv = cm_id->context;
2115         struct rdma_cm_event event;
2116         struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2117         int ret = 0;
2118
2119         if (cma_disable_callback(id_priv, CMA_CONNECT))
2120                 return 0;
2121
2122         memset(&event, 0, sizeof event);
2123         switch (ib_event->event) {
2124         case IB_CM_SIDR_REQ_ERROR:
2125                 event.event = RDMA_CM_EVENT_UNREACHABLE;
2126                 event.status = -ETIMEDOUT;
2127                 break;
2128         case IB_CM_SIDR_REP_RECEIVED:
2129                 event.param.ud.private_data = ib_event->private_data;
2130                 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2131                 if (rep->status != IB_SIDR_SUCCESS) {
2132                         event.event = RDMA_CM_EVENT_UNREACHABLE;
2133                         event.status = ib_event->param.sidr_rep_rcvd.status;
2134                         break;
2135                 }
2136                 if (id_priv->qkey != rep->qkey) {
2137                         event.event = RDMA_CM_EVENT_UNREACHABLE;
2138                         event.status = -EINVAL;
2139                         break;
2140                 }
2141                 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2142                                      id_priv->id.route.path_rec,
2143                                      &event.param.ud.ah_attr);
2144                 event.param.ud.qp_num = rep->qpn;
2145                 event.param.ud.qkey = rep->qkey;
2146                 event.event = RDMA_CM_EVENT_ESTABLISHED;
2147                 event.status = 0;
2148                 break;
2149         default:
2150                 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
2151                        ib_event->event);
2152                 goto out;
2153         }
2154
2155         ret = id_priv->id.event_handler(&id_priv->id, &event);
2156         if (ret) {
2157                 /* Destroy the CM ID by returning a non-zero value. */
2158                 id_priv->cm_id.ib = NULL;
2159                 cma_exch(id_priv, CMA_DESTROYING);
2160                 mutex_unlock(&id_priv->handler_mutex);
2161                 rdma_destroy_id(&id_priv->id);
2162                 return ret;
2163         }
2164 out:
2165         mutex_unlock(&id_priv->handler_mutex);
2166         return ret;
2167 }
2168
2169 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2170                               struct rdma_conn_param *conn_param)
2171 {
2172         struct ib_cm_sidr_req_param req;
2173         struct rdma_route *route;
2174         int ret;
2175
2176         req.private_data_len = sizeof(struct cma_hdr) +
2177                                conn_param->private_data_len;
2178         req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2179         if (!req.private_data)
2180                 return -ENOMEM;
2181
2182         if (conn_param->private_data && conn_param->private_data_len)
2183                 memcpy((void *) req.private_data + sizeof(struct cma_hdr),
2184                        conn_param->private_data, conn_param->private_data_len);
2185
2186         route = &id_priv->id.route;
2187         ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2188         if (ret)
2189                 goto out;
2190
2191         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2192                                             cma_sidr_rep_handler, id_priv);
2193         if (IS_ERR(id_priv->cm_id.ib)) {
2194                 ret = PTR_ERR(id_priv->cm_id.ib);
2195                 goto out;
2196         }
2197
2198         req.path = route->path_rec;
2199         req.service_id = cma_get_service_id(id_priv->id.ps,
2200                                             &route->addr.dst_addr);
2201         req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2202         req.max_cm_retries = CMA_MAX_CM_RETRIES;
2203
2204         ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2205         if (ret) {
2206                 ib_destroy_cm_id(id_priv->cm_id.ib);
2207                 id_priv->cm_id.ib = NULL;
2208         }
2209 out:
2210         kfree(req.private_data);
2211         return ret;
2212 }
2213
2214 static int cma_connect_ib(struct rdma_id_private *id_priv,
2215                           struct rdma_conn_param *conn_param)
2216 {
2217         struct ib_cm_req_param req;
2218         struct rdma_route *route;
2219         void *private_data;
2220         int offset, ret;
2221
2222         memset(&req, 0, sizeof req);
2223         offset = cma_user_data_offset(id_priv->id.ps);
2224         req.private_data_len = offset + conn_param->private_data_len;
2225         private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2226         if (!private_data)
2227                 return -ENOMEM;
2228
2229         if (conn_param->private_data && conn_param->private_data_len)
2230                 memcpy(private_data + offset, conn_param->private_data,
2231                        conn_param->private_data_len);
2232
2233         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2234                                             id_priv);
2235         if (IS_ERR(id_priv->cm_id.ib)) {
2236                 ret = PTR_ERR(id_priv->cm_id.ib);
2237                 goto out;
2238         }
2239
2240         route = &id_priv->id.route;
2241         ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2242         if (ret)
2243                 goto out;
2244         req.private_data = private_data;
2245
2246         req.primary_path = &route->path_rec[0];
2247         if (route->num_paths == 2)
2248                 req.alternate_path = &route->path_rec[1];
2249
2250         req.service_id = cma_get_service_id(id_priv->id.ps,
2251                                             &route->addr.dst_addr);
2252         req.qp_num = id_priv->qp_num;
2253         req.qp_type = IB_QPT_RC;
2254         req.starting_psn = id_priv->seq_num;
2255         req.responder_resources = conn_param->responder_resources;
2256         req.initiator_depth = conn_param->initiator_depth;
2257         req.flow_control = conn_param->flow_control;
2258         req.retry_count = conn_param->retry_count;
2259         req.rnr_retry_count = conn_param->rnr_retry_count;
2260         req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2261         req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2262         req.max_cm_retries = CMA_MAX_CM_RETRIES;
2263         req.srq = id_priv->srq ? 1 : 0;
2264
2265         ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2266 out:
2267         if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2268                 ib_destroy_cm_id(id_priv->cm_id.ib);
2269                 id_priv->cm_id.ib = NULL;
2270         }
2271
2272         kfree(private_data);
2273         return ret;
2274 }
2275
2276 static int cma_connect_iw(struct rdma_id_private *id_priv,
2277                           struct rdma_conn_param *conn_param)
2278 {
2279         struct iw_cm_id *cm_id;
2280         struct sockaddr_in* sin;
2281         int ret;
2282         struct iw_cm_conn_param iw_param;
2283
2284         cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
2285         if (IS_ERR(cm_id)) {
2286                 ret = PTR_ERR(cm_id);
2287                 goto out;
2288         }
2289
2290         id_priv->cm_id.iw = cm_id;
2291
2292         sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2293         cm_id->local_addr = *sin;
2294
2295         sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2296         cm_id->remote_addr = *sin;
2297
2298         ret = cma_modify_qp_rtr(id_priv, conn_param);
2299         if (ret)
2300                 goto out;
2301
2302         iw_param.ord = conn_param->initiator_depth;
2303         iw_param.ird = conn_param->responder_resources;
2304         iw_param.private_data = conn_param->private_data;
2305         iw_param.private_data_len = conn_param->private_data_len;
2306         if (id_priv->id.qp)
2307                 iw_param.qpn = id_priv->qp_num;
2308         else
2309                 iw_param.qpn = conn_param->qp_num;
2310         ret = iw_cm_connect(cm_id, &iw_param);
2311 out:
2312         if (ret && !IS_ERR(cm_id)) {
2313                 iw_destroy_cm_id(cm_id);
2314                 id_priv->cm_id.iw = NULL;
2315         }
2316         return ret;
2317 }
2318
2319 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2320 {
2321         struct rdma_id_private *id_priv;
2322         int ret;
2323
2324         id_priv = container_of(id, struct rdma_id_private, id);
2325         if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2326                 return -EINVAL;
2327
2328         if (!id->qp) {
2329                 id_priv->qp_num = conn_param->qp_num;
2330                 id_priv->srq = conn_param->srq;
2331         }
2332
2333         switch (rdma_node_get_transport(id->device->node_type)) {
2334         case RDMA_TRANSPORT_IB:
2335                 if (cma_is_ud_ps(id->ps))
2336                         ret = cma_resolve_ib_udp(id_priv, conn_param);
2337                 else
2338                         ret = cma_connect_ib(id_priv, conn_param);
2339                 break;
2340         case RDMA_TRANSPORT_IWARP:
2341                 ret = cma_connect_iw(id_priv, conn_param);
2342                 break;
2343         default:
2344                 ret = -ENOSYS;
2345                 break;
2346         }
2347         if (ret)
2348                 goto err;
2349
2350         return 0;
2351 err:
2352         cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2353         return ret;
2354 }
2355 EXPORT_SYMBOL(rdma_connect);
2356
2357 static int cma_accept_ib(struct rdma_id_private *id_priv,
2358                          struct rdma_conn_param *conn_param)
2359 {
2360         struct ib_cm_rep_param rep;
2361         int ret;
2362
2363         ret = cma_modify_qp_rtr(id_priv, conn_param);
2364         if (ret)
2365                 goto out;
2366
2367         ret = cma_modify_qp_rts(id_priv, conn_param);
2368         if (ret)
2369                 goto out;
2370
2371         memset(&rep, 0, sizeof rep);
2372         rep.qp_num = id_priv->qp_num;
2373         rep.starting_psn = id_priv->seq_num;
2374         rep.private_data = conn_param->private_data;
2375         rep.private_data_len = conn_param->private_data_len;
2376         rep.responder_resources = conn_param->responder_resources;
2377         rep.initiator_depth = conn_param->initiator_depth;
2378         rep.failover_accepted = 0;
2379         rep.flow_control = conn_param->flow_control;
2380         rep.rnr_retry_count = conn_param->rnr_retry_count;
2381         rep.srq = id_priv->srq ? 1 : 0;
2382
2383         ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2384 out:
2385         return ret;
2386 }
2387
2388 static int cma_accept_iw(struct rdma_id_private *id_priv,
2389                   struct rdma_conn_param *conn_param)
2390 {
2391         struct iw_cm_conn_param iw_param;
2392         int ret;
2393
2394         ret = cma_modify_qp_rtr(id_priv, conn_param);
2395         if (ret)
2396                 return ret;
2397
2398         iw_param.ord = conn_param->initiator_depth;
2399         iw_param.ird = conn_param->responder_resources;
2400         iw_param.private_data = conn_param->private_data;
2401         iw_param.private_data_len = conn_param->private_data_len;
2402         if (id_priv->id.qp) {
2403                 iw_param.qpn = id_priv->qp_num;
2404         } else
2405                 iw_param.qpn = conn_param->qp_num;
2406
2407         return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2408 }
2409
2410 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2411                              enum ib_cm_sidr_status status,
2412                              const void *private_data, int private_data_len)
2413 {
2414         struct ib_cm_sidr_rep_param rep;
2415
2416         memset(&rep, 0, sizeof rep);
2417         rep.status = status;
2418         if (status == IB_SIDR_SUCCESS) {
2419                 rep.qp_num = id_priv->qp_num;
2420                 rep.qkey = id_priv->qkey;
2421         }
2422         rep.private_data = private_data;
2423         rep.private_data_len = private_data_len;
2424
2425         return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2426 }
2427
2428 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2429 {
2430         struct rdma_id_private *id_priv;
2431         int ret;
2432
2433         id_priv = container_of(id, struct rdma_id_private, id);
2434         if (!cma_comp(id_priv, CMA_CONNECT))
2435                 return -EINVAL;
2436
2437         if (!id->qp && conn_param) {
2438                 id_priv->qp_num = conn_param->qp_num;
2439                 id_priv->srq = conn_param->srq;
2440         }
2441
2442         switch (rdma_node_get_transport(id->device->node_type)) {
2443         case RDMA_TRANSPORT_IB:
2444                 if (cma_is_ud_ps(id->ps))
2445                         ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2446                                                 conn_param->private_data,
2447                                                 conn_param->private_data_len);
2448                 else if (conn_param)
2449                         ret = cma_accept_ib(id_priv, conn_param);
2450                 else
2451                         ret = cma_rep_recv(id_priv);
2452                 break;
2453         case RDMA_TRANSPORT_IWARP:
2454                 ret = cma_accept_iw(id_priv, conn_param);
2455                 break;
2456         default:
2457                 ret = -ENOSYS;
2458                 break;
2459         }
2460
2461         if (ret)
2462                 goto reject;
2463
2464         return 0;
2465 reject:
2466         cma_modify_qp_err(id_priv);
2467         rdma_reject(id, NULL, 0);
2468         return ret;
2469 }
2470 EXPORT_SYMBOL(rdma_accept);
2471
2472 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2473 {
2474         struct rdma_id_private *id_priv;
2475         int ret;
2476
2477         id_priv = container_of(id, struct rdma_id_private, id);
2478         if (!cma_has_cm_dev(id_priv))
2479                 return -EINVAL;
2480
2481         switch (id->device->node_type) {
2482         case RDMA_NODE_IB_CA:
2483                 ret = ib_cm_notify(id_priv->cm_id.ib, event);
2484                 break;
2485         default:
2486                 ret = 0;
2487                 break;
2488         }
2489         return ret;
2490 }
2491 EXPORT_SYMBOL(rdma_notify);
2492
2493 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2494                 u8 private_data_len)
2495 {
2496         struct rdma_id_private *id_priv;
2497         int ret;
2498
2499         id_priv = container_of(id, struct rdma_id_private, id);
2500         if (!cma_has_cm_dev(id_priv))
2501                 return -EINVAL;
2502
2503         switch (rdma_node_get_transport(id->device->node_type)) {
2504         case RDMA_TRANSPORT_IB:
2505                 if (cma_is_ud_ps(id->ps))
2506                         ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2507                                                 private_data, private_data_len);
2508                 else
2509                         ret = ib_send_cm_rej(id_priv->cm_id.ib,
2510                                              IB_CM_REJ_CONSUMER_DEFINED, NULL,
2511                                              0, private_data, private_data_len);
2512                 break;
2513         case RDMA_TRANSPORT_IWARP:
2514                 ret = iw_cm_reject(id_priv->cm_id.iw,
2515                                    private_data, private_data_len);
2516                 break;
2517         default:
2518                 ret = -ENOSYS;
2519                 break;
2520         }
2521         return ret;
2522 }
2523 EXPORT_SYMBOL(rdma_reject);
2524
2525 int rdma_disconnect(struct rdma_cm_id *id)
2526 {
2527         struct rdma_id_private *id_priv;
2528         int ret;
2529
2530         id_priv = container_of(id, struct rdma_id_private, id);
2531         if (!cma_has_cm_dev(id_priv))
2532                 return -EINVAL;
2533
2534         switch (rdma_node_get_transport(id->device->node_type)) {
2535         case RDMA_TRANSPORT_IB:
2536                 ret = cma_modify_qp_err(id_priv);
2537                 if (ret)
2538                         goto out;
2539                 /* Initiate or respond to a disconnect. */
2540                 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2541                         ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2542                 break;
2543         case RDMA_TRANSPORT_IWARP:
2544                 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2545                 break;
2546         default:
2547                 ret = -EINVAL;
2548                 break;
2549         }
2550 out:
2551         return ret;
2552 }
2553 EXPORT_SYMBOL(rdma_disconnect);
2554
2555 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2556 {
2557         struct rdma_id_private *id_priv;
2558         struct cma_multicast *mc = multicast->context;
2559         struct rdma_cm_event event;
2560         int ret;
2561
2562         id_priv = mc->id_priv;
2563         if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) &&
2564             cma_disable_callback(id_priv, CMA_ADDR_RESOLVED))
2565                 return 0;
2566
2567         mutex_lock(&id_priv->qp_mutex);
2568         if (!status && id_priv->id.qp)
2569                 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2570                                          multicast->rec.mlid);
2571         mutex_unlock(&id_priv->qp_mutex);
2572
2573         memset(&event, 0, sizeof event);
2574         event.status = status;
2575         event.param.ud.private_data = mc->context;
2576         if (!status) {
2577                 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2578                 ib_init_ah_from_mcmember(id_priv->id.device,
2579                                          id_priv->id.port_num, &multicast->rec,
2580                                          &event.param.ud.ah_attr);
2581                 event.param.ud.qp_num = 0xFFFFFF;
2582                 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2583         } else
2584                 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2585
2586         ret = id_priv->id.event_handler(&id_priv->id, &event);
2587         if (ret) {
2588                 cma_exch(id_priv, CMA_DESTROYING);
2589                 mutex_unlock(&id_priv->handler_mutex);
2590                 rdma_destroy_id(&id_priv->id);
2591                 return 0;
2592         }
2593
2594         mutex_unlock(&id_priv->handler_mutex);
2595         return 0;
2596 }
2597
2598 static void cma_set_mgid(struct rdma_id_private *id_priv,
2599                          struct sockaddr *addr, union ib_gid *mgid)
2600 {
2601         unsigned char mc_map[MAX_ADDR_LEN];
2602         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2603         struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2604         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2605
2606         if (cma_any_addr(addr)) {
2607                 memset(mgid, 0, sizeof *mgid);
2608         } else if ((addr->sa_family == AF_INET6) &&
2609                    ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
2610                                                                  0xFF10A01B)) {
2611                 /* IPv6 address is an SA assigned MGID. */
2612                 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2613         } else {
2614                 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
2615                 if (id_priv->id.ps == RDMA_PS_UDP)
2616                         mc_map[7] = 0x01;       /* Use RDMA CM signature */
2617                 *mgid = *(union ib_gid *) (mc_map + 4);
2618         }
2619 }
2620
2621 static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2622                                  struct cma_multicast *mc)
2623 {
2624         struct ib_sa_mcmember_rec rec;
2625         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2626         ib_sa_comp_mask comp_mask;
2627         int ret;
2628
2629         ib_addr_get_mgid(dev_addr, &rec.mgid);
2630         ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2631                                      &rec.mgid, &rec);
2632         if (ret)
2633                 return ret;
2634
2635         cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
2636         if (id_priv->id.ps == RDMA_PS_UDP)
2637                 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2638         ib_addr_get_sgid(dev_addr, &rec.port_gid);
2639         rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2640         rec.join_state = 1;
2641
2642         comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2643                     IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2644                     IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2645                     IB_SA_MCMEMBER_REC_FLOW_LABEL |
2646                     IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2647
2648         mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2649                                                 id_priv->id.port_num, &rec,
2650                                                 comp_mask, GFP_KERNEL,
2651                                                 cma_ib_mc_handler, mc);
2652         if (IS_ERR(mc->multicast.ib))
2653                 return PTR_ERR(mc->multicast.ib);
2654
2655         return 0;
2656 }
2657
2658 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2659                         void *context)
2660 {
2661         struct rdma_id_private *id_priv;
2662         struct cma_multicast *mc;
2663         int ret;
2664
2665         id_priv = container_of(id, struct rdma_id_private, id);
2666         if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2667             !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2668                 return -EINVAL;
2669
2670         mc = kmalloc(sizeof *mc, GFP_KERNEL);
2671         if (!mc)
2672                 return -ENOMEM;
2673
2674         memcpy(&mc->addr, addr, ip_addr_size(addr));
2675         mc->context = context;
2676         mc->id_priv = id_priv;
2677
2678         spin_lock(&id_priv->lock);
2679         list_add(&mc->list, &id_priv->mc_list);
2680         spin_unlock(&id_priv->lock);
2681
2682         switch (rdma_node_get_transport(id->device->node_type)) {
2683         case RDMA_TRANSPORT_IB:
2684                 ret = cma_join_ib_multicast(id_priv, mc);
2685                 break;
2686         default:
2687                 ret = -ENOSYS;
2688                 break;
2689         }
2690
2691         if (ret) {
2692                 spin_lock_irq(&id_priv->lock);
2693                 list_del(&mc->list);
2694                 spin_unlock_irq(&id_priv->lock);
2695                 kfree(mc);
2696         }
2697         return ret;
2698 }
2699 EXPORT_SYMBOL(rdma_join_multicast);
2700
2701 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2702 {
2703         struct rdma_id_private *id_priv;
2704         struct cma_multicast *mc;
2705
2706         id_priv = container_of(id, struct rdma_id_private, id);
2707         spin_lock_irq(&id_priv->lock);
2708         list_for_each_entry(mc, &id_priv->mc_list, list) {
2709                 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
2710                         list_del(&mc->list);
2711                         spin_unlock_irq(&id_priv->lock);
2712
2713                         if (id->qp)
2714                                 ib_detach_mcast(id->qp,
2715                                                 &mc->multicast.ib->rec.mgid,
2716                                                 mc->multicast.ib->rec.mlid);
2717                         ib_sa_free_multicast(mc->multicast.ib);
2718                         kfree(mc);
2719                         return;
2720                 }
2721         }
2722         spin_unlock_irq(&id_priv->lock);
2723 }
2724 EXPORT_SYMBOL(rdma_leave_multicast);
2725
2726 static void cma_add_one(struct ib_device *device)
2727 {
2728         struct cma_device *cma_dev;
2729         struct rdma_id_private *id_priv;
2730
2731         cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
2732         if (!cma_dev)
2733                 return;
2734
2735         cma_dev->device = device;
2736
2737         init_completion(&cma_dev->comp);
2738         atomic_set(&cma_dev->refcount, 1);
2739         INIT_LIST_HEAD(&cma_dev->id_list);
2740         ib_set_client_data(device, &cma_client, cma_dev);
2741
2742         mutex_lock(&lock);
2743         list_add_tail(&cma_dev->list, &dev_list);
2744         list_for_each_entry(id_priv, &listen_any_list, list)
2745                 cma_listen_on_dev(id_priv, cma_dev);
2746         mutex_unlock(&lock);
2747 }
2748
2749 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2750 {
2751         struct rdma_cm_event event;
2752         enum cma_state state;
2753         int ret = 0;
2754
2755         /* Record that we want to remove the device */
2756         state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
2757         if (state == CMA_DESTROYING)
2758                 return 0;
2759
2760         cma_cancel_operation(id_priv, state);
2761         mutex_lock(&id_priv->handler_mutex);
2762
2763         /* Check for destruction from another callback. */
2764         if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
2765                 goto out;
2766
2767         memset(&event, 0, sizeof event);
2768         event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
2769         ret = id_priv->id.event_handler(&id_priv->id, &event);
2770 out:
2771         mutex_unlock(&id_priv->handler_mutex);
2772         return ret;
2773 }
2774
2775 static void cma_process_remove(struct cma_device *cma_dev)
2776 {
2777         struct rdma_id_private *id_priv;
2778         int ret;
2779
2780         mutex_lock(&lock);
2781         while (!list_empty(&cma_dev->id_list)) {
2782                 id_priv = list_entry(cma_dev->id_list.next,
2783                                      struct rdma_id_private, list);
2784
2785                 list_del(&id_priv->listen_list);
2786                 list_del_init(&id_priv->list);
2787                 atomic_inc(&id_priv->refcount);
2788                 mutex_unlock(&lock);
2789
2790                 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
2791                 cma_deref_id(id_priv);
2792                 if (ret)
2793                         rdma_destroy_id(&id_priv->id);
2794
2795                 mutex_lock(&lock);
2796         }
2797         mutex_unlock(&lock);
2798
2799         cma_deref_dev(cma_dev);
2800         wait_for_completion(&cma_dev->comp);
2801 }
2802
2803 static void cma_remove_one(struct ib_device *device)
2804 {
2805         struct cma_device *cma_dev;
2806
2807         cma_dev = ib_get_client_data(device, &cma_client);
2808         if (!cma_dev)
2809                 return;
2810
2811         mutex_lock(&lock);
2812         list_del(&cma_dev->list);
2813         mutex_unlock(&lock);
2814
2815         cma_process_remove(cma_dev);
2816         kfree(cma_dev);
2817 }
2818
2819 static int cma_init(void)
2820 {
2821         int ret, low, high, remaining;
2822
2823         get_random_bytes(&next_port, sizeof next_port);
2824         inet_get_local_port_range(&low, &high);
2825         remaining = (high - low) + 1;
2826         next_port = ((unsigned int) next_port % remaining) + low;
2827
2828         cma_wq = create_singlethread_workqueue("rdma_cm");
2829         if (!cma_wq)
2830                 return -ENOMEM;
2831
2832         ib_sa_register_client(&sa_client);
2833         rdma_addr_register_client(&addr_client);
2834
2835         ret = ib_register_client(&cma_client);
2836         if (ret)
2837                 goto err;
2838         return 0;
2839
2840 err:
2841         rdma_addr_unregister_client(&addr_client);
2842         ib_sa_unregister_client(&sa_client);
2843         destroy_workqueue(cma_wq);
2844         return ret;
2845 }
2846
2847 static void cma_cleanup(void)
2848 {
2849         ib_unregister_client(&cma_client);
2850         rdma_addr_unregister_client(&addr_client);
2851         ib_sa_unregister_client(&sa_client);
2852         destroy_workqueue(cma_wq);
2853         idr_destroy(&sdp_ps);
2854         idr_destroy(&tcp_ps);
2855         idr_destroy(&udp_ps);
2856         idr_destroy(&ipoib_ps);
2857 }
2858
2859 module_init(cma_init);
2860 module_exit(cma_cleanup);