Merge commit 'v2.6.26-rc8' into x86/mce
[linux-2.6] / drivers / infiniband / core / cma.c
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6  *
7  * This Software is licensed under one of the following licenses:
8  *
9  * 1) under the terms of the "Common Public License 1.0" a copy of which is
10  *    available from the Open Source Initiative, see
11  *    http://www.opensource.org/licenses/cpl.php.
12  *
13  * 2) under the terms of the "The BSD License" a copy of which is
14  *    available from the Open Source Initiative, see
15  *    http://www.opensource.org/licenses/bsd-license.php.
16  *
17  * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18  *    copy of which is available from the Open Source Initiative, see
19  *    http://www.opensource.org/licenses/gpl-license.php.
20  *
21  * Licensee has the right to choose one of the above licenses.
22  *
23  * Redistributions of source code must retain the above copyright
24  * notice and one of the license notices.
25  *
26  * Redistributions in binary form must reproduce both the above copyright
27  * notice, one of the license notices in the documentation
28  * and/or other materials provided with the distribution.
29  *
30  */
31
32 #include <linux/completion.h>
33 #include <linux/in.h>
34 #include <linux/in6.h>
35 #include <linux/mutex.h>
36 #include <linux/random.h>
37 #include <linux/idr.h>
38 #include <linux/inetdevice.h>
39
40 #include <net/tcp.h>
41
42 #include <rdma/rdma_cm.h>
43 #include <rdma/rdma_cm_ib.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/ib_cm.h>
46 #include <rdma/ib_sa.h>
47 #include <rdma/iw_cm.h>
48
49 MODULE_AUTHOR("Sean Hefty");
50 MODULE_DESCRIPTION("Generic RDMA CM Agent");
51 MODULE_LICENSE("Dual BSD/GPL");
52
53 #define CMA_CM_RESPONSE_TIMEOUT 20
54 #define CMA_MAX_CM_RETRIES 15
55 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
56
57 static void cma_add_one(struct ib_device *device);
58 static void cma_remove_one(struct ib_device *device);
59
60 static struct ib_client cma_client = {
61         .name   = "cma",
62         .add    = cma_add_one,
63         .remove = cma_remove_one
64 };
65
66 static struct ib_sa_client sa_client;
67 static struct rdma_addr_client addr_client;
68 static LIST_HEAD(dev_list);
69 static LIST_HEAD(listen_any_list);
70 static DEFINE_MUTEX(lock);
71 static struct workqueue_struct *cma_wq;
72 static DEFINE_IDR(sdp_ps);
73 static DEFINE_IDR(tcp_ps);
74 static DEFINE_IDR(udp_ps);
75 static DEFINE_IDR(ipoib_ps);
76 static int next_port;
77
78 struct cma_device {
79         struct list_head        list;
80         struct ib_device        *device;
81         struct completion       comp;
82         atomic_t                refcount;
83         struct list_head        id_list;
84 };
85
86 enum cma_state {
87         CMA_IDLE,
88         CMA_ADDR_QUERY,
89         CMA_ADDR_RESOLVED,
90         CMA_ROUTE_QUERY,
91         CMA_ROUTE_RESOLVED,
92         CMA_CONNECT,
93         CMA_DISCONNECT,
94         CMA_ADDR_BOUND,
95         CMA_LISTEN,
96         CMA_DEVICE_REMOVAL,
97         CMA_DESTROYING
98 };
99
100 struct rdma_bind_list {
101         struct idr              *ps;
102         struct hlist_head       owners;
103         unsigned short          port;
104 };
105
106 /*
107  * Device removal can occur at anytime, so we need extra handling to
108  * serialize notifying the user of device removal with other callbacks.
109  * We do this by disabling removal notification while a callback is in process,
110  * and reporting it after the callback completes.
111  */
112 struct rdma_id_private {
113         struct rdma_cm_id       id;
114
115         struct rdma_bind_list   *bind_list;
116         struct hlist_node       node;
117         struct list_head        list; /* listen_any_list or cma_device.list */
118         struct list_head        listen_list; /* per device listens */
119         struct cma_device       *cma_dev;
120         struct list_head        mc_list;
121
122         int                     internal_id;
123         enum cma_state          state;
124         spinlock_t              lock;
125         struct mutex            qp_mutex;
126
127         struct completion       comp;
128         atomic_t                refcount;
129         wait_queue_head_t       wait_remove;
130         atomic_t                dev_remove;
131
132         int                     backlog;
133         int                     timeout_ms;
134         struct ib_sa_query      *query;
135         int                     query_id;
136         union {
137                 struct ib_cm_id *ib;
138                 struct iw_cm_id *iw;
139         } cm_id;
140
141         u32                     seq_num;
142         u32                     qkey;
143         u32                     qp_num;
144         u8                      srq;
145         u8                      tos;
146 };
147
148 struct cma_multicast {
149         struct rdma_id_private *id_priv;
150         union {
151                 struct ib_sa_multicast *ib;
152         } multicast;
153         struct list_head        list;
154         void                    *context;
155         struct sockaddr         addr;
156         u8                      pad[sizeof(struct sockaddr_in6) -
157                                     sizeof(struct sockaddr)];
158 };
159
160 struct cma_work {
161         struct work_struct      work;
162         struct rdma_id_private  *id;
163         enum cma_state          old_state;
164         enum cma_state          new_state;
165         struct rdma_cm_event    event;
166 };
167
168 union cma_ip_addr {
169         struct in6_addr ip6;
170         struct {
171                 __be32 pad[3];
172                 __be32 addr;
173         } ip4;
174 };
175
176 struct cma_hdr {
177         u8 cma_version;
178         u8 ip_version;  /* IP version: 7:4 */
179         __be16 port;
180         union cma_ip_addr src_addr;
181         union cma_ip_addr dst_addr;
182 };
183
184 struct sdp_hh {
185         u8 bsdh[16];
186         u8 sdp_version; /* Major version: 7:4 */
187         u8 ip_version;  /* IP version: 7:4 */
188         u8 sdp_specific1[10];
189         __be16 port;
190         __be16 sdp_specific2;
191         union cma_ip_addr src_addr;
192         union cma_ip_addr dst_addr;
193 };
194
195 struct sdp_hah {
196         u8 bsdh[16];
197         u8 sdp_version;
198 };
199
200 #define CMA_VERSION 0x00
201 #define SDP_MAJ_VERSION 0x2
202
203 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
204 {
205         unsigned long flags;
206         int ret;
207
208         spin_lock_irqsave(&id_priv->lock, flags);
209         ret = (id_priv->state == comp);
210         spin_unlock_irqrestore(&id_priv->lock, flags);
211         return ret;
212 }
213
214 static int cma_comp_exch(struct rdma_id_private *id_priv,
215                          enum cma_state comp, enum cma_state exch)
216 {
217         unsigned long flags;
218         int ret;
219
220         spin_lock_irqsave(&id_priv->lock, flags);
221         if ((ret = (id_priv->state == comp)))
222                 id_priv->state = exch;
223         spin_unlock_irqrestore(&id_priv->lock, flags);
224         return ret;
225 }
226
227 static enum cma_state cma_exch(struct rdma_id_private *id_priv,
228                                enum cma_state exch)
229 {
230         unsigned long flags;
231         enum cma_state old;
232
233         spin_lock_irqsave(&id_priv->lock, flags);
234         old = id_priv->state;
235         id_priv->state = exch;
236         spin_unlock_irqrestore(&id_priv->lock, flags);
237         return old;
238 }
239
240 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
241 {
242         return hdr->ip_version >> 4;
243 }
244
245 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
246 {
247         hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
248 }
249
250 static inline u8 sdp_get_majv(u8 sdp_version)
251 {
252         return sdp_version >> 4;
253 }
254
255 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
256 {
257         return hh->ip_version >> 4;
258 }
259
260 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
261 {
262         hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
263 }
264
265 static inline int cma_is_ud_ps(enum rdma_port_space ps)
266 {
267         return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
268 }
269
270 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
271                               struct cma_device *cma_dev)
272 {
273         atomic_inc(&cma_dev->refcount);
274         id_priv->cma_dev = cma_dev;
275         id_priv->id.device = cma_dev->device;
276         list_add_tail(&id_priv->list, &cma_dev->id_list);
277 }
278
279 static inline void cma_deref_dev(struct cma_device *cma_dev)
280 {
281         if (atomic_dec_and_test(&cma_dev->refcount))
282                 complete(&cma_dev->comp);
283 }
284
285 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
286 {
287         list_del(&id_priv->list);
288         cma_deref_dev(id_priv->cma_dev);
289         id_priv->cma_dev = NULL;
290 }
291
292 static int cma_set_qkey(struct ib_device *device, u8 port_num,
293                         enum rdma_port_space ps,
294                         struct rdma_dev_addr *dev_addr, u32 *qkey)
295 {
296         struct ib_sa_mcmember_rec rec;
297         int ret = 0;
298
299         switch (ps) {
300         case RDMA_PS_UDP:
301                 *qkey = RDMA_UDP_QKEY;
302                 break;
303         case RDMA_PS_IPOIB:
304                 ib_addr_get_mgid(dev_addr, &rec.mgid);
305                 ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
306                 *qkey = be32_to_cpu(rec.qkey);
307                 break;
308         default:
309                 break;
310         }
311         return ret;
312 }
313
314 static int cma_acquire_dev(struct rdma_id_private *id_priv)
315 {
316         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
317         struct cma_device *cma_dev;
318         union ib_gid gid;
319         int ret = -ENODEV;
320
321         switch (rdma_node_get_transport(dev_addr->dev_type)) {
322         case RDMA_TRANSPORT_IB:
323                 ib_addr_get_sgid(dev_addr, &gid);
324                 break;
325         case RDMA_TRANSPORT_IWARP:
326                 iw_addr_get_sgid(dev_addr, &gid);
327                 break;
328         default:
329                 return -ENODEV;
330         }
331
332         list_for_each_entry(cma_dev, &dev_list, list) {
333                 ret = ib_find_cached_gid(cma_dev->device, &gid,
334                                          &id_priv->id.port_num, NULL);
335                 if (!ret) {
336                         ret = cma_set_qkey(cma_dev->device,
337                                            id_priv->id.port_num,
338                                            id_priv->id.ps, dev_addr,
339                                            &id_priv->qkey);
340                         if (!ret)
341                                 cma_attach_to_dev(id_priv, cma_dev);
342                         break;
343                 }
344         }
345         return ret;
346 }
347
348 static void cma_deref_id(struct rdma_id_private *id_priv)
349 {
350         if (atomic_dec_and_test(&id_priv->refcount))
351                 complete(&id_priv->comp);
352 }
353
354 static int cma_disable_remove(struct rdma_id_private *id_priv,
355                               enum cma_state state)
356 {
357         unsigned long flags;
358         int ret;
359
360         spin_lock_irqsave(&id_priv->lock, flags);
361         if (id_priv->state == state) {
362                 atomic_inc(&id_priv->dev_remove);
363                 ret = 0;
364         } else
365                 ret = -EINVAL;
366         spin_unlock_irqrestore(&id_priv->lock, flags);
367         return ret;
368 }
369
370 static void cma_enable_remove(struct rdma_id_private *id_priv)
371 {
372         if (atomic_dec_and_test(&id_priv->dev_remove))
373                 wake_up(&id_priv->wait_remove);
374 }
375
376 static int cma_has_cm_dev(struct rdma_id_private *id_priv)
377 {
378         return (id_priv->id.device && id_priv->cm_id.ib);
379 }
380
381 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
382                                   void *context, enum rdma_port_space ps)
383 {
384         struct rdma_id_private *id_priv;
385
386         id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
387         if (!id_priv)
388                 return ERR_PTR(-ENOMEM);
389
390         id_priv->state = CMA_IDLE;
391         id_priv->id.context = context;
392         id_priv->id.event_handler = event_handler;
393         id_priv->id.ps = ps;
394         spin_lock_init(&id_priv->lock);
395         mutex_init(&id_priv->qp_mutex);
396         init_completion(&id_priv->comp);
397         atomic_set(&id_priv->refcount, 1);
398         init_waitqueue_head(&id_priv->wait_remove);
399         atomic_set(&id_priv->dev_remove, 0);
400         INIT_LIST_HEAD(&id_priv->listen_list);
401         INIT_LIST_HEAD(&id_priv->mc_list);
402         get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
403
404         return &id_priv->id;
405 }
406 EXPORT_SYMBOL(rdma_create_id);
407
408 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
409 {
410         struct ib_qp_attr qp_attr;
411         int qp_attr_mask, ret;
412
413         qp_attr.qp_state = IB_QPS_INIT;
414         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
415         if (ret)
416                 return ret;
417
418         ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
419         if (ret)
420                 return ret;
421
422         qp_attr.qp_state = IB_QPS_RTR;
423         ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
424         if (ret)
425                 return ret;
426
427         qp_attr.qp_state = IB_QPS_RTS;
428         qp_attr.sq_psn = 0;
429         ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
430
431         return ret;
432 }
433
434 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
435 {
436         struct ib_qp_attr qp_attr;
437         int qp_attr_mask, ret;
438
439         qp_attr.qp_state = IB_QPS_INIT;
440         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
441         if (ret)
442                 return ret;
443
444         return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
445 }
446
447 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
448                    struct ib_qp_init_attr *qp_init_attr)
449 {
450         struct rdma_id_private *id_priv;
451         struct ib_qp *qp;
452         int ret;
453
454         id_priv = container_of(id, struct rdma_id_private, id);
455         if (id->device != pd->device)
456                 return -EINVAL;
457
458         qp = ib_create_qp(pd, qp_init_attr);
459         if (IS_ERR(qp))
460                 return PTR_ERR(qp);
461
462         if (cma_is_ud_ps(id_priv->id.ps))
463                 ret = cma_init_ud_qp(id_priv, qp);
464         else
465                 ret = cma_init_conn_qp(id_priv, qp);
466         if (ret)
467                 goto err;
468
469         id->qp = qp;
470         id_priv->qp_num = qp->qp_num;
471         id_priv->srq = (qp->srq != NULL);
472         return 0;
473 err:
474         ib_destroy_qp(qp);
475         return ret;
476 }
477 EXPORT_SYMBOL(rdma_create_qp);
478
479 void rdma_destroy_qp(struct rdma_cm_id *id)
480 {
481         struct rdma_id_private *id_priv;
482
483         id_priv = container_of(id, struct rdma_id_private, id);
484         mutex_lock(&id_priv->qp_mutex);
485         ib_destroy_qp(id_priv->id.qp);
486         id_priv->id.qp = NULL;
487         mutex_unlock(&id_priv->qp_mutex);
488 }
489 EXPORT_SYMBOL(rdma_destroy_qp);
490
491 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
492                              struct rdma_conn_param *conn_param)
493 {
494         struct ib_qp_attr qp_attr;
495         int qp_attr_mask, ret;
496
497         mutex_lock(&id_priv->qp_mutex);
498         if (!id_priv->id.qp) {
499                 ret = 0;
500                 goto out;
501         }
502
503         /* Need to update QP attributes from default values. */
504         qp_attr.qp_state = IB_QPS_INIT;
505         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
506         if (ret)
507                 goto out;
508
509         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
510         if (ret)
511                 goto out;
512
513         qp_attr.qp_state = IB_QPS_RTR;
514         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
515         if (ret)
516                 goto out;
517
518         if (conn_param)
519                 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
520         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
521 out:
522         mutex_unlock(&id_priv->qp_mutex);
523         return ret;
524 }
525
526 static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
527                              struct rdma_conn_param *conn_param)
528 {
529         struct ib_qp_attr qp_attr;
530         int qp_attr_mask, ret;
531
532         mutex_lock(&id_priv->qp_mutex);
533         if (!id_priv->id.qp) {
534                 ret = 0;
535                 goto out;
536         }
537
538         qp_attr.qp_state = IB_QPS_RTS;
539         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
540         if (ret)
541                 goto out;
542
543         if (conn_param)
544                 qp_attr.max_rd_atomic = conn_param->initiator_depth;
545         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
546 out:
547         mutex_unlock(&id_priv->qp_mutex);
548         return ret;
549 }
550
551 static int cma_modify_qp_err(struct rdma_id_private *id_priv)
552 {
553         struct ib_qp_attr qp_attr;
554         int ret;
555
556         mutex_lock(&id_priv->qp_mutex);
557         if (!id_priv->id.qp) {
558                 ret = 0;
559                 goto out;
560         }
561
562         qp_attr.qp_state = IB_QPS_ERR;
563         ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
564 out:
565         mutex_unlock(&id_priv->qp_mutex);
566         return ret;
567 }
568
569 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
570                                struct ib_qp_attr *qp_attr, int *qp_attr_mask)
571 {
572         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
573         int ret;
574
575         ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
576                                   ib_addr_get_pkey(dev_addr),
577                                   &qp_attr->pkey_index);
578         if (ret)
579                 return ret;
580
581         qp_attr->port_num = id_priv->id.port_num;
582         *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
583
584         if (cma_is_ud_ps(id_priv->id.ps)) {
585                 qp_attr->qkey = id_priv->qkey;
586                 *qp_attr_mask |= IB_QP_QKEY;
587         } else {
588                 qp_attr->qp_access_flags = 0;
589                 *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
590         }
591         return 0;
592 }
593
594 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
595                        int *qp_attr_mask)
596 {
597         struct rdma_id_private *id_priv;
598         int ret = 0;
599
600         id_priv = container_of(id, struct rdma_id_private, id);
601         switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
602         case RDMA_TRANSPORT_IB:
603                 if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
604                         ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
605                 else
606                         ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
607                                                  qp_attr_mask);
608                 if (qp_attr->qp_state == IB_QPS_RTR)
609                         qp_attr->rq_psn = id_priv->seq_num;
610                 break;
611         case RDMA_TRANSPORT_IWARP:
612                 if (!id_priv->cm_id.iw) {
613                         qp_attr->qp_access_flags = 0;
614                         *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
615                 } else
616                         ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
617                                                  qp_attr_mask);
618                 break;
619         default:
620                 ret = -ENOSYS;
621                 break;
622         }
623
624         return ret;
625 }
626 EXPORT_SYMBOL(rdma_init_qp_attr);
627
628 static inline int cma_zero_addr(struct sockaddr *addr)
629 {
630         struct in6_addr *ip6;
631
632         if (addr->sa_family == AF_INET)
633                 return ipv4_is_zeronet(
634                         ((struct sockaddr_in *)addr)->sin_addr.s_addr);
635         else {
636                 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
637                 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
638                         ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
639         }
640 }
641
642 static inline int cma_loopback_addr(struct sockaddr *addr)
643 {
644         return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
645 }
646
647 static inline int cma_any_addr(struct sockaddr *addr)
648 {
649         return cma_zero_addr(addr) || cma_loopback_addr(addr);
650 }
651
652 static inline __be16 cma_port(struct sockaddr *addr)
653 {
654         if (addr->sa_family == AF_INET)
655                 return ((struct sockaddr_in *) addr)->sin_port;
656         else
657                 return ((struct sockaddr_in6 *) addr)->sin6_port;
658 }
659
660 static inline int cma_any_port(struct sockaddr *addr)
661 {
662         return !cma_port(addr);
663 }
664
665 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
666                             u8 *ip_ver, __be16 *port,
667                             union cma_ip_addr **src, union cma_ip_addr **dst)
668 {
669         switch (ps) {
670         case RDMA_PS_SDP:
671                 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
672                     SDP_MAJ_VERSION)
673                         return -EINVAL;
674
675                 *ip_ver = sdp_get_ip_ver(hdr);
676                 *port   = ((struct sdp_hh *) hdr)->port;
677                 *src    = &((struct sdp_hh *) hdr)->src_addr;
678                 *dst    = &((struct sdp_hh *) hdr)->dst_addr;
679                 break;
680         default:
681                 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
682                         return -EINVAL;
683
684                 *ip_ver = cma_get_ip_ver(hdr);
685                 *port   = ((struct cma_hdr *) hdr)->port;
686                 *src    = &((struct cma_hdr *) hdr)->src_addr;
687                 *dst    = &((struct cma_hdr *) hdr)->dst_addr;
688                 break;
689         }
690
691         if (*ip_ver != 4 && *ip_ver != 6)
692                 return -EINVAL;
693         return 0;
694 }
695
696 static void cma_save_net_info(struct rdma_addr *addr,
697                               struct rdma_addr *listen_addr,
698                               u8 ip_ver, __be16 port,
699                               union cma_ip_addr *src, union cma_ip_addr *dst)
700 {
701         struct sockaddr_in *listen4, *ip4;
702         struct sockaddr_in6 *listen6, *ip6;
703
704         switch (ip_ver) {
705         case 4:
706                 listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
707                 ip4 = (struct sockaddr_in *) &addr->src_addr;
708                 ip4->sin_family = listen4->sin_family;
709                 ip4->sin_addr.s_addr = dst->ip4.addr;
710                 ip4->sin_port = listen4->sin_port;
711
712                 ip4 = (struct sockaddr_in *) &addr->dst_addr;
713                 ip4->sin_family = listen4->sin_family;
714                 ip4->sin_addr.s_addr = src->ip4.addr;
715                 ip4->sin_port = port;
716                 break;
717         case 6:
718                 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
719                 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
720                 ip6->sin6_family = listen6->sin6_family;
721                 ip6->sin6_addr = dst->ip6;
722                 ip6->sin6_port = listen6->sin6_port;
723
724                 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
725                 ip6->sin6_family = listen6->sin6_family;
726                 ip6->sin6_addr = src->ip6;
727                 ip6->sin6_port = port;
728                 break;
729         default:
730                 break;
731         }
732 }
733
734 static inline int cma_user_data_offset(enum rdma_port_space ps)
735 {
736         switch (ps) {
737         case RDMA_PS_SDP:
738                 return 0;
739         default:
740                 return sizeof(struct cma_hdr);
741         }
742 }
743
744 static void cma_cancel_route(struct rdma_id_private *id_priv)
745 {
746         switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
747         case RDMA_TRANSPORT_IB:
748                 if (id_priv->query)
749                         ib_sa_cancel_query(id_priv->query_id, id_priv->query);
750                 break;
751         default:
752                 break;
753         }
754 }
755
756 static void cma_cancel_listens(struct rdma_id_private *id_priv)
757 {
758         struct rdma_id_private *dev_id_priv;
759
760         /*
761          * Remove from listen_any_list to prevent added devices from spawning
762          * additional listen requests.
763          */
764         mutex_lock(&lock);
765         list_del(&id_priv->list);
766
767         while (!list_empty(&id_priv->listen_list)) {
768                 dev_id_priv = list_entry(id_priv->listen_list.next,
769                                          struct rdma_id_private, listen_list);
770                 /* sync with device removal to avoid duplicate destruction */
771                 list_del_init(&dev_id_priv->list);
772                 list_del(&dev_id_priv->listen_list);
773                 mutex_unlock(&lock);
774
775                 rdma_destroy_id(&dev_id_priv->id);
776                 mutex_lock(&lock);
777         }
778         mutex_unlock(&lock);
779 }
780
781 static void cma_cancel_operation(struct rdma_id_private *id_priv,
782                                  enum cma_state state)
783 {
784         switch (state) {
785         case CMA_ADDR_QUERY:
786                 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
787                 break;
788         case CMA_ROUTE_QUERY:
789                 cma_cancel_route(id_priv);
790                 break;
791         case CMA_LISTEN:
792                 if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
793                     !id_priv->cma_dev)
794                         cma_cancel_listens(id_priv);
795                 break;
796         default:
797                 break;
798         }
799 }
800
801 static void cma_release_port(struct rdma_id_private *id_priv)
802 {
803         struct rdma_bind_list *bind_list = id_priv->bind_list;
804
805         if (!bind_list)
806                 return;
807
808         mutex_lock(&lock);
809         hlist_del(&id_priv->node);
810         if (hlist_empty(&bind_list->owners)) {
811                 idr_remove(bind_list->ps, bind_list->port);
812                 kfree(bind_list);
813         }
814         mutex_unlock(&lock);
815 }
816
817 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
818 {
819         struct cma_multicast *mc;
820
821         while (!list_empty(&id_priv->mc_list)) {
822                 mc = container_of(id_priv->mc_list.next,
823                                   struct cma_multicast, list);
824                 list_del(&mc->list);
825                 ib_sa_free_multicast(mc->multicast.ib);
826                 kfree(mc);
827         }
828 }
829
830 void rdma_destroy_id(struct rdma_cm_id *id)
831 {
832         struct rdma_id_private *id_priv;
833         enum cma_state state;
834
835         id_priv = container_of(id, struct rdma_id_private, id);
836         state = cma_exch(id_priv, CMA_DESTROYING);
837         cma_cancel_operation(id_priv, state);
838
839         mutex_lock(&lock);
840         if (id_priv->cma_dev) {
841                 mutex_unlock(&lock);
842                 switch (rdma_node_get_transport(id->device->node_type)) {
843                 case RDMA_TRANSPORT_IB:
844                         if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
845                                 ib_destroy_cm_id(id_priv->cm_id.ib);
846                         break;
847                 case RDMA_TRANSPORT_IWARP:
848                         if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
849                                 iw_destroy_cm_id(id_priv->cm_id.iw);
850                         break;
851                 default:
852                         break;
853                 }
854                 cma_leave_mc_groups(id_priv);
855                 mutex_lock(&lock);
856                 cma_detach_from_dev(id_priv);
857         }
858         mutex_unlock(&lock);
859
860         cma_release_port(id_priv);
861         cma_deref_id(id_priv);
862         wait_for_completion(&id_priv->comp);
863
864         if (id_priv->internal_id)
865                 cma_deref_id(id_priv->id.context);
866
867         kfree(id_priv->id.route.path_rec);
868         kfree(id_priv);
869 }
870 EXPORT_SYMBOL(rdma_destroy_id);
871
872 static int cma_rep_recv(struct rdma_id_private *id_priv)
873 {
874         int ret;
875
876         ret = cma_modify_qp_rtr(id_priv, NULL);
877         if (ret)
878                 goto reject;
879
880         ret = cma_modify_qp_rts(id_priv, NULL);
881         if (ret)
882                 goto reject;
883
884         ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
885         if (ret)
886                 goto reject;
887
888         return 0;
889 reject:
890         cma_modify_qp_err(id_priv);
891         ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
892                        NULL, 0, NULL, 0);
893         return ret;
894 }
895
896 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
897 {
898         if (id_priv->id.ps == RDMA_PS_SDP &&
899             sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
900             SDP_MAJ_VERSION)
901                 return -EINVAL;
902
903         return 0;
904 }
905
906 static void cma_set_rep_event_data(struct rdma_cm_event *event,
907                                    struct ib_cm_rep_event_param *rep_data,
908                                    void *private_data)
909 {
910         event->param.conn.private_data = private_data;
911         event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
912         event->param.conn.responder_resources = rep_data->responder_resources;
913         event->param.conn.initiator_depth = rep_data->initiator_depth;
914         event->param.conn.flow_control = rep_data->flow_control;
915         event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
916         event->param.conn.srq = rep_data->srq;
917         event->param.conn.qp_num = rep_data->remote_qpn;
918 }
919
920 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
921 {
922         struct rdma_id_private *id_priv = cm_id->context;
923         struct rdma_cm_event event;
924         int ret = 0;
925
926         if (cma_disable_remove(id_priv, CMA_CONNECT))
927                 return 0;
928
929         memset(&event, 0, sizeof event);
930         switch (ib_event->event) {
931         case IB_CM_REQ_ERROR:
932         case IB_CM_REP_ERROR:
933                 event.event = RDMA_CM_EVENT_UNREACHABLE;
934                 event.status = -ETIMEDOUT;
935                 break;
936         case IB_CM_REP_RECEIVED:
937                 event.status = cma_verify_rep(id_priv, ib_event->private_data);
938                 if (event.status)
939                         event.event = RDMA_CM_EVENT_CONNECT_ERROR;
940                 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
941                         event.status = cma_rep_recv(id_priv);
942                         event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
943                                                      RDMA_CM_EVENT_ESTABLISHED;
944                 } else
945                         event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
946                 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
947                                        ib_event->private_data);
948                 break;
949         case IB_CM_RTU_RECEIVED:
950         case IB_CM_USER_ESTABLISHED:
951                 event.event = RDMA_CM_EVENT_ESTABLISHED;
952                 break;
953         case IB_CM_DREQ_ERROR:
954                 event.status = -ETIMEDOUT; /* fall through */
955         case IB_CM_DREQ_RECEIVED:
956         case IB_CM_DREP_RECEIVED:
957                 if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
958                         goto out;
959                 event.event = RDMA_CM_EVENT_DISCONNECTED;
960                 break;
961         case IB_CM_TIMEWAIT_EXIT:
962         case IB_CM_MRA_RECEIVED:
963                 /* ignore event */
964                 goto out;
965         case IB_CM_REJ_RECEIVED:
966                 cma_modify_qp_err(id_priv);
967                 event.status = ib_event->param.rej_rcvd.reason;
968                 event.event = RDMA_CM_EVENT_REJECTED;
969                 event.param.conn.private_data = ib_event->private_data;
970                 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
971                 break;
972         default:
973                 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
974                        ib_event->event);
975                 goto out;
976         }
977
978         ret = id_priv->id.event_handler(&id_priv->id, &event);
979         if (ret) {
980                 /* Destroy the CM ID by returning a non-zero value. */
981                 id_priv->cm_id.ib = NULL;
982                 cma_exch(id_priv, CMA_DESTROYING);
983                 cma_enable_remove(id_priv);
984                 rdma_destroy_id(&id_priv->id);
985                 return ret;
986         }
987 out:
988         cma_enable_remove(id_priv);
989         return ret;
990 }
991
992 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
993                                                struct ib_cm_event *ib_event)
994 {
995         struct rdma_id_private *id_priv;
996         struct rdma_cm_id *id;
997         struct rdma_route *rt;
998         union cma_ip_addr *src, *dst;
999         __be16 port;
1000         u8 ip_ver;
1001
1002         if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1003                              &ip_ver, &port, &src, &dst))
1004                 goto err;
1005
1006         id = rdma_create_id(listen_id->event_handler, listen_id->context,
1007                             listen_id->ps);
1008         if (IS_ERR(id))
1009                 goto err;
1010
1011         cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1012                           ip_ver, port, src, dst);
1013
1014         rt = &id->route;
1015         rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1016         rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
1017                                GFP_KERNEL);
1018         if (!rt->path_rec)
1019                 goto destroy_id;
1020
1021         rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1022         if (rt->num_paths == 2)
1023                 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1024
1025         ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1026         ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1027         ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1028         rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
1029
1030         id_priv = container_of(id, struct rdma_id_private, id);
1031         id_priv->state = CMA_CONNECT;
1032         return id_priv;
1033
1034 destroy_id:
1035         rdma_destroy_id(id);
1036 err:
1037         return NULL;
1038 }
1039
1040 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1041                                               struct ib_cm_event *ib_event)
1042 {
1043         struct rdma_id_private *id_priv;
1044         struct rdma_cm_id *id;
1045         union cma_ip_addr *src, *dst;
1046         __be16 port;
1047         u8 ip_ver;
1048         int ret;
1049
1050         id = rdma_create_id(listen_id->event_handler, listen_id->context,
1051                             listen_id->ps);
1052         if (IS_ERR(id))
1053                 return NULL;
1054
1055
1056         if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1057                              &ip_ver, &port, &src, &dst))
1058                 goto err;
1059
1060         cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1061                           ip_ver, port, src, dst);
1062
1063         ret = rdma_translate_ip(&id->route.addr.src_addr,
1064                                 &id->route.addr.dev_addr);
1065         if (ret)
1066                 goto err;
1067
1068         id_priv = container_of(id, struct rdma_id_private, id);
1069         id_priv->state = CMA_CONNECT;
1070         return id_priv;
1071 err:
1072         rdma_destroy_id(id);
1073         return NULL;
1074 }
1075
1076 static void cma_set_req_event_data(struct rdma_cm_event *event,
1077                                    struct ib_cm_req_event_param *req_data,
1078                                    void *private_data, int offset)
1079 {
1080         event->param.conn.private_data = private_data + offset;
1081         event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1082         event->param.conn.responder_resources = req_data->responder_resources;
1083         event->param.conn.initiator_depth = req_data->initiator_depth;
1084         event->param.conn.flow_control = req_data->flow_control;
1085         event->param.conn.retry_count = req_data->retry_count;
1086         event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1087         event->param.conn.srq = req_data->srq;
1088         event->param.conn.qp_num = req_data->remote_qpn;
1089 }
1090
1091 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1092 {
1093         struct rdma_id_private *listen_id, *conn_id;
1094         struct rdma_cm_event event;
1095         int offset, ret;
1096
1097         listen_id = cm_id->context;
1098         if (cma_disable_remove(listen_id, CMA_LISTEN))
1099                 return -ECONNABORTED;
1100
1101         memset(&event, 0, sizeof event);
1102         offset = cma_user_data_offset(listen_id->id.ps);
1103         event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1104         if (cma_is_ud_ps(listen_id->id.ps)) {
1105                 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1106                 event.param.ud.private_data = ib_event->private_data + offset;
1107                 event.param.ud.private_data_len =
1108                                 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1109         } else {
1110                 conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1111                 cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1112                                        ib_event->private_data, offset);
1113         }
1114         if (!conn_id) {
1115                 ret = -ENOMEM;
1116                 goto out;
1117         }
1118
1119         atomic_inc(&conn_id->dev_remove);
1120         mutex_lock(&lock);
1121         ret = cma_acquire_dev(conn_id);
1122         mutex_unlock(&lock);
1123         if (ret)
1124                 goto release_conn_id;
1125
1126         conn_id->cm_id.ib = cm_id;
1127         cm_id->context = conn_id;
1128         cm_id->cm_handler = cma_ib_handler;
1129
1130         ret = conn_id->id.event_handler(&conn_id->id, &event);
1131         if (!ret) {
1132                 /*
1133                  * Acquire mutex to prevent user executing rdma_destroy_id()
1134                  * while we're accessing the cm_id.
1135                  */
1136                 mutex_lock(&lock);
1137                 if (cma_comp(conn_id, CMA_CONNECT) &&
1138                     !cma_is_ud_ps(conn_id->id.ps))
1139                         ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1140                 mutex_unlock(&lock);
1141                 cma_enable_remove(conn_id);
1142                 goto out;
1143         }
1144
1145         /* Destroy the CM ID by returning a non-zero value. */
1146         conn_id->cm_id.ib = NULL;
1147
1148 release_conn_id:
1149         cma_exch(conn_id, CMA_DESTROYING);
1150         cma_enable_remove(conn_id);
1151         rdma_destroy_id(&conn_id->id);
1152
1153 out:
1154         cma_enable_remove(listen_id);
1155         return ret;
1156 }
1157
1158 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1159 {
1160         return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1161 }
1162
1163 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1164                                  struct ib_cm_compare_data *compare)
1165 {
1166         struct cma_hdr *cma_data, *cma_mask;
1167         struct sdp_hh *sdp_data, *sdp_mask;
1168         __be32 ip4_addr;
1169         struct in6_addr ip6_addr;
1170
1171         memset(compare, 0, sizeof *compare);
1172         cma_data = (void *) compare->data;
1173         cma_mask = (void *) compare->mask;
1174         sdp_data = (void *) compare->data;
1175         sdp_mask = (void *) compare->mask;
1176
1177         switch (addr->sa_family) {
1178         case AF_INET:
1179                 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1180                 if (ps == RDMA_PS_SDP) {
1181                         sdp_set_ip_ver(sdp_data, 4);
1182                         sdp_set_ip_ver(sdp_mask, 0xF);
1183                         sdp_data->dst_addr.ip4.addr = ip4_addr;
1184                         sdp_mask->dst_addr.ip4.addr = htonl(~0);
1185                 } else {
1186                         cma_set_ip_ver(cma_data, 4);
1187                         cma_set_ip_ver(cma_mask, 0xF);
1188                         cma_data->dst_addr.ip4.addr = ip4_addr;
1189                         cma_mask->dst_addr.ip4.addr = htonl(~0);
1190                 }
1191                 break;
1192         case AF_INET6:
1193                 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1194                 if (ps == RDMA_PS_SDP) {
1195                         sdp_set_ip_ver(sdp_data, 6);
1196                         sdp_set_ip_ver(sdp_mask, 0xF);
1197                         sdp_data->dst_addr.ip6 = ip6_addr;
1198                         memset(&sdp_mask->dst_addr.ip6, 0xFF,
1199                                sizeof sdp_mask->dst_addr.ip6);
1200                 } else {
1201                         cma_set_ip_ver(cma_data, 6);
1202                         cma_set_ip_ver(cma_mask, 0xF);
1203                         cma_data->dst_addr.ip6 = ip6_addr;
1204                         memset(&cma_mask->dst_addr.ip6, 0xFF,
1205                                sizeof cma_mask->dst_addr.ip6);
1206                 }
1207                 break;
1208         default:
1209                 break;
1210         }
1211 }
1212
1213 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1214 {
1215         struct rdma_id_private *id_priv = iw_id->context;
1216         struct rdma_cm_event event;
1217         struct sockaddr_in *sin;
1218         int ret = 0;
1219
1220         if (cma_disable_remove(id_priv, CMA_CONNECT))
1221                 return 0;
1222
1223         memset(&event, 0, sizeof event);
1224         switch (iw_event->event) {
1225         case IW_CM_EVENT_CLOSE:
1226                 event.event = RDMA_CM_EVENT_DISCONNECTED;
1227                 break;
1228         case IW_CM_EVENT_CONNECT_REPLY:
1229                 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1230                 *sin = iw_event->local_addr;
1231                 sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1232                 *sin = iw_event->remote_addr;
1233                 switch (iw_event->status) {
1234                 case 0:
1235                         event.event = RDMA_CM_EVENT_ESTABLISHED;
1236                         break;
1237                 case -ECONNRESET:
1238                 case -ECONNREFUSED:
1239                         event.event = RDMA_CM_EVENT_REJECTED;
1240                         break;
1241                 case -ETIMEDOUT:
1242                         event.event = RDMA_CM_EVENT_UNREACHABLE;
1243                         break;
1244                 default:
1245                         event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1246                         break;
1247                 }
1248                 break;
1249         case IW_CM_EVENT_ESTABLISHED:
1250                 event.event = RDMA_CM_EVENT_ESTABLISHED;
1251                 break;
1252         default:
1253                 BUG_ON(1);
1254         }
1255
1256         event.status = iw_event->status;
1257         event.param.conn.private_data = iw_event->private_data;
1258         event.param.conn.private_data_len = iw_event->private_data_len;
1259         ret = id_priv->id.event_handler(&id_priv->id, &event);
1260         if (ret) {
1261                 /* Destroy the CM ID by returning a non-zero value. */
1262                 id_priv->cm_id.iw = NULL;
1263                 cma_exch(id_priv, CMA_DESTROYING);
1264                 cma_enable_remove(id_priv);
1265                 rdma_destroy_id(&id_priv->id);
1266                 return ret;
1267         }
1268
1269         cma_enable_remove(id_priv);
1270         return ret;
1271 }
1272
1273 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1274                                struct iw_cm_event *iw_event)
1275 {
1276         struct rdma_cm_id *new_cm_id;
1277         struct rdma_id_private *listen_id, *conn_id;
1278         struct sockaddr_in *sin;
1279         struct net_device *dev = NULL;
1280         struct rdma_cm_event event;
1281         int ret;
1282         struct ib_device_attr attr;
1283
1284         listen_id = cm_id->context;
1285         if (cma_disable_remove(listen_id, CMA_LISTEN))
1286                 return -ECONNABORTED;
1287
1288         /* Create a new RDMA id for the new IW CM ID */
1289         new_cm_id = rdma_create_id(listen_id->id.event_handler,
1290                                    listen_id->id.context,
1291                                    RDMA_PS_TCP);
1292         if (IS_ERR(new_cm_id)) {
1293                 ret = -ENOMEM;
1294                 goto out;
1295         }
1296         conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1297         atomic_inc(&conn_id->dev_remove);
1298         conn_id->state = CMA_CONNECT;
1299
1300         dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
1301         if (!dev) {
1302                 ret = -EADDRNOTAVAIL;
1303                 cma_enable_remove(conn_id);
1304                 rdma_destroy_id(new_cm_id);
1305                 goto out;
1306         }
1307         ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1308         if (ret) {
1309                 cma_enable_remove(conn_id);
1310                 rdma_destroy_id(new_cm_id);
1311                 goto out;
1312         }
1313
1314         mutex_lock(&lock);
1315         ret = cma_acquire_dev(conn_id);
1316         mutex_unlock(&lock);
1317         if (ret) {
1318                 cma_enable_remove(conn_id);
1319                 rdma_destroy_id(new_cm_id);
1320                 goto out;
1321         }
1322
1323         conn_id->cm_id.iw = cm_id;
1324         cm_id->context = conn_id;
1325         cm_id->cm_handler = cma_iw_handler;
1326
1327         sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1328         *sin = iw_event->local_addr;
1329         sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1330         *sin = iw_event->remote_addr;
1331
1332         ret = ib_query_device(conn_id->id.device, &attr);
1333         if (ret) {
1334                 cma_enable_remove(conn_id);
1335                 rdma_destroy_id(new_cm_id);
1336                 goto out;
1337         }
1338
1339         memset(&event, 0, sizeof event);
1340         event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1341         event.param.conn.private_data = iw_event->private_data;
1342         event.param.conn.private_data_len = iw_event->private_data_len;
1343         event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1344         event.param.conn.responder_resources = attr.max_qp_rd_atom;
1345         ret = conn_id->id.event_handler(&conn_id->id, &event);
1346         if (ret) {
1347                 /* User wants to destroy the CM ID */
1348                 conn_id->cm_id.iw = NULL;
1349                 cma_exch(conn_id, CMA_DESTROYING);
1350                 cma_enable_remove(conn_id);
1351                 rdma_destroy_id(&conn_id->id);
1352         }
1353
1354 out:
1355         if (dev)
1356                 dev_put(dev);
1357         cma_enable_remove(listen_id);
1358         return ret;
1359 }
1360
1361 static int cma_ib_listen(struct rdma_id_private *id_priv)
1362 {
1363         struct ib_cm_compare_data compare_data;
1364         struct sockaddr *addr;
1365         __be64 svc_id;
1366         int ret;
1367
1368         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1369                                             id_priv);
1370         if (IS_ERR(id_priv->cm_id.ib))
1371                 return PTR_ERR(id_priv->cm_id.ib);
1372
1373         addr = &id_priv->id.route.addr.src_addr;
1374         svc_id = cma_get_service_id(id_priv->id.ps, addr);
1375         if (cma_any_addr(addr))
1376                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1377         else {
1378                 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1379                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1380         }
1381
1382         if (ret) {
1383                 ib_destroy_cm_id(id_priv->cm_id.ib);
1384                 id_priv->cm_id.ib = NULL;
1385         }
1386
1387         return ret;
1388 }
1389
1390 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1391 {
1392         int ret;
1393         struct sockaddr_in *sin;
1394
1395         id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1396                                             iw_conn_req_handler,
1397                                             id_priv);
1398         if (IS_ERR(id_priv->cm_id.iw))
1399                 return PTR_ERR(id_priv->cm_id.iw);
1400
1401         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1402         id_priv->cm_id.iw->local_addr = *sin;
1403
1404         ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1405
1406         if (ret) {
1407                 iw_destroy_cm_id(id_priv->cm_id.iw);
1408                 id_priv->cm_id.iw = NULL;
1409         }
1410
1411         return ret;
1412 }
1413
1414 static int cma_listen_handler(struct rdma_cm_id *id,
1415                               struct rdma_cm_event *event)
1416 {
1417         struct rdma_id_private *id_priv = id->context;
1418
1419         id->context = id_priv->id.context;
1420         id->event_handler = id_priv->id.event_handler;
1421         return id_priv->id.event_handler(id, event);
1422 }
1423
1424 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1425                               struct cma_device *cma_dev)
1426 {
1427         struct rdma_id_private *dev_id_priv;
1428         struct rdma_cm_id *id;
1429         int ret;
1430
1431         id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1432         if (IS_ERR(id))
1433                 return;
1434
1435         dev_id_priv = container_of(id, struct rdma_id_private, id);
1436
1437         dev_id_priv->state = CMA_ADDR_BOUND;
1438         memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1439                ip_addr_size(&id_priv->id.route.addr.src_addr));
1440
1441         cma_attach_to_dev(dev_id_priv, cma_dev);
1442         list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1443         atomic_inc(&id_priv->refcount);
1444         dev_id_priv->internal_id = 1;
1445
1446         ret = rdma_listen(id, id_priv->backlog);
1447         if (ret)
1448                 printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
1449                        "listening on device %s", ret, cma_dev->device->name);
1450 }
1451
1452 static void cma_listen_on_all(struct rdma_id_private *id_priv)
1453 {
1454         struct cma_device *cma_dev;
1455
1456         mutex_lock(&lock);
1457         list_add_tail(&id_priv->list, &listen_any_list);
1458         list_for_each_entry(cma_dev, &dev_list, list)
1459                 cma_listen_on_dev(id_priv, cma_dev);
1460         mutex_unlock(&lock);
1461 }
1462
1463 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1464 {
1465         struct sockaddr_in addr_in;
1466
1467         memset(&addr_in, 0, sizeof addr_in);
1468         addr_in.sin_family = af;
1469         return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1470 }
1471
1472 int rdma_listen(struct rdma_cm_id *id, int backlog)
1473 {
1474         struct rdma_id_private *id_priv;
1475         int ret;
1476
1477         id_priv = container_of(id, struct rdma_id_private, id);
1478         if (id_priv->state == CMA_IDLE) {
1479                 ret = cma_bind_any(id, AF_INET);
1480                 if (ret)
1481                         return ret;
1482         }
1483
1484         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1485                 return -EINVAL;
1486
1487         id_priv->backlog = backlog;
1488         if (id->device) {
1489                 switch (rdma_node_get_transport(id->device->node_type)) {
1490                 case RDMA_TRANSPORT_IB:
1491                         ret = cma_ib_listen(id_priv);
1492                         if (ret)
1493                                 goto err;
1494                         break;
1495                 case RDMA_TRANSPORT_IWARP:
1496                         ret = cma_iw_listen(id_priv, backlog);
1497                         if (ret)
1498                                 goto err;
1499                         break;
1500                 default:
1501                         ret = -ENOSYS;
1502                         goto err;
1503                 }
1504         } else
1505                 cma_listen_on_all(id_priv);
1506
1507         return 0;
1508 err:
1509         id_priv->backlog = 0;
1510         cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1511         return ret;
1512 }
1513 EXPORT_SYMBOL(rdma_listen);
1514
1515 void rdma_set_service_type(struct rdma_cm_id *id, int tos)
1516 {
1517         struct rdma_id_private *id_priv;
1518
1519         id_priv = container_of(id, struct rdma_id_private, id);
1520         id_priv->tos = (u8) tos;
1521 }
1522 EXPORT_SYMBOL(rdma_set_service_type);
1523
1524 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1525                               void *context)
1526 {
1527         struct cma_work *work = context;
1528         struct rdma_route *route;
1529
1530         route = &work->id->id.route;
1531
1532         if (!status) {
1533                 route->num_paths = 1;
1534                 *route->path_rec = *path_rec;
1535         } else {
1536                 work->old_state = CMA_ROUTE_QUERY;
1537                 work->new_state = CMA_ADDR_RESOLVED;
1538                 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1539                 work->event.status = status;
1540         }
1541
1542         queue_work(cma_wq, &work->work);
1543 }
1544
1545 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1546                               struct cma_work *work)
1547 {
1548         struct rdma_addr *addr = &id_priv->id.route.addr;
1549         struct ib_sa_path_rec path_rec;
1550         ib_sa_comp_mask comp_mask;
1551         struct sockaddr_in6 *sin6;
1552
1553         memset(&path_rec, 0, sizeof path_rec);
1554         ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
1555         ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
1556         path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
1557         path_rec.numb_path = 1;
1558         path_rec.reversible = 1;
1559         path_rec.service_id = cma_get_service_id(id_priv->id.ps, &addr->dst_addr);
1560
1561         comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1562                     IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1563                     IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1564
1565         if (addr->src_addr.sa_family == AF_INET) {
1566                 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1567                 comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1568         } else {
1569                 sin6 = (struct sockaddr_in6 *) &addr->src_addr;
1570                 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1571                 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1572         }
1573
1574         id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1575                                                id_priv->id.port_num, &path_rec,
1576                                                comp_mask, timeout_ms,
1577                                                GFP_KERNEL, cma_query_handler,
1578                                                work, &id_priv->query);
1579
1580         return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1581 }
1582
1583 static void cma_work_handler(struct work_struct *_work)
1584 {
1585         struct cma_work *work = container_of(_work, struct cma_work, work);
1586         struct rdma_id_private *id_priv = work->id;
1587         int destroy = 0;
1588
1589         atomic_inc(&id_priv->dev_remove);
1590         if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1591                 goto out;
1592
1593         if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1594                 cma_exch(id_priv, CMA_DESTROYING);
1595                 destroy = 1;
1596         }
1597 out:
1598         cma_enable_remove(id_priv);
1599         cma_deref_id(id_priv);
1600         if (destroy)
1601                 rdma_destroy_id(&id_priv->id);
1602         kfree(work);
1603 }
1604
1605 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1606 {
1607         struct rdma_route *route = &id_priv->id.route;
1608         struct cma_work *work;
1609         int ret;
1610
1611         work = kzalloc(sizeof *work, GFP_KERNEL);
1612         if (!work)
1613                 return -ENOMEM;
1614
1615         work->id = id_priv;
1616         INIT_WORK(&work->work, cma_work_handler);
1617         work->old_state = CMA_ROUTE_QUERY;
1618         work->new_state = CMA_ROUTE_RESOLVED;
1619         work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1620
1621         route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1622         if (!route->path_rec) {
1623                 ret = -ENOMEM;
1624                 goto err1;
1625         }
1626
1627         ret = cma_query_ib_route(id_priv, timeout_ms, work);
1628         if (ret)
1629                 goto err2;
1630
1631         return 0;
1632 err2:
1633         kfree(route->path_rec);
1634         route->path_rec = NULL;
1635 err1:
1636         kfree(work);
1637         return ret;
1638 }
1639
1640 int rdma_set_ib_paths(struct rdma_cm_id *id,
1641                       struct ib_sa_path_rec *path_rec, int num_paths)
1642 {
1643         struct rdma_id_private *id_priv;
1644         int ret;
1645
1646         id_priv = container_of(id, struct rdma_id_private, id);
1647         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1648                 return -EINVAL;
1649
1650         id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1651         if (!id->route.path_rec) {
1652                 ret = -ENOMEM;
1653                 goto err;
1654         }
1655
1656         memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1657         return 0;
1658 err:
1659         cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1660         return ret;
1661 }
1662 EXPORT_SYMBOL(rdma_set_ib_paths);
1663
1664 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1665 {
1666         struct cma_work *work;
1667
1668         work = kzalloc(sizeof *work, GFP_KERNEL);
1669         if (!work)
1670                 return -ENOMEM;
1671
1672         work->id = id_priv;
1673         INIT_WORK(&work->work, cma_work_handler);
1674         work->old_state = CMA_ROUTE_QUERY;
1675         work->new_state = CMA_ROUTE_RESOLVED;
1676         work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1677         queue_work(cma_wq, &work->work);
1678         return 0;
1679 }
1680
1681 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1682 {
1683         struct rdma_id_private *id_priv;
1684         int ret;
1685
1686         id_priv = container_of(id, struct rdma_id_private, id);
1687         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1688                 return -EINVAL;
1689
1690         atomic_inc(&id_priv->refcount);
1691         switch (rdma_node_get_transport(id->device->node_type)) {
1692         case RDMA_TRANSPORT_IB:
1693                 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1694                 break;
1695         case RDMA_TRANSPORT_IWARP:
1696                 ret = cma_resolve_iw_route(id_priv, timeout_ms);
1697                 break;
1698         default:
1699                 ret = -ENOSYS;
1700                 break;
1701         }
1702         if (ret)
1703                 goto err;
1704
1705         return 0;
1706 err:
1707         cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1708         cma_deref_id(id_priv);
1709         return ret;
1710 }
1711 EXPORT_SYMBOL(rdma_resolve_route);
1712
1713 static int cma_bind_loopback(struct rdma_id_private *id_priv)
1714 {
1715         struct cma_device *cma_dev;
1716         struct ib_port_attr port_attr;
1717         union ib_gid gid;
1718         u16 pkey;
1719         int ret;
1720         u8 p;
1721
1722         mutex_lock(&lock);
1723         if (list_empty(&dev_list)) {
1724                 ret = -ENODEV;
1725                 goto out;
1726         }
1727         list_for_each_entry(cma_dev, &dev_list, list)
1728                 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1729                         if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1730                             port_attr.state == IB_PORT_ACTIVE)
1731                                 goto port_found;
1732
1733         p = 1;
1734         cma_dev = list_entry(dev_list.next, struct cma_device, list);
1735
1736 port_found:
1737         ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1738         if (ret)
1739                 goto out;
1740
1741         ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1742         if (ret)
1743                 goto out;
1744
1745         ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1746         ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1747         id_priv->id.port_num = p;
1748         cma_attach_to_dev(id_priv, cma_dev);
1749 out:
1750         mutex_unlock(&lock);
1751         return ret;
1752 }
1753
1754 static void addr_handler(int status, struct sockaddr *src_addr,
1755                          struct rdma_dev_addr *dev_addr, void *context)
1756 {
1757         struct rdma_id_private *id_priv = context;
1758         struct rdma_cm_event event;
1759
1760         memset(&event, 0, sizeof event);
1761         atomic_inc(&id_priv->dev_remove);
1762
1763         /*
1764          * Grab mutex to block rdma_destroy_id() from removing the device while
1765          * we're trying to acquire it.
1766          */
1767         mutex_lock(&lock);
1768         if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1769                 mutex_unlock(&lock);
1770                 goto out;
1771         }
1772
1773         if (!status && !id_priv->cma_dev)
1774                 status = cma_acquire_dev(id_priv);
1775         mutex_unlock(&lock);
1776
1777         if (status) {
1778                 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1779                         goto out;
1780                 event.event = RDMA_CM_EVENT_ADDR_ERROR;
1781                 event.status = status;
1782         } else {
1783                 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1784                        ip_addr_size(src_addr));
1785                 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1786         }
1787
1788         if (id_priv->id.event_handler(&id_priv->id, &event)) {
1789                 cma_exch(id_priv, CMA_DESTROYING);
1790                 cma_enable_remove(id_priv);
1791                 cma_deref_id(id_priv);
1792                 rdma_destroy_id(&id_priv->id);
1793                 return;
1794         }
1795 out:
1796         cma_enable_remove(id_priv);
1797         cma_deref_id(id_priv);
1798 }
1799
1800 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1801 {
1802         struct cma_work *work;
1803         struct sockaddr_in *src_in, *dst_in;
1804         union ib_gid gid;
1805         int ret;
1806
1807         work = kzalloc(sizeof *work, GFP_KERNEL);
1808         if (!work)
1809                 return -ENOMEM;
1810
1811         if (!id_priv->cma_dev) {
1812                 ret = cma_bind_loopback(id_priv);
1813                 if (ret)
1814                         goto err;
1815         }
1816
1817         ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1818         ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1819
1820         if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1821                 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1822                 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1823                 src_in->sin_family = dst_in->sin_family;
1824                 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1825         }
1826
1827         work->id = id_priv;
1828         INIT_WORK(&work->work, cma_work_handler);
1829         work->old_state = CMA_ADDR_QUERY;
1830         work->new_state = CMA_ADDR_RESOLVED;
1831         work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1832         queue_work(cma_wq, &work->work);
1833         return 0;
1834 err:
1835         kfree(work);
1836         return ret;
1837 }
1838
1839 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1840                          struct sockaddr *dst_addr)
1841 {
1842         if (src_addr && src_addr->sa_family)
1843                 return rdma_bind_addr(id, src_addr);
1844         else
1845                 return cma_bind_any(id, dst_addr->sa_family);
1846 }
1847
1848 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1849                       struct sockaddr *dst_addr, int timeout_ms)
1850 {
1851         struct rdma_id_private *id_priv;
1852         int ret;
1853
1854         id_priv = container_of(id, struct rdma_id_private, id);
1855         if (id_priv->state == CMA_IDLE) {
1856                 ret = cma_bind_addr(id, src_addr, dst_addr);
1857                 if (ret)
1858                         return ret;
1859         }
1860
1861         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1862                 return -EINVAL;
1863
1864         atomic_inc(&id_priv->refcount);
1865         memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1866         if (cma_any_addr(dst_addr))
1867                 ret = cma_resolve_loopback(id_priv);
1868         else
1869                 ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
1870                                       dst_addr, &id->route.addr.dev_addr,
1871                                       timeout_ms, addr_handler, id_priv);
1872         if (ret)
1873                 goto err;
1874
1875         return 0;
1876 err:
1877         cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1878         cma_deref_id(id_priv);
1879         return ret;
1880 }
1881 EXPORT_SYMBOL(rdma_resolve_addr);
1882
1883 static void cma_bind_port(struct rdma_bind_list *bind_list,
1884                           struct rdma_id_private *id_priv)
1885 {
1886         struct sockaddr_in *sin;
1887
1888         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1889         sin->sin_port = htons(bind_list->port);
1890         id_priv->bind_list = bind_list;
1891         hlist_add_head(&id_priv->node, &bind_list->owners);
1892 }
1893
1894 static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1895                           unsigned short snum)
1896 {
1897         struct rdma_bind_list *bind_list;
1898         int port, ret;
1899
1900         bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1901         if (!bind_list)
1902                 return -ENOMEM;
1903
1904         do {
1905                 ret = idr_get_new_above(ps, bind_list, snum, &port);
1906         } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1907
1908         if (ret)
1909                 goto err1;
1910
1911         if (port != snum) {
1912                 ret = -EADDRNOTAVAIL;
1913                 goto err2;
1914         }
1915
1916         bind_list->ps = ps;
1917         bind_list->port = (unsigned short) port;
1918         cma_bind_port(bind_list, id_priv);
1919         return 0;
1920 err2:
1921         idr_remove(ps, port);
1922 err1:
1923         kfree(bind_list);
1924         return ret;
1925 }
1926
1927 static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
1928 {
1929         struct rdma_bind_list *bind_list;
1930         int port, ret, low, high;
1931
1932         bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1933         if (!bind_list)
1934                 return -ENOMEM;
1935
1936 retry:
1937         /* FIXME: add proper port randomization per like inet_csk_get_port */
1938         do {
1939                 ret = idr_get_new_above(ps, bind_list, next_port, &port);
1940         } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1941
1942         if (ret)
1943                 goto err1;
1944
1945         inet_get_local_port_range(&low, &high);
1946         if (port > high) {
1947                 if (next_port != low) {
1948                         idr_remove(ps, port);
1949                         next_port = low;
1950                         goto retry;
1951                 }
1952                 ret = -EADDRNOTAVAIL;
1953                 goto err2;
1954         }
1955
1956         if (port == high)
1957                 next_port = low;
1958         else
1959                 next_port = port + 1;
1960
1961         bind_list->ps = ps;
1962         bind_list->port = (unsigned short) port;
1963         cma_bind_port(bind_list, id_priv);
1964         return 0;
1965 err2:
1966         idr_remove(ps, port);
1967 err1:
1968         kfree(bind_list);
1969         return ret;
1970 }
1971
1972 static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1973 {
1974         struct rdma_id_private *cur_id;
1975         struct sockaddr_in *sin, *cur_sin;
1976         struct rdma_bind_list *bind_list;
1977         struct hlist_node *node;
1978         unsigned short snum;
1979
1980         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1981         snum = ntohs(sin->sin_port);
1982         if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
1983                 return -EACCES;
1984
1985         bind_list = idr_find(ps, snum);
1986         if (!bind_list)
1987                 return cma_alloc_port(ps, id_priv, snum);
1988
1989         /*
1990          * We don't support binding to any address if anyone is bound to
1991          * a specific address on the same port.
1992          */
1993         if (cma_any_addr(&id_priv->id.route.addr.src_addr))
1994                 return -EADDRNOTAVAIL;
1995
1996         hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1997                 if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1998                         return -EADDRNOTAVAIL;
1999
2000                 cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
2001                 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
2002                         return -EADDRINUSE;
2003         }
2004
2005         cma_bind_port(bind_list, id_priv);
2006         return 0;
2007 }
2008
2009 static int cma_get_port(struct rdma_id_private *id_priv)
2010 {
2011         struct idr *ps;
2012         int ret;
2013
2014         switch (id_priv->id.ps) {
2015         case RDMA_PS_SDP:
2016                 ps = &sdp_ps;
2017                 break;
2018         case RDMA_PS_TCP:
2019                 ps = &tcp_ps;
2020                 break;
2021         case RDMA_PS_UDP:
2022                 ps = &udp_ps;
2023                 break;
2024         case RDMA_PS_IPOIB:
2025                 ps = &ipoib_ps;
2026                 break;
2027         default:
2028                 return -EPROTONOSUPPORT;
2029         }
2030
2031         mutex_lock(&lock);
2032         if (cma_any_port(&id_priv->id.route.addr.src_addr))
2033                 ret = cma_alloc_any_port(ps, id_priv);
2034         else
2035                 ret = cma_use_port(ps, id_priv);
2036         mutex_unlock(&lock);
2037
2038         return ret;
2039 }
2040
2041 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2042 {
2043         struct rdma_id_private *id_priv;
2044         int ret;
2045
2046         if (addr->sa_family != AF_INET)
2047                 return -EAFNOSUPPORT;
2048
2049         id_priv = container_of(id, struct rdma_id_private, id);
2050         if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2051                 return -EINVAL;
2052
2053         if (!cma_any_addr(addr)) {
2054                 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2055                 if (ret)
2056                         goto err1;
2057
2058                 mutex_lock(&lock);
2059                 ret = cma_acquire_dev(id_priv);
2060                 mutex_unlock(&lock);
2061                 if (ret)
2062                         goto err1;
2063         }
2064
2065         memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2066         ret = cma_get_port(id_priv);
2067         if (ret)
2068                 goto err2;
2069
2070         return 0;
2071 err2:
2072         if (!cma_any_addr(addr)) {
2073                 mutex_lock(&lock);
2074                 cma_detach_from_dev(id_priv);
2075                 mutex_unlock(&lock);
2076         }
2077 err1:
2078         cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2079         return ret;
2080 }
2081 EXPORT_SYMBOL(rdma_bind_addr);
2082
2083 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2084                           struct rdma_route *route)
2085 {
2086         struct sockaddr_in *src4, *dst4;
2087         struct cma_hdr *cma_hdr;
2088         struct sdp_hh *sdp_hdr;
2089
2090         src4 = (struct sockaddr_in *) &route->addr.src_addr;
2091         dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2092
2093         switch (ps) {
2094         case RDMA_PS_SDP:
2095                 sdp_hdr = hdr;
2096                 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2097                         return -EINVAL;
2098                 sdp_set_ip_ver(sdp_hdr, 4);
2099                 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2100                 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2101                 sdp_hdr->port = src4->sin_port;
2102                 break;
2103         default:
2104                 cma_hdr = hdr;
2105                 cma_hdr->cma_version = CMA_VERSION;
2106                 cma_set_ip_ver(cma_hdr, 4);
2107                 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2108                 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2109                 cma_hdr->port = src4->sin_port;
2110                 break;
2111         }
2112         return 0;
2113 }
2114
2115 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2116                                 struct ib_cm_event *ib_event)
2117 {
2118         struct rdma_id_private *id_priv = cm_id->context;
2119         struct rdma_cm_event event;
2120         struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2121         int ret = 0;
2122
2123         if (cma_disable_remove(id_priv, CMA_CONNECT))
2124                 return 0;
2125
2126         memset(&event, 0, sizeof event);
2127         switch (ib_event->event) {
2128         case IB_CM_SIDR_REQ_ERROR:
2129                 event.event = RDMA_CM_EVENT_UNREACHABLE;
2130                 event.status = -ETIMEDOUT;
2131                 break;
2132         case IB_CM_SIDR_REP_RECEIVED:
2133                 event.param.ud.private_data = ib_event->private_data;
2134                 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2135                 if (rep->status != IB_SIDR_SUCCESS) {
2136                         event.event = RDMA_CM_EVENT_UNREACHABLE;
2137                         event.status = ib_event->param.sidr_rep_rcvd.status;
2138                         break;
2139                 }
2140                 if (id_priv->qkey != rep->qkey) {
2141                         event.event = RDMA_CM_EVENT_UNREACHABLE;
2142                         event.status = -EINVAL;
2143                         break;
2144                 }
2145                 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2146                                      id_priv->id.route.path_rec,
2147                                      &event.param.ud.ah_attr);
2148                 event.param.ud.qp_num = rep->qpn;
2149                 event.param.ud.qkey = rep->qkey;
2150                 event.event = RDMA_CM_EVENT_ESTABLISHED;
2151                 event.status = 0;
2152                 break;
2153         default:
2154                 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
2155                        ib_event->event);
2156                 goto out;
2157         }
2158
2159         ret = id_priv->id.event_handler(&id_priv->id, &event);
2160         if (ret) {
2161                 /* Destroy the CM ID by returning a non-zero value. */
2162                 id_priv->cm_id.ib = NULL;
2163                 cma_exch(id_priv, CMA_DESTROYING);
2164                 cma_enable_remove(id_priv);
2165                 rdma_destroy_id(&id_priv->id);
2166                 return ret;
2167         }
2168 out:
2169         cma_enable_remove(id_priv);
2170         return ret;
2171 }
2172
2173 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2174                               struct rdma_conn_param *conn_param)
2175 {
2176         struct ib_cm_sidr_req_param req;
2177         struct rdma_route *route;
2178         int ret;
2179
2180         req.private_data_len = sizeof(struct cma_hdr) +
2181                                conn_param->private_data_len;
2182         req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2183         if (!req.private_data)
2184                 return -ENOMEM;
2185
2186         if (conn_param->private_data && conn_param->private_data_len)
2187                 memcpy((void *) req.private_data + sizeof(struct cma_hdr),
2188                        conn_param->private_data, conn_param->private_data_len);
2189
2190         route = &id_priv->id.route;
2191         ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2192         if (ret)
2193                 goto out;
2194
2195         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2196                                             cma_sidr_rep_handler, id_priv);
2197         if (IS_ERR(id_priv->cm_id.ib)) {
2198                 ret = PTR_ERR(id_priv->cm_id.ib);
2199                 goto out;
2200         }
2201
2202         req.path = route->path_rec;
2203         req.service_id = cma_get_service_id(id_priv->id.ps,
2204                                             &route->addr.dst_addr);
2205         req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2206         req.max_cm_retries = CMA_MAX_CM_RETRIES;
2207
2208         ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2209         if (ret) {
2210                 ib_destroy_cm_id(id_priv->cm_id.ib);
2211                 id_priv->cm_id.ib = NULL;
2212         }
2213 out:
2214         kfree(req.private_data);
2215         return ret;
2216 }
2217
2218 static int cma_connect_ib(struct rdma_id_private *id_priv,
2219                           struct rdma_conn_param *conn_param)
2220 {
2221         struct ib_cm_req_param req;
2222         struct rdma_route *route;
2223         void *private_data;
2224         int offset, ret;
2225
2226         memset(&req, 0, sizeof req);
2227         offset = cma_user_data_offset(id_priv->id.ps);
2228         req.private_data_len = offset + conn_param->private_data_len;
2229         private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2230         if (!private_data)
2231                 return -ENOMEM;
2232
2233         if (conn_param->private_data && conn_param->private_data_len)
2234                 memcpy(private_data + offset, conn_param->private_data,
2235                        conn_param->private_data_len);
2236
2237         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2238                                             id_priv);
2239         if (IS_ERR(id_priv->cm_id.ib)) {
2240                 ret = PTR_ERR(id_priv->cm_id.ib);
2241                 goto out;
2242         }
2243
2244         route = &id_priv->id.route;
2245         ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2246         if (ret)
2247                 goto out;
2248         req.private_data = private_data;
2249
2250         req.primary_path = &route->path_rec[0];
2251         if (route->num_paths == 2)
2252                 req.alternate_path = &route->path_rec[1];
2253
2254         req.service_id = cma_get_service_id(id_priv->id.ps,
2255                                             &route->addr.dst_addr);
2256         req.qp_num = id_priv->qp_num;
2257         req.qp_type = IB_QPT_RC;
2258         req.starting_psn = id_priv->seq_num;
2259         req.responder_resources = conn_param->responder_resources;
2260         req.initiator_depth = conn_param->initiator_depth;
2261         req.flow_control = conn_param->flow_control;
2262         req.retry_count = conn_param->retry_count;
2263         req.rnr_retry_count = conn_param->rnr_retry_count;
2264         req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2265         req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2266         req.max_cm_retries = CMA_MAX_CM_RETRIES;
2267         req.srq = id_priv->srq ? 1 : 0;
2268
2269         ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2270 out:
2271         if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2272                 ib_destroy_cm_id(id_priv->cm_id.ib);
2273                 id_priv->cm_id.ib = NULL;
2274         }
2275
2276         kfree(private_data);
2277         return ret;
2278 }
2279
2280 static int cma_connect_iw(struct rdma_id_private *id_priv,
2281                           struct rdma_conn_param *conn_param)
2282 {
2283         struct iw_cm_id *cm_id;
2284         struct sockaddr_in* sin;
2285         int ret;
2286         struct iw_cm_conn_param iw_param;
2287
2288         cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
2289         if (IS_ERR(cm_id)) {
2290                 ret = PTR_ERR(cm_id);
2291                 goto out;
2292         }
2293
2294         id_priv->cm_id.iw = cm_id;
2295
2296         sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2297         cm_id->local_addr = *sin;
2298
2299         sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2300         cm_id->remote_addr = *sin;
2301
2302         ret = cma_modify_qp_rtr(id_priv, conn_param);
2303         if (ret)
2304                 goto out;
2305
2306         iw_param.ord = conn_param->initiator_depth;
2307         iw_param.ird = conn_param->responder_resources;
2308         iw_param.private_data = conn_param->private_data;
2309         iw_param.private_data_len = conn_param->private_data_len;
2310         if (id_priv->id.qp)
2311                 iw_param.qpn = id_priv->qp_num;
2312         else
2313                 iw_param.qpn = conn_param->qp_num;
2314         ret = iw_cm_connect(cm_id, &iw_param);
2315 out:
2316         if (ret && !IS_ERR(cm_id)) {
2317                 iw_destroy_cm_id(cm_id);
2318                 id_priv->cm_id.iw = NULL;
2319         }
2320         return ret;
2321 }
2322
2323 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2324 {
2325         struct rdma_id_private *id_priv;
2326         int ret;
2327
2328         id_priv = container_of(id, struct rdma_id_private, id);
2329         if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2330                 return -EINVAL;
2331
2332         if (!id->qp) {
2333                 id_priv->qp_num = conn_param->qp_num;
2334                 id_priv->srq = conn_param->srq;
2335         }
2336
2337         switch (rdma_node_get_transport(id->device->node_type)) {
2338         case RDMA_TRANSPORT_IB:
2339                 if (cma_is_ud_ps(id->ps))
2340                         ret = cma_resolve_ib_udp(id_priv, conn_param);
2341                 else
2342                         ret = cma_connect_ib(id_priv, conn_param);
2343                 break;
2344         case RDMA_TRANSPORT_IWARP:
2345                 ret = cma_connect_iw(id_priv, conn_param);
2346                 break;
2347         default:
2348                 ret = -ENOSYS;
2349                 break;
2350         }
2351         if (ret)
2352                 goto err;
2353
2354         return 0;
2355 err:
2356         cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2357         return ret;
2358 }
2359 EXPORT_SYMBOL(rdma_connect);
2360
2361 static int cma_accept_ib(struct rdma_id_private *id_priv,
2362                          struct rdma_conn_param *conn_param)
2363 {
2364         struct ib_cm_rep_param rep;
2365         int ret;
2366
2367         ret = cma_modify_qp_rtr(id_priv, conn_param);
2368         if (ret)
2369                 goto out;
2370
2371         ret = cma_modify_qp_rts(id_priv, conn_param);
2372         if (ret)
2373                 goto out;
2374
2375         memset(&rep, 0, sizeof rep);
2376         rep.qp_num = id_priv->qp_num;
2377         rep.starting_psn = id_priv->seq_num;
2378         rep.private_data = conn_param->private_data;
2379         rep.private_data_len = conn_param->private_data_len;
2380         rep.responder_resources = conn_param->responder_resources;
2381         rep.initiator_depth = conn_param->initiator_depth;
2382         rep.failover_accepted = 0;
2383         rep.flow_control = conn_param->flow_control;
2384         rep.rnr_retry_count = conn_param->rnr_retry_count;
2385         rep.srq = id_priv->srq ? 1 : 0;
2386
2387         ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2388 out:
2389         return ret;
2390 }
2391
2392 static int cma_accept_iw(struct rdma_id_private *id_priv,
2393                   struct rdma_conn_param *conn_param)
2394 {
2395         struct iw_cm_conn_param iw_param;
2396         int ret;
2397
2398         ret = cma_modify_qp_rtr(id_priv, conn_param);
2399         if (ret)
2400                 return ret;
2401
2402         iw_param.ord = conn_param->initiator_depth;
2403         iw_param.ird = conn_param->responder_resources;
2404         iw_param.private_data = conn_param->private_data;
2405         iw_param.private_data_len = conn_param->private_data_len;
2406         if (id_priv->id.qp) {
2407                 iw_param.qpn = id_priv->qp_num;
2408         } else
2409                 iw_param.qpn = conn_param->qp_num;
2410
2411         return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2412 }
2413
2414 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2415                              enum ib_cm_sidr_status status,
2416                              const void *private_data, int private_data_len)
2417 {
2418         struct ib_cm_sidr_rep_param rep;
2419
2420         memset(&rep, 0, sizeof rep);
2421         rep.status = status;
2422         if (status == IB_SIDR_SUCCESS) {
2423                 rep.qp_num = id_priv->qp_num;
2424                 rep.qkey = id_priv->qkey;
2425         }
2426         rep.private_data = private_data;
2427         rep.private_data_len = private_data_len;
2428
2429         return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2430 }
2431
2432 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2433 {
2434         struct rdma_id_private *id_priv;
2435         int ret;
2436
2437         id_priv = container_of(id, struct rdma_id_private, id);
2438         if (!cma_comp(id_priv, CMA_CONNECT))
2439                 return -EINVAL;
2440
2441         if (!id->qp && conn_param) {
2442                 id_priv->qp_num = conn_param->qp_num;
2443                 id_priv->srq = conn_param->srq;
2444         }
2445
2446         switch (rdma_node_get_transport(id->device->node_type)) {
2447         case RDMA_TRANSPORT_IB:
2448                 if (cma_is_ud_ps(id->ps))
2449                         ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2450                                                 conn_param->private_data,
2451                                                 conn_param->private_data_len);
2452                 else if (conn_param)
2453                         ret = cma_accept_ib(id_priv, conn_param);
2454                 else
2455                         ret = cma_rep_recv(id_priv);
2456                 break;
2457         case RDMA_TRANSPORT_IWARP:
2458                 ret = cma_accept_iw(id_priv, conn_param);
2459                 break;
2460         default:
2461                 ret = -ENOSYS;
2462                 break;
2463         }
2464
2465         if (ret)
2466                 goto reject;
2467
2468         return 0;
2469 reject:
2470         cma_modify_qp_err(id_priv);
2471         rdma_reject(id, NULL, 0);
2472         return ret;
2473 }
2474 EXPORT_SYMBOL(rdma_accept);
2475
2476 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2477 {
2478         struct rdma_id_private *id_priv;
2479         int ret;
2480
2481         id_priv = container_of(id, struct rdma_id_private, id);
2482         if (!cma_has_cm_dev(id_priv))
2483                 return -EINVAL;
2484
2485         switch (id->device->node_type) {
2486         case RDMA_NODE_IB_CA:
2487                 ret = ib_cm_notify(id_priv->cm_id.ib, event);
2488                 break;
2489         default:
2490                 ret = 0;
2491                 break;
2492         }
2493         return ret;
2494 }
2495 EXPORT_SYMBOL(rdma_notify);
2496
2497 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2498                 u8 private_data_len)
2499 {
2500         struct rdma_id_private *id_priv;
2501         int ret;
2502
2503         id_priv = container_of(id, struct rdma_id_private, id);
2504         if (!cma_has_cm_dev(id_priv))
2505                 return -EINVAL;
2506
2507         switch (rdma_node_get_transport(id->device->node_type)) {
2508         case RDMA_TRANSPORT_IB:
2509                 if (cma_is_ud_ps(id->ps))
2510                         ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2511                                                 private_data, private_data_len);
2512                 else
2513                         ret = ib_send_cm_rej(id_priv->cm_id.ib,
2514                                              IB_CM_REJ_CONSUMER_DEFINED, NULL,
2515                                              0, private_data, private_data_len);
2516                 break;
2517         case RDMA_TRANSPORT_IWARP:
2518                 ret = iw_cm_reject(id_priv->cm_id.iw,
2519                                    private_data, private_data_len);
2520                 break;
2521         default:
2522                 ret = -ENOSYS;
2523                 break;
2524         }
2525         return ret;
2526 }
2527 EXPORT_SYMBOL(rdma_reject);
2528
2529 int rdma_disconnect(struct rdma_cm_id *id)
2530 {
2531         struct rdma_id_private *id_priv;
2532         int ret;
2533
2534         id_priv = container_of(id, struct rdma_id_private, id);
2535         if (!cma_has_cm_dev(id_priv))
2536                 return -EINVAL;
2537
2538         switch (rdma_node_get_transport(id->device->node_type)) {
2539         case RDMA_TRANSPORT_IB:
2540                 ret = cma_modify_qp_err(id_priv);
2541                 if (ret)
2542                         goto out;
2543                 /* Initiate or respond to a disconnect. */
2544                 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2545                         ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2546                 break;
2547         case RDMA_TRANSPORT_IWARP:
2548                 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2549                 break;
2550         default:
2551                 ret = -EINVAL;
2552                 break;
2553         }
2554 out:
2555         return ret;
2556 }
2557 EXPORT_SYMBOL(rdma_disconnect);
2558
2559 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2560 {
2561         struct rdma_id_private *id_priv;
2562         struct cma_multicast *mc = multicast->context;
2563         struct rdma_cm_event event;
2564         int ret;
2565
2566         id_priv = mc->id_priv;
2567         if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) &&
2568             cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
2569                 return 0;
2570
2571         mutex_lock(&id_priv->qp_mutex);
2572         if (!status && id_priv->id.qp)
2573                 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2574                                          multicast->rec.mlid);
2575         mutex_unlock(&id_priv->qp_mutex);
2576
2577         memset(&event, 0, sizeof event);
2578         event.status = status;
2579         event.param.ud.private_data = mc->context;
2580         if (!status) {
2581                 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2582                 ib_init_ah_from_mcmember(id_priv->id.device,
2583                                          id_priv->id.port_num, &multicast->rec,
2584                                          &event.param.ud.ah_attr);
2585                 event.param.ud.qp_num = 0xFFFFFF;
2586                 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2587         } else
2588                 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2589
2590         ret = id_priv->id.event_handler(&id_priv->id, &event);
2591         if (ret) {
2592                 cma_exch(id_priv, CMA_DESTROYING);
2593                 cma_enable_remove(id_priv);
2594                 rdma_destroy_id(&id_priv->id);
2595                 return 0;
2596         }
2597
2598         cma_enable_remove(id_priv);
2599         return 0;
2600 }
2601
2602 static void cma_set_mgid(struct rdma_id_private *id_priv,
2603                          struct sockaddr *addr, union ib_gid *mgid)
2604 {
2605         unsigned char mc_map[MAX_ADDR_LEN];
2606         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2607         struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2608         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2609
2610         if (cma_any_addr(addr)) {
2611                 memset(mgid, 0, sizeof *mgid);
2612         } else if ((addr->sa_family == AF_INET6) &&
2613                    ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
2614                                                                  0xFF10A01B)) {
2615                 /* IPv6 address is an SA assigned MGID. */
2616                 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2617         } else {
2618                 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
2619                 if (id_priv->id.ps == RDMA_PS_UDP)
2620                         mc_map[7] = 0x01;       /* Use RDMA CM signature */
2621                 *mgid = *(union ib_gid *) (mc_map + 4);
2622         }
2623 }
2624
2625 static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2626                                  struct cma_multicast *mc)
2627 {
2628         struct ib_sa_mcmember_rec rec;
2629         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2630         ib_sa_comp_mask comp_mask;
2631         int ret;
2632
2633         ib_addr_get_mgid(dev_addr, &rec.mgid);
2634         ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2635                                      &rec.mgid, &rec);
2636         if (ret)
2637                 return ret;
2638
2639         cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
2640         if (id_priv->id.ps == RDMA_PS_UDP)
2641                 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2642         ib_addr_get_sgid(dev_addr, &rec.port_gid);
2643         rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2644         rec.join_state = 1;
2645
2646         comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2647                     IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2648                     IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2649                     IB_SA_MCMEMBER_REC_FLOW_LABEL |
2650                     IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2651
2652         mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2653                                                 id_priv->id.port_num, &rec,
2654                                                 comp_mask, GFP_KERNEL,
2655                                                 cma_ib_mc_handler, mc);
2656         if (IS_ERR(mc->multicast.ib))
2657                 return PTR_ERR(mc->multicast.ib);
2658
2659         return 0;
2660 }
2661
2662 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2663                         void *context)
2664 {
2665         struct rdma_id_private *id_priv;
2666         struct cma_multicast *mc;
2667         int ret;
2668
2669         id_priv = container_of(id, struct rdma_id_private, id);
2670         if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2671             !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2672                 return -EINVAL;
2673
2674         mc = kmalloc(sizeof *mc, GFP_KERNEL);
2675         if (!mc)
2676                 return -ENOMEM;
2677
2678         memcpy(&mc->addr, addr, ip_addr_size(addr));
2679         mc->context = context;
2680         mc->id_priv = id_priv;
2681
2682         spin_lock(&id_priv->lock);
2683         list_add(&mc->list, &id_priv->mc_list);
2684         spin_unlock(&id_priv->lock);
2685
2686         switch (rdma_node_get_transport(id->device->node_type)) {
2687         case RDMA_TRANSPORT_IB:
2688                 ret = cma_join_ib_multicast(id_priv, mc);
2689                 break;
2690         default:
2691                 ret = -ENOSYS;
2692                 break;
2693         }
2694
2695         if (ret) {
2696                 spin_lock_irq(&id_priv->lock);
2697                 list_del(&mc->list);
2698                 spin_unlock_irq(&id_priv->lock);
2699                 kfree(mc);
2700         }
2701         return ret;
2702 }
2703 EXPORT_SYMBOL(rdma_join_multicast);
2704
2705 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2706 {
2707         struct rdma_id_private *id_priv;
2708         struct cma_multicast *mc;
2709
2710         id_priv = container_of(id, struct rdma_id_private, id);
2711         spin_lock_irq(&id_priv->lock);
2712         list_for_each_entry(mc, &id_priv->mc_list, list) {
2713                 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
2714                         list_del(&mc->list);
2715                         spin_unlock_irq(&id_priv->lock);
2716
2717                         if (id->qp)
2718                                 ib_detach_mcast(id->qp,
2719                                                 &mc->multicast.ib->rec.mgid,
2720                                                 mc->multicast.ib->rec.mlid);
2721                         ib_sa_free_multicast(mc->multicast.ib);
2722                         kfree(mc);
2723                         return;
2724                 }
2725         }
2726         spin_unlock_irq(&id_priv->lock);
2727 }
2728 EXPORT_SYMBOL(rdma_leave_multicast);
2729
2730 static void cma_add_one(struct ib_device *device)
2731 {
2732         struct cma_device *cma_dev;
2733         struct rdma_id_private *id_priv;
2734
2735         cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
2736         if (!cma_dev)
2737                 return;
2738
2739         cma_dev->device = device;
2740
2741         init_completion(&cma_dev->comp);
2742         atomic_set(&cma_dev->refcount, 1);
2743         INIT_LIST_HEAD(&cma_dev->id_list);
2744         ib_set_client_data(device, &cma_client, cma_dev);
2745
2746         mutex_lock(&lock);
2747         list_add_tail(&cma_dev->list, &dev_list);
2748         list_for_each_entry(id_priv, &listen_any_list, list)
2749                 cma_listen_on_dev(id_priv, cma_dev);
2750         mutex_unlock(&lock);
2751 }
2752
2753 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2754 {
2755         struct rdma_cm_event event;
2756         enum cma_state state;
2757
2758         /* Record that we want to remove the device */
2759         state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
2760         if (state == CMA_DESTROYING)
2761                 return 0;
2762
2763         cma_cancel_operation(id_priv, state);
2764         wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
2765
2766         /* Check for destruction from another callback. */
2767         if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
2768                 return 0;
2769
2770         memset(&event, 0, sizeof event);
2771         event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
2772         return id_priv->id.event_handler(&id_priv->id, &event);
2773 }
2774
2775 static void cma_process_remove(struct cma_device *cma_dev)
2776 {
2777         struct rdma_id_private *id_priv;
2778         int ret;
2779
2780         mutex_lock(&lock);
2781         while (!list_empty(&cma_dev->id_list)) {
2782                 id_priv = list_entry(cma_dev->id_list.next,
2783                                      struct rdma_id_private, list);
2784
2785                 list_del(&id_priv->listen_list);
2786                 list_del_init(&id_priv->list);
2787                 atomic_inc(&id_priv->refcount);
2788                 mutex_unlock(&lock);
2789
2790                 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
2791                 cma_deref_id(id_priv);
2792                 if (ret)
2793                         rdma_destroy_id(&id_priv->id);
2794
2795                 mutex_lock(&lock);
2796         }
2797         mutex_unlock(&lock);
2798
2799         cma_deref_dev(cma_dev);
2800         wait_for_completion(&cma_dev->comp);
2801 }
2802
2803 static void cma_remove_one(struct ib_device *device)
2804 {
2805         struct cma_device *cma_dev;
2806
2807         cma_dev = ib_get_client_data(device, &cma_client);
2808         if (!cma_dev)
2809                 return;
2810
2811         mutex_lock(&lock);
2812         list_del(&cma_dev->list);
2813         mutex_unlock(&lock);
2814
2815         cma_process_remove(cma_dev);
2816         kfree(cma_dev);
2817 }
2818
2819 static int cma_init(void)
2820 {
2821         int ret, low, high, remaining;
2822
2823         get_random_bytes(&next_port, sizeof next_port);
2824         inet_get_local_port_range(&low, &high);
2825         remaining = (high - low) + 1;
2826         next_port = ((unsigned int) next_port % remaining) + low;
2827
2828         cma_wq = create_singlethread_workqueue("rdma_cm");
2829         if (!cma_wq)
2830                 return -ENOMEM;
2831
2832         ib_sa_register_client(&sa_client);
2833         rdma_addr_register_client(&addr_client);
2834
2835         ret = ib_register_client(&cma_client);
2836         if (ret)
2837                 goto err;
2838         return 0;
2839
2840 err:
2841         rdma_addr_unregister_client(&addr_client);
2842         ib_sa_unregister_client(&sa_client);
2843         destroy_workqueue(cma_wq);
2844         return ret;
2845 }
2846
2847 static void cma_cleanup(void)
2848 {
2849         ib_unregister_client(&cma_client);
2850         rdma_addr_unregister_client(&addr_client);
2851         ib_sa_unregister_client(&sa_client);
2852         destroy_workqueue(cma_wq);
2853         idr_destroy(&sdp_ps);
2854         idr_destroy(&tcp_ps);
2855         idr_destroy(&udp_ps);
2856         idr_destroy(&ipoib_ps);
2857 }
2858
2859 module_init(cma_init);
2860 module_exit(cma_cleanup);