Merge git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
[linux-2.6] / drivers / infiniband / core / cma.c
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6  *
7  * This Software is licensed under one of the following licenses:
8  *
9  * 1) under the terms of the "Common Public License 1.0" a copy of which is
10  *    available from the Open Source Initiative, see
11  *    http://www.opensource.org/licenses/cpl.php.
12  *
13  * 2) under the terms of the "The BSD License" a copy of which is
14  *    available from the Open Source Initiative, see
15  *    http://www.opensource.org/licenses/bsd-license.php.
16  *
17  * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18  *    copy of which is available from the Open Source Initiative, see
19  *    http://www.opensource.org/licenses/gpl-license.php.
20  *
21  * Licensee has the right to choose one of the above licenses.
22  *
23  * Redistributions of source code must retain the above copyright
24  * notice and one of the license notices.
25  *
26  * Redistributions in binary form must reproduce both the above copyright
27  * notice, one of the license notices in the documentation
28  * and/or other materials provided with the distribution.
29  *
30  */
31
32 #include <linux/completion.h>
33 #include <linux/in.h>
34 #include <linux/in6.h>
35 #include <linux/mutex.h>
36 #include <linux/random.h>
37 #include <linux/idr.h>
38
39 #include <net/tcp.h>
40
41 #include <rdma/rdma_cm.h>
42 #include <rdma/rdma_cm_ib.h>
43 #include <rdma/ib_cache.h>
44 #include <rdma/ib_cm.h>
45 #include <rdma/ib_sa.h>
46
47 MODULE_AUTHOR("Sean Hefty");
48 MODULE_DESCRIPTION("Generic RDMA CM Agent");
49 MODULE_LICENSE("Dual BSD/GPL");
50
51 #define CMA_CM_RESPONSE_TIMEOUT 20
52 #define CMA_MAX_CM_RETRIES 15
53
54 static void cma_add_one(struct ib_device *device);
55 static void cma_remove_one(struct ib_device *device);
56
57 static struct ib_client cma_client = {
58         .name   = "cma",
59         .add    = cma_add_one,
60         .remove = cma_remove_one
61 };
62
63 static LIST_HEAD(dev_list);
64 static LIST_HEAD(listen_any_list);
65 static DEFINE_MUTEX(lock);
66 static struct workqueue_struct *cma_wq;
67 static DEFINE_IDR(sdp_ps);
68 static DEFINE_IDR(tcp_ps);
69
70 struct cma_device {
71         struct list_head        list;
72         struct ib_device        *device;
73         __be64                  node_guid;
74         struct completion       comp;
75         atomic_t                refcount;
76         struct list_head        id_list;
77 };
78
79 enum cma_state {
80         CMA_IDLE,
81         CMA_ADDR_QUERY,
82         CMA_ADDR_RESOLVED,
83         CMA_ROUTE_QUERY,
84         CMA_ROUTE_RESOLVED,
85         CMA_CONNECT,
86         CMA_DISCONNECT,
87         CMA_ADDR_BOUND,
88         CMA_LISTEN,
89         CMA_DEVICE_REMOVAL,
90         CMA_DESTROYING
91 };
92
93 struct rdma_bind_list {
94         struct idr              *ps;
95         struct hlist_head       owners;
96         unsigned short          port;
97 };
98
99 /*
100  * Device removal can occur at anytime, so we need extra handling to
101  * serialize notifying the user of device removal with other callbacks.
102  * We do this by disabling removal notification while a callback is in process,
103  * and reporting it after the callback completes.
104  */
105 struct rdma_id_private {
106         struct rdma_cm_id       id;
107
108         struct rdma_bind_list   *bind_list;
109         struct hlist_node       node;
110         struct list_head        list;
111         struct list_head        listen_list;
112         struct cma_device       *cma_dev;
113
114         enum cma_state          state;
115         spinlock_t              lock;
116         struct completion       comp;
117         atomic_t                refcount;
118         wait_queue_head_t       wait_remove;
119         atomic_t                dev_remove;
120
121         int                     backlog;
122         int                     timeout_ms;
123         struct ib_sa_query      *query;
124         int                     query_id;
125         union {
126                 struct ib_cm_id *ib;
127         } cm_id;
128
129         u32                     seq_num;
130         u32                     qp_num;
131         enum ib_qp_type         qp_type;
132         u8                      srq;
133 };
134
135 struct cma_work {
136         struct work_struct      work;
137         struct rdma_id_private  *id;
138         enum cma_state          old_state;
139         enum cma_state          new_state;
140         struct rdma_cm_event    event;
141 };
142
143 union cma_ip_addr {
144         struct in6_addr ip6;
145         struct {
146                 __u32 pad[3];
147                 __u32 addr;
148         } ip4;
149 };
150
151 struct cma_hdr {
152         u8 cma_version;
153         u8 ip_version;  /* IP version: 7:4 */
154         __u16 port;
155         union cma_ip_addr src_addr;
156         union cma_ip_addr dst_addr;
157 };
158
159 struct sdp_hh {
160         u8 bsdh[16];
161         u8 sdp_version; /* Major version: 7:4 */
162         u8 ip_version;  /* IP version: 7:4 */
163         u8 sdp_specific1[10];
164         __u16 port;
165         __u16 sdp_specific2;
166         union cma_ip_addr src_addr;
167         union cma_ip_addr dst_addr;
168 };
169
170 struct sdp_hah {
171         u8 bsdh[16];
172         u8 sdp_version;
173 };
174
175 #define CMA_VERSION 0x00
176 #define SDP_MAJ_VERSION 0x2
177
178 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
179 {
180         unsigned long flags;
181         int ret;
182
183         spin_lock_irqsave(&id_priv->lock, flags);
184         ret = (id_priv->state == comp);
185         spin_unlock_irqrestore(&id_priv->lock, flags);
186         return ret;
187 }
188
189 static int cma_comp_exch(struct rdma_id_private *id_priv,
190                          enum cma_state comp, enum cma_state exch)
191 {
192         unsigned long flags;
193         int ret;
194
195         spin_lock_irqsave(&id_priv->lock, flags);
196         if ((ret = (id_priv->state == comp)))
197                 id_priv->state = exch;
198         spin_unlock_irqrestore(&id_priv->lock, flags);
199         return ret;
200 }
201
202 static enum cma_state cma_exch(struct rdma_id_private *id_priv,
203                                enum cma_state exch)
204 {
205         unsigned long flags;
206         enum cma_state old;
207
208         spin_lock_irqsave(&id_priv->lock, flags);
209         old = id_priv->state;
210         id_priv->state = exch;
211         spin_unlock_irqrestore(&id_priv->lock, flags);
212         return old;
213 }
214
215 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
216 {
217         return hdr->ip_version >> 4;
218 }
219
220 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
221 {
222         hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
223 }
224
225 static inline u8 sdp_get_majv(u8 sdp_version)
226 {
227         return sdp_version >> 4;
228 }
229
230 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
231 {
232         return hh->ip_version >> 4;
233 }
234
235 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
236 {
237         hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
238 }
239
240 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
241                               struct cma_device *cma_dev)
242 {
243         atomic_inc(&cma_dev->refcount);
244         id_priv->cma_dev = cma_dev;
245         id_priv->id.device = cma_dev->device;
246         list_add_tail(&id_priv->list, &cma_dev->id_list);
247 }
248
249 static inline void cma_deref_dev(struct cma_device *cma_dev)
250 {
251         if (atomic_dec_and_test(&cma_dev->refcount))
252                 complete(&cma_dev->comp);
253 }
254
255 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
256 {
257         list_del(&id_priv->list);
258         cma_deref_dev(id_priv->cma_dev);
259         id_priv->cma_dev = NULL;
260 }
261
262 static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
263 {
264         struct cma_device *cma_dev;
265         union ib_gid gid;
266         int ret = -ENODEV;
267
268         ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid),
269
270         mutex_lock(&lock);
271         list_for_each_entry(cma_dev, &dev_list, list) {
272                 ret = ib_find_cached_gid(cma_dev->device, &gid,
273                                          &id_priv->id.port_num, NULL);
274                 if (!ret) {
275                         cma_attach_to_dev(id_priv, cma_dev);
276                         break;
277                 }
278         }
279         mutex_unlock(&lock);
280         return ret;
281 }
282
283 static int cma_acquire_dev(struct rdma_id_private *id_priv)
284 {
285         switch (id_priv->id.route.addr.dev_addr.dev_type) {
286         case IB_NODE_CA:
287                 return cma_acquire_ib_dev(id_priv);
288         default:
289                 return -ENODEV;
290         }
291 }
292
293 static void cma_deref_id(struct rdma_id_private *id_priv)
294 {
295         if (atomic_dec_and_test(&id_priv->refcount))
296                 complete(&id_priv->comp);
297 }
298
299 static void cma_release_remove(struct rdma_id_private *id_priv)
300 {
301         if (atomic_dec_and_test(&id_priv->dev_remove))
302                 wake_up(&id_priv->wait_remove);
303 }
304
305 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
306                                   void *context, enum rdma_port_space ps)
307 {
308         struct rdma_id_private *id_priv;
309
310         id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
311         if (!id_priv)
312                 return ERR_PTR(-ENOMEM);
313
314         id_priv->state = CMA_IDLE;
315         id_priv->id.context = context;
316         id_priv->id.event_handler = event_handler;
317         id_priv->id.ps = ps;
318         spin_lock_init(&id_priv->lock);
319         init_completion(&id_priv->comp);
320         atomic_set(&id_priv->refcount, 1);
321         init_waitqueue_head(&id_priv->wait_remove);
322         atomic_set(&id_priv->dev_remove, 0);
323         INIT_LIST_HEAD(&id_priv->listen_list);
324         get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
325
326         return &id_priv->id;
327 }
328 EXPORT_SYMBOL(rdma_create_id);
329
330 static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
331 {
332         struct ib_qp_attr qp_attr;
333         struct rdma_dev_addr *dev_addr;
334         int ret;
335
336         dev_addr = &id_priv->id.route.addr.dev_addr;
337         ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
338                                   ib_addr_get_pkey(dev_addr),
339                                   &qp_attr.pkey_index);
340         if (ret)
341                 return ret;
342
343         qp_attr.qp_state = IB_QPS_INIT;
344         qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
345         qp_attr.port_num = id_priv->id.port_num;
346         return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
347                                           IB_QP_PKEY_INDEX | IB_QP_PORT);
348 }
349
350 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
351                    struct ib_qp_init_attr *qp_init_attr)
352 {
353         struct rdma_id_private *id_priv;
354         struct ib_qp *qp;
355         int ret;
356
357         id_priv = container_of(id, struct rdma_id_private, id);
358         if (id->device != pd->device)
359                 return -EINVAL;
360
361         qp = ib_create_qp(pd, qp_init_attr);
362         if (IS_ERR(qp))
363                 return PTR_ERR(qp);
364
365         switch (id->device->node_type) {
366         case IB_NODE_CA:
367                 ret = cma_init_ib_qp(id_priv, qp);
368                 break;
369         default:
370                 ret = -ENOSYS;
371                 break;
372         }
373
374         if (ret)
375                 goto err;
376
377         id->qp = qp;
378         id_priv->qp_num = qp->qp_num;
379         id_priv->qp_type = qp->qp_type;
380         id_priv->srq = (qp->srq != NULL);
381         return 0;
382 err:
383         ib_destroy_qp(qp);
384         return ret;
385 }
386 EXPORT_SYMBOL(rdma_create_qp);
387
388 void rdma_destroy_qp(struct rdma_cm_id *id)
389 {
390         ib_destroy_qp(id->qp);
391 }
392 EXPORT_SYMBOL(rdma_destroy_qp);
393
394 static int cma_modify_qp_rtr(struct rdma_cm_id *id)
395 {
396         struct ib_qp_attr qp_attr;
397         int qp_attr_mask, ret;
398
399         if (!id->qp)
400                 return 0;
401
402         /* Need to update QP attributes from default values. */
403         qp_attr.qp_state = IB_QPS_INIT;
404         ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
405         if (ret)
406                 return ret;
407
408         ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
409         if (ret)
410                 return ret;
411
412         qp_attr.qp_state = IB_QPS_RTR;
413         ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
414         if (ret)
415                 return ret;
416
417         return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
418 }
419
420 static int cma_modify_qp_rts(struct rdma_cm_id *id)
421 {
422         struct ib_qp_attr qp_attr;
423         int qp_attr_mask, ret;
424
425         if (!id->qp)
426                 return 0;
427
428         qp_attr.qp_state = IB_QPS_RTS;
429         ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
430         if (ret)
431                 return ret;
432
433         return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
434 }
435
436 static int cma_modify_qp_err(struct rdma_cm_id *id)
437 {
438         struct ib_qp_attr qp_attr;
439
440         if (!id->qp)
441                 return 0;
442
443         qp_attr.qp_state = IB_QPS_ERR;
444         return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
445 }
446
447 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
448                        int *qp_attr_mask)
449 {
450         struct rdma_id_private *id_priv;
451         int ret;
452
453         id_priv = container_of(id, struct rdma_id_private, id);
454         switch (id_priv->id.device->node_type) {
455         case IB_NODE_CA:
456                 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
457                                          qp_attr_mask);
458                 if (qp_attr->qp_state == IB_QPS_RTR)
459                         qp_attr->rq_psn = id_priv->seq_num;
460                 break;
461         default:
462                 ret = -ENOSYS;
463                 break;
464         }
465
466         return ret;
467 }
468 EXPORT_SYMBOL(rdma_init_qp_attr);
469
470 static inline int cma_zero_addr(struct sockaddr *addr)
471 {
472         struct in6_addr *ip6;
473
474         if (addr->sa_family == AF_INET)
475                 return ZERONET(((struct sockaddr_in *) addr)->sin_addr.s_addr);
476         else {
477                 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
478                 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
479                         ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
480         }
481 }
482
483 static inline int cma_loopback_addr(struct sockaddr *addr)
484 {
485         return LOOPBACK(((struct sockaddr_in *) addr)->sin_addr.s_addr);
486 }
487
488 static inline int cma_any_addr(struct sockaddr *addr)
489 {
490         return cma_zero_addr(addr) || cma_loopback_addr(addr);
491 }
492
493 static inline int cma_any_port(struct sockaddr *addr)
494 {
495         return !((struct sockaddr_in *) addr)->sin_port;
496 }
497
498 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
499                             u8 *ip_ver, __u16 *port,
500                             union cma_ip_addr **src, union cma_ip_addr **dst)
501 {
502         switch (ps) {
503         case RDMA_PS_SDP:
504                 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
505                     SDP_MAJ_VERSION)
506                         return -EINVAL;
507
508                 *ip_ver = sdp_get_ip_ver(hdr);
509                 *port   = ((struct sdp_hh *) hdr)->port;
510                 *src    = &((struct sdp_hh *) hdr)->src_addr;
511                 *dst    = &((struct sdp_hh *) hdr)->dst_addr;
512                 break;
513         default:
514                 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
515                         return -EINVAL;
516
517                 *ip_ver = cma_get_ip_ver(hdr);
518                 *port   = ((struct cma_hdr *) hdr)->port;
519                 *src    = &((struct cma_hdr *) hdr)->src_addr;
520                 *dst    = &((struct cma_hdr *) hdr)->dst_addr;
521                 break;
522         }
523
524         if (*ip_ver != 4 && *ip_ver != 6)
525                 return -EINVAL;
526         return 0;
527 }
528
529 static void cma_save_net_info(struct rdma_addr *addr,
530                               struct rdma_addr *listen_addr,
531                               u8 ip_ver, __u16 port,
532                               union cma_ip_addr *src, union cma_ip_addr *dst)
533 {
534         struct sockaddr_in *listen4, *ip4;
535         struct sockaddr_in6 *listen6, *ip6;
536
537         switch (ip_ver) {
538         case 4:
539                 listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
540                 ip4 = (struct sockaddr_in *) &addr->src_addr;
541                 ip4->sin_family = listen4->sin_family;
542                 ip4->sin_addr.s_addr = dst->ip4.addr;
543                 ip4->sin_port = listen4->sin_port;
544
545                 ip4 = (struct sockaddr_in *) &addr->dst_addr;
546                 ip4->sin_family = listen4->sin_family;
547                 ip4->sin_addr.s_addr = src->ip4.addr;
548                 ip4->sin_port = port;
549                 break;
550         case 6:
551                 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
552                 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
553                 ip6->sin6_family = listen6->sin6_family;
554                 ip6->sin6_addr = dst->ip6;
555                 ip6->sin6_port = listen6->sin6_port;
556
557                 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
558                 ip6->sin6_family = listen6->sin6_family;
559                 ip6->sin6_addr = src->ip6;
560                 ip6->sin6_port = port;
561                 break;
562         default:
563                 break;
564         }
565 }
566
567 static inline int cma_user_data_offset(enum rdma_port_space ps)
568 {
569         switch (ps) {
570         case RDMA_PS_SDP:
571                 return 0;
572         default:
573                 return sizeof(struct cma_hdr);
574         }
575 }
576
577 static int cma_notify_user(struct rdma_id_private *id_priv,
578                            enum rdma_cm_event_type type, int status,
579                            void *data, u8 data_len)
580 {
581         struct rdma_cm_event event;
582
583         event.event = type;
584         event.status = status;
585         event.private_data = data;
586         event.private_data_len = data_len;
587
588         return id_priv->id.event_handler(&id_priv->id, &event);
589 }
590
591 static void cma_cancel_route(struct rdma_id_private *id_priv)
592 {
593         switch (id_priv->id.device->node_type) {
594         case IB_NODE_CA:
595                 if (id_priv->query)
596                         ib_sa_cancel_query(id_priv->query_id, id_priv->query);
597                 break;
598         default:
599                 break;
600         }
601 }
602
603 static inline int cma_internal_listen(struct rdma_id_private *id_priv)
604 {
605         return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
606                cma_any_addr(&id_priv->id.route.addr.src_addr);
607 }
608
609 static void cma_destroy_listen(struct rdma_id_private *id_priv)
610 {
611         cma_exch(id_priv, CMA_DESTROYING);
612
613         if (id_priv->cma_dev) {
614                 switch (id_priv->id.device->node_type) {
615                 case IB_NODE_CA:
616                         if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
617                                 ib_destroy_cm_id(id_priv->cm_id.ib);
618                         break;
619                 default:
620                         break;
621                 }
622                 cma_detach_from_dev(id_priv);
623         }
624         list_del(&id_priv->listen_list);
625
626         cma_deref_id(id_priv);
627         wait_for_completion(&id_priv->comp);
628
629         kfree(id_priv);
630 }
631
632 static void cma_cancel_listens(struct rdma_id_private *id_priv)
633 {
634         struct rdma_id_private *dev_id_priv;
635
636         mutex_lock(&lock);
637         list_del(&id_priv->list);
638
639         while (!list_empty(&id_priv->listen_list)) {
640                 dev_id_priv = list_entry(id_priv->listen_list.next,
641                                          struct rdma_id_private, listen_list);
642                 cma_destroy_listen(dev_id_priv);
643         }
644         mutex_unlock(&lock);
645 }
646
647 static void cma_cancel_operation(struct rdma_id_private *id_priv,
648                                  enum cma_state state)
649 {
650         switch (state) {
651         case CMA_ADDR_QUERY:
652                 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
653                 break;
654         case CMA_ROUTE_QUERY:
655                 cma_cancel_route(id_priv);
656                 break;
657         case CMA_LISTEN:
658                 if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
659                     !id_priv->cma_dev)
660                         cma_cancel_listens(id_priv);
661                 break;
662         default:
663                 break;
664         }
665 }
666
667 static void cma_release_port(struct rdma_id_private *id_priv)
668 {
669         struct rdma_bind_list *bind_list = id_priv->bind_list;
670
671         if (!bind_list)
672                 return;
673
674         mutex_lock(&lock);
675         hlist_del(&id_priv->node);
676         if (hlist_empty(&bind_list->owners)) {
677                 idr_remove(bind_list->ps, bind_list->port);
678                 kfree(bind_list);
679         }
680         mutex_unlock(&lock);
681 }
682
683 void rdma_destroy_id(struct rdma_cm_id *id)
684 {
685         struct rdma_id_private *id_priv;
686         enum cma_state state;
687
688         id_priv = container_of(id, struct rdma_id_private, id);
689         state = cma_exch(id_priv, CMA_DESTROYING);
690         cma_cancel_operation(id_priv, state);
691
692         if (id_priv->cma_dev) {
693                 switch (id->device->node_type) {
694                 case IB_NODE_CA:
695                         if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
696                                 ib_destroy_cm_id(id_priv->cm_id.ib);
697                         break;
698                 default:
699                         break;
700                 }
701                 mutex_lock(&lock);
702                 cma_detach_from_dev(id_priv);
703                 mutex_unlock(&lock);
704         }
705
706         cma_release_port(id_priv);
707         cma_deref_id(id_priv);
708         wait_for_completion(&id_priv->comp);
709
710         kfree(id_priv->id.route.path_rec);
711         kfree(id_priv);
712 }
713 EXPORT_SYMBOL(rdma_destroy_id);
714
715 static int cma_rep_recv(struct rdma_id_private *id_priv)
716 {
717         int ret;
718
719         ret = cma_modify_qp_rtr(&id_priv->id);
720         if (ret)
721                 goto reject;
722
723         ret = cma_modify_qp_rts(&id_priv->id);
724         if (ret)
725                 goto reject;
726
727         ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
728         if (ret)
729                 goto reject;
730
731         return 0;
732 reject:
733         cma_modify_qp_err(&id_priv->id);
734         ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
735                        NULL, 0, NULL, 0);
736         return ret;
737 }
738
739 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
740 {
741         if (id_priv->id.ps == RDMA_PS_SDP &&
742             sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
743             SDP_MAJ_VERSION)
744                 return -EINVAL;
745
746         return 0;
747 }
748
749 static int cma_rtu_recv(struct rdma_id_private *id_priv)
750 {
751         int ret;
752
753         ret = cma_modify_qp_rts(&id_priv->id);
754         if (ret)
755                 goto reject;
756
757         return 0;
758 reject:
759         cma_modify_qp_err(&id_priv->id);
760         ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
761                        NULL, 0, NULL, 0);
762         return ret;
763 }
764
765 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
766 {
767         struct rdma_id_private *id_priv = cm_id->context;
768         enum rdma_cm_event_type event;
769         u8 private_data_len = 0;
770         int ret = 0, status = 0;
771
772         atomic_inc(&id_priv->dev_remove);
773         if (!cma_comp(id_priv, CMA_CONNECT))
774                 goto out;
775
776         switch (ib_event->event) {
777         case IB_CM_REQ_ERROR:
778         case IB_CM_REP_ERROR:
779                 event = RDMA_CM_EVENT_UNREACHABLE;
780                 status = -ETIMEDOUT;
781                 break;
782         case IB_CM_REP_RECEIVED:
783                 status = cma_verify_rep(id_priv, ib_event->private_data);
784                 if (status)
785                         event = RDMA_CM_EVENT_CONNECT_ERROR;
786                 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
787                         status = cma_rep_recv(id_priv);
788                         event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
789                                          RDMA_CM_EVENT_ESTABLISHED;
790                 } else
791                         event = RDMA_CM_EVENT_CONNECT_RESPONSE;
792                 private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
793                 break;
794         case IB_CM_RTU_RECEIVED:
795                 status = cma_rtu_recv(id_priv);
796                 event = status ? RDMA_CM_EVENT_CONNECT_ERROR :
797                                  RDMA_CM_EVENT_ESTABLISHED;
798                 break;
799         case IB_CM_DREQ_ERROR:
800                 status = -ETIMEDOUT; /* fall through */
801         case IB_CM_DREQ_RECEIVED:
802         case IB_CM_DREP_RECEIVED:
803                 if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
804                         goto out;
805                 event = RDMA_CM_EVENT_DISCONNECTED;
806                 break;
807         case IB_CM_TIMEWAIT_EXIT:
808         case IB_CM_MRA_RECEIVED:
809                 /* ignore event */
810                 goto out;
811         case IB_CM_REJ_RECEIVED:
812                 cma_modify_qp_err(&id_priv->id);
813                 status = ib_event->param.rej_rcvd.reason;
814                 event = RDMA_CM_EVENT_REJECTED;
815                 private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
816                 break;
817         default:
818                 printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d",
819                        ib_event->event);
820                 goto out;
821         }
822
823         ret = cma_notify_user(id_priv, event, status, ib_event->private_data,
824                               private_data_len);
825         if (ret) {
826                 /* Destroy the CM ID by returning a non-zero value. */
827                 id_priv->cm_id.ib = NULL;
828                 cma_exch(id_priv, CMA_DESTROYING);
829                 cma_release_remove(id_priv);
830                 rdma_destroy_id(&id_priv->id);
831                 return ret;
832         }
833 out:
834         cma_release_remove(id_priv);
835         return ret;
836 }
837
838 static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
839                                           struct ib_cm_event *ib_event)
840 {
841         struct rdma_id_private *id_priv;
842         struct rdma_cm_id *id;
843         struct rdma_route *rt;
844         union cma_ip_addr *src, *dst;
845         __u16 port;
846         u8 ip_ver;
847
848         id = rdma_create_id(listen_id->event_handler, listen_id->context,
849                             listen_id->ps);
850         if (IS_ERR(id))
851                 return NULL;
852
853         rt = &id->route;
854         rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
855         rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL);
856         if (!rt->path_rec)
857                 goto err;
858
859         if (cma_get_net_info(ib_event->private_data, listen_id->ps,
860                              &ip_ver, &port, &src, &dst))
861                 goto err;
862
863         cma_save_net_info(&id->route.addr, &listen_id->route.addr,
864                           ip_ver, port, src, dst);
865         rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
866         if (rt->num_paths == 2)
867                 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
868
869         ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
870         ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
871         ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
872         rt->addr.dev_addr.dev_type = IB_NODE_CA;
873
874         id_priv = container_of(id, struct rdma_id_private, id);
875         id_priv->state = CMA_CONNECT;
876         return id_priv;
877 err:
878         rdma_destroy_id(id);
879         return NULL;
880 }
881
882 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
883 {
884         struct rdma_id_private *listen_id, *conn_id;
885         int offset, ret;
886
887         listen_id = cm_id->context;
888         atomic_inc(&listen_id->dev_remove);
889         if (!cma_comp(listen_id, CMA_LISTEN)) {
890                 ret = -ECONNABORTED;
891                 goto out;
892         }
893
894         conn_id = cma_new_id(&listen_id->id, ib_event);
895         if (!conn_id) {
896                 ret = -ENOMEM;
897                 goto out;
898         }
899
900         atomic_inc(&conn_id->dev_remove);
901         ret = cma_acquire_ib_dev(conn_id);
902         if (ret) {
903                 ret = -ENODEV;
904                 cma_release_remove(conn_id);
905                 rdma_destroy_id(&conn_id->id);
906                 goto out;
907         }
908
909         conn_id->cm_id.ib = cm_id;
910         cm_id->context = conn_id;
911         cm_id->cm_handler = cma_ib_handler;
912
913         offset = cma_user_data_offset(listen_id->id.ps);
914         ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
915                               ib_event->private_data + offset,
916                               IB_CM_REQ_PRIVATE_DATA_SIZE - offset);
917         if (ret) {
918                 /* Destroy the CM ID by returning a non-zero value. */
919                 conn_id->cm_id.ib = NULL;
920                 cma_exch(conn_id, CMA_DESTROYING);
921                 cma_release_remove(conn_id);
922                 rdma_destroy_id(&conn_id->id);
923         }
924 out:
925         cma_release_remove(listen_id);
926         return ret;
927 }
928
929 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
930 {
931         return cpu_to_be64(((u64)ps << 16) +
932                be16_to_cpu(((struct sockaddr_in *) addr)->sin_port));
933 }
934
935 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
936                                  struct ib_cm_compare_data *compare)
937 {
938         struct cma_hdr *cma_data, *cma_mask;
939         struct sdp_hh *sdp_data, *sdp_mask;
940         __u32 ip4_addr;
941         struct in6_addr ip6_addr;
942
943         memset(compare, 0, sizeof *compare);
944         cma_data = (void *) compare->data;
945         cma_mask = (void *) compare->mask;
946         sdp_data = (void *) compare->data;
947         sdp_mask = (void *) compare->mask;
948
949         switch (addr->sa_family) {
950         case AF_INET:
951                 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
952                 if (ps == RDMA_PS_SDP) {
953                         sdp_set_ip_ver(sdp_data, 4);
954                         sdp_set_ip_ver(sdp_mask, 0xF);
955                         sdp_data->dst_addr.ip4.addr = ip4_addr;
956                         sdp_mask->dst_addr.ip4.addr = ~0;
957                 } else {
958                         cma_set_ip_ver(cma_data, 4);
959                         cma_set_ip_ver(cma_mask, 0xF);
960                         cma_data->dst_addr.ip4.addr = ip4_addr;
961                         cma_mask->dst_addr.ip4.addr = ~0;
962                 }
963                 break;
964         case AF_INET6:
965                 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
966                 if (ps == RDMA_PS_SDP) {
967                         sdp_set_ip_ver(sdp_data, 6);
968                         sdp_set_ip_ver(sdp_mask, 0xF);
969                         sdp_data->dst_addr.ip6 = ip6_addr;
970                         memset(&sdp_mask->dst_addr.ip6, 0xFF,
971                                sizeof sdp_mask->dst_addr.ip6);
972                 } else {
973                         cma_set_ip_ver(cma_data, 6);
974                         cma_set_ip_ver(cma_mask, 0xF);
975                         cma_data->dst_addr.ip6 = ip6_addr;
976                         memset(&cma_mask->dst_addr.ip6, 0xFF,
977                                sizeof cma_mask->dst_addr.ip6);
978                 }
979                 break;
980         default:
981                 break;
982         }
983 }
984
985 static int cma_ib_listen(struct rdma_id_private *id_priv)
986 {
987         struct ib_cm_compare_data compare_data;
988         struct sockaddr *addr;
989         __be64 svc_id;
990         int ret;
991
992         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
993                                             id_priv);
994         if (IS_ERR(id_priv->cm_id.ib))
995                 return PTR_ERR(id_priv->cm_id.ib);
996
997         addr = &id_priv->id.route.addr.src_addr;
998         svc_id = cma_get_service_id(id_priv->id.ps, addr);
999         if (cma_any_addr(addr))
1000                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1001         else {
1002                 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1003                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1004         }
1005
1006         if (ret) {
1007                 ib_destroy_cm_id(id_priv->cm_id.ib);
1008                 id_priv->cm_id.ib = NULL;
1009         }
1010
1011         return ret;
1012 }
1013
1014 static int cma_listen_handler(struct rdma_cm_id *id,
1015                               struct rdma_cm_event *event)
1016 {
1017         struct rdma_id_private *id_priv = id->context;
1018
1019         id->context = id_priv->id.context;
1020         id->event_handler = id_priv->id.event_handler;
1021         return id_priv->id.event_handler(id, event);
1022 }
1023
1024 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1025                               struct cma_device *cma_dev)
1026 {
1027         struct rdma_id_private *dev_id_priv;
1028         struct rdma_cm_id *id;
1029         int ret;
1030
1031         id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1032         if (IS_ERR(id))
1033                 return;
1034
1035         dev_id_priv = container_of(id, struct rdma_id_private, id);
1036
1037         dev_id_priv->state = CMA_ADDR_BOUND;
1038         memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1039                ip_addr_size(&id_priv->id.route.addr.src_addr));
1040
1041         cma_attach_to_dev(dev_id_priv, cma_dev);
1042         list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1043
1044         ret = rdma_listen(id, id_priv->backlog);
1045         if (ret)
1046                 goto err;
1047
1048         return;
1049 err:
1050         cma_destroy_listen(dev_id_priv);
1051 }
1052
1053 static void cma_listen_on_all(struct rdma_id_private *id_priv)
1054 {
1055         struct cma_device *cma_dev;
1056
1057         mutex_lock(&lock);
1058         list_add_tail(&id_priv->list, &listen_any_list);
1059         list_for_each_entry(cma_dev, &dev_list, list)
1060                 cma_listen_on_dev(id_priv, cma_dev);
1061         mutex_unlock(&lock);
1062 }
1063
1064 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1065 {
1066         struct sockaddr_in addr_in;
1067
1068         memset(&addr_in, 0, sizeof addr_in);
1069         addr_in.sin_family = af;
1070         return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1071 }
1072
1073 int rdma_listen(struct rdma_cm_id *id, int backlog)
1074 {
1075         struct rdma_id_private *id_priv;
1076         int ret;
1077
1078         id_priv = container_of(id, struct rdma_id_private, id);
1079         if (id_priv->state == CMA_IDLE) {
1080                 ret = cma_bind_any(id, AF_INET);
1081                 if (ret)
1082                         return ret;
1083         }
1084
1085         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1086                 return -EINVAL;
1087
1088         id_priv->backlog = backlog;
1089         if (id->device) {
1090                 switch (id->device->node_type) {
1091                 case IB_NODE_CA:
1092                         ret = cma_ib_listen(id_priv);
1093                         if (ret)
1094                                 goto err;
1095                         break;
1096                 default:
1097                         ret = -ENOSYS;
1098                         goto err;
1099                 }
1100         } else
1101                 cma_listen_on_all(id_priv);
1102
1103         return 0;
1104 err:
1105         id_priv->backlog = 0;
1106         cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1107         return ret;
1108 }
1109 EXPORT_SYMBOL(rdma_listen);
1110
1111 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1112                               void *context)
1113 {
1114         struct cma_work *work = context;
1115         struct rdma_route *route;
1116
1117         route = &work->id->id.route;
1118
1119         if (!status) {
1120                 route->num_paths = 1;
1121                 *route->path_rec = *path_rec;
1122         } else {
1123                 work->old_state = CMA_ROUTE_QUERY;
1124                 work->new_state = CMA_ADDR_RESOLVED;
1125                 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1126         }
1127
1128         queue_work(cma_wq, &work->work);
1129 }
1130
1131 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1132                               struct cma_work *work)
1133 {
1134         struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
1135         struct ib_sa_path_rec path_rec;
1136
1137         memset(&path_rec, 0, sizeof path_rec);
1138         ib_addr_get_sgid(addr, &path_rec.sgid);
1139         ib_addr_get_dgid(addr, &path_rec.dgid);
1140         path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
1141         path_rec.numb_path = 1;
1142
1143         id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
1144                                 id_priv->id.port_num, &path_rec,
1145                                 IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1146                                 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
1147                                 timeout_ms, GFP_KERNEL,
1148                                 cma_query_handler, work, &id_priv->query);
1149
1150         return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1151 }
1152
1153 static void cma_work_handler(void *data)
1154 {
1155         struct cma_work *work = data;
1156         struct rdma_id_private *id_priv = work->id;
1157         int destroy = 0;
1158
1159         atomic_inc(&id_priv->dev_remove);
1160         if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1161                 goto out;
1162
1163         if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1164                 cma_exch(id_priv, CMA_DESTROYING);
1165                 destroy = 1;
1166         }
1167 out:
1168         cma_release_remove(id_priv);
1169         cma_deref_id(id_priv);
1170         if (destroy)
1171                 rdma_destroy_id(&id_priv->id);
1172         kfree(work);
1173 }
1174
1175 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1176 {
1177         struct rdma_route *route = &id_priv->id.route;
1178         struct cma_work *work;
1179         int ret;
1180
1181         work = kzalloc(sizeof *work, GFP_KERNEL);
1182         if (!work)
1183                 return -ENOMEM;
1184
1185         work->id = id_priv;
1186         INIT_WORK(&work->work, cma_work_handler, work);
1187         work->old_state = CMA_ROUTE_QUERY;
1188         work->new_state = CMA_ROUTE_RESOLVED;
1189         work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1190
1191         route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
1192         if (!route->path_rec) {
1193                 ret = -ENOMEM;
1194                 goto err1;
1195         }
1196
1197         ret = cma_query_ib_route(id_priv, timeout_ms, work);
1198         if (ret)
1199                 goto err2;
1200
1201         return 0;
1202 err2:
1203         kfree(route->path_rec);
1204         route->path_rec = NULL;
1205 err1:
1206         kfree(work);
1207         return ret;
1208 }
1209
1210 int rdma_set_ib_paths(struct rdma_cm_id *id,
1211                       struct ib_sa_path_rec *path_rec, int num_paths)
1212 {
1213         struct rdma_id_private *id_priv;
1214         int ret;
1215
1216         id_priv = container_of(id, struct rdma_id_private, id);
1217         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1218                 return -EINVAL;
1219
1220         id->route.path_rec = kmalloc(sizeof *path_rec * num_paths, GFP_KERNEL);
1221         if (!id->route.path_rec) {
1222                 ret = -ENOMEM;
1223                 goto err;
1224         }
1225
1226         memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1227         return 0;
1228 err:
1229         cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1230         return ret;
1231 }
1232 EXPORT_SYMBOL(rdma_set_ib_paths);
1233
1234 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1235 {
1236         struct rdma_id_private *id_priv;
1237         int ret;
1238
1239         id_priv = container_of(id, struct rdma_id_private, id);
1240         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1241                 return -EINVAL;
1242
1243         atomic_inc(&id_priv->refcount);
1244         switch (id->device->node_type) {
1245         case IB_NODE_CA:
1246                 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1247                 break;
1248         default:
1249                 ret = -ENOSYS;
1250                 break;
1251         }
1252         if (ret)
1253                 goto err;
1254
1255         return 0;
1256 err:
1257         cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1258         cma_deref_id(id_priv);
1259         return ret;
1260 }
1261 EXPORT_SYMBOL(rdma_resolve_route);
1262
1263 static int cma_bind_loopback(struct rdma_id_private *id_priv)
1264 {
1265         struct cma_device *cma_dev;
1266         struct ib_port_attr port_attr;
1267         union ib_gid gid;
1268         u16 pkey;
1269         int ret;
1270         u8 p;
1271
1272         mutex_lock(&lock);
1273         list_for_each_entry(cma_dev, &dev_list, list)
1274                 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1275                         if (!ib_query_port (cma_dev->device, p, &port_attr) &&
1276                             port_attr.state == IB_PORT_ACTIVE)
1277                                 goto port_found;
1278
1279         if (!list_empty(&dev_list)) {
1280                 p = 1;
1281                 cma_dev = list_entry(dev_list.next, struct cma_device, list);
1282         } else {
1283                 ret = -ENODEV;
1284                 goto out;
1285         }
1286
1287 port_found:
1288         ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1289         if (ret)
1290                 goto out;
1291
1292         ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1293         if (ret)
1294                 goto out;
1295
1296         ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1297         ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1298         id_priv->id.port_num = p;
1299         cma_attach_to_dev(id_priv, cma_dev);
1300 out:
1301         mutex_unlock(&lock);
1302         return ret;
1303 }
1304
1305 static void addr_handler(int status, struct sockaddr *src_addr,
1306                          struct rdma_dev_addr *dev_addr, void *context)
1307 {
1308         struct rdma_id_private *id_priv = context;
1309         enum rdma_cm_event_type event;
1310
1311         atomic_inc(&id_priv->dev_remove);
1312         if (!id_priv->cma_dev && !status)
1313                 status = cma_acquire_dev(id_priv);
1314
1315         if (status) {
1316                 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
1317                         goto out;
1318                 event = RDMA_CM_EVENT_ADDR_ERROR;
1319         } else {
1320                 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
1321                         goto out;
1322                 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1323                        ip_addr_size(src_addr));
1324                 event = RDMA_CM_EVENT_ADDR_RESOLVED;
1325         }
1326
1327         if (cma_notify_user(id_priv, event, status, NULL, 0)) {
1328                 cma_exch(id_priv, CMA_DESTROYING);
1329                 cma_release_remove(id_priv);
1330                 cma_deref_id(id_priv);
1331                 rdma_destroy_id(&id_priv->id);
1332                 return;
1333         }
1334 out:
1335         cma_release_remove(id_priv);
1336         cma_deref_id(id_priv);
1337 }
1338
1339 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1340 {
1341         struct cma_work *work;
1342         struct sockaddr_in *src_in, *dst_in;
1343         union ib_gid gid;
1344         int ret;
1345
1346         work = kzalloc(sizeof *work, GFP_KERNEL);
1347         if (!work)
1348                 return -ENOMEM;
1349
1350         if (!id_priv->cma_dev) {
1351                 ret = cma_bind_loopback(id_priv);
1352                 if (ret)
1353                         goto err;
1354         }
1355
1356         ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1357         ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1358
1359         if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1360                 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1361                 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1362                 src_in->sin_family = dst_in->sin_family;
1363                 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1364         }
1365
1366         work->id = id_priv;
1367         INIT_WORK(&work->work, cma_work_handler, work);
1368         work->old_state = CMA_ADDR_QUERY;
1369         work->new_state = CMA_ADDR_RESOLVED;
1370         work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1371         queue_work(cma_wq, &work->work);
1372         return 0;
1373 err:
1374         kfree(work);
1375         return ret;
1376 }
1377
1378 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1379                          struct sockaddr *dst_addr)
1380 {
1381         if (src_addr && src_addr->sa_family)
1382                 return rdma_bind_addr(id, src_addr);
1383         else
1384                 return cma_bind_any(id, dst_addr->sa_family);
1385 }
1386
1387 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1388                       struct sockaddr *dst_addr, int timeout_ms)
1389 {
1390         struct rdma_id_private *id_priv;
1391         int ret;
1392
1393         id_priv = container_of(id, struct rdma_id_private, id);
1394         if (id_priv->state == CMA_IDLE) {
1395                 ret = cma_bind_addr(id, src_addr, dst_addr);
1396                 if (ret)
1397                         return ret;
1398         }
1399
1400         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1401                 return -EINVAL;
1402
1403         atomic_inc(&id_priv->refcount);
1404         memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1405         if (cma_any_addr(dst_addr))
1406                 ret = cma_resolve_loopback(id_priv);
1407         else
1408                 ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr,
1409                                       &id->route.addr.dev_addr,
1410                                       timeout_ms, addr_handler, id_priv);
1411         if (ret)
1412                 goto err;
1413
1414         return 0;
1415 err:
1416         cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1417         cma_deref_id(id_priv);
1418         return ret;
1419 }
1420 EXPORT_SYMBOL(rdma_resolve_addr);
1421
1422 static void cma_bind_port(struct rdma_bind_list *bind_list,
1423                           struct rdma_id_private *id_priv)
1424 {
1425         struct sockaddr_in *sin;
1426
1427         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1428         sin->sin_port = htons(bind_list->port);
1429         id_priv->bind_list = bind_list;
1430         hlist_add_head(&id_priv->node, &bind_list->owners);
1431 }
1432
1433 static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1434                           unsigned short snum)
1435 {
1436         struct rdma_bind_list *bind_list;
1437         int port, start, ret;
1438
1439         bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1440         if (!bind_list)
1441                 return -ENOMEM;
1442
1443         start = snum ? snum : sysctl_local_port_range[0];
1444
1445         do {
1446                 ret = idr_get_new_above(ps, bind_list, start, &port);
1447         } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1448
1449         if (ret)
1450                 goto err;
1451
1452         if ((snum && port != snum) ||
1453             (!snum && port > sysctl_local_port_range[1])) {
1454                 idr_remove(ps, port);
1455                 ret = -EADDRNOTAVAIL;
1456                 goto err;
1457         }
1458
1459         bind_list->ps = ps;
1460         bind_list->port = (unsigned short) port;
1461         cma_bind_port(bind_list, id_priv);
1462         return 0;
1463 err:
1464         kfree(bind_list);
1465         return ret;
1466 }
1467
1468 static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1469 {
1470         struct rdma_id_private *cur_id;
1471         struct sockaddr_in *sin, *cur_sin;
1472         struct rdma_bind_list *bind_list;
1473         struct hlist_node *node;
1474         unsigned short snum;
1475
1476         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1477         snum = ntohs(sin->sin_port);
1478         if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
1479                 return -EACCES;
1480
1481         bind_list = idr_find(ps, snum);
1482         if (!bind_list)
1483                 return cma_alloc_port(ps, id_priv, snum);
1484
1485         /*
1486          * We don't support binding to any address if anyone is bound to
1487          * a specific address on the same port.
1488          */
1489         if (cma_any_addr(&id_priv->id.route.addr.src_addr))
1490                 return -EADDRNOTAVAIL;
1491
1492         hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1493                 if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1494                         return -EADDRNOTAVAIL;
1495                 
1496                 cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
1497                 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
1498                         return -EADDRINUSE;
1499         }
1500
1501         cma_bind_port(bind_list, id_priv);
1502         return 0;
1503 }
1504
1505 static int cma_get_port(struct rdma_id_private *id_priv)
1506 {
1507         struct idr *ps;
1508         int ret;
1509
1510         switch (id_priv->id.ps) {
1511         case RDMA_PS_SDP:
1512                 ps = &sdp_ps;
1513                 break;
1514         case RDMA_PS_TCP:
1515                 ps = &tcp_ps;
1516                 break;
1517         default:
1518                 return -EPROTONOSUPPORT;
1519         }
1520
1521         mutex_lock(&lock);
1522         if (cma_any_port(&id_priv->id.route.addr.src_addr))
1523                 ret = cma_alloc_port(ps, id_priv, 0);
1524         else
1525                 ret = cma_use_port(ps, id_priv);
1526         mutex_unlock(&lock);
1527
1528         return ret;
1529 }
1530
1531 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
1532 {
1533         struct rdma_id_private *id_priv;
1534         int ret;
1535
1536         if (addr->sa_family != AF_INET)
1537                 return -EAFNOSUPPORT;
1538
1539         id_priv = container_of(id, struct rdma_id_private, id);
1540         if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
1541                 return -EINVAL;
1542
1543         if (!cma_any_addr(addr)) {
1544                 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
1545                 if (!ret)
1546                         ret = cma_acquire_dev(id_priv);
1547                 if (ret)
1548                         goto err;
1549         }
1550
1551         memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
1552         ret = cma_get_port(id_priv);
1553         if (ret)
1554                 goto err;
1555
1556         return 0;
1557 err:
1558         cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
1559         return ret;
1560 }
1561 EXPORT_SYMBOL(rdma_bind_addr);
1562
1563 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
1564                           struct rdma_route *route)
1565 {
1566         struct sockaddr_in *src4, *dst4;
1567         struct cma_hdr *cma_hdr;
1568         struct sdp_hh *sdp_hdr;
1569
1570         src4 = (struct sockaddr_in *) &route->addr.src_addr;
1571         dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
1572
1573         switch (ps) {
1574         case RDMA_PS_SDP:
1575                 sdp_hdr = hdr;
1576                 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
1577                         return -EINVAL;
1578                 sdp_set_ip_ver(sdp_hdr, 4);
1579                 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1580                 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1581                 sdp_hdr->port = src4->sin_port;
1582                 break;
1583         default:
1584                 cma_hdr = hdr;
1585                 cma_hdr->cma_version = CMA_VERSION;
1586                 cma_set_ip_ver(cma_hdr, 4);
1587                 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
1588                 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
1589                 cma_hdr->port = src4->sin_port;
1590                 break;
1591         }
1592         return 0;
1593 }
1594
1595 static int cma_connect_ib(struct rdma_id_private *id_priv,
1596                           struct rdma_conn_param *conn_param)
1597 {
1598         struct ib_cm_req_param req;
1599         struct rdma_route *route;
1600         void *private_data;
1601         int offset, ret;
1602
1603         memset(&req, 0, sizeof req);
1604         offset = cma_user_data_offset(id_priv->id.ps);
1605         req.private_data_len = offset + conn_param->private_data_len;
1606         private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
1607         if (!private_data)
1608                 return -ENOMEM;
1609
1610         if (conn_param->private_data && conn_param->private_data_len)
1611                 memcpy(private_data + offset, conn_param->private_data,
1612                        conn_param->private_data_len);
1613
1614         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
1615                                             id_priv);
1616         if (IS_ERR(id_priv->cm_id.ib)) {
1617                 ret = PTR_ERR(id_priv->cm_id.ib);
1618                 goto out;
1619         }
1620
1621         route = &id_priv->id.route;
1622         ret = cma_format_hdr(private_data, id_priv->id.ps, route);
1623         if (ret)
1624                 goto out;
1625         req.private_data = private_data;
1626
1627         req.primary_path = &route->path_rec[0];
1628         if (route->num_paths == 2)
1629                 req.alternate_path = &route->path_rec[1];
1630
1631         req.service_id = cma_get_service_id(id_priv->id.ps,
1632                                             &route->addr.dst_addr);
1633         req.qp_num = id_priv->qp_num;
1634         req.qp_type = id_priv->qp_type;
1635         req.starting_psn = id_priv->seq_num;
1636         req.responder_resources = conn_param->responder_resources;
1637         req.initiator_depth = conn_param->initiator_depth;
1638         req.flow_control = conn_param->flow_control;
1639         req.retry_count = conn_param->retry_count;
1640         req.rnr_retry_count = conn_param->rnr_retry_count;
1641         req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
1642         req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
1643         req.max_cm_retries = CMA_MAX_CM_RETRIES;
1644         req.srq = id_priv->srq ? 1 : 0;
1645
1646         ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
1647 out:
1648         kfree(private_data);
1649         return ret;
1650 }
1651
1652 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1653 {
1654         struct rdma_id_private *id_priv;
1655         int ret;
1656
1657         id_priv = container_of(id, struct rdma_id_private, id);
1658         if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
1659                 return -EINVAL;
1660
1661         if (!id->qp) {
1662                 id_priv->qp_num = conn_param->qp_num;
1663                 id_priv->qp_type = conn_param->qp_type;
1664                 id_priv->srq = conn_param->srq;
1665         }
1666
1667         switch (id->device->node_type) {
1668         case IB_NODE_CA:
1669                 ret = cma_connect_ib(id_priv, conn_param);
1670                 break;
1671         default:
1672                 ret = -ENOSYS;
1673                 break;
1674         }
1675         if (ret)
1676                 goto err;
1677
1678         return 0;
1679 err:
1680         cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
1681         return ret;
1682 }
1683 EXPORT_SYMBOL(rdma_connect);
1684
1685 static int cma_accept_ib(struct rdma_id_private *id_priv,
1686                          struct rdma_conn_param *conn_param)
1687 {
1688         struct ib_cm_rep_param rep;
1689         int ret;
1690
1691         ret = cma_modify_qp_rtr(&id_priv->id);
1692         if (ret)
1693                 return ret;
1694
1695         memset(&rep, 0, sizeof rep);
1696         rep.qp_num = id_priv->qp_num;
1697         rep.starting_psn = id_priv->seq_num;
1698         rep.private_data = conn_param->private_data;
1699         rep.private_data_len = conn_param->private_data_len;
1700         rep.responder_resources = conn_param->responder_resources;
1701         rep.initiator_depth = conn_param->initiator_depth;
1702         rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
1703         rep.failover_accepted = 0;
1704         rep.flow_control = conn_param->flow_control;
1705         rep.rnr_retry_count = conn_param->rnr_retry_count;
1706         rep.srq = id_priv->srq ? 1 : 0;
1707
1708         return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
1709 }
1710
1711 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1712 {
1713         struct rdma_id_private *id_priv;
1714         int ret;
1715
1716         id_priv = container_of(id, struct rdma_id_private, id);
1717         if (!cma_comp(id_priv, CMA_CONNECT))
1718                 return -EINVAL;
1719
1720         if (!id->qp && conn_param) {
1721                 id_priv->qp_num = conn_param->qp_num;
1722                 id_priv->qp_type = conn_param->qp_type;
1723                 id_priv->srq = conn_param->srq;
1724         }
1725
1726         switch (id->device->node_type) {
1727         case IB_NODE_CA:
1728                 if (conn_param)
1729                         ret = cma_accept_ib(id_priv, conn_param);
1730                 else
1731                         ret = cma_rep_recv(id_priv);
1732                 break;
1733         default:
1734                 ret = -ENOSYS;
1735                 break;
1736         }
1737
1738         if (ret)
1739                 goto reject;
1740
1741         return 0;
1742 reject:
1743         cma_modify_qp_err(id);
1744         rdma_reject(id, NULL, 0);
1745         return ret;
1746 }
1747 EXPORT_SYMBOL(rdma_accept);
1748
1749 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
1750                 u8 private_data_len)
1751 {
1752         struct rdma_id_private *id_priv;
1753         int ret;
1754
1755         id_priv = container_of(id, struct rdma_id_private, id);
1756         if (!cma_comp(id_priv, CMA_CONNECT))
1757                 return -EINVAL;
1758
1759         switch (id->device->node_type) {
1760         case IB_NODE_CA:
1761                 ret = ib_send_cm_rej(id_priv->cm_id.ib,
1762                                      IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1763                                      private_data, private_data_len);
1764                 break;
1765         default:
1766                 ret = -ENOSYS;
1767                 break;
1768         }
1769         return ret;
1770 }
1771 EXPORT_SYMBOL(rdma_reject);
1772
1773 int rdma_disconnect(struct rdma_cm_id *id)
1774 {
1775         struct rdma_id_private *id_priv;
1776         int ret;
1777
1778         id_priv = container_of(id, struct rdma_id_private, id);
1779         if (!cma_comp(id_priv, CMA_CONNECT) &&
1780             !cma_comp(id_priv, CMA_DISCONNECT))
1781                 return -EINVAL;
1782
1783         ret = cma_modify_qp_err(id);
1784         if (ret)
1785                 goto out;
1786
1787         switch (id->device->node_type) {
1788         case IB_NODE_CA:
1789                 /* Initiate or respond to a disconnect. */
1790                 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
1791                         ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
1792                 break;
1793         default:
1794                 break;
1795         }
1796 out:
1797         return ret;
1798 }
1799 EXPORT_SYMBOL(rdma_disconnect);
1800
1801 static void cma_add_one(struct ib_device *device)
1802 {
1803         struct cma_device *cma_dev;
1804         struct rdma_id_private *id_priv;
1805
1806         cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
1807         if (!cma_dev)
1808                 return;
1809
1810         cma_dev->device = device;
1811         cma_dev->node_guid = device->node_guid;
1812         if (!cma_dev->node_guid)
1813                 goto err;
1814
1815         init_completion(&cma_dev->comp);
1816         atomic_set(&cma_dev->refcount, 1);
1817         INIT_LIST_HEAD(&cma_dev->id_list);
1818         ib_set_client_data(device, &cma_client, cma_dev);
1819
1820         mutex_lock(&lock);
1821         list_add_tail(&cma_dev->list, &dev_list);
1822         list_for_each_entry(id_priv, &listen_any_list, list)
1823                 cma_listen_on_dev(id_priv, cma_dev);
1824         mutex_unlock(&lock);
1825         return;
1826 err:
1827         kfree(cma_dev);
1828 }
1829
1830 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
1831 {
1832         enum cma_state state;
1833
1834         /* Record that we want to remove the device */
1835         state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
1836         if (state == CMA_DESTROYING)
1837                 return 0;
1838
1839         cma_cancel_operation(id_priv, state);
1840         wait_event(id_priv->wait_remove, !atomic_read(&id_priv->dev_remove));
1841
1842         /* Check for destruction from another callback. */
1843         if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
1844                 return 0;
1845
1846         return cma_notify_user(id_priv, RDMA_CM_EVENT_DEVICE_REMOVAL,
1847                                0, NULL, 0);
1848 }
1849
1850 static void cma_process_remove(struct cma_device *cma_dev)
1851 {
1852         struct list_head remove_list;
1853         struct rdma_id_private *id_priv;
1854         int ret;
1855
1856         INIT_LIST_HEAD(&remove_list);
1857
1858         mutex_lock(&lock);
1859         while (!list_empty(&cma_dev->id_list)) {
1860                 id_priv = list_entry(cma_dev->id_list.next,
1861                                      struct rdma_id_private, list);
1862
1863                 if (cma_internal_listen(id_priv)) {
1864                         cma_destroy_listen(id_priv);
1865                         continue;
1866                 }
1867
1868                 list_del(&id_priv->list);
1869                 list_add_tail(&id_priv->list, &remove_list);
1870                 atomic_inc(&id_priv->refcount);
1871                 mutex_unlock(&lock);
1872
1873                 ret = cma_remove_id_dev(id_priv);
1874                 cma_deref_id(id_priv);
1875                 if (ret)
1876                         rdma_destroy_id(&id_priv->id);
1877
1878                 mutex_lock(&lock);
1879         }
1880         mutex_unlock(&lock);
1881
1882         cma_deref_dev(cma_dev);
1883         wait_for_completion(&cma_dev->comp);
1884 }
1885
1886 static void cma_remove_one(struct ib_device *device)
1887 {
1888         struct cma_device *cma_dev;
1889
1890         cma_dev = ib_get_client_data(device, &cma_client);
1891         if (!cma_dev)
1892                 return;
1893
1894         mutex_lock(&lock);
1895         list_del(&cma_dev->list);
1896         mutex_unlock(&lock);
1897
1898         cma_process_remove(cma_dev);
1899         kfree(cma_dev);
1900 }
1901
1902 static int cma_init(void)
1903 {
1904         int ret;
1905
1906         cma_wq = create_singlethread_workqueue("rdma_cm_wq");
1907         if (!cma_wq)
1908                 return -ENOMEM;
1909
1910         ret = ib_register_client(&cma_client);
1911         if (ret)
1912                 goto err;
1913         return 0;
1914
1915 err:
1916         destroy_workqueue(cma_wq);
1917         return ret;
1918 }
1919
1920 static void cma_cleanup(void)
1921 {
1922         ib_unregister_client(&cma_client);
1923         destroy_workqueue(cma_wq);
1924         idr_destroy(&sdp_ps);
1925         idr_destroy(&tcp_ps);
1926 }
1927
1928 module_init(cma_init);
1929 module_exit(cma_cleanup);