Merge branch 'fix/usx2y' into for-linus
[linux-2.6] / drivers / infiniband / core / cm.c
1 /*
2  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
3  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
4  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
5  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/completion.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/device.h>
39 #include <linux/err.h>
40 #include <linux/idr.h>
41 #include <linux/interrupt.h>
42 #include <linux/random.h>
43 #include <linux/rbtree.h>
44 #include <linux/spinlock.h>
45 #include <linux/sysfs.h>
46 #include <linux/workqueue.h>
47 #include <linux/kdev_t.h>
48
49 #include <rdma/ib_cache.h>
50 #include <rdma/ib_cm.h>
51 #include "cm_msgs.h"
52
53 MODULE_AUTHOR("Sean Hefty");
54 MODULE_DESCRIPTION("InfiniBand CM");
55 MODULE_LICENSE("Dual BSD/GPL");
56
57 static void cm_add_one(struct ib_device *device);
58 static void cm_remove_one(struct ib_device *device);
59
60 static struct ib_client cm_client = {
61         .name   = "cm",
62         .add    = cm_add_one,
63         .remove = cm_remove_one
64 };
65
66 static struct ib_cm {
67         spinlock_t lock;
68         struct list_head device_list;
69         rwlock_t device_lock;
70         struct rb_root listen_service_table;
71         u64 listen_service_id;
72         /* struct rb_root peer_service_table; todo: fix peer to peer */
73         struct rb_root remote_qp_table;
74         struct rb_root remote_id_table;
75         struct rb_root remote_sidr_table;
76         struct idr local_id_table;
77         __be32 random_id_operand;
78         struct list_head timewait_list;
79         struct workqueue_struct *wq;
80 } cm;
81
82 /* Counter indexes ordered by attribute ID */
83 enum {
84         CM_REQ_COUNTER,
85         CM_MRA_COUNTER,
86         CM_REJ_COUNTER,
87         CM_REP_COUNTER,
88         CM_RTU_COUNTER,
89         CM_DREQ_COUNTER,
90         CM_DREP_COUNTER,
91         CM_SIDR_REQ_COUNTER,
92         CM_SIDR_REP_COUNTER,
93         CM_LAP_COUNTER,
94         CM_APR_COUNTER,
95         CM_ATTR_COUNT,
96         CM_ATTR_ID_OFFSET = 0x0010,
97 };
98
99 enum {
100         CM_XMIT,
101         CM_XMIT_RETRIES,
102         CM_RECV,
103         CM_RECV_DUPLICATES,
104         CM_COUNTER_GROUPS
105 };
106
107 static char const counter_group_names[CM_COUNTER_GROUPS]
108                                      [sizeof("cm_rx_duplicates")] = {
109         "cm_tx_msgs", "cm_tx_retries",
110         "cm_rx_msgs", "cm_rx_duplicates"
111 };
112
113 struct cm_counter_group {
114         struct kobject obj;
115         atomic_long_t counter[CM_ATTR_COUNT];
116 };
117
118 struct cm_counter_attribute {
119         struct attribute attr;
120         int index;
121 };
122
123 #define CM_COUNTER_ATTR(_name, _index) \
124 struct cm_counter_attribute cm_##_name##_counter_attr = { \
125         .attr = { .name = __stringify(_name), .mode = 0444 }, \
126         .index = _index \
127 }
128
129 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
130 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
131 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
132 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
133 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
134 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
135 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
136 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
137 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
138 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
139 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
140
141 static struct attribute *cm_counter_default_attrs[] = {
142         &cm_req_counter_attr.attr,
143         &cm_mra_counter_attr.attr,
144         &cm_rej_counter_attr.attr,
145         &cm_rep_counter_attr.attr,
146         &cm_rtu_counter_attr.attr,
147         &cm_dreq_counter_attr.attr,
148         &cm_drep_counter_attr.attr,
149         &cm_sidr_req_counter_attr.attr,
150         &cm_sidr_rep_counter_attr.attr,
151         &cm_lap_counter_attr.attr,
152         &cm_apr_counter_attr.attr,
153         NULL
154 };
155
156 struct cm_port {
157         struct cm_device *cm_dev;
158         struct ib_mad_agent *mad_agent;
159         struct kobject port_obj;
160         u8 port_num;
161         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
162 };
163
164 struct cm_device {
165         struct list_head list;
166         struct ib_device *ib_device;
167         struct device *device;
168         u8 ack_delay;
169         struct cm_port *port[0];
170 };
171
172 struct cm_av {
173         struct cm_port *port;
174         union ib_gid dgid;
175         struct ib_ah_attr ah_attr;
176         u16 pkey_index;
177         u8 timeout;
178 };
179
180 struct cm_work {
181         struct delayed_work work;
182         struct list_head list;
183         struct cm_port *port;
184         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
185         __be32 local_id;                        /* Established / timewait */
186         __be32 remote_id;
187         struct ib_cm_event cm_event;
188         struct ib_sa_path_rec path[0];
189 };
190
191 struct cm_timewait_info {
192         struct cm_work work;                    /* Must be first. */
193         struct list_head list;
194         struct rb_node remote_qp_node;
195         struct rb_node remote_id_node;
196         __be64 remote_ca_guid;
197         __be32 remote_qpn;
198         u8 inserted_remote_qp;
199         u8 inserted_remote_id;
200 };
201
202 struct cm_id_private {
203         struct ib_cm_id id;
204
205         struct rb_node service_node;
206         struct rb_node sidr_id_node;
207         spinlock_t lock;        /* Do not acquire inside cm.lock */
208         struct completion comp;
209         atomic_t refcount;
210
211         struct ib_mad_send_buf *msg;
212         struct cm_timewait_info *timewait_info;
213         /* todo: use alternate port on send failure */
214         struct cm_av av;
215         struct cm_av alt_av;
216         struct ib_cm_compare_data *compare_data;
217
218         void *private_data;
219         __be64 tid;
220         __be32 local_qpn;
221         __be32 remote_qpn;
222         enum ib_qp_type qp_type;
223         __be32 sq_psn;
224         __be32 rq_psn;
225         int timeout_ms;
226         enum ib_mtu path_mtu;
227         __be16 pkey;
228         u8 private_data_len;
229         u8 max_cm_retries;
230         u8 peer_to_peer;
231         u8 responder_resources;
232         u8 initiator_depth;
233         u8 retry_count;
234         u8 rnr_retry_count;
235         u8 service_timeout;
236         u8 target_ack_delay;
237
238         struct list_head work_list;
239         atomic_t work_count;
240 };
241
242 static void cm_work_handler(struct work_struct *work);
243
244 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
245 {
246         if (atomic_dec_and_test(&cm_id_priv->refcount))
247                 complete(&cm_id_priv->comp);
248 }
249
250 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
251                         struct ib_mad_send_buf **msg)
252 {
253         struct ib_mad_agent *mad_agent;
254         struct ib_mad_send_buf *m;
255         struct ib_ah *ah;
256
257         mad_agent = cm_id_priv->av.port->mad_agent;
258         ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
259         if (IS_ERR(ah))
260                 return PTR_ERR(ah);
261
262         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
263                                cm_id_priv->av.pkey_index,
264                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
265                                GFP_ATOMIC);
266         if (IS_ERR(m)) {
267                 ib_destroy_ah(ah);
268                 return PTR_ERR(m);
269         }
270
271         /* Timeout set by caller if response is expected. */
272         m->ah = ah;
273         m->retries = cm_id_priv->max_cm_retries;
274
275         atomic_inc(&cm_id_priv->refcount);
276         m->context[0] = cm_id_priv;
277         *msg = m;
278         return 0;
279 }
280
281 static int cm_alloc_response_msg(struct cm_port *port,
282                                  struct ib_mad_recv_wc *mad_recv_wc,
283                                  struct ib_mad_send_buf **msg)
284 {
285         struct ib_mad_send_buf *m;
286         struct ib_ah *ah;
287
288         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
289                                   mad_recv_wc->recv_buf.grh, port->port_num);
290         if (IS_ERR(ah))
291                 return PTR_ERR(ah);
292
293         m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
294                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
295                                GFP_ATOMIC);
296         if (IS_ERR(m)) {
297                 ib_destroy_ah(ah);
298                 return PTR_ERR(m);
299         }
300         m->ah = ah;
301         *msg = m;
302         return 0;
303 }
304
305 static void cm_free_msg(struct ib_mad_send_buf *msg)
306 {
307         ib_destroy_ah(msg->ah);
308         if (msg->context[0])
309                 cm_deref_id(msg->context[0]);
310         ib_free_send_mad(msg);
311 }
312
313 static void * cm_copy_private_data(const void *private_data,
314                                    u8 private_data_len)
315 {
316         void *data;
317
318         if (!private_data || !private_data_len)
319                 return NULL;
320
321         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
322         if (!data)
323                 return ERR_PTR(-ENOMEM);
324
325         return data;
326 }
327
328 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
329                                  void *private_data, u8 private_data_len)
330 {
331         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
332                 kfree(cm_id_priv->private_data);
333
334         cm_id_priv->private_data = private_data;
335         cm_id_priv->private_data_len = private_data_len;
336 }
337
338 static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
339                                     struct ib_grh *grh, struct cm_av *av)
340 {
341         av->port = port;
342         av->pkey_index = wc->pkey_index;
343         ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
344                            grh, &av->ah_attr);
345 }
346
347 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
348 {
349         struct cm_device *cm_dev;
350         struct cm_port *port = NULL;
351         unsigned long flags;
352         int ret;
353         u8 p;
354
355         read_lock_irqsave(&cm.device_lock, flags);
356         list_for_each_entry(cm_dev, &cm.device_list, list) {
357                 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
358                                         &p, NULL)) {
359                         port = cm_dev->port[p-1];
360                         break;
361                 }
362         }
363         read_unlock_irqrestore(&cm.device_lock, flags);
364
365         if (!port)
366                 return -EINVAL;
367
368         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
369                                   be16_to_cpu(path->pkey), &av->pkey_index);
370         if (ret)
371                 return ret;
372
373         av->port = port;
374         ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
375                              &av->ah_attr);
376         av->timeout = path->packet_life_time + 1;
377         return 0;
378 }
379
380 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
381 {
382         unsigned long flags;
383         int ret, id;
384         static int next_id;
385
386         do {
387                 spin_lock_irqsave(&cm.lock, flags);
388                 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
389                                         next_id, &id);
390                 if (!ret)
391                         next_id = ((unsigned) id + 1) & MAX_ID_MASK;
392                 spin_unlock_irqrestore(&cm.lock, flags);
393         } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
394
395         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
396         return ret;
397 }
398
399 static void cm_free_id(__be32 local_id)
400 {
401         spin_lock_irq(&cm.lock);
402         idr_remove(&cm.local_id_table,
403                    (__force int) (local_id ^ cm.random_id_operand));
404         spin_unlock_irq(&cm.lock);
405 }
406
407 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
408 {
409         struct cm_id_private *cm_id_priv;
410
411         cm_id_priv = idr_find(&cm.local_id_table,
412                               (__force int) (local_id ^ cm.random_id_operand));
413         if (cm_id_priv) {
414                 if (cm_id_priv->id.remote_id == remote_id)
415                         atomic_inc(&cm_id_priv->refcount);
416                 else
417                         cm_id_priv = NULL;
418         }
419
420         return cm_id_priv;
421 }
422
423 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
424 {
425         struct cm_id_private *cm_id_priv;
426
427         spin_lock_irq(&cm.lock);
428         cm_id_priv = cm_get_id(local_id, remote_id);
429         spin_unlock_irq(&cm.lock);
430
431         return cm_id_priv;
432 }
433
434 static void cm_mask_copy(u8 *dst, u8 *src, u8 *mask)
435 {
436         int i;
437
438         for (i = 0; i < IB_CM_COMPARE_SIZE / sizeof(unsigned long); i++)
439                 ((unsigned long *) dst)[i] = ((unsigned long *) src)[i] &
440                                              ((unsigned long *) mask)[i];
441 }
442
443 static int cm_compare_data(struct ib_cm_compare_data *src_data,
444                            struct ib_cm_compare_data *dst_data)
445 {
446         u8 src[IB_CM_COMPARE_SIZE];
447         u8 dst[IB_CM_COMPARE_SIZE];
448
449         if (!src_data || !dst_data)
450                 return 0;
451
452         cm_mask_copy(src, src_data->data, dst_data->mask);
453         cm_mask_copy(dst, dst_data->data, src_data->mask);
454         return memcmp(src, dst, IB_CM_COMPARE_SIZE);
455 }
456
457 static int cm_compare_private_data(u8 *private_data,
458                                    struct ib_cm_compare_data *dst_data)
459 {
460         u8 src[IB_CM_COMPARE_SIZE];
461
462         if (!dst_data)
463                 return 0;
464
465         cm_mask_copy(src, private_data, dst_data->mask);
466         return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE);
467 }
468
469 /*
470  * Trivial helpers to strip endian annotation and compare; the
471  * endianness doesn't actually matter since we just need a stable
472  * order for the RB tree.
473  */
474 static int be32_lt(__be32 a, __be32 b)
475 {
476         return (__force u32) a < (__force u32) b;
477 }
478
479 static int be32_gt(__be32 a, __be32 b)
480 {
481         return (__force u32) a > (__force u32) b;
482 }
483
484 static int be64_lt(__be64 a, __be64 b)
485 {
486         return (__force u64) a < (__force u64) b;
487 }
488
489 static int be64_gt(__be64 a, __be64 b)
490 {
491         return (__force u64) a > (__force u64) b;
492 }
493
494 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
495 {
496         struct rb_node **link = &cm.listen_service_table.rb_node;
497         struct rb_node *parent = NULL;
498         struct cm_id_private *cur_cm_id_priv;
499         __be64 service_id = cm_id_priv->id.service_id;
500         __be64 service_mask = cm_id_priv->id.service_mask;
501         int data_cmp;
502
503         while (*link) {
504                 parent = *link;
505                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
506                                           service_node);
507                 data_cmp = cm_compare_data(cm_id_priv->compare_data,
508                                            cur_cm_id_priv->compare_data);
509                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
510                     (service_mask & cur_cm_id_priv->id.service_id) &&
511                     (cm_id_priv->id.device == cur_cm_id_priv->id.device) &&
512                     !data_cmp)
513                         return cur_cm_id_priv;
514
515                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
516                         link = &(*link)->rb_left;
517                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
518                         link = &(*link)->rb_right;
519                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
520                         link = &(*link)->rb_left;
521                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
522                         link = &(*link)->rb_right;
523                 else if (data_cmp < 0)
524                         link = &(*link)->rb_left;
525                 else
526                         link = &(*link)->rb_right;
527         }
528         rb_link_node(&cm_id_priv->service_node, parent, link);
529         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
530         return NULL;
531 }
532
533 static struct cm_id_private * cm_find_listen(struct ib_device *device,
534                                              __be64 service_id,
535                                              u8 *private_data)
536 {
537         struct rb_node *node = cm.listen_service_table.rb_node;
538         struct cm_id_private *cm_id_priv;
539         int data_cmp;
540
541         while (node) {
542                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
543                 data_cmp = cm_compare_private_data(private_data,
544                                                    cm_id_priv->compare_data);
545                 if ((cm_id_priv->id.service_mask & service_id) ==
546                      cm_id_priv->id.service_id &&
547                     (cm_id_priv->id.device == device) && !data_cmp)
548                         return cm_id_priv;
549
550                 if (device < cm_id_priv->id.device)
551                         node = node->rb_left;
552                 else if (device > cm_id_priv->id.device)
553                         node = node->rb_right;
554                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
555                         node = node->rb_left;
556                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
557                         node = node->rb_right;
558                 else if (data_cmp < 0)
559                         node = node->rb_left;
560                 else
561                         node = node->rb_right;
562         }
563         return NULL;
564 }
565
566 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
567                                                      *timewait_info)
568 {
569         struct rb_node **link = &cm.remote_id_table.rb_node;
570         struct rb_node *parent = NULL;
571         struct cm_timewait_info *cur_timewait_info;
572         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
573         __be32 remote_id = timewait_info->work.remote_id;
574
575         while (*link) {
576                 parent = *link;
577                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
578                                              remote_id_node);
579                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
580                         link = &(*link)->rb_left;
581                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
582                         link = &(*link)->rb_right;
583                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
584                         link = &(*link)->rb_left;
585                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
586                         link = &(*link)->rb_right;
587                 else
588                         return cur_timewait_info;
589         }
590         timewait_info->inserted_remote_id = 1;
591         rb_link_node(&timewait_info->remote_id_node, parent, link);
592         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
593         return NULL;
594 }
595
596 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
597                                                    __be32 remote_id)
598 {
599         struct rb_node *node = cm.remote_id_table.rb_node;
600         struct cm_timewait_info *timewait_info;
601
602         while (node) {
603                 timewait_info = rb_entry(node, struct cm_timewait_info,
604                                          remote_id_node);
605                 if (be32_lt(remote_id, timewait_info->work.remote_id))
606                         node = node->rb_left;
607                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
608                         node = node->rb_right;
609                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
610                         node = node->rb_left;
611                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
612                         node = node->rb_right;
613                 else
614                         return timewait_info;
615         }
616         return NULL;
617 }
618
619 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
620                                                       *timewait_info)
621 {
622         struct rb_node **link = &cm.remote_qp_table.rb_node;
623         struct rb_node *parent = NULL;
624         struct cm_timewait_info *cur_timewait_info;
625         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
626         __be32 remote_qpn = timewait_info->remote_qpn;
627
628         while (*link) {
629                 parent = *link;
630                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
631                                              remote_qp_node);
632                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
633                         link = &(*link)->rb_left;
634                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
635                         link = &(*link)->rb_right;
636                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
637                         link = &(*link)->rb_left;
638                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
639                         link = &(*link)->rb_right;
640                 else
641                         return cur_timewait_info;
642         }
643         timewait_info->inserted_remote_qp = 1;
644         rb_link_node(&timewait_info->remote_qp_node, parent, link);
645         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
646         return NULL;
647 }
648
649 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
650                                                     *cm_id_priv)
651 {
652         struct rb_node **link = &cm.remote_sidr_table.rb_node;
653         struct rb_node *parent = NULL;
654         struct cm_id_private *cur_cm_id_priv;
655         union ib_gid *port_gid = &cm_id_priv->av.dgid;
656         __be32 remote_id = cm_id_priv->id.remote_id;
657
658         while (*link) {
659                 parent = *link;
660                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
661                                           sidr_id_node);
662                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
663                         link = &(*link)->rb_left;
664                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
665                         link = &(*link)->rb_right;
666                 else {
667                         int cmp;
668                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
669                                      sizeof *port_gid);
670                         if (cmp < 0)
671                                 link = &(*link)->rb_left;
672                         else if (cmp > 0)
673                                 link = &(*link)->rb_right;
674                         else
675                                 return cur_cm_id_priv;
676                 }
677         }
678         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
679         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
680         return NULL;
681 }
682
683 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
684                                enum ib_cm_sidr_status status)
685 {
686         struct ib_cm_sidr_rep_param param;
687
688         memset(&param, 0, sizeof param);
689         param.status = status;
690         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
691 }
692
693 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
694                                  ib_cm_handler cm_handler,
695                                  void *context)
696 {
697         struct cm_id_private *cm_id_priv;
698         int ret;
699
700         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
701         if (!cm_id_priv)
702                 return ERR_PTR(-ENOMEM);
703
704         cm_id_priv->id.state = IB_CM_IDLE;
705         cm_id_priv->id.device = device;
706         cm_id_priv->id.cm_handler = cm_handler;
707         cm_id_priv->id.context = context;
708         cm_id_priv->id.remote_cm_qpn = 1;
709         ret = cm_alloc_id(cm_id_priv);
710         if (ret)
711                 goto error;
712
713         spin_lock_init(&cm_id_priv->lock);
714         init_completion(&cm_id_priv->comp);
715         INIT_LIST_HEAD(&cm_id_priv->work_list);
716         atomic_set(&cm_id_priv->work_count, -1);
717         atomic_set(&cm_id_priv->refcount, 1);
718         return &cm_id_priv->id;
719
720 error:
721         kfree(cm_id_priv);
722         return ERR_PTR(-ENOMEM);
723 }
724 EXPORT_SYMBOL(ib_create_cm_id);
725
726 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
727 {
728         struct cm_work *work;
729
730         if (list_empty(&cm_id_priv->work_list))
731                 return NULL;
732
733         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
734         list_del(&work->list);
735         return work;
736 }
737
738 static void cm_free_work(struct cm_work *work)
739 {
740         if (work->mad_recv_wc)
741                 ib_free_recv_mad(work->mad_recv_wc);
742         kfree(work);
743 }
744
745 static inline int cm_convert_to_ms(int iba_time)
746 {
747         /* approximate conversion to ms from 4.096us x 2^iba_time */
748         return 1 << max(iba_time - 8, 0);
749 }
750
751 /*
752  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
753  * Because of how ack_timeout is stored, adding one doubles the timeout.
754  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
755  * increment it (round up) only if the other is within 50%.
756  */
757 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
758 {
759         int ack_timeout = packet_life_time + 1;
760
761         if (ack_timeout >= ca_ack_delay)
762                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
763         else
764                 ack_timeout = ca_ack_delay +
765                               (ack_timeout >= (ca_ack_delay - 1));
766
767         return min(31, ack_timeout);
768 }
769
770 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
771 {
772         if (timewait_info->inserted_remote_id) {
773                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
774                 timewait_info->inserted_remote_id = 0;
775         }
776
777         if (timewait_info->inserted_remote_qp) {
778                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
779                 timewait_info->inserted_remote_qp = 0;
780         }
781 }
782
783 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
784 {
785         struct cm_timewait_info *timewait_info;
786
787         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
788         if (!timewait_info)
789                 return ERR_PTR(-ENOMEM);
790
791         timewait_info->work.local_id = local_id;
792         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
793         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
794         return timewait_info;
795 }
796
797 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
798 {
799         int wait_time;
800         unsigned long flags;
801
802         spin_lock_irqsave(&cm.lock, flags);
803         cm_cleanup_timewait(cm_id_priv->timewait_info);
804         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
805         spin_unlock_irqrestore(&cm.lock, flags);
806
807         /*
808          * The cm_id could be destroyed by the user before we exit timewait.
809          * To protect against this, we search for the cm_id after exiting
810          * timewait before notifying the user that we've exited timewait.
811          */
812         cm_id_priv->id.state = IB_CM_TIMEWAIT;
813         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
814         queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
815                            msecs_to_jiffies(wait_time));
816         cm_id_priv->timewait_info = NULL;
817 }
818
819 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
820 {
821         unsigned long flags;
822
823         cm_id_priv->id.state = IB_CM_IDLE;
824         if (cm_id_priv->timewait_info) {
825                 spin_lock_irqsave(&cm.lock, flags);
826                 cm_cleanup_timewait(cm_id_priv->timewait_info);
827                 spin_unlock_irqrestore(&cm.lock, flags);
828                 kfree(cm_id_priv->timewait_info);
829                 cm_id_priv->timewait_info = NULL;
830         }
831 }
832
833 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
834 {
835         struct cm_id_private *cm_id_priv;
836         struct cm_work *work;
837
838         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
839 retest:
840         spin_lock_irq(&cm_id_priv->lock);
841         switch (cm_id->state) {
842         case IB_CM_LISTEN:
843                 cm_id->state = IB_CM_IDLE;
844                 spin_unlock_irq(&cm_id_priv->lock);
845                 spin_lock_irq(&cm.lock);
846                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
847                 spin_unlock_irq(&cm.lock);
848                 break;
849         case IB_CM_SIDR_REQ_SENT:
850                 cm_id->state = IB_CM_IDLE;
851                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
852                 spin_unlock_irq(&cm_id_priv->lock);
853                 break;
854         case IB_CM_SIDR_REQ_RCVD:
855                 spin_unlock_irq(&cm_id_priv->lock);
856                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
857                 break;
858         case IB_CM_REQ_SENT:
859                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
860                 spin_unlock_irq(&cm_id_priv->lock);
861                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
862                                &cm_id_priv->id.device->node_guid,
863                                sizeof cm_id_priv->id.device->node_guid,
864                                NULL, 0);
865                 break;
866         case IB_CM_REQ_RCVD:
867                 if (err == -ENOMEM) {
868                         /* Do not reject to allow future retries. */
869                         cm_reset_to_idle(cm_id_priv);
870                         spin_unlock_irq(&cm_id_priv->lock);
871                 } else {
872                         spin_unlock_irq(&cm_id_priv->lock);
873                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
874                                        NULL, 0, NULL, 0);
875                 }
876                 break;
877         case IB_CM_MRA_REQ_RCVD:
878         case IB_CM_REP_SENT:
879         case IB_CM_MRA_REP_RCVD:
880                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
881                 /* Fall through */
882         case IB_CM_MRA_REQ_SENT:
883         case IB_CM_REP_RCVD:
884         case IB_CM_MRA_REP_SENT:
885                 spin_unlock_irq(&cm_id_priv->lock);
886                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
887                                NULL, 0, NULL, 0);
888                 break;
889         case IB_CM_ESTABLISHED:
890                 spin_unlock_irq(&cm_id_priv->lock);
891                 ib_send_cm_dreq(cm_id, NULL, 0);
892                 goto retest;
893         case IB_CM_DREQ_SENT:
894                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
895                 cm_enter_timewait(cm_id_priv);
896                 spin_unlock_irq(&cm_id_priv->lock);
897                 break;
898         case IB_CM_DREQ_RCVD:
899                 spin_unlock_irq(&cm_id_priv->lock);
900                 ib_send_cm_drep(cm_id, NULL, 0);
901                 break;
902         default:
903                 spin_unlock_irq(&cm_id_priv->lock);
904                 break;
905         }
906
907         cm_free_id(cm_id->local_id);
908         cm_deref_id(cm_id_priv);
909         wait_for_completion(&cm_id_priv->comp);
910         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
911                 cm_free_work(work);
912         kfree(cm_id_priv->compare_data);
913         kfree(cm_id_priv->private_data);
914         kfree(cm_id_priv);
915 }
916
917 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
918 {
919         cm_destroy_id(cm_id, 0);
920 }
921 EXPORT_SYMBOL(ib_destroy_cm_id);
922
923 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
924                  struct ib_cm_compare_data *compare_data)
925 {
926         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
927         unsigned long flags;
928         int ret = 0;
929
930         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
931         service_id &= service_mask;
932         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
933             (service_id != IB_CM_ASSIGN_SERVICE_ID))
934                 return -EINVAL;
935
936         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
937         if (cm_id->state != IB_CM_IDLE)
938                 return -EINVAL;
939
940         if (compare_data) {
941                 cm_id_priv->compare_data = kzalloc(sizeof *compare_data,
942                                                    GFP_KERNEL);
943                 if (!cm_id_priv->compare_data)
944                         return -ENOMEM;
945                 cm_mask_copy(cm_id_priv->compare_data->data,
946                              compare_data->data, compare_data->mask);
947                 memcpy(cm_id_priv->compare_data->mask, compare_data->mask,
948                        IB_CM_COMPARE_SIZE);
949         }
950
951         cm_id->state = IB_CM_LISTEN;
952
953         spin_lock_irqsave(&cm.lock, flags);
954         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
955                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
956                 cm_id->service_mask = ~cpu_to_be64(0);
957         } else {
958                 cm_id->service_id = service_id;
959                 cm_id->service_mask = service_mask;
960         }
961         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
962         spin_unlock_irqrestore(&cm.lock, flags);
963
964         if (cur_cm_id_priv) {
965                 cm_id->state = IB_CM_IDLE;
966                 kfree(cm_id_priv->compare_data);
967                 cm_id_priv->compare_data = NULL;
968                 ret = -EBUSY;
969         }
970         return ret;
971 }
972 EXPORT_SYMBOL(ib_cm_listen);
973
974 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
975                           enum cm_msg_sequence msg_seq)
976 {
977         u64 hi_tid, low_tid;
978
979         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
980         low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
981                           (msg_seq << 30));
982         return cpu_to_be64(hi_tid | low_tid);
983 }
984
985 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
986                               __be16 attr_id, __be64 tid)
987 {
988         hdr->base_version  = IB_MGMT_BASE_VERSION;
989         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
990         hdr->class_version = IB_CM_CLASS_VERSION;
991         hdr->method        = IB_MGMT_METHOD_SEND;
992         hdr->attr_id       = attr_id;
993         hdr->tid           = tid;
994 }
995
996 static void cm_format_req(struct cm_req_msg *req_msg,
997                           struct cm_id_private *cm_id_priv,
998                           struct ib_cm_req_param *param)
999 {
1000         struct ib_sa_path_rec *pri_path = param->primary_path;
1001         struct ib_sa_path_rec *alt_path = param->alternate_path;
1002
1003         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1004                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1005
1006         req_msg->local_comm_id = cm_id_priv->id.local_id;
1007         req_msg->service_id = param->service_id;
1008         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1009         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1010         cm_req_set_resp_res(req_msg, param->responder_resources);
1011         cm_req_set_init_depth(req_msg, param->initiator_depth);
1012         cm_req_set_remote_resp_timeout(req_msg,
1013                                        param->remote_cm_response_timeout);
1014         cm_req_set_qp_type(req_msg, param->qp_type);
1015         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1016         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1017         cm_req_set_local_resp_timeout(req_msg,
1018                                       param->local_cm_response_timeout);
1019         cm_req_set_retry_count(req_msg, param->retry_count);
1020         req_msg->pkey = param->primary_path->pkey;
1021         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1022         cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1023         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1024         cm_req_set_srq(req_msg, param->srq);
1025
1026         if (pri_path->hop_limit <= 1) {
1027                 req_msg->primary_local_lid = pri_path->slid;
1028                 req_msg->primary_remote_lid = pri_path->dlid;
1029         } else {
1030                 /* Work-around until there's a way to obtain remote LID info */
1031                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1032                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1033         }
1034         req_msg->primary_local_gid = pri_path->sgid;
1035         req_msg->primary_remote_gid = pri_path->dgid;
1036         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1037         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1038         req_msg->primary_traffic_class = pri_path->traffic_class;
1039         req_msg->primary_hop_limit = pri_path->hop_limit;
1040         cm_req_set_primary_sl(req_msg, pri_path->sl);
1041         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1042         cm_req_set_primary_local_ack_timeout(req_msg,
1043                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1044                                pri_path->packet_life_time));
1045
1046         if (alt_path) {
1047                 if (alt_path->hop_limit <= 1) {
1048                         req_msg->alt_local_lid = alt_path->slid;
1049                         req_msg->alt_remote_lid = alt_path->dlid;
1050                 } else {
1051                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1052                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1053                 }
1054                 req_msg->alt_local_gid = alt_path->sgid;
1055                 req_msg->alt_remote_gid = alt_path->dgid;
1056                 cm_req_set_alt_flow_label(req_msg,
1057                                           alt_path->flow_label);
1058                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1059                 req_msg->alt_traffic_class = alt_path->traffic_class;
1060                 req_msg->alt_hop_limit = alt_path->hop_limit;
1061                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1062                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1063                 cm_req_set_alt_local_ack_timeout(req_msg,
1064                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1065                                        alt_path->packet_life_time));
1066         }
1067
1068         if (param->private_data && param->private_data_len)
1069                 memcpy(req_msg->private_data, param->private_data,
1070                        param->private_data_len);
1071 }
1072
1073 static int cm_validate_req_param(struct ib_cm_req_param *param)
1074 {
1075         /* peer-to-peer not supported */
1076         if (param->peer_to_peer)
1077                 return -EINVAL;
1078
1079         if (!param->primary_path)
1080                 return -EINVAL;
1081
1082         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
1083                 return -EINVAL;
1084
1085         if (param->private_data &&
1086             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1087                 return -EINVAL;
1088
1089         if (param->alternate_path &&
1090             (param->alternate_path->pkey != param->primary_path->pkey ||
1091              param->alternate_path->mtu != param->primary_path->mtu))
1092                 return -EINVAL;
1093
1094         return 0;
1095 }
1096
1097 int ib_send_cm_req(struct ib_cm_id *cm_id,
1098                    struct ib_cm_req_param *param)
1099 {
1100         struct cm_id_private *cm_id_priv;
1101         struct cm_req_msg *req_msg;
1102         unsigned long flags;
1103         int ret;
1104
1105         ret = cm_validate_req_param(param);
1106         if (ret)
1107                 return ret;
1108
1109         /* Verify that we're not in timewait. */
1110         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1111         spin_lock_irqsave(&cm_id_priv->lock, flags);
1112         if (cm_id->state != IB_CM_IDLE) {
1113                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1114                 ret = -EINVAL;
1115                 goto out;
1116         }
1117         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1118
1119         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1120                                                             id.local_id);
1121         if (IS_ERR(cm_id_priv->timewait_info)) {
1122                 ret = PTR_ERR(cm_id_priv->timewait_info);
1123                 goto out;
1124         }
1125
1126         ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
1127         if (ret)
1128                 goto error1;
1129         if (param->alternate_path) {
1130                 ret = cm_init_av_by_path(param->alternate_path,
1131                                          &cm_id_priv->alt_av);
1132                 if (ret)
1133                         goto error1;
1134         }
1135         cm_id->service_id = param->service_id;
1136         cm_id->service_mask = ~cpu_to_be64(0);
1137         cm_id_priv->timeout_ms = cm_convert_to_ms(
1138                                     param->primary_path->packet_life_time) * 2 +
1139                                  cm_convert_to_ms(
1140                                     param->remote_cm_response_timeout);
1141         cm_id_priv->max_cm_retries = param->max_cm_retries;
1142         cm_id_priv->initiator_depth = param->initiator_depth;
1143         cm_id_priv->responder_resources = param->responder_resources;
1144         cm_id_priv->retry_count = param->retry_count;
1145         cm_id_priv->path_mtu = param->primary_path->mtu;
1146         cm_id_priv->pkey = param->primary_path->pkey;
1147         cm_id_priv->qp_type = param->qp_type;
1148
1149         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1150         if (ret)
1151                 goto error1;
1152
1153         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1154         cm_format_req(req_msg, cm_id_priv, param);
1155         cm_id_priv->tid = req_msg->hdr.tid;
1156         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1157         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1158
1159         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1160         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1161
1162         spin_lock_irqsave(&cm_id_priv->lock, flags);
1163         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1164         if (ret) {
1165                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1166                 goto error2;
1167         }
1168         BUG_ON(cm_id->state != IB_CM_IDLE);
1169         cm_id->state = IB_CM_REQ_SENT;
1170         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1171         return 0;
1172
1173 error2: cm_free_msg(cm_id_priv->msg);
1174 error1: kfree(cm_id_priv->timewait_info);
1175 out:    return ret;
1176 }
1177 EXPORT_SYMBOL(ib_send_cm_req);
1178
1179 static int cm_issue_rej(struct cm_port *port,
1180                         struct ib_mad_recv_wc *mad_recv_wc,
1181                         enum ib_cm_rej_reason reason,
1182                         enum cm_msg_response msg_rejected,
1183                         void *ari, u8 ari_length)
1184 {
1185         struct ib_mad_send_buf *msg = NULL;
1186         struct cm_rej_msg *rej_msg, *rcv_msg;
1187         int ret;
1188
1189         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1190         if (ret)
1191                 return ret;
1192
1193         /* We just need common CM header information.  Cast to any message. */
1194         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1195         rej_msg = (struct cm_rej_msg *) msg->mad;
1196
1197         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1198         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1199         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1200         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1201         rej_msg->reason = cpu_to_be16(reason);
1202
1203         if (ari && ari_length) {
1204                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1205                 memcpy(rej_msg->ari, ari, ari_length);
1206         }
1207
1208         ret = ib_post_send_mad(msg, NULL);
1209         if (ret)
1210                 cm_free_msg(msg);
1211
1212         return ret;
1213 }
1214
1215 static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
1216                                     __be32 local_qpn, __be32 remote_qpn)
1217 {
1218         return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
1219                 ((local_ca_guid == remote_ca_guid) &&
1220                  (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
1221 }
1222
1223 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1224                                             struct ib_sa_path_rec *primary_path,
1225                                             struct ib_sa_path_rec *alt_path)
1226 {
1227         memset(primary_path, 0, sizeof *primary_path);
1228         primary_path->dgid = req_msg->primary_local_gid;
1229         primary_path->sgid = req_msg->primary_remote_gid;
1230         primary_path->dlid = req_msg->primary_local_lid;
1231         primary_path->slid = req_msg->primary_remote_lid;
1232         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1233         primary_path->hop_limit = req_msg->primary_hop_limit;
1234         primary_path->traffic_class = req_msg->primary_traffic_class;
1235         primary_path->reversible = 1;
1236         primary_path->pkey = req_msg->pkey;
1237         primary_path->sl = cm_req_get_primary_sl(req_msg);
1238         primary_path->mtu_selector = IB_SA_EQ;
1239         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1240         primary_path->rate_selector = IB_SA_EQ;
1241         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1242         primary_path->packet_life_time_selector = IB_SA_EQ;
1243         primary_path->packet_life_time =
1244                 cm_req_get_primary_local_ack_timeout(req_msg);
1245         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1246
1247         if (req_msg->alt_local_lid) {
1248                 memset(alt_path, 0, sizeof *alt_path);
1249                 alt_path->dgid = req_msg->alt_local_gid;
1250                 alt_path->sgid = req_msg->alt_remote_gid;
1251                 alt_path->dlid = req_msg->alt_local_lid;
1252                 alt_path->slid = req_msg->alt_remote_lid;
1253                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1254                 alt_path->hop_limit = req_msg->alt_hop_limit;
1255                 alt_path->traffic_class = req_msg->alt_traffic_class;
1256                 alt_path->reversible = 1;
1257                 alt_path->pkey = req_msg->pkey;
1258                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1259                 alt_path->mtu_selector = IB_SA_EQ;
1260                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1261                 alt_path->rate_selector = IB_SA_EQ;
1262                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1263                 alt_path->packet_life_time_selector = IB_SA_EQ;
1264                 alt_path->packet_life_time =
1265                         cm_req_get_alt_local_ack_timeout(req_msg);
1266                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1267         }
1268 }
1269
1270 static void cm_format_req_event(struct cm_work *work,
1271                                 struct cm_id_private *cm_id_priv,
1272                                 struct ib_cm_id *listen_id)
1273 {
1274         struct cm_req_msg *req_msg;
1275         struct ib_cm_req_event_param *param;
1276
1277         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1278         param = &work->cm_event.param.req_rcvd;
1279         param->listen_id = listen_id;
1280         param->port = cm_id_priv->av.port->port_num;
1281         param->primary_path = &work->path[0];
1282         if (req_msg->alt_local_lid)
1283                 param->alternate_path = &work->path[1];
1284         else
1285                 param->alternate_path = NULL;
1286         param->remote_ca_guid = req_msg->local_ca_guid;
1287         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1288         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1289         param->qp_type = cm_req_get_qp_type(req_msg);
1290         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1291         param->responder_resources = cm_req_get_init_depth(req_msg);
1292         param->initiator_depth = cm_req_get_resp_res(req_msg);
1293         param->local_cm_response_timeout =
1294                                         cm_req_get_remote_resp_timeout(req_msg);
1295         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1296         param->remote_cm_response_timeout =
1297                                         cm_req_get_local_resp_timeout(req_msg);
1298         param->retry_count = cm_req_get_retry_count(req_msg);
1299         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1300         param->srq = cm_req_get_srq(req_msg);
1301         work->cm_event.private_data = &req_msg->private_data;
1302 }
1303
1304 static void cm_process_work(struct cm_id_private *cm_id_priv,
1305                             struct cm_work *work)
1306 {
1307         int ret;
1308
1309         /* We will typically only have the current event to report. */
1310         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1311         cm_free_work(work);
1312
1313         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1314                 spin_lock_irq(&cm_id_priv->lock);
1315                 work = cm_dequeue_work(cm_id_priv);
1316                 spin_unlock_irq(&cm_id_priv->lock);
1317                 BUG_ON(!work);
1318                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1319                                                 &work->cm_event);
1320                 cm_free_work(work);
1321         }
1322         cm_deref_id(cm_id_priv);
1323         if (ret)
1324                 cm_destroy_id(&cm_id_priv->id, ret);
1325 }
1326
1327 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1328                           struct cm_id_private *cm_id_priv,
1329                           enum cm_msg_response msg_mraed, u8 service_timeout,
1330                           const void *private_data, u8 private_data_len)
1331 {
1332         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1333         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1334         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1335         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1336         cm_mra_set_service_timeout(mra_msg, service_timeout);
1337
1338         if (private_data && private_data_len)
1339                 memcpy(mra_msg->private_data, private_data, private_data_len);
1340 }
1341
1342 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1343                           struct cm_id_private *cm_id_priv,
1344                           enum ib_cm_rej_reason reason,
1345                           void *ari,
1346                           u8 ari_length,
1347                           const void *private_data,
1348                           u8 private_data_len)
1349 {
1350         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1351         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1352
1353         switch(cm_id_priv->id.state) {
1354         case IB_CM_REQ_RCVD:
1355                 rej_msg->local_comm_id = 0;
1356                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1357                 break;
1358         case IB_CM_MRA_REQ_SENT:
1359                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1360                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1361                 break;
1362         case IB_CM_REP_RCVD:
1363         case IB_CM_MRA_REP_SENT:
1364                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1365                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1366                 break;
1367         default:
1368                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1369                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1370                 break;
1371         }
1372
1373         rej_msg->reason = cpu_to_be16(reason);
1374         if (ari && ari_length) {
1375                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1376                 memcpy(rej_msg->ari, ari, ari_length);
1377         }
1378
1379         if (private_data && private_data_len)
1380                 memcpy(rej_msg->private_data, private_data, private_data_len);
1381 }
1382
1383 static void cm_dup_req_handler(struct cm_work *work,
1384                                struct cm_id_private *cm_id_priv)
1385 {
1386         struct ib_mad_send_buf *msg = NULL;
1387         int ret;
1388
1389         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1390                         counter[CM_REQ_COUNTER]);
1391
1392         /* Quick state check to discard duplicate REQs. */
1393         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1394                 return;
1395
1396         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1397         if (ret)
1398                 return;
1399
1400         spin_lock_irq(&cm_id_priv->lock);
1401         switch (cm_id_priv->id.state) {
1402         case IB_CM_MRA_REQ_SENT:
1403                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1404                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1405                               cm_id_priv->private_data,
1406                               cm_id_priv->private_data_len);
1407                 break;
1408         case IB_CM_TIMEWAIT:
1409                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1410                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1411                 break;
1412         default:
1413                 goto unlock;
1414         }
1415         spin_unlock_irq(&cm_id_priv->lock);
1416
1417         ret = ib_post_send_mad(msg, NULL);
1418         if (ret)
1419                 goto free;
1420         return;
1421
1422 unlock: spin_unlock_irq(&cm_id_priv->lock);
1423 free:   cm_free_msg(msg);
1424 }
1425
1426 static struct cm_id_private * cm_match_req(struct cm_work *work,
1427                                            struct cm_id_private *cm_id_priv)
1428 {
1429         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1430         struct cm_timewait_info *timewait_info;
1431         struct cm_req_msg *req_msg;
1432
1433         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1434
1435         /* Check for possible duplicate REQ. */
1436         spin_lock_irq(&cm.lock);
1437         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1438         if (timewait_info) {
1439                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1440                                            timewait_info->work.remote_id);
1441                 spin_unlock_irq(&cm.lock);
1442                 if (cur_cm_id_priv) {
1443                         cm_dup_req_handler(work, cur_cm_id_priv);
1444                         cm_deref_id(cur_cm_id_priv);
1445                 }
1446                 return NULL;
1447         }
1448
1449         /* Check for stale connections. */
1450         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1451         if (timewait_info) {
1452                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1453                 spin_unlock_irq(&cm.lock);
1454                 cm_issue_rej(work->port, work->mad_recv_wc,
1455                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1456                              NULL, 0);
1457                 return NULL;
1458         }
1459
1460         /* Find matching listen request. */
1461         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1462                                            req_msg->service_id,
1463                                            req_msg->private_data);
1464         if (!listen_cm_id_priv) {
1465                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1466                 spin_unlock_irq(&cm.lock);
1467                 cm_issue_rej(work->port, work->mad_recv_wc,
1468                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1469                              NULL, 0);
1470                 goto out;
1471         }
1472         atomic_inc(&listen_cm_id_priv->refcount);
1473         atomic_inc(&cm_id_priv->refcount);
1474         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1475         atomic_inc(&cm_id_priv->work_count);
1476         spin_unlock_irq(&cm.lock);
1477 out:
1478         return listen_cm_id_priv;
1479 }
1480
1481 /*
1482  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1483  * we need to override the LID/SL data in the REQ with the LID information
1484  * in the work completion.
1485  */
1486 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1487 {
1488         if (!cm_req_get_primary_subnet_local(req_msg)) {
1489                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1490                         req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1491                         cm_req_set_primary_sl(req_msg, wc->sl);
1492                 }
1493
1494                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1495                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1496         }
1497
1498         if (!cm_req_get_alt_subnet_local(req_msg)) {
1499                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1500                         req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1501                         cm_req_set_alt_sl(req_msg, wc->sl);
1502                 }
1503
1504                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1505                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1506         }
1507 }
1508
1509 static int cm_req_handler(struct cm_work *work)
1510 {
1511         struct ib_cm_id *cm_id;
1512         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1513         struct cm_req_msg *req_msg;
1514         int ret;
1515
1516         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1517
1518         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1519         if (IS_ERR(cm_id))
1520                 return PTR_ERR(cm_id);
1521
1522         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1523         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1524         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1525                                 work->mad_recv_wc->recv_buf.grh,
1526                                 &cm_id_priv->av);
1527         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1528                                                             id.local_id);
1529         if (IS_ERR(cm_id_priv->timewait_info)) {
1530                 ret = PTR_ERR(cm_id_priv->timewait_info);
1531                 goto destroy;
1532         }
1533         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1534         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1535         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1536
1537         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1538         if (!listen_cm_id_priv) {
1539                 ret = -EINVAL;
1540                 kfree(cm_id_priv->timewait_info);
1541                 goto destroy;
1542         }
1543
1544         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1545         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1546         cm_id_priv->id.service_id = req_msg->service_id;
1547         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1548
1549         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1550         cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1551         ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1552         if (ret) {
1553                 ib_get_cached_gid(work->port->cm_dev->ib_device,
1554                                   work->port->port_num, 0, &work->path[0].sgid);
1555                 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1556                                &work->path[0].sgid, sizeof work->path[0].sgid,
1557                                NULL, 0);
1558                 goto rejected;
1559         }
1560         if (req_msg->alt_local_lid) {
1561                 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1562                 if (ret) {
1563                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1564                                        &work->path[0].sgid,
1565                                        sizeof work->path[0].sgid, NULL, 0);
1566                         goto rejected;
1567                 }
1568         }
1569         cm_id_priv->tid = req_msg->hdr.tid;
1570         cm_id_priv->timeout_ms = cm_convert_to_ms(
1571                                         cm_req_get_local_resp_timeout(req_msg));
1572         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1573         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1574         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1575         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1576         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1577         cm_id_priv->pkey = req_msg->pkey;
1578         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1579         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1580         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1581         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1582
1583         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1584         cm_process_work(cm_id_priv, work);
1585         cm_deref_id(listen_cm_id_priv);
1586         return 0;
1587
1588 rejected:
1589         atomic_dec(&cm_id_priv->refcount);
1590         cm_deref_id(listen_cm_id_priv);
1591 destroy:
1592         ib_destroy_cm_id(cm_id);
1593         return ret;
1594 }
1595
1596 static void cm_format_rep(struct cm_rep_msg *rep_msg,
1597                           struct cm_id_private *cm_id_priv,
1598                           struct ib_cm_rep_param *param)
1599 {
1600         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1601         rep_msg->local_comm_id = cm_id_priv->id.local_id;
1602         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1603         cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1604         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1605         rep_msg->resp_resources = param->responder_resources;
1606         rep_msg->initiator_depth = param->initiator_depth;
1607         cm_rep_set_target_ack_delay(rep_msg,
1608                                     cm_id_priv->av.port->cm_dev->ack_delay);
1609         cm_rep_set_failover(rep_msg, param->failover_accepted);
1610         cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1611         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1612         cm_rep_set_srq(rep_msg, param->srq);
1613         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1614
1615         if (param->private_data && param->private_data_len)
1616                 memcpy(rep_msg->private_data, param->private_data,
1617                        param->private_data_len);
1618 }
1619
1620 int ib_send_cm_rep(struct ib_cm_id *cm_id,
1621                    struct ib_cm_rep_param *param)
1622 {
1623         struct cm_id_private *cm_id_priv;
1624         struct ib_mad_send_buf *msg;
1625         struct cm_rep_msg *rep_msg;
1626         unsigned long flags;
1627         int ret;
1628
1629         if (param->private_data &&
1630             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1631                 return -EINVAL;
1632
1633         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1634         spin_lock_irqsave(&cm_id_priv->lock, flags);
1635         if (cm_id->state != IB_CM_REQ_RCVD &&
1636             cm_id->state != IB_CM_MRA_REQ_SENT) {
1637                 ret = -EINVAL;
1638                 goto out;
1639         }
1640
1641         ret = cm_alloc_msg(cm_id_priv, &msg);
1642         if (ret)
1643                 goto out;
1644
1645         rep_msg = (struct cm_rep_msg *) msg->mad;
1646         cm_format_rep(rep_msg, cm_id_priv, param);
1647         msg->timeout_ms = cm_id_priv->timeout_ms;
1648         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1649
1650         ret = ib_post_send_mad(msg, NULL);
1651         if (ret) {
1652                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1653                 cm_free_msg(msg);
1654                 return ret;
1655         }
1656
1657         cm_id->state = IB_CM_REP_SENT;
1658         cm_id_priv->msg = msg;
1659         cm_id_priv->initiator_depth = param->initiator_depth;
1660         cm_id_priv->responder_resources = param->responder_resources;
1661         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1662         cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
1663
1664 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1665         return ret;
1666 }
1667 EXPORT_SYMBOL(ib_send_cm_rep);
1668
1669 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1670                           struct cm_id_private *cm_id_priv,
1671                           const void *private_data,
1672                           u8 private_data_len)
1673 {
1674         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1675         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1676         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1677
1678         if (private_data && private_data_len)
1679                 memcpy(rtu_msg->private_data, private_data, private_data_len);
1680 }
1681
1682 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1683                    const void *private_data,
1684                    u8 private_data_len)
1685 {
1686         struct cm_id_private *cm_id_priv;
1687         struct ib_mad_send_buf *msg;
1688         unsigned long flags;
1689         void *data;
1690         int ret;
1691
1692         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1693                 return -EINVAL;
1694
1695         data = cm_copy_private_data(private_data, private_data_len);
1696         if (IS_ERR(data))
1697                 return PTR_ERR(data);
1698
1699         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1700         spin_lock_irqsave(&cm_id_priv->lock, flags);
1701         if (cm_id->state != IB_CM_REP_RCVD &&
1702             cm_id->state != IB_CM_MRA_REP_SENT) {
1703                 ret = -EINVAL;
1704                 goto error;
1705         }
1706
1707         ret = cm_alloc_msg(cm_id_priv, &msg);
1708         if (ret)
1709                 goto error;
1710
1711         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1712                       private_data, private_data_len);
1713
1714         ret = ib_post_send_mad(msg, NULL);
1715         if (ret) {
1716                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1717                 cm_free_msg(msg);
1718                 kfree(data);
1719                 return ret;
1720         }
1721
1722         cm_id->state = IB_CM_ESTABLISHED;
1723         cm_set_private_data(cm_id_priv, data, private_data_len);
1724         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1725         return 0;
1726
1727 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1728         kfree(data);
1729         return ret;
1730 }
1731 EXPORT_SYMBOL(ib_send_cm_rtu);
1732
1733 static void cm_format_rep_event(struct cm_work *work)
1734 {
1735         struct cm_rep_msg *rep_msg;
1736         struct ib_cm_rep_event_param *param;
1737
1738         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1739         param = &work->cm_event.param.rep_rcvd;
1740         param->remote_ca_guid = rep_msg->local_ca_guid;
1741         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1742         param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
1743         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1744         param->responder_resources = rep_msg->initiator_depth;
1745         param->initiator_depth = rep_msg->resp_resources;
1746         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1747         param->failover_accepted = cm_rep_get_failover(rep_msg);
1748         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1749         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1750         param->srq = cm_rep_get_srq(rep_msg);
1751         work->cm_event.private_data = &rep_msg->private_data;
1752 }
1753
1754 static void cm_dup_rep_handler(struct cm_work *work)
1755 {
1756         struct cm_id_private *cm_id_priv;
1757         struct cm_rep_msg *rep_msg;
1758         struct ib_mad_send_buf *msg = NULL;
1759         int ret;
1760
1761         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1762         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1763                                    rep_msg->local_comm_id);
1764         if (!cm_id_priv)
1765                 return;
1766
1767         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1768                         counter[CM_REP_COUNTER]);
1769         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1770         if (ret)
1771                 goto deref;
1772
1773         spin_lock_irq(&cm_id_priv->lock);
1774         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1775                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1776                               cm_id_priv->private_data,
1777                               cm_id_priv->private_data_len);
1778         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1779                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1780                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1781                               cm_id_priv->private_data,
1782                               cm_id_priv->private_data_len);
1783         else
1784                 goto unlock;
1785         spin_unlock_irq(&cm_id_priv->lock);
1786
1787         ret = ib_post_send_mad(msg, NULL);
1788         if (ret)
1789                 goto free;
1790         goto deref;
1791
1792 unlock: spin_unlock_irq(&cm_id_priv->lock);
1793 free:   cm_free_msg(msg);
1794 deref:  cm_deref_id(cm_id_priv);
1795 }
1796
1797 static int cm_rep_handler(struct cm_work *work)
1798 {
1799         struct cm_id_private *cm_id_priv;
1800         struct cm_rep_msg *rep_msg;
1801         int ret;
1802
1803         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1804         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
1805         if (!cm_id_priv) {
1806                 cm_dup_rep_handler(work);
1807                 return -EINVAL;
1808         }
1809
1810         cm_format_rep_event(work);
1811
1812         spin_lock_irq(&cm_id_priv->lock);
1813         switch (cm_id_priv->id.state) {
1814         case IB_CM_REQ_SENT:
1815         case IB_CM_MRA_REQ_RCVD:
1816                 break;
1817         default:
1818                 spin_unlock_irq(&cm_id_priv->lock);
1819                 ret = -EINVAL;
1820                 goto error;
1821         }
1822
1823         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1824         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1825         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1826
1827         spin_lock(&cm.lock);
1828         /* Check for duplicate REP. */
1829         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
1830                 spin_unlock(&cm.lock);
1831                 spin_unlock_irq(&cm_id_priv->lock);
1832                 ret = -EINVAL;
1833                 goto error;
1834         }
1835         /* Check for a stale connection. */
1836         if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
1837                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
1838                          &cm.remote_id_table);
1839                 cm_id_priv->timewait_info->inserted_remote_id = 0;
1840                 spin_unlock(&cm.lock);
1841                 spin_unlock_irq(&cm_id_priv->lock);
1842                 cm_issue_rej(work->port, work->mad_recv_wc,
1843                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
1844                              NULL, 0);
1845                 ret = -EINVAL;
1846                 goto error;
1847         }
1848         spin_unlock(&cm.lock);
1849
1850         cm_id_priv->id.state = IB_CM_REP_RCVD;
1851         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1852         cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1853         cm_id_priv->initiator_depth = rep_msg->resp_resources;
1854         cm_id_priv->responder_resources = rep_msg->initiator_depth;
1855         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
1856         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1857         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1858         cm_id_priv->av.timeout =
1859                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1860                                        cm_id_priv->av.timeout - 1);
1861         cm_id_priv->alt_av.timeout =
1862                         cm_ack_timeout(cm_id_priv->target_ack_delay,
1863                                        cm_id_priv->alt_av.timeout - 1);
1864
1865         /* todo: handle peer_to_peer */
1866
1867         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1868         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1869         if (!ret)
1870                 list_add_tail(&work->list, &cm_id_priv->work_list);
1871         spin_unlock_irq(&cm_id_priv->lock);
1872
1873         if (ret)
1874                 cm_process_work(cm_id_priv, work);
1875         else
1876                 cm_deref_id(cm_id_priv);
1877         return 0;
1878
1879 error:
1880         cm_deref_id(cm_id_priv);
1881         return ret;
1882 }
1883
1884 static int cm_establish_handler(struct cm_work *work)
1885 {
1886         struct cm_id_private *cm_id_priv;
1887         int ret;
1888
1889         /* See comment in cm_establish about lookup. */
1890         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
1891         if (!cm_id_priv)
1892                 return -EINVAL;
1893
1894         spin_lock_irq(&cm_id_priv->lock);
1895         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
1896                 spin_unlock_irq(&cm_id_priv->lock);
1897                 goto out;
1898         }
1899
1900         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1901         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1902         if (!ret)
1903                 list_add_tail(&work->list, &cm_id_priv->work_list);
1904         spin_unlock_irq(&cm_id_priv->lock);
1905
1906         if (ret)
1907                 cm_process_work(cm_id_priv, work);
1908         else
1909                 cm_deref_id(cm_id_priv);
1910         return 0;
1911 out:
1912         cm_deref_id(cm_id_priv);
1913         return -EINVAL;
1914 }
1915
1916 static int cm_rtu_handler(struct cm_work *work)
1917 {
1918         struct cm_id_private *cm_id_priv;
1919         struct cm_rtu_msg *rtu_msg;
1920         int ret;
1921
1922         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
1923         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
1924                                    rtu_msg->local_comm_id);
1925         if (!cm_id_priv)
1926                 return -EINVAL;
1927
1928         work->cm_event.private_data = &rtu_msg->private_data;
1929
1930         spin_lock_irq(&cm_id_priv->lock);
1931         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
1932             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
1933                 spin_unlock_irq(&cm_id_priv->lock);
1934                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1935                                 counter[CM_RTU_COUNTER]);
1936                 goto out;
1937         }
1938         cm_id_priv->id.state = IB_CM_ESTABLISHED;
1939
1940         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
1941         ret = atomic_inc_and_test(&cm_id_priv->work_count);
1942         if (!ret)
1943                 list_add_tail(&work->list, &cm_id_priv->work_list);
1944         spin_unlock_irq(&cm_id_priv->lock);
1945
1946         if (ret)
1947                 cm_process_work(cm_id_priv, work);
1948         else
1949                 cm_deref_id(cm_id_priv);
1950         return 0;
1951 out:
1952         cm_deref_id(cm_id_priv);
1953         return -EINVAL;
1954 }
1955
1956 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
1957                           struct cm_id_private *cm_id_priv,
1958                           const void *private_data,
1959                           u8 private_data_len)
1960 {
1961         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
1962                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
1963         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
1964         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
1965         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
1966
1967         if (private_data && private_data_len)
1968                 memcpy(dreq_msg->private_data, private_data, private_data_len);
1969 }
1970
1971 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
1972                     const void *private_data,
1973                     u8 private_data_len)
1974 {
1975         struct cm_id_private *cm_id_priv;
1976         struct ib_mad_send_buf *msg;
1977         unsigned long flags;
1978         int ret;
1979
1980         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
1981                 return -EINVAL;
1982
1983         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1984         spin_lock_irqsave(&cm_id_priv->lock, flags);
1985         if (cm_id->state != IB_CM_ESTABLISHED) {
1986                 ret = -EINVAL;
1987                 goto out;
1988         }
1989
1990         ret = cm_alloc_msg(cm_id_priv, &msg);
1991         if (ret) {
1992                 cm_enter_timewait(cm_id_priv);
1993                 goto out;
1994         }
1995
1996         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
1997                        private_data, private_data_len);
1998         msg->timeout_ms = cm_id_priv->timeout_ms;
1999         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2000
2001         ret = ib_post_send_mad(msg, NULL);
2002         if (ret) {
2003                 cm_enter_timewait(cm_id_priv);
2004                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2005                 cm_free_msg(msg);
2006                 return ret;
2007         }
2008
2009         cm_id->state = IB_CM_DREQ_SENT;
2010         cm_id_priv->msg = msg;
2011 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2012         return ret;
2013 }
2014 EXPORT_SYMBOL(ib_send_cm_dreq);
2015
2016 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2017                           struct cm_id_private *cm_id_priv,
2018                           const void *private_data,
2019                           u8 private_data_len)
2020 {
2021         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2022         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2023         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2024
2025         if (private_data && private_data_len)
2026                 memcpy(drep_msg->private_data, private_data, private_data_len);
2027 }
2028
2029 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2030                     const void *private_data,
2031                     u8 private_data_len)
2032 {
2033         struct cm_id_private *cm_id_priv;
2034         struct ib_mad_send_buf *msg;
2035         unsigned long flags;
2036         void *data;
2037         int ret;
2038
2039         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2040                 return -EINVAL;
2041
2042         data = cm_copy_private_data(private_data, private_data_len);
2043         if (IS_ERR(data))
2044                 return PTR_ERR(data);
2045
2046         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2047         spin_lock_irqsave(&cm_id_priv->lock, flags);
2048         if (cm_id->state != IB_CM_DREQ_RCVD) {
2049                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2050                 kfree(data);
2051                 return -EINVAL;
2052         }
2053
2054         cm_set_private_data(cm_id_priv, data, private_data_len);
2055         cm_enter_timewait(cm_id_priv);
2056
2057         ret = cm_alloc_msg(cm_id_priv, &msg);
2058         if (ret)
2059                 goto out;
2060
2061         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2062                        private_data, private_data_len);
2063
2064         ret = ib_post_send_mad(msg, NULL);
2065         if (ret) {
2066                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2067                 cm_free_msg(msg);
2068                 return ret;
2069         }
2070
2071 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2072         return ret;
2073 }
2074 EXPORT_SYMBOL(ib_send_cm_drep);
2075
2076 static int cm_issue_drep(struct cm_port *port,
2077                          struct ib_mad_recv_wc *mad_recv_wc)
2078 {
2079         struct ib_mad_send_buf *msg = NULL;
2080         struct cm_dreq_msg *dreq_msg;
2081         struct cm_drep_msg *drep_msg;
2082         int ret;
2083
2084         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2085         if (ret)
2086                 return ret;
2087
2088         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2089         drep_msg = (struct cm_drep_msg *) msg->mad;
2090
2091         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2092         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2093         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2094
2095         ret = ib_post_send_mad(msg, NULL);
2096         if (ret)
2097                 cm_free_msg(msg);
2098
2099         return ret;
2100 }
2101
2102 static int cm_dreq_handler(struct cm_work *work)
2103 {
2104         struct cm_id_private *cm_id_priv;
2105         struct cm_dreq_msg *dreq_msg;
2106         struct ib_mad_send_buf *msg = NULL;
2107         int ret;
2108
2109         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2110         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2111                                    dreq_msg->local_comm_id);
2112         if (!cm_id_priv) {
2113                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2114                                 counter[CM_DREQ_COUNTER]);
2115                 cm_issue_drep(work->port, work->mad_recv_wc);
2116                 return -EINVAL;
2117         }
2118
2119         work->cm_event.private_data = &dreq_msg->private_data;
2120
2121         spin_lock_irq(&cm_id_priv->lock);
2122         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2123                 goto unlock;
2124
2125         switch (cm_id_priv->id.state) {
2126         case IB_CM_REP_SENT:
2127         case IB_CM_DREQ_SENT:
2128                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2129                 break;
2130         case IB_CM_ESTABLISHED:
2131         case IB_CM_MRA_REP_RCVD:
2132                 break;
2133         case IB_CM_TIMEWAIT:
2134                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2135                                 counter[CM_DREQ_COUNTER]);
2136                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2137                         goto unlock;
2138
2139                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2140                                cm_id_priv->private_data,
2141                                cm_id_priv->private_data_len);
2142                 spin_unlock_irq(&cm_id_priv->lock);
2143
2144                 if (ib_post_send_mad(msg, NULL))
2145                         cm_free_msg(msg);
2146                 goto deref;
2147         case IB_CM_DREQ_RCVD:
2148                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2149                                 counter[CM_DREQ_COUNTER]);
2150                 goto unlock;
2151         default:
2152                 goto unlock;
2153         }
2154         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2155         cm_id_priv->tid = dreq_msg->hdr.tid;
2156         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2157         if (!ret)
2158                 list_add_tail(&work->list, &cm_id_priv->work_list);
2159         spin_unlock_irq(&cm_id_priv->lock);
2160
2161         if (ret)
2162                 cm_process_work(cm_id_priv, work);
2163         else
2164                 cm_deref_id(cm_id_priv);
2165         return 0;
2166
2167 unlock: spin_unlock_irq(&cm_id_priv->lock);
2168 deref:  cm_deref_id(cm_id_priv);
2169         return -EINVAL;
2170 }
2171
2172 static int cm_drep_handler(struct cm_work *work)
2173 {
2174         struct cm_id_private *cm_id_priv;
2175         struct cm_drep_msg *drep_msg;
2176         int ret;
2177
2178         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2179         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2180                                    drep_msg->local_comm_id);
2181         if (!cm_id_priv)
2182                 return -EINVAL;
2183
2184         work->cm_event.private_data = &drep_msg->private_data;
2185
2186         spin_lock_irq(&cm_id_priv->lock);
2187         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2188             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2189                 spin_unlock_irq(&cm_id_priv->lock);
2190                 goto out;
2191         }
2192         cm_enter_timewait(cm_id_priv);
2193
2194         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2195         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2196         if (!ret)
2197                 list_add_tail(&work->list, &cm_id_priv->work_list);
2198         spin_unlock_irq(&cm_id_priv->lock);
2199
2200         if (ret)
2201                 cm_process_work(cm_id_priv, work);
2202         else
2203                 cm_deref_id(cm_id_priv);
2204         return 0;
2205 out:
2206         cm_deref_id(cm_id_priv);
2207         return -EINVAL;
2208 }
2209
2210 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2211                    enum ib_cm_rej_reason reason,
2212                    void *ari,
2213                    u8 ari_length,
2214                    const void *private_data,
2215                    u8 private_data_len)
2216 {
2217         struct cm_id_private *cm_id_priv;
2218         struct ib_mad_send_buf *msg;
2219         unsigned long flags;
2220         int ret;
2221
2222         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2223             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2224                 return -EINVAL;
2225
2226         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2227
2228         spin_lock_irqsave(&cm_id_priv->lock, flags);
2229         switch (cm_id->state) {
2230         case IB_CM_REQ_SENT:
2231         case IB_CM_MRA_REQ_RCVD:
2232         case IB_CM_REQ_RCVD:
2233         case IB_CM_MRA_REQ_SENT:
2234         case IB_CM_REP_RCVD:
2235         case IB_CM_MRA_REP_SENT:
2236                 ret = cm_alloc_msg(cm_id_priv, &msg);
2237                 if (!ret)
2238                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2239                                       cm_id_priv, reason, ari, ari_length,
2240                                       private_data, private_data_len);
2241
2242                 cm_reset_to_idle(cm_id_priv);
2243                 break;
2244         case IB_CM_REP_SENT:
2245         case IB_CM_MRA_REP_RCVD:
2246                 ret = cm_alloc_msg(cm_id_priv, &msg);
2247                 if (!ret)
2248                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2249                                       cm_id_priv, reason, ari, ari_length,
2250                                       private_data, private_data_len);
2251
2252                 cm_enter_timewait(cm_id_priv);
2253                 break;
2254         default:
2255                 ret = -EINVAL;
2256                 goto out;
2257         }
2258
2259         if (ret)
2260                 goto out;
2261
2262         ret = ib_post_send_mad(msg, NULL);
2263         if (ret)
2264                 cm_free_msg(msg);
2265
2266 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2267         return ret;
2268 }
2269 EXPORT_SYMBOL(ib_send_cm_rej);
2270
2271 static void cm_format_rej_event(struct cm_work *work)
2272 {
2273         struct cm_rej_msg *rej_msg;
2274         struct ib_cm_rej_event_param *param;
2275
2276         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2277         param = &work->cm_event.param.rej_rcvd;
2278         param->ari = rej_msg->ari;
2279         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2280         param->reason = __be16_to_cpu(rej_msg->reason);
2281         work->cm_event.private_data = &rej_msg->private_data;
2282 }
2283
2284 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2285 {
2286         struct cm_timewait_info *timewait_info;
2287         struct cm_id_private *cm_id_priv;
2288         __be32 remote_id;
2289
2290         remote_id = rej_msg->local_comm_id;
2291
2292         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2293                 spin_lock_irq(&cm.lock);
2294                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2295                                                   remote_id);
2296                 if (!timewait_info) {
2297                         spin_unlock_irq(&cm.lock);
2298                         return NULL;
2299                 }
2300                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2301                                       (timewait_info->work.local_id ^
2302                                        cm.random_id_operand));
2303                 if (cm_id_priv) {
2304                         if (cm_id_priv->id.remote_id == remote_id)
2305                                 atomic_inc(&cm_id_priv->refcount);
2306                         else
2307                                 cm_id_priv = NULL;
2308                 }
2309                 spin_unlock_irq(&cm.lock);
2310         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2311                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2312         else
2313                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2314
2315         return cm_id_priv;
2316 }
2317
2318 static int cm_rej_handler(struct cm_work *work)
2319 {
2320         struct cm_id_private *cm_id_priv;
2321         struct cm_rej_msg *rej_msg;
2322         int ret;
2323
2324         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2325         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2326         if (!cm_id_priv)
2327                 return -EINVAL;
2328
2329         cm_format_rej_event(work);
2330
2331         spin_lock_irq(&cm_id_priv->lock);
2332         switch (cm_id_priv->id.state) {
2333         case IB_CM_REQ_SENT:
2334         case IB_CM_MRA_REQ_RCVD:
2335         case IB_CM_REP_SENT:
2336         case IB_CM_MRA_REP_RCVD:
2337                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2338                 /* fall through */
2339         case IB_CM_REQ_RCVD:
2340         case IB_CM_MRA_REQ_SENT:
2341                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2342                         cm_enter_timewait(cm_id_priv);
2343                 else
2344                         cm_reset_to_idle(cm_id_priv);
2345                 break;
2346         case IB_CM_DREQ_SENT:
2347                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2348                 /* fall through */
2349         case IB_CM_REP_RCVD:
2350         case IB_CM_MRA_REP_SENT:
2351         case IB_CM_ESTABLISHED:
2352                 cm_enter_timewait(cm_id_priv);
2353                 break;
2354         default:
2355                 spin_unlock_irq(&cm_id_priv->lock);
2356                 ret = -EINVAL;
2357                 goto out;
2358         }
2359
2360         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2361         if (!ret)
2362                 list_add_tail(&work->list, &cm_id_priv->work_list);
2363         spin_unlock_irq(&cm_id_priv->lock);
2364
2365         if (ret)
2366                 cm_process_work(cm_id_priv, work);
2367         else
2368                 cm_deref_id(cm_id_priv);
2369         return 0;
2370 out:
2371         cm_deref_id(cm_id_priv);
2372         return -EINVAL;
2373 }
2374
2375 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2376                    u8 service_timeout,
2377                    const void *private_data,
2378                    u8 private_data_len)
2379 {
2380         struct cm_id_private *cm_id_priv;
2381         struct ib_mad_send_buf *msg;
2382         enum ib_cm_state cm_state;
2383         enum ib_cm_lap_state lap_state;
2384         enum cm_msg_response msg_response;
2385         void *data;
2386         unsigned long flags;
2387         int ret;
2388
2389         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2390                 return -EINVAL;
2391
2392         data = cm_copy_private_data(private_data, private_data_len);
2393         if (IS_ERR(data))
2394                 return PTR_ERR(data);
2395
2396         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2397
2398         spin_lock_irqsave(&cm_id_priv->lock, flags);
2399         switch(cm_id_priv->id.state) {
2400         case IB_CM_REQ_RCVD:
2401                 cm_state = IB_CM_MRA_REQ_SENT;
2402                 lap_state = cm_id->lap_state;
2403                 msg_response = CM_MSG_RESPONSE_REQ;
2404                 break;
2405         case IB_CM_REP_RCVD:
2406                 cm_state = IB_CM_MRA_REP_SENT;
2407                 lap_state = cm_id->lap_state;
2408                 msg_response = CM_MSG_RESPONSE_REP;
2409                 break;
2410         case IB_CM_ESTABLISHED:
2411                 cm_state = cm_id->state;
2412                 lap_state = IB_CM_MRA_LAP_SENT;
2413                 msg_response = CM_MSG_RESPONSE_OTHER;
2414                 break;
2415         default:
2416                 ret = -EINVAL;
2417                 goto error1;
2418         }
2419
2420         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2421                 ret = cm_alloc_msg(cm_id_priv, &msg);
2422                 if (ret)
2423                         goto error1;
2424
2425                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2426                               msg_response, service_timeout,
2427                               private_data, private_data_len);
2428                 ret = ib_post_send_mad(msg, NULL);
2429                 if (ret)
2430                         goto error2;
2431         }
2432
2433         cm_id->state = cm_state;
2434         cm_id->lap_state = lap_state;
2435         cm_id_priv->service_timeout = service_timeout;
2436         cm_set_private_data(cm_id_priv, data, private_data_len);
2437         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2438         return 0;
2439
2440 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2441         kfree(data);
2442         return ret;
2443
2444 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2445         kfree(data);
2446         cm_free_msg(msg);
2447         return ret;
2448 }
2449 EXPORT_SYMBOL(ib_send_cm_mra);
2450
2451 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2452 {
2453         switch (cm_mra_get_msg_mraed(mra_msg)) {
2454         case CM_MSG_RESPONSE_REQ:
2455                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
2456         case CM_MSG_RESPONSE_REP:
2457         case CM_MSG_RESPONSE_OTHER:
2458                 return cm_acquire_id(mra_msg->remote_comm_id,
2459                                      mra_msg->local_comm_id);
2460         default:
2461                 return NULL;
2462         }
2463 }
2464
2465 static int cm_mra_handler(struct cm_work *work)
2466 {
2467         struct cm_id_private *cm_id_priv;
2468         struct cm_mra_msg *mra_msg;
2469         int timeout, ret;
2470
2471         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2472         cm_id_priv = cm_acquire_mraed_id(mra_msg);
2473         if (!cm_id_priv)
2474                 return -EINVAL;
2475
2476         work->cm_event.private_data = &mra_msg->private_data;
2477         work->cm_event.param.mra_rcvd.service_timeout =
2478                                         cm_mra_get_service_timeout(mra_msg);
2479         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2480                   cm_convert_to_ms(cm_id_priv->av.timeout);
2481
2482         spin_lock_irq(&cm_id_priv->lock);
2483         switch (cm_id_priv->id.state) {
2484         case IB_CM_REQ_SENT:
2485                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2486                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2487                                   cm_id_priv->msg, timeout))
2488                         goto out;
2489                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2490                 break;
2491         case IB_CM_REP_SENT:
2492                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2493                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2494                                   cm_id_priv->msg, timeout))
2495                         goto out;
2496                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2497                 break;
2498         case IB_CM_ESTABLISHED:
2499                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2500                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2501                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2502                                   cm_id_priv->msg, timeout)) {
2503                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2504                                 atomic_long_inc(&work->port->
2505                                                 counter_group[CM_RECV_DUPLICATES].
2506                                                 counter[CM_MRA_COUNTER]);
2507                         goto out;
2508                 }
2509                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2510                 break;
2511         case IB_CM_MRA_REQ_RCVD:
2512         case IB_CM_MRA_REP_RCVD:
2513                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2514                                 counter[CM_MRA_COUNTER]);
2515                 /* fall through */
2516         default:
2517                 goto out;
2518         }
2519
2520         cm_id_priv->msg->context[1] = (void *) (unsigned long)
2521                                       cm_id_priv->id.state;
2522         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2523         if (!ret)
2524                 list_add_tail(&work->list, &cm_id_priv->work_list);
2525         spin_unlock_irq(&cm_id_priv->lock);
2526
2527         if (ret)
2528                 cm_process_work(cm_id_priv, work);
2529         else
2530                 cm_deref_id(cm_id_priv);
2531         return 0;
2532 out:
2533         spin_unlock_irq(&cm_id_priv->lock);
2534         cm_deref_id(cm_id_priv);
2535         return -EINVAL;
2536 }
2537
2538 static void cm_format_lap(struct cm_lap_msg *lap_msg,
2539                           struct cm_id_private *cm_id_priv,
2540                           struct ib_sa_path_rec *alternate_path,
2541                           const void *private_data,
2542                           u8 private_data_len)
2543 {
2544         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2545                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2546         lap_msg->local_comm_id = cm_id_priv->id.local_id;
2547         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2548         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2549         /* todo: need remote CM response timeout */
2550         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2551         lap_msg->alt_local_lid = alternate_path->slid;
2552         lap_msg->alt_remote_lid = alternate_path->dlid;
2553         lap_msg->alt_local_gid = alternate_path->sgid;
2554         lap_msg->alt_remote_gid = alternate_path->dgid;
2555         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2556         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2557         lap_msg->alt_hop_limit = alternate_path->hop_limit;
2558         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2559         cm_lap_set_sl(lap_msg, alternate_path->sl);
2560         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2561         cm_lap_set_local_ack_timeout(lap_msg,
2562                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2563                                alternate_path->packet_life_time));
2564
2565         if (private_data && private_data_len)
2566                 memcpy(lap_msg->private_data, private_data, private_data_len);
2567 }
2568
2569 int ib_send_cm_lap(struct ib_cm_id *cm_id,
2570                    struct ib_sa_path_rec *alternate_path,
2571                    const void *private_data,
2572                    u8 private_data_len)
2573 {
2574         struct cm_id_private *cm_id_priv;
2575         struct ib_mad_send_buf *msg;
2576         unsigned long flags;
2577         int ret;
2578
2579         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2580                 return -EINVAL;
2581
2582         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2583         spin_lock_irqsave(&cm_id_priv->lock, flags);
2584         if (cm_id->state != IB_CM_ESTABLISHED ||
2585             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2586              cm_id->lap_state != IB_CM_LAP_IDLE)) {
2587                 ret = -EINVAL;
2588                 goto out;
2589         }
2590
2591         ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
2592         if (ret)
2593                 goto out;
2594         cm_id_priv->alt_av.timeout =
2595                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2596                                        cm_id_priv->alt_av.timeout - 1);
2597
2598         ret = cm_alloc_msg(cm_id_priv, &msg);
2599         if (ret)
2600                 goto out;
2601
2602         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2603                       alternate_path, private_data, private_data_len);
2604         msg->timeout_ms = cm_id_priv->timeout_ms;
2605         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2606
2607         ret = ib_post_send_mad(msg, NULL);
2608         if (ret) {
2609                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2610                 cm_free_msg(msg);
2611                 return ret;
2612         }
2613
2614         cm_id->lap_state = IB_CM_LAP_SENT;
2615         cm_id_priv->msg = msg;
2616
2617 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2618         return ret;
2619 }
2620 EXPORT_SYMBOL(ib_send_cm_lap);
2621
2622 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2623                                     struct ib_sa_path_rec *path,
2624                                     struct cm_lap_msg *lap_msg)
2625 {
2626         memset(path, 0, sizeof *path);
2627         path->dgid = lap_msg->alt_local_gid;
2628         path->sgid = lap_msg->alt_remote_gid;
2629         path->dlid = lap_msg->alt_local_lid;
2630         path->slid = lap_msg->alt_remote_lid;
2631         path->flow_label = cm_lap_get_flow_label(lap_msg);
2632         path->hop_limit = lap_msg->alt_hop_limit;
2633         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2634         path->reversible = 1;
2635         path->pkey = cm_id_priv->pkey;
2636         path->sl = cm_lap_get_sl(lap_msg);
2637         path->mtu_selector = IB_SA_EQ;
2638         path->mtu = cm_id_priv->path_mtu;
2639         path->rate_selector = IB_SA_EQ;
2640         path->rate = cm_lap_get_packet_rate(lap_msg);
2641         path->packet_life_time_selector = IB_SA_EQ;
2642         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2643         path->packet_life_time -= (path->packet_life_time > 0);
2644 }
2645
2646 static int cm_lap_handler(struct cm_work *work)
2647 {
2648         struct cm_id_private *cm_id_priv;
2649         struct cm_lap_msg *lap_msg;
2650         struct ib_cm_lap_event_param *param;
2651         struct ib_mad_send_buf *msg = NULL;
2652         int ret;
2653
2654         /* todo: verify LAP request and send reject APR if invalid. */
2655         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2656         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2657                                    lap_msg->local_comm_id);
2658         if (!cm_id_priv)
2659                 return -EINVAL;
2660
2661         param = &work->cm_event.param.lap_rcvd;
2662         param->alternate_path = &work->path[0];
2663         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2664         work->cm_event.private_data = &lap_msg->private_data;
2665
2666         spin_lock_irq(&cm_id_priv->lock);
2667         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2668                 goto unlock;
2669
2670         switch (cm_id_priv->id.lap_state) {
2671         case IB_CM_LAP_UNINIT:
2672         case IB_CM_LAP_IDLE:
2673                 break;
2674         case IB_CM_MRA_LAP_SENT:
2675                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2676                                 counter[CM_LAP_COUNTER]);
2677                 if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
2678                         goto unlock;
2679
2680                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2681                               CM_MSG_RESPONSE_OTHER,
2682                               cm_id_priv->service_timeout,
2683                               cm_id_priv->private_data,
2684                               cm_id_priv->private_data_len);
2685                 spin_unlock_irq(&cm_id_priv->lock);
2686
2687                 if (ib_post_send_mad(msg, NULL))
2688                         cm_free_msg(msg);
2689                 goto deref;
2690         case IB_CM_LAP_RCVD:
2691                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2692                                 counter[CM_LAP_COUNTER]);
2693                 goto unlock;
2694         default:
2695                 goto unlock;
2696         }
2697
2698         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2699         cm_id_priv->tid = lap_msg->hdr.tid;
2700         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2701                                 work->mad_recv_wc->recv_buf.grh,
2702                                 &cm_id_priv->av);
2703         cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
2704         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2705         if (!ret)
2706                 list_add_tail(&work->list, &cm_id_priv->work_list);
2707         spin_unlock_irq(&cm_id_priv->lock);
2708
2709         if (ret)
2710                 cm_process_work(cm_id_priv, work);
2711         else
2712                 cm_deref_id(cm_id_priv);
2713         return 0;
2714
2715 unlock: spin_unlock_irq(&cm_id_priv->lock);
2716 deref:  cm_deref_id(cm_id_priv);
2717         return -EINVAL;
2718 }
2719
2720 static void cm_format_apr(struct cm_apr_msg *apr_msg,
2721                           struct cm_id_private *cm_id_priv,
2722                           enum ib_cm_apr_status status,
2723                           void *info,
2724                           u8 info_length,
2725                           const void *private_data,
2726                           u8 private_data_len)
2727 {
2728         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2729         apr_msg->local_comm_id = cm_id_priv->id.local_id;
2730         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2731         apr_msg->ap_status = (u8) status;
2732
2733         if (info && info_length) {
2734                 apr_msg->info_length = info_length;
2735                 memcpy(apr_msg->info, info, info_length);
2736         }
2737
2738         if (private_data && private_data_len)
2739                 memcpy(apr_msg->private_data, private_data, private_data_len);
2740 }
2741
2742 int ib_send_cm_apr(struct ib_cm_id *cm_id,
2743                    enum ib_cm_apr_status status,
2744                    void *info,
2745                    u8 info_length,
2746                    const void *private_data,
2747                    u8 private_data_len)
2748 {
2749         struct cm_id_private *cm_id_priv;
2750         struct ib_mad_send_buf *msg;
2751         unsigned long flags;
2752         int ret;
2753
2754         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2755             (info && info_length > IB_CM_APR_INFO_LENGTH))
2756                 return -EINVAL;
2757
2758         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2759         spin_lock_irqsave(&cm_id_priv->lock, flags);
2760         if (cm_id->state != IB_CM_ESTABLISHED ||
2761             (cm_id->lap_state != IB_CM_LAP_RCVD &&
2762              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
2763                 ret = -EINVAL;
2764                 goto out;
2765         }
2766
2767         ret = cm_alloc_msg(cm_id_priv, &msg);
2768         if (ret)
2769                 goto out;
2770
2771         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
2772                       info, info_length, private_data, private_data_len);
2773         ret = ib_post_send_mad(msg, NULL);
2774         if (ret) {
2775                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2776                 cm_free_msg(msg);
2777                 return ret;
2778         }
2779
2780         cm_id->lap_state = IB_CM_LAP_IDLE;
2781 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2782         return ret;
2783 }
2784 EXPORT_SYMBOL(ib_send_cm_apr);
2785
2786 static int cm_apr_handler(struct cm_work *work)
2787 {
2788         struct cm_id_private *cm_id_priv;
2789         struct cm_apr_msg *apr_msg;
2790         int ret;
2791
2792         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
2793         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
2794                                    apr_msg->local_comm_id);
2795         if (!cm_id_priv)
2796                 return -EINVAL; /* Unmatched reply. */
2797
2798         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
2799         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
2800         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
2801         work->cm_event.private_data = &apr_msg->private_data;
2802
2803         spin_lock_irq(&cm_id_priv->lock);
2804         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
2805             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
2806              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
2807                 spin_unlock_irq(&cm_id_priv->lock);
2808                 goto out;
2809         }
2810         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
2811         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2812         cm_id_priv->msg = NULL;
2813
2814         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2815         if (!ret)
2816                 list_add_tail(&work->list, &cm_id_priv->work_list);
2817         spin_unlock_irq(&cm_id_priv->lock);
2818
2819         if (ret)
2820                 cm_process_work(cm_id_priv, work);
2821         else
2822                 cm_deref_id(cm_id_priv);
2823         return 0;
2824 out:
2825         cm_deref_id(cm_id_priv);
2826         return -EINVAL;
2827 }
2828
2829 static int cm_timewait_handler(struct cm_work *work)
2830 {
2831         struct cm_timewait_info *timewait_info;
2832         struct cm_id_private *cm_id_priv;
2833         int ret;
2834
2835         timewait_info = (struct cm_timewait_info *)work;
2836         spin_lock_irq(&cm.lock);
2837         list_del(&timewait_info->list);
2838         spin_unlock_irq(&cm.lock);
2839
2840         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
2841                                    timewait_info->work.remote_id);
2842         if (!cm_id_priv)
2843                 return -EINVAL;
2844
2845         spin_lock_irq(&cm_id_priv->lock);
2846         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
2847             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
2848                 spin_unlock_irq(&cm_id_priv->lock);
2849                 goto out;
2850         }
2851         cm_id_priv->id.state = IB_CM_IDLE;
2852         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2853         if (!ret)
2854                 list_add_tail(&work->list, &cm_id_priv->work_list);
2855         spin_unlock_irq(&cm_id_priv->lock);
2856
2857         if (ret)
2858                 cm_process_work(cm_id_priv, work);
2859         else
2860                 cm_deref_id(cm_id_priv);
2861         return 0;
2862 out:
2863         cm_deref_id(cm_id_priv);
2864         return -EINVAL;
2865 }
2866
2867 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
2868                                struct cm_id_private *cm_id_priv,
2869                                struct ib_cm_sidr_req_param *param)
2870 {
2871         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
2872                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
2873         sidr_req_msg->request_id = cm_id_priv->id.local_id;
2874         sidr_req_msg->pkey = param->path->pkey;
2875         sidr_req_msg->service_id = param->service_id;
2876
2877         if (param->private_data && param->private_data_len)
2878                 memcpy(sidr_req_msg->private_data, param->private_data,
2879                        param->private_data_len);
2880 }
2881
2882 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
2883                         struct ib_cm_sidr_req_param *param)
2884 {
2885         struct cm_id_private *cm_id_priv;
2886         struct ib_mad_send_buf *msg;
2887         unsigned long flags;
2888         int ret;
2889
2890         if (!param->path || (param->private_data &&
2891              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
2892                 return -EINVAL;
2893
2894         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2895         ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
2896         if (ret)
2897                 goto out;
2898
2899         cm_id->service_id = param->service_id;
2900         cm_id->service_mask = ~cpu_to_be64(0);
2901         cm_id_priv->timeout_ms = param->timeout_ms;
2902         cm_id_priv->max_cm_retries = param->max_cm_retries;
2903         ret = cm_alloc_msg(cm_id_priv, &msg);
2904         if (ret)
2905                 goto out;
2906
2907         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
2908                            param);
2909         msg->timeout_ms = cm_id_priv->timeout_ms;
2910         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
2911
2912         spin_lock_irqsave(&cm_id_priv->lock, flags);
2913         if (cm_id->state == IB_CM_IDLE)
2914                 ret = ib_post_send_mad(msg, NULL);
2915         else
2916                 ret = -EINVAL;
2917
2918         if (ret) {
2919                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2920                 cm_free_msg(msg);
2921                 goto out;
2922         }
2923         cm_id->state = IB_CM_SIDR_REQ_SENT;
2924         cm_id_priv->msg = msg;
2925         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2926 out:
2927         return ret;
2928 }
2929 EXPORT_SYMBOL(ib_send_cm_sidr_req);
2930
2931 static void cm_format_sidr_req_event(struct cm_work *work,
2932                                      struct ib_cm_id *listen_id)
2933 {
2934         struct cm_sidr_req_msg *sidr_req_msg;
2935         struct ib_cm_sidr_req_event_param *param;
2936
2937         sidr_req_msg = (struct cm_sidr_req_msg *)
2938                                 work->mad_recv_wc->recv_buf.mad;
2939         param = &work->cm_event.param.sidr_req_rcvd;
2940         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
2941         param->listen_id = listen_id;
2942         param->port = work->port->port_num;
2943         work->cm_event.private_data = &sidr_req_msg->private_data;
2944 }
2945
2946 static int cm_sidr_req_handler(struct cm_work *work)
2947 {
2948         struct ib_cm_id *cm_id;
2949         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
2950         struct cm_sidr_req_msg *sidr_req_msg;
2951         struct ib_wc *wc;
2952
2953         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
2954         if (IS_ERR(cm_id))
2955                 return PTR_ERR(cm_id);
2956         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2957
2958         /* Record SGID/SLID and request ID for lookup. */
2959         sidr_req_msg = (struct cm_sidr_req_msg *)
2960                                 work->mad_recv_wc->recv_buf.mad;
2961         wc = work->mad_recv_wc->wc;
2962         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
2963         cm_id_priv->av.dgid.global.interface_id = 0;
2964         cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2965                                 work->mad_recv_wc->recv_buf.grh,
2966                                 &cm_id_priv->av);
2967         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
2968         cm_id_priv->tid = sidr_req_msg->hdr.tid;
2969         atomic_inc(&cm_id_priv->work_count);
2970
2971         spin_lock_irq(&cm.lock);
2972         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
2973         if (cur_cm_id_priv) {
2974                 spin_unlock_irq(&cm.lock);
2975                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2976                                 counter[CM_SIDR_REQ_COUNTER]);
2977                 goto out; /* Duplicate message. */
2978         }
2979         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
2980         cur_cm_id_priv = cm_find_listen(cm_id->device,
2981                                         sidr_req_msg->service_id,
2982                                         sidr_req_msg->private_data);
2983         if (!cur_cm_id_priv) {
2984                 spin_unlock_irq(&cm.lock);
2985                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
2986                 goto out; /* No match. */
2987         }
2988         atomic_inc(&cur_cm_id_priv->refcount);
2989         spin_unlock_irq(&cm.lock);
2990
2991         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
2992         cm_id_priv->id.context = cur_cm_id_priv->id.context;
2993         cm_id_priv->id.service_id = sidr_req_msg->service_id;
2994         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
2995
2996         cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
2997         cm_process_work(cm_id_priv, work);
2998         cm_deref_id(cur_cm_id_priv);
2999         return 0;
3000 out:
3001         ib_destroy_cm_id(&cm_id_priv->id);
3002         return -EINVAL;
3003 }
3004
3005 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3006                                struct cm_id_private *cm_id_priv,
3007                                struct ib_cm_sidr_rep_param *param)
3008 {
3009         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3010                           cm_id_priv->tid);
3011         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3012         sidr_rep_msg->status = param->status;
3013         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3014         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3015         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3016
3017         if (param->info && param->info_length)
3018                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3019
3020         if (param->private_data && param->private_data_len)
3021                 memcpy(sidr_rep_msg->private_data, param->private_data,
3022                        param->private_data_len);
3023 }
3024
3025 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3026                         struct ib_cm_sidr_rep_param *param)
3027 {
3028         struct cm_id_private *cm_id_priv;
3029         struct ib_mad_send_buf *msg;
3030         unsigned long flags;
3031         int ret;
3032
3033         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3034             (param->private_data &&
3035              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3036                 return -EINVAL;
3037
3038         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3039         spin_lock_irqsave(&cm_id_priv->lock, flags);
3040         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3041                 ret = -EINVAL;
3042                 goto error;
3043         }
3044
3045         ret = cm_alloc_msg(cm_id_priv, &msg);
3046         if (ret)
3047                 goto error;
3048
3049         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3050                            param);
3051         ret = ib_post_send_mad(msg, NULL);
3052         if (ret) {
3053                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3054                 cm_free_msg(msg);
3055                 return ret;
3056         }
3057         cm_id->state = IB_CM_IDLE;
3058         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3059
3060         spin_lock_irqsave(&cm.lock, flags);
3061         rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3062         spin_unlock_irqrestore(&cm.lock, flags);
3063         return 0;
3064
3065 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3066         return ret;
3067 }
3068 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3069
3070 static void cm_format_sidr_rep_event(struct cm_work *work)
3071 {
3072         struct cm_sidr_rep_msg *sidr_rep_msg;
3073         struct ib_cm_sidr_rep_event_param *param;
3074
3075         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3076                                 work->mad_recv_wc->recv_buf.mad;
3077         param = &work->cm_event.param.sidr_rep_rcvd;
3078         param->status = sidr_rep_msg->status;
3079         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3080         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3081         param->info = &sidr_rep_msg->info;
3082         param->info_len = sidr_rep_msg->info_length;
3083         work->cm_event.private_data = &sidr_rep_msg->private_data;
3084 }
3085
3086 static int cm_sidr_rep_handler(struct cm_work *work)
3087 {
3088         struct cm_sidr_rep_msg *sidr_rep_msg;
3089         struct cm_id_private *cm_id_priv;
3090
3091         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3092                                 work->mad_recv_wc->recv_buf.mad;
3093         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3094         if (!cm_id_priv)
3095                 return -EINVAL; /* Unmatched reply. */
3096
3097         spin_lock_irq(&cm_id_priv->lock);
3098         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3099                 spin_unlock_irq(&cm_id_priv->lock);
3100                 goto out;
3101         }
3102         cm_id_priv->id.state = IB_CM_IDLE;
3103         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3104         spin_unlock_irq(&cm_id_priv->lock);
3105
3106         cm_format_sidr_rep_event(work);
3107         cm_process_work(cm_id_priv, work);
3108         return 0;
3109 out:
3110         cm_deref_id(cm_id_priv);
3111         return -EINVAL;
3112 }
3113
3114 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3115                                   enum ib_wc_status wc_status)
3116 {
3117         struct cm_id_private *cm_id_priv;
3118         struct ib_cm_event cm_event;
3119         enum ib_cm_state state;
3120         int ret;
3121
3122         memset(&cm_event, 0, sizeof cm_event);
3123         cm_id_priv = msg->context[0];
3124
3125         /* Discard old sends or ones without a response. */
3126         spin_lock_irq(&cm_id_priv->lock);
3127         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3128         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3129                 goto discard;
3130
3131         switch (state) {
3132         case IB_CM_REQ_SENT:
3133         case IB_CM_MRA_REQ_RCVD:
3134                 cm_reset_to_idle(cm_id_priv);
3135                 cm_event.event = IB_CM_REQ_ERROR;
3136                 break;
3137         case IB_CM_REP_SENT:
3138         case IB_CM_MRA_REP_RCVD:
3139                 cm_reset_to_idle(cm_id_priv);
3140                 cm_event.event = IB_CM_REP_ERROR;
3141                 break;
3142         case IB_CM_DREQ_SENT:
3143                 cm_enter_timewait(cm_id_priv);
3144                 cm_event.event = IB_CM_DREQ_ERROR;
3145                 break;
3146         case IB_CM_SIDR_REQ_SENT:
3147                 cm_id_priv->id.state = IB_CM_IDLE;
3148                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3149                 break;
3150         default:
3151                 goto discard;
3152         }
3153         spin_unlock_irq(&cm_id_priv->lock);
3154         cm_event.param.send_status = wc_status;
3155
3156         /* No other events can occur on the cm_id at this point. */
3157         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3158         cm_free_msg(msg);
3159         if (ret)
3160                 ib_destroy_cm_id(&cm_id_priv->id);
3161         return;
3162 discard:
3163         spin_unlock_irq(&cm_id_priv->lock);
3164         cm_free_msg(msg);
3165 }
3166
3167 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3168                             struct ib_mad_send_wc *mad_send_wc)
3169 {
3170         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3171         struct cm_port *port;
3172         u16 attr_index;
3173
3174         port = mad_agent->context;
3175         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3176                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3177
3178         /*
3179          * If the send was in response to a received message (context[0] is not
3180          * set to a cm_id), and is not a REJ, then it is a send that was
3181          * manually retried.
3182          */
3183         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3184                 msg->retries = 1;
3185
3186         atomic_long_add(1 + msg->retries,
3187                         &port->counter_group[CM_XMIT].counter[attr_index]);
3188         if (msg->retries)
3189                 atomic_long_add(msg->retries,
3190                                 &port->counter_group[CM_XMIT_RETRIES].
3191                                 counter[attr_index]);
3192
3193         switch (mad_send_wc->status) {
3194         case IB_WC_SUCCESS:
3195         case IB_WC_WR_FLUSH_ERR:
3196                 cm_free_msg(msg);
3197                 break;
3198         default:
3199                 if (msg->context[0] && msg->context[1])
3200                         cm_process_send_error(msg, mad_send_wc->status);
3201                 else
3202                         cm_free_msg(msg);
3203                 break;
3204         }
3205 }
3206
3207 static void cm_work_handler(struct work_struct *_work)
3208 {
3209         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3210         int ret;
3211
3212         switch (work->cm_event.event) {
3213         case IB_CM_REQ_RECEIVED:
3214                 ret = cm_req_handler(work);
3215                 break;
3216         case IB_CM_MRA_RECEIVED:
3217                 ret = cm_mra_handler(work);
3218                 break;
3219         case IB_CM_REJ_RECEIVED:
3220                 ret = cm_rej_handler(work);
3221                 break;
3222         case IB_CM_REP_RECEIVED:
3223                 ret = cm_rep_handler(work);
3224                 break;
3225         case IB_CM_RTU_RECEIVED:
3226                 ret = cm_rtu_handler(work);
3227                 break;
3228         case IB_CM_USER_ESTABLISHED:
3229                 ret = cm_establish_handler(work);
3230                 break;
3231         case IB_CM_DREQ_RECEIVED:
3232                 ret = cm_dreq_handler(work);
3233                 break;
3234         case IB_CM_DREP_RECEIVED:
3235                 ret = cm_drep_handler(work);
3236                 break;
3237         case IB_CM_SIDR_REQ_RECEIVED:
3238                 ret = cm_sidr_req_handler(work);
3239                 break;
3240         case IB_CM_SIDR_REP_RECEIVED:
3241                 ret = cm_sidr_rep_handler(work);
3242                 break;
3243         case IB_CM_LAP_RECEIVED:
3244                 ret = cm_lap_handler(work);
3245                 break;
3246         case IB_CM_APR_RECEIVED:
3247                 ret = cm_apr_handler(work);
3248                 break;
3249         case IB_CM_TIMEWAIT_EXIT:
3250                 ret = cm_timewait_handler(work);
3251                 break;
3252         default:
3253                 ret = -EINVAL;
3254                 break;
3255         }
3256         if (ret)
3257                 cm_free_work(work);
3258 }
3259
3260 static int cm_establish(struct ib_cm_id *cm_id)
3261 {
3262         struct cm_id_private *cm_id_priv;
3263         struct cm_work *work;
3264         unsigned long flags;
3265         int ret = 0;
3266
3267         work = kmalloc(sizeof *work, GFP_ATOMIC);
3268         if (!work)
3269                 return -ENOMEM;
3270
3271         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3272         spin_lock_irqsave(&cm_id_priv->lock, flags);
3273         switch (cm_id->state)
3274         {
3275         case IB_CM_REP_SENT:
3276         case IB_CM_MRA_REP_RCVD:
3277                 cm_id->state = IB_CM_ESTABLISHED;
3278                 break;
3279         case IB_CM_ESTABLISHED:
3280                 ret = -EISCONN;
3281                 break;
3282         default:
3283                 ret = -EINVAL;
3284                 break;
3285         }
3286         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3287
3288         if (ret) {
3289                 kfree(work);
3290                 goto out;
3291         }
3292
3293         /*
3294          * The CM worker thread may try to destroy the cm_id before it
3295          * can execute this work item.  To prevent potential deadlock,
3296          * we need to find the cm_id once we're in the context of the
3297          * worker thread, rather than holding a reference on it.
3298          */
3299         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3300         work->local_id = cm_id->local_id;
3301         work->remote_id = cm_id->remote_id;
3302         work->mad_recv_wc = NULL;
3303         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3304         queue_delayed_work(cm.wq, &work->work, 0);
3305 out:
3306         return ret;
3307 }
3308
3309 static int cm_migrate(struct ib_cm_id *cm_id)
3310 {
3311         struct cm_id_private *cm_id_priv;
3312         unsigned long flags;
3313         int ret = 0;
3314
3315         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3316         spin_lock_irqsave(&cm_id_priv->lock, flags);
3317         if (cm_id->state == IB_CM_ESTABLISHED &&
3318             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3319              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3320                 cm_id->lap_state = IB_CM_LAP_IDLE;
3321                 cm_id_priv->av = cm_id_priv->alt_av;
3322         } else
3323                 ret = -EINVAL;
3324         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3325
3326         return ret;
3327 }
3328
3329 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3330 {
3331         int ret;
3332
3333         switch (event) {
3334         case IB_EVENT_COMM_EST:
3335                 ret = cm_establish(cm_id);
3336                 break;
3337         case IB_EVENT_PATH_MIG:
3338                 ret = cm_migrate(cm_id);
3339                 break;
3340         default:
3341                 ret = -EINVAL;
3342         }
3343         return ret;
3344 }
3345 EXPORT_SYMBOL(ib_cm_notify);
3346
3347 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3348                             struct ib_mad_recv_wc *mad_recv_wc)
3349 {
3350         struct cm_port *port = mad_agent->context;
3351         struct cm_work *work;
3352         enum ib_cm_event_type event;
3353         u16 attr_id;
3354         int paths = 0;
3355
3356         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3357         case CM_REQ_ATTR_ID:
3358                 paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3359                                                     alt_local_lid != 0);
3360                 event = IB_CM_REQ_RECEIVED;
3361                 break;
3362         case CM_MRA_ATTR_ID:
3363                 event = IB_CM_MRA_RECEIVED;
3364                 break;
3365         case CM_REJ_ATTR_ID:
3366                 event = IB_CM_REJ_RECEIVED;
3367                 break;
3368         case CM_REP_ATTR_ID:
3369                 event = IB_CM_REP_RECEIVED;
3370                 break;
3371         case CM_RTU_ATTR_ID:
3372                 event = IB_CM_RTU_RECEIVED;
3373                 break;
3374         case CM_DREQ_ATTR_ID:
3375                 event = IB_CM_DREQ_RECEIVED;
3376                 break;
3377         case CM_DREP_ATTR_ID:
3378                 event = IB_CM_DREP_RECEIVED;
3379                 break;
3380         case CM_SIDR_REQ_ATTR_ID:
3381                 event = IB_CM_SIDR_REQ_RECEIVED;
3382                 break;
3383         case CM_SIDR_REP_ATTR_ID:
3384                 event = IB_CM_SIDR_REP_RECEIVED;
3385                 break;
3386         case CM_LAP_ATTR_ID:
3387                 paths = 1;
3388                 event = IB_CM_LAP_RECEIVED;
3389                 break;
3390         case CM_APR_ATTR_ID:
3391                 event = IB_CM_APR_RECEIVED;
3392                 break;
3393         default:
3394                 ib_free_recv_mad(mad_recv_wc);
3395                 return;
3396         }
3397
3398         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3399         atomic_long_inc(&port->counter_group[CM_RECV].
3400                         counter[attr_id - CM_ATTR_ID_OFFSET]);
3401
3402         work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3403                        GFP_KERNEL);
3404         if (!work) {
3405                 ib_free_recv_mad(mad_recv_wc);
3406                 return;
3407         }
3408
3409         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3410         work->cm_event.event = event;
3411         work->mad_recv_wc = mad_recv_wc;
3412         work->port = port;
3413         queue_delayed_work(cm.wq, &work->work, 0);
3414 }
3415
3416 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3417                                 struct ib_qp_attr *qp_attr,
3418                                 int *qp_attr_mask)
3419 {
3420         unsigned long flags;
3421         int ret;
3422
3423         spin_lock_irqsave(&cm_id_priv->lock, flags);
3424         switch (cm_id_priv->id.state) {
3425         case IB_CM_REQ_SENT:
3426         case IB_CM_MRA_REQ_RCVD:
3427         case IB_CM_REQ_RCVD:
3428         case IB_CM_MRA_REQ_SENT:
3429         case IB_CM_REP_RCVD:
3430         case IB_CM_MRA_REP_SENT:
3431         case IB_CM_REP_SENT:
3432         case IB_CM_MRA_REP_RCVD:
3433         case IB_CM_ESTABLISHED:
3434                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3435                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
3436                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3437                 if (cm_id_priv->responder_resources)
3438                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3439                                                     IB_ACCESS_REMOTE_ATOMIC;
3440                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3441                 qp_attr->port_num = cm_id_priv->av.port->port_num;
3442                 ret = 0;
3443                 break;
3444         default:
3445                 ret = -EINVAL;
3446                 break;
3447         }
3448         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3449         return ret;
3450 }
3451
3452 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3453                                struct ib_qp_attr *qp_attr,
3454                                int *qp_attr_mask)
3455 {
3456         unsigned long flags;
3457         int ret;
3458
3459         spin_lock_irqsave(&cm_id_priv->lock, flags);
3460         switch (cm_id_priv->id.state) {
3461         case IB_CM_REQ_RCVD:
3462         case IB_CM_MRA_REQ_SENT:
3463         case IB_CM_REP_RCVD:
3464         case IB_CM_MRA_REP_SENT:
3465         case IB_CM_REP_SENT:
3466         case IB_CM_MRA_REP_RCVD:
3467         case IB_CM_ESTABLISHED:
3468                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3469                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3470                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3471                 qp_attr->path_mtu = cm_id_priv->path_mtu;
3472                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3473                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3474                 if (cm_id_priv->qp_type == IB_QPT_RC) {
3475                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3476                                          IB_QP_MIN_RNR_TIMER;
3477                         qp_attr->max_dest_rd_atomic =
3478                                         cm_id_priv->responder_resources;
3479                         qp_attr->min_rnr_timer = 0;
3480                 }
3481                 if (cm_id_priv->alt_av.ah_attr.dlid) {
3482                         *qp_attr_mask |= IB_QP_ALT_PATH;
3483                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3484                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3485                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3486                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3487                 }
3488                 ret = 0;
3489                 break;
3490         default:
3491                 ret = -EINVAL;
3492                 break;
3493         }
3494         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3495         return ret;
3496 }
3497
3498 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3499                                struct ib_qp_attr *qp_attr,
3500                                int *qp_attr_mask)
3501 {
3502         unsigned long flags;
3503         int ret;
3504
3505         spin_lock_irqsave(&cm_id_priv->lock, flags);
3506         switch (cm_id_priv->id.state) {
3507         /* Allow transition to RTS before sending REP */
3508         case IB_CM_REQ_RCVD:
3509         case IB_CM_MRA_REQ_SENT:
3510
3511         case IB_CM_REP_RCVD:
3512         case IB_CM_MRA_REP_SENT:
3513         case IB_CM_REP_SENT:
3514         case IB_CM_MRA_REP_RCVD:
3515         case IB_CM_ESTABLISHED:
3516                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3517                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3518                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3519                         if (cm_id_priv->qp_type == IB_QPT_RC) {
3520                                 *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
3521                                                  IB_QP_RNR_RETRY |
3522                                                  IB_QP_MAX_QP_RD_ATOMIC;
3523                                 qp_attr->timeout = cm_id_priv->av.timeout;
3524                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
3525                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3526                                 qp_attr->max_rd_atomic =
3527                                         cm_id_priv->initiator_depth;
3528                         }
3529                         if (cm_id_priv->alt_av.ah_attr.dlid) {
3530                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3531                                 qp_attr->path_mig_state = IB_MIG_REARM;
3532                         }
3533                 } else {
3534                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3535                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3536                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3537                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3538                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3539                         qp_attr->path_mig_state = IB_MIG_REARM;
3540                 }
3541                 ret = 0;
3542                 break;
3543         default:
3544                 ret = -EINVAL;
3545                 break;
3546         }
3547         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3548         return ret;
3549 }
3550
3551 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3552                        struct ib_qp_attr *qp_attr,
3553                        int *qp_attr_mask)
3554 {
3555         struct cm_id_private *cm_id_priv;
3556         int ret;
3557
3558         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3559         switch (qp_attr->qp_state) {
3560         case IB_QPS_INIT:
3561                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3562                 break;
3563         case IB_QPS_RTR:
3564                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3565                 break;
3566         case IB_QPS_RTS:
3567                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3568                 break;
3569         default:
3570                 ret = -EINVAL;
3571                 break;
3572         }
3573         return ret;
3574 }
3575 EXPORT_SYMBOL(ib_cm_init_qp_attr);
3576
3577 static void cm_get_ack_delay(struct cm_device *cm_dev)
3578 {
3579         struct ib_device_attr attr;
3580
3581         if (ib_query_device(cm_dev->ib_device, &attr))
3582                 cm_dev->ack_delay = 0; /* acks will rely on packet life time */
3583         else
3584                 cm_dev->ack_delay = attr.local_ca_ack_delay;
3585 }
3586
3587 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3588                                char *buf)
3589 {
3590         struct cm_counter_group *group;
3591         struct cm_counter_attribute *cm_attr;
3592
3593         group = container_of(obj, struct cm_counter_group, obj);
3594         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3595
3596         return sprintf(buf, "%ld\n",
3597                        atomic_long_read(&group->counter[cm_attr->index]));
3598 }
3599
3600 static struct sysfs_ops cm_counter_ops = {
3601         .show = cm_show_counter
3602 };
3603
3604 static struct kobj_type cm_counter_obj_type = {
3605         .sysfs_ops = &cm_counter_ops,
3606         .default_attrs = cm_counter_default_attrs
3607 };
3608
3609 static void cm_release_port_obj(struct kobject *obj)
3610 {
3611         struct cm_port *cm_port;
3612
3613         cm_port = container_of(obj, struct cm_port, port_obj);
3614         kfree(cm_port);
3615 }
3616
3617 static struct kobj_type cm_port_obj_type = {
3618         .release = cm_release_port_obj
3619 };
3620
3621 struct class cm_class = {
3622         .name    = "infiniband_cm",
3623 };
3624 EXPORT_SYMBOL(cm_class);
3625
3626 static int cm_create_port_fs(struct cm_port *port)
3627 {
3628         int i, ret;
3629
3630         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3631                                    &port->cm_dev->device->kobj,
3632                                    "%d", port->port_num);
3633         if (ret) {
3634                 kfree(port);
3635                 return ret;
3636         }
3637
3638         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3639                 ret = kobject_init_and_add(&port->counter_group[i].obj,
3640                                            &cm_counter_obj_type,
3641                                            &port->port_obj,
3642                                            "%s", counter_group_names[i]);
3643                 if (ret)
3644                         goto error;
3645         }
3646
3647         return 0;
3648
3649 error:
3650         while (i--)
3651                 kobject_put(&port->counter_group[i].obj);
3652         kobject_put(&port->port_obj);
3653         return ret;
3654
3655 }
3656
3657 static void cm_remove_port_fs(struct cm_port *port)
3658 {
3659         int i;
3660
3661         for (i = 0; i < CM_COUNTER_GROUPS; i++)
3662                 kobject_put(&port->counter_group[i].obj);
3663
3664         kobject_put(&port->port_obj);
3665 }
3666
3667 static void cm_add_one(struct ib_device *ib_device)
3668 {
3669         struct cm_device *cm_dev;
3670         struct cm_port *port;
3671         struct ib_mad_reg_req reg_req = {
3672                 .mgmt_class = IB_MGMT_CLASS_CM,
3673                 .mgmt_class_version = IB_CM_CLASS_VERSION
3674         };
3675         struct ib_port_modify port_modify = {
3676                 .set_port_cap_mask = IB_PORT_CM_SUP
3677         };
3678         unsigned long flags;
3679         int ret;
3680         u8 i;
3681
3682         if (rdma_node_get_transport(ib_device->node_type) != RDMA_TRANSPORT_IB)
3683                 return;
3684
3685         cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3686                          ib_device->phys_port_cnt, GFP_KERNEL);
3687         if (!cm_dev)
3688                 return;
3689
3690         cm_dev->ib_device = ib_device;
3691         cm_get_ack_delay(cm_dev);
3692
3693         cm_dev->device = device_create(&cm_class, &ib_device->dev,
3694                                        MKDEV(0, 0), NULL,
3695                                        "%s", ib_device->name);
3696         if (!cm_dev->device) {
3697                 kfree(cm_dev);
3698                 return;
3699         }
3700
3701         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3702         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3703                 port = kzalloc(sizeof *port, GFP_KERNEL);
3704                 if (!port)
3705                         goto error1;
3706
3707                 cm_dev->port[i-1] = port;
3708                 port->cm_dev = cm_dev;
3709                 port->port_num = i;
3710
3711                 ret = cm_create_port_fs(port);
3712                 if (ret)
3713                         goto error1;
3714
3715                 port->mad_agent = ib_register_mad_agent(ib_device, i,
3716                                                         IB_QPT_GSI,
3717                                                         &reg_req,
3718                                                         0,
3719                                                         cm_send_handler,
3720                                                         cm_recv_handler,
3721                                                         port);
3722                 if (IS_ERR(port->mad_agent))
3723                         goto error2;
3724
3725                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
3726                 if (ret)
3727                         goto error3;
3728         }
3729         ib_set_client_data(ib_device, &cm_client, cm_dev);
3730
3731         write_lock_irqsave(&cm.device_lock, flags);
3732         list_add_tail(&cm_dev->list, &cm.device_list);
3733         write_unlock_irqrestore(&cm.device_lock, flags);
3734         return;
3735
3736 error3:
3737         ib_unregister_mad_agent(port->mad_agent);
3738 error2:
3739         cm_remove_port_fs(port);
3740 error1:
3741         port_modify.set_port_cap_mask = 0;
3742         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
3743         while (--i) {
3744                 port = cm_dev->port[i-1];
3745                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3746                 ib_unregister_mad_agent(port->mad_agent);
3747                 cm_remove_port_fs(port);
3748         }
3749         device_unregister(cm_dev->device);
3750         kfree(cm_dev);
3751 }
3752
3753 static void cm_remove_one(struct ib_device *ib_device)
3754 {
3755         struct cm_device *cm_dev;
3756         struct cm_port *port;
3757         struct ib_port_modify port_modify = {
3758                 .clr_port_cap_mask = IB_PORT_CM_SUP
3759         };
3760         unsigned long flags;
3761         int i;
3762
3763         cm_dev = ib_get_client_data(ib_device, &cm_client);
3764         if (!cm_dev)
3765                 return;
3766
3767         write_lock_irqsave(&cm.device_lock, flags);
3768         list_del(&cm_dev->list);
3769         write_unlock_irqrestore(&cm.device_lock, flags);
3770
3771         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3772                 port = cm_dev->port[i-1];
3773                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
3774                 ib_unregister_mad_agent(port->mad_agent);
3775                 flush_workqueue(cm.wq);
3776                 cm_remove_port_fs(port);
3777         }
3778         device_unregister(cm_dev->device);
3779         kfree(cm_dev);
3780 }
3781
3782 static int __init ib_cm_init(void)
3783 {
3784         int ret;
3785
3786         memset(&cm, 0, sizeof cm);
3787         INIT_LIST_HEAD(&cm.device_list);
3788         rwlock_init(&cm.device_lock);
3789         spin_lock_init(&cm.lock);
3790         cm.listen_service_table = RB_ROOT;
3791         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
3792         cm.remote_id_table = RB_ROOT;
3793         cm.remote_qp_table = RB_ROOT;
3794         cm.remote_sidr_table = RB_ROOT;
3795         idr_init(&cm.local_id_table);
3796         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
3797         idr_pre_get(&cm.local_id_table, GFP_KERNEL);
3798         INIT_LIST_HEAD(&cm.timewait_list);
3799
3800         ret = class_register(&cm_class);
3801         if (ret)
3802                 return -ENOMEM;
3803
3804         cm.wq = create_workqueue("ib_cm");
3805         if (!cm.wq) {
3806                 ret = -ENOMEM;
3807                 goto error1;
3808         }
3809
3810         ret = ib_register_client(&cm_client);
3811         if (ret)
3812                 goto error2;
3813
3814         return 0;
3815 error2:
3816         destroy_workqueue(cm.wq);
3817 error1:
3818         class_unregister(&cm_class);
3819         return ret;
3820 }
3821
3822 static void __exit ib_cm_cleanup(void)
3823 {
3824         struct cm_timewait_info *timewait_info, *tmp;
3825
3826         spin_lock_irq(&cm.lock);
3827         list_for_each_entry(timewait_info, &cm.timewait_list, list)
3828                 cancel_delayed_work(&timewait_info->work.work);
3829         spin_unlock_irq(&cm.lock);
3830
3831         ib_unregister_client(&cm_client);
3832         destroy_workqueue(cm.wq);
3833
3834         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
3835                 list_del(&timewait_info->list);
3836                 kfree(timewait_info);
3837         }
3838
3839         class_unregister(&cm_class);
3840         idr_destroy(&cm.local_id_table);
3841 }
3842
3843 module_init(ib_cm_init);
3844 module_exit(ib_cm_cleanup);
3845