sgi-xpc: prevent false heartbeat failures
[linux-2.6] / drivers / misc / sgi-xp / xpc_uv.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8
9 /*
10  * Cross Partition Communication (XPC) uv-based functions.
11  *
12  *     Architecture specific implementation of common functions.
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/mm.h>
18 #include <linux/interrupt.h>
19 #include <linux/delay.h>
20 #include <linux/device.h>
21 #include <linux/err.h>
22 #include <asm/uv/uv_hub.h>
23 #if defined CONFIG_X86_64
24 #include <asm/uv/bios.h>
25 #include <asm/uv/uv_irq.h>
26 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
27 #include <asm/sn/intr.h>
28 #include <asm/sn/sn_sal.h>
29 #endif
30 #include "../sgi-gru/gru.h"
31 #include "../sgi-gru/grukservices.h"
32 #include "xpc.h"
33
34 #if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
35 struct uv_IO_APIC_route_entry {
36         __u64   vector          :  8,
37                 delivery_mode   :  3,
38                 dest_mode       :  1,
39                 delivery_status :  1,
40                 polarity        :  1,
41                 __reserved_1    :  1,
42                 trigger         :  1,
43                 mask            :  1,
44                 __reserved_2    : 15,
45                 dest            : 32;
46 };
47 #endif
48
49 static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
50
51 #define XPC_ACTIVATE_MSG_SIZE_UV        (1 * GRU_CACHE_LINE_BYTES)
52 #define XPC_ACTIVATE_MQ_SIZE_UV         (4 * XP_MAX_NPARTITIONS_UV * \
53                                          XPC_ACTIVATE_MSG_SIZE_UV)
54 #define XPC_ACTIVATE_IRQ_NAME           "xpc_activate"
55
56 #define XPC_NOTIFY_MSG_SIZE_UV          (2 * GRU_CACHE_LINE_BYTES)
57 #define XPC_NOTIFY_MQ_SIZE_UV           (4 * XP_MAX_NPARTITIONS_UV * \
58                                          XPC_NOTIFY_MSG_SIZE_UV)
59 #define XPC_NOTIFY_IRQ_NAME             "xpc_notify"
60
61 static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
62 static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
63
64 static int
65 xpc_setup_partitions_sn_uv(void)
66 {
67         short partid;
68         struct xpc_partition_uv *part_uv;
69
70         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
71                 part_uv = &xpc_partitions[partid].sn.uv;
72
73                 mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
74                 spin_lock_init(&part_uv->flags_lock);
75                 part_uv->remote_act_state = XPC_P_AS_INACTIVE;
76         }
77         return 0;
78 }
79
80 static void
81 xpc_teardown_partitions_sn_uv(void)
82 {
83         short partid;
84         struct xpc_partition_uv *part_uv;
85         unsigned long irq_flags;
86
87         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
88                 part_uv = &xpc_partitions[partid].sn.uv;
89
90                 if (part_uv->cached_activate_gru_mq_desc != NULL) {
91                         mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
92                         spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
93                         part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
94                         spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
95                         kfree(part_uv->cached_activate_gru_mq_desc);
96                         part_uv->cached_activate_gru_mq_desc = NULL;
97                         mutex_unlock(&part_uv->
98                                      cached_activate_gru_mq_desc_mutex);
99                 }
100         }
101 }
102
103 static int
104 xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
105 {
106         int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
107
108 #if defined CONFIG_X86_64
109         mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
110         if (mq->irq < 0) {
111                 dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
112                         -mq->irq);
113                 return mq->irq;
114         }
115
116         mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
117
118 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
119         if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0)
120                 mq->irq = SGI_XPC_ACTIVATE;
121         else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0)
122                 mq->irq = SGI_XPC_NOTIFY;
123         else
124                 return -EINVAL;
125
126         mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
127         uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value);
128 #else
129         #error not a supported configuration
130 #endif
131
132         return 0;
133 }
134
135 static void
136 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
137 {
138 #if defined CONFIG_X86_64
139         uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
140
141 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
142         int mmr_pnode;
143         unsigned long mmr_value;
144
145         mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
146         mmr_value = 1UL << 16;
147
148         uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
149 #else
150         #error not a supported configuration
151 #endif
152 }
153
154 static int
155 xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
156 {
157         int ret;
158
159 #if defined CONFIG_X86_64
160         ret = uv_bios_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address),
161                                          mq->order, &mq->mmr_offset);
162         if (ret < 0) {
163                 dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, "
164                         "ret=%d\n", ret);
165                 return ret;
166         }
167 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
168         ret = sn_mq_watchlist_alloc(mq->mmr_blade, (void *)uv_gpa(mq->address),
169                                     mq->order, &mq->mmr_offset);
170         if (ret < 0) {
171                 dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
172                         ret);
173                 return -EBUSY;
174         }
175 #else
176         #error not a supported configuration
177 #endif
178
179         mq->watchlist_num = ret;
180         return 0;
181 }
182
183 static void
184 xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq)
185 {
186         int ret;
187
188 #if defined CONFIG_X86_64
189         ret = uv_bios_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num);
190         BUG_ON(ret != BIOS_STATUS_SUCCESS);
191 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
192         ret = sn_mq_watchlist_free(mq->mmr_blade, mq->watchlist_num);
193         BUG_ON(ret != SALRET_OK);
194 #else
195         #error not a supported configuration
196 #endif
197 }
198
199 static struct xpc_gru_mq_uv *
200 xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
201                      irq_handler_t irq_handler)
202 {
203         enum xp_retval xp_ret;
204         int ret;
205         int nid;
206         int pg_order;
207         struct page *page;
208         struct xpc_gru_mq_uv *mq;
209         struct uv_IO_APIC_route_entry *mmr_value;
210
211         mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
212         if (mq == NULL) {
213                 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
214                         "a xpc_gru_mq_uv structure\n");
215                 ret = -ENOMEM;
216                 goto out_0;
217         }
218
219         mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc),
220                                   GFP_KERNEL);
221         if (mq->gru_mq_desc == NULL) {
222                 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
223                         "a gru_message_queue_desc structure\n");
224                 ret = -ENOMEM;
225                 goto out_1;
226         }
227
228         pg_order = get_order(mq_size);
229         mq->order = pg_order + PAGE_SHIFT;
230         mq_size = 1UL << mq->order;
231
232         mq->mmr_blade = uv_cpu_to_blade_id(cpu);
233
234         nid = cpu_to_node(cpu);
235         page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
236                                 pg_order);
237         if (page == NULL) {
238                 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
239                         "bytes of memory on nid=%d for GRU mq\n", mq_size, nid);
240                 ret = -ENOMEM;
241                 goto out_2;
242         }
243         mq->address = page_address(page);
244
245         /* enable generation of irq when GRU mq operation occurs to this mq */
246         ret = xpc_gru_mq_watchlist_alloc_uv(mq);
247         if (ret != 0)
248                 goto out_3;
249
250         ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name);
251         if (ret != 0)
252                 goto out_4;
253
254         ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
255         if (ret != 0) {
256                 dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
257                         mq->irq, -ret);
258                 goto out_5;
259         }
260
261         mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
262         ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
263                                        nid, mmr_value->vector, mmr_value->dest);
264         if (ret != 0) {
265                 dev_err(xpc_part, "gru_create_message_queue() returned "
266                         "error=%d\n", ret);
267                 ret = -EINVAL;
268                 goto out_6;
269         }
270
271         /* allow other partitions to access this GRU mq */
272         xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
273         if (xp_ret != xpSuccess) {
274                 ret = -EACCES;
275                 goto out_6;
276         }
277
278         return mq;
279
280         /* something went wrong */
281 out_6:
282         free_irq(mq->irq, NULL);
283 out_5:
284         xpc_release_gru_mq_irq_uv(mq);
285 out_4:
286         xpc_gru_mq_watchlist_free_uv(mq);
287 out_3:
288         free_pages((unsigned long)mq->address, pg_order);
289 out_2:
290         kfree(mq->gru_mq_desc);
291 out_1:
292         kfree(mq);
293 out_0:
294         return ERR_PTR(ret);
295 }
296
297 static void
298 xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
299 {
300         unsigned int mq_size;
301         int pg_order;
302         int ret;
303
304         /* disallow other partitions to access GRU mq */
305         mq_size = 1UL << mq->order;
306         ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size);
307         BUG_ON(ret != xpSuccess);
308
309         /* unregister irq handler and release mq irq/vector mapping */
310         free_irq(mq->irq, NULL);
311         xpc_release_gru_mq_irq_uv(mq);
312
313         /* disable generation of irq when GRU mq op occurs to this mq */
314         xpc_gru_mq_watchlist_free_uv(mq);
315
316         pg_order = mq->order - PAGE_SHIFT;
317         free_pages((unsigned long)mq->address, pg_order);
318
319         kfree(mq);
320 }
321
322 static enum xp_retval
323 xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
324                  size_t msg_size)
325 {
326         enum xp_retval xp_ret;
327         int ret;
328
329         while (1) {
330                 ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
331                 if (ret == MQE_OK) {
332                         xp_ret = xpSuccess;
333                         break;
334                 }
335
336                 if (ret == MQE_QUEUE_FULL) {
337                         dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
338                                 "error=MQE_QUEUE_FULL\n");
339                         /* !!! handle QLimit reached; delay & try again */
340                         /* ??? Do we add a limit to the number of retries? */
341                         (void)msleep_interruptible(10);
342                 } else if (ret == MQE_CONGESTION) {
343                         dev_dbg(xpc_chan, "gru_send_message_gpa() returned "
344                                 "error=MQE_CONGESTION\n");
345                         /* !!! handle LB Overflow; simply try again */
346                         /* ??? Do we add a limit to the number of retries? */
347                 } else {
348                         /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */
349                         dev_err(xpc_chan, "gru_send_message_gpa() returned "
350                                 "error=%d\n", ret);
351                         xp_ret = xpGruSendMqError;
352                         break;
353                 }
354         }
355         return xp_ret;
356 }
357
358 static void
359 xpc_process_activate_IRQ_rcvd_uv(void)
360 {
361         unsigned long irq_flags;
362         short partid;
363         struct xpc_partition *part;
364         u8 act_state_req;
365
366         DBUG_ON(xpc_activate_IRQ_rcvd == 0);
367
368         spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
369         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
370                 part = &xpc_partitions[partid];
371
372                 if (part->sn.uv.act_state_req == 0)
373                         continue;
374
375                 xpc_activate_IRQ_rcvd--;
376                 BUG_ON(xpc_activate_IRQ_rcvd < 0);
377
378                 act_state_req = part->sn.uv.act_state_req;
379                 part->sn.uv.act_state_req = 0;
380                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
381
382                 if (act_state_req == XPC_P_ASR_ACTIVATE_UV) {
383                         if (part->act_state == XPC_P_AS_INACTIVE)
384                                 xpc_activate_partition(part);
385                         else if (part->act_state == XPC_P_AS_DEACTIVATING)
386                                 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
387
388                 } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) {
389                         if (part->act_state == XPC_P_AS_INACTIVE)
390                                 xpc_activate_partition(part);
391                         else
392                                 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
393
394                 } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) {
395                         XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason);
396
397                 } else {
398                         BUG();
399                 }
400
401                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
402                 if (xpc_activate_IRQ_rcvd == 0)
403                         break;
404         }
405         spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
406
407 }
408
409 static void
410 xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
411                               struct xpc_activate_mq_msghdr_uv *msg_hdr,
412                               int *wakeup_hb_checker)
413 {
414         unsigned long irq_flags;
415         struct xpc_partition_uv *part_uv = &part->sn.uv;
416         struct xpc_openclose_args *args;
417
418         part_uv->remote_act_state = msg_hdr->act_state;
419
420         switch (msg_hdr->type) {
421         case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV:
422                 /* syncing of remote_act_state was just done above */
423                 break;
424
425         case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
426                 struct xpc_activate_mq_msg_activate_req_uv *msg;
427
428                 /*
429                  * ??? Do we deal here with ts_jiffies being different
430                  * ??? if act_state != XPC_P_AS_INACTIVE instead of
431                  * ??? below?
432                  */
433                 msg = container_of(msg_hdr, struct
434                                    xpc_activate_mq_msg_activate_req_uv, hdr);
435
436                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
437                 if (part_uv->act_state_req == 0)
438                         xpc_activate_IRQ_rcvd++;
439                 part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
440                 part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
441                 part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
442                 part_uv->heartbeat_gpa = msg->heartbeat_gpa;
443
444                 if (msg->activate_gru_mq_desc_gpa !=
445                     part_uv->activate_gru_mq_desc_gpa) {
446                         spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
447                         part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
448                         spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
449                         part_uv->activate_gru_mq_desc_gpa =
450                             msg->activate_gru_mq_desc_gpa;
451                 }
452                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
453
454                 (*wakeup_hb_checker)++;
455                 break;
456         }
457         case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: {
458                 struct xpc_activate_mq_msg_deactivate_req_uv *msg;
459
460                 msg = container_of(msg_hdr, struct
461                                    xpc_activate_mq_msg_deactivate_req_uv, hdr);
462
463                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
464                 if (part_uv->act_state_req == 0)
465                         xpc_activate_IRQ_rcvd++;
466                 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
467                 part_uv->reason = msg->reason;
468                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
469
470                 (*wakeup_hb_checker)++;
471                 return;
472         }
473         case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: {
474                 struct xpc_activate_mq_msg_chctl_closerequest_uv *msg;
475
476                 msg = container_of(msg_hdr, struct
477                                    xpc_activate_mq_msg_chctl_closerequest_uv,
478                                    hdr);
479                 args = &part->remote_openclose_args[msg->ch_number];
480                 args->reason = msg->reason;
481
482                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
483                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST;
484                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
485
486                 xpc_wakeup_channel_mgr(part);
487                 break;
488         }
489         case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: {
490                 struct xpc_activate_mq_msg_chctl_closereply_uv *msg;
491
492                 msg = container_of(msg_hdr, struct
493                                    xpc_activate_mq_msg_chctl_closereply_uv,
494                                    hdr);
495
496                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
497                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY;
498                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
499
500                 xpc_wakeup_channel_mgr(part);
501                 break;
502         }
503         case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: {
504                 struct xpc_activate_mq_msg_chctl_openrequest_uv *msg;
505
506                 msg = container_of(msg_hdr, struct
507                                    xpc_activate_mq_msg_chctl_openrequest_uv,
508                                    hdr);
509                 args = &part->remote_openclose_args[msg->ch_number];
510                 args->entry_size = msg->entry_size;
511                 args->local_nentries = msg->local_nentries;
512
513                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
514                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST;
515                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
516
517                 xpc_wakeup_channel_mgr(part);
518                 break;
519         }
520         case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: {
521                 struct xpc_activate_mq_msg_chctl_openreply_uv *msg;
522
523                 msg = container_of(msg_hdr, struct
524                                    xpc_activate_mq_msg_chctl_openreply_uv, hdr);
525                 args = &part->remote_openclose_args[msg->ch_number];
526                 args->remote_nentries = msg->remote_nentries;
527                 args->local_nentries = msg->local_nentries;
528                 args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
529
530                 spin_lock_irqsave(&part->chctl_lock, irq_flags);
531                 part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
532                 spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
533
534                 xpc_wakeup_channel_mgr(part);
535                 break;
536         }
537         case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
538                 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
539                 part_uv->flags |= XPC_P_ENGAGED_UV;
540                 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
541                 break;
542
543         case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV:
544                 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
545                 part_uv->flags &= ~XPC_P_ENGAGED_UV;
546                 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
547                 break;
548
549         default:
550                 dev_err(xpc_part, "received unknown activate_mq msg type=%d "
551                         "from partition=%d\n", msg_hdr->type, XPC_PARTID(part));
552
553                 /* get hb checker to deactivate from the remote partition */
554                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
555                 if (part_uv->act_state_req == 0)
556                         xpc_activate_IRQ_rcvd++;
557                 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
558                 part_uv->reason = xpBadMsgType;
559                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
560
561                 (*wakeup_hb_checker)++;
562                 return;
563         }
564
565         if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies &&
566             part->remote_rp_ts_jiffies != 0) {
567                 /*
568                  * ??? Does what we do here need to be sensitive to
569                  * ??? act_state or remote_act_state?
570                  */
571                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
572                 if (part_uv->act_state_req == 0)
573                         xpc_activate_IRQ_rcvd++;
574                 part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV;
575                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
576
577                 (*wakeup_hb_checker)++;
578         }
579 }
580
581 static irqreturn_t
582 xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
583 {
584         struct xpc_activate_mq_msghdr_uv *msg_hdr;
585         short partid;
586         struct xpc_partition *part;
587         int wakeup_hb_checker = 0;
588         int part_referenced;
589
590         while (1) {
591                 msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
592                 if (msg_hdr == NULL)
593                         break;
594
595                 partid = msg_hdr->partid;
596                 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
597                         dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() "
598                                 "received invalid partid=0x%x in message\n",
599                                 partid);
600                 } else {
601                         part = &xpc_partitions[partid];
602
603                         part_referenced = xpc_part_ref(part);
604                         xpc_handle_activate_mq_msg_uv(part, msg_hdr,
605                                                       &wakeup_hb_checker);
606                         if (part_referenced)
607                                 xpc_part_deref(part);
608                 }
609
610                 gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
611         }
612
613         if (wakeup_hb_checker)
614                 wake_up_interruptible(&xpc_activate_IRQ_wq);
615
616         return IRQ_HANDLED;
617 }
618
619 static enum xp_retval
620 xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
621                                 unsigned long gru_mq_desc_gpa)
622 {
623         enum xp_retval ret;
624
625         ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
626                                sizeof(struct gru_message_queue_desc));
627         if (ret == xpSuccess)
628                 gru_mq_desc->mq = NULL;
629
630         return ret;
631 }
632
633 static enum xp_retval
634 xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
635                          int msg_type)
636 {
637         struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
638         struct xpc_partition_uv *part_uv = &part->sn.uv;
639         struct gru_message_queue_desc *gru_mq_desc;
640         unsigned long irq_flags;
641         enum xp_retval ret;
642
643         DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
644
645         msg_hdr->type = msg_type;
646         msg_hdr->partid = xp_partition_id;
647         msg_hdr->act_state = part->act_state;
648         msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
649
650         mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
651 again:
652         if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
653                 gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
654                 if (gru_mq_desc == NULL) {
655                         gru_mq_desc = kmalloc(sizeof(struct
656                                               gru_message_queue_desc),
657                                               GFP_KERNEL);
658                         if (gru_mq_desc == NULL) {
659                                 ret = xpNoMemory;
660                                 goto done;
661                         }
662                         part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
663                 }
664
665                 ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
666                                                       part_uv->
667                                                       activate_gru_mq_desc_gpa);
668                 if (ret != xpSuccess)
669                         goto done;
670
671                 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
672                 part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
673                 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
674         }
675
676         /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
677         ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
678                                msg_size);
679         if (ret != xpSuccess) {
680                 smp_rmb();      /* ensure a fresh copy of part_uv->flags */
681                 if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
682                         goto again;
683         }
684 done:
685         mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
686         return ret;
687 }
688
689 static void
690 xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg,
691                               size_t msg_size, int msg_type)
692 {
693         enum xp_retval ret;
694
695         ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
696         if (unlikely(ret != xpSuccess))
697                 XPC_DEACTIVATE_PARTITION(part, ret);
698 }
699
700 static void
701 xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
702                          void *msg, size_t msg_size, int msg_type)
703 {
704         struct xpc_partition *part = &xpc_partitions[ch->partid];
705         enum xp_retval ret;
706
707         ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
708         if (unlikely(ret != xpSuccess)) {
709                 if (irq_flags != NULL)
710                         spin_unlock_irqrestore(&ch->lock, *irq_flags);
711
712                 XPC_DEACTIVATE_PARTITION(part, ret);
713
714                 if (irq_flags != NULL)
715                         spin_lock_irqsave(&ch->lock, *irq_flags);
716         }
717 }
718
719 static void
720 xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
721 {
722         unsigned long irq_flags;
723         struct xpc_partition_uv *part_uv = &part->sn.uv;
724
725         /*
726          * !!! Make our side think that the remote partition sent an activate
727          * !!! mq message our way by doing what the activate IRQ handler would
728          * !!! do had one really been sent.
729          */
730
731         spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
732         if (part_uv->act_state_req == 0)
733                 xpc_activate_IRQ_rcvd++;
734         part_uv->act_state_req = act_state_req;
735         spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
736
737         wake_up_interruptible(&xpc_activate_IRQ_wq);
738 }
739
740 static enum xp_retval
741 xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
742                                   size_t *len)
743 {
744         s64 status;
745         enum xp_retval ret;
746
747 #if defined CONFIG_X86_64
748         status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa,
749                                           (u64 *)len);
750         if (status == BIOS_STATUS_SUCCESS)
751                 ret = xpSuccess;
752         else if (status == BIOS_STATUS_MORE_PASSES)
753                 ret = xpNeedMoreInfo;
754         else
755                 ret = xpBiosError;
756
757 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
758         status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len);
759         if (status == SALRET_OK)
760                 ret = xpSuccess;
761         else if (status == SALRET_MORE_PASSES)
762                 ret = xpNeedMoreInfo;
763         else
764                 ret = xpSalError;
765
766 #else
767         #error not a supported configuration
768 #endif
769
770         return ret;
771 }
772
773 static int
774 xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
775 {
776         xpc_heartbeat_uv =
777             &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
778         rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
779         rp->sn.uv.activate_gru_mq_desc_gpa =
780             uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
781         return 0;
782 }
783
784 static void
785 xpc_allow_hb_uv(short partid)
786 {
787 }
788
789 static void
790 xpc_disallow_hb_uv(short partid)
791 {
792 }
793
794 static void
795 xpc_disallow_all_hbs_uv(void)
796 {
797 }
798
799 static void
800 xpc_increment_heartbeat_uv(void)
801 {
802         xpc_heartbeat_uv->value++;
803 }
804
805 static void
806 xpc_offline_heartbeat_uv(void)
807 {
808         xpc_increment_heartbeat_uv();
809         xpc_heartbeat_uv->offline = 1;
810 }
811
812 static void
813 xpc_online_heartbeat_uv(void)
814 {
815         xpc_increment_heartbeat_uv();
816         xpc_heartbeat_uv->offline = 0;
817 }
818
819 static void
820 xpc_heartbeat_init_uv(void)
821 {
822         xpc_heartbeat_uv->value = 1;
823         xpc_heartbeat_uv->offline = 0;
824 }
825
826 static void
827 xpc_heartbeat_exit_uv(void)
828 {
829         xpc_offline_heartbeat_uv();
830 }
831
832 static enum xp_retval
833 xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
834 {
835         struct xpc_partition_uv *part_uv = &part->sn.uv;
836         enum xp_retval ret;
837
838         ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
839                                part_uv->heartbeat_gpa,
840                                sizeof(struct xpc_heartbeat_uv));
841         if (ret != xpSuccess)
842                 return ret;
843
844         if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
845             !part_uv->cached_heartbeat.offline) {
846
847                 ret = xpNoHeartbeat;
848         } else {
849                 part->last_heartbeat = part_uv->cached_heartbeat.value;
850         }
851         return ret;
852 }
853
854 static void
855 xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
856                                     unsigned long remote_rp_gpa, int nasid)
857 {
858         short partid = remote_rp->SAL_partid;
859         struct xpc_partition *part = &xpc_partitions[partid];
860         struct xpc_activate_mq_msg_activate_req_uv msg;
861
862         part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
863         part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
864         part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
865         part->sn.uv.activate_gru_mq_desc_gpa =
866             remote_rp->sn.uv.activate_gru_mq_desc_gpa;
867
868         /*
869          * ??? Is it a good idea to make this conditional on what is
870          * ??? potentially stale state information?
871          */
872         if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
873                 msg.rp_gpa = uv_gpa(xpc_rsvd_page);
874                 msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
875                 msg.activate_gru_mq_desc_gpa =
876                     xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
877                 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
878                                            XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
879         }
880
881         if (part->act_state == XPC_P_AS_INACTIVE)
882                 xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
883 }
884
885 static void
886 xpc_request_partition_reactivation_uv(struct xpc_partition *part)
887 {
888         xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV);
889 }
890
891 static void
892 xpc_request_partition_deactivation_uv(struct xpc_partition *part)
893 {
894         struct xpc_activate_mq_msg_deactivate_req_uv msg;
895
896         /*
897          * ??? Is it a good idea to make this conditional on what is
898          * ??? potentially stale state information?
899          */
900         if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING &&
901             part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) {
902
903                 msg.reason = part->reason;
904                 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
905                                          XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV);
906         }
907 }
908
909 static void
910 xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part)
911 {
912         /* nothing needs to be done */
913         return;
914 }
915
916 static void
917 xpc_init_fifo_uv(struct xpc_fifo_head_uv *head)
918 {
919         head->first = NULL;
920         head->last = NULL;
921         spin_lock_init(&head->lock);
922         head->n_entries = 0;
923 }
924
925 static void *
926 xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
927 {
928         unsigned long irq_flags;
929         struct xpc_fifo_entry_uv *first;
930
931         spin_lock_irqsave(&head->lock, irq_flags);
932         first = head->first;
933         if (head->first != NULL) {
934                 head->first = first->next;
935                 if (head->first == NULL)
936                         head->last = NULL;
937         }
938         head->n_entries--;
939         BUG_ON(head->n_entries < 0);
940         spin_unlock_irqrestore(&head->lock, irq_flags);
941         first->next = NULL;
942         return first;
943 }
944
945 static void
946 xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
947                       struct xpc_fifo_entry_uv *last)
948 {
949         unsigned long irq_flags;
950
951         last->next = NULL;
952         spin_lock_irqsave(&head->lock, irq_flags);
953         if (head->last != NULL)
954                 head->last->next = last;
955         else
956                 head->first = last;
957         head->last = last;
958         head->n_entries++;
959         spin_unlock_irqrestore(&head->lock, irq_flags);
960 }
961
962 static int
963 xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
964 {
965         return head->n_entries;
966 }
967
968 /*
969  * Setup the channel structures that are uv specific.
970  */
971 static enum xp_retval
972 xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
973 {
974         struct xpc_channel_uv *ch_uv;
975         int ch_number;
976
977         for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
978                 ch_uv = &part->channels[ch_number].sn.uv;
979
980                 xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
981                 xpc_init_fifo_uv(&ch_uv->recv_msg_list);
982         }
983
984         return xpSuccess;
985 }
986
987 /*
988  * Teardown the channel structures that are uv specific.
989  */
990 static void
991 xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part)
992 {
993         /* nothing needs to be done */
994         return;
995 }
996
997 static enum xp_retval
998 xpc_make_first_contact_uv(struct xpc_partition *part)
999 {
1000         struct xpc_activate_mq_msg_uv msg;
1001
1002         /*
1003          * We send a sync msg to get the remote partition's remote_act_state
1004          * updated to our current act_state which at this point should
1005          * be XPC_P_AS_ACTIVATING.
1006          */
1007         xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1008                                       XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV);
1009
1010         while (part->sn.uv.remote_act_state != XPC_P_AS_ACTIVATING) {
1011
1012                 dev_dbg(xpc_part, "waiting to make first contact with "
1013                         "partition %d\n", XPC_PARTID(part));
1014
1015                 /* wait a 1/4 of a second or so */
1016                 (void)msleep_interruptible(250);
1017
1018                 if (part->act_state == XPC_P_AS_DEACTIVATING)
1019                         return part->reason;
1020         }
1021
1022         return xpSuccess;
1023 }
1024
1025 static u64
1026 xpc_get_chctl_all_flags_uv(struct xpc_partition *part)
1027 {
1028         unsigned long irq_flags;
1029         union xpc_channel_ctl_flags chctl;
1030
1031         spin_lock_irqsave(&part->chctl_lock, irq_flags);
1032         chctl = part->chctl;
1033         if (chctl.all_flags != 0)
1034                 part->chctl.all_flags = 0;
1035
1036         spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1037         return chctl.all_flags;
1038 }
1039
1040 static enum xp_retval
1041 xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch)
1042 {
1043         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1044         struct xpc_send_msg_slot_uv *msg_slot;
1045         unsigned long irq_flags;
1046         int nentries;
1047         int entry;
1048         size_t nbytes;
1049
1050         for (nentries = ch->local_nentries; nentries > 0; nentries--) {
1051                 nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv);
1052                 ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1053                 if (ch_uv->send_msg_slots == NULL)
1054                         continue;
1055
1056                 for (entry = 0; entry < nentries; entry++) {
1057                         msg_slot = &ch_uv->send_msg_slots[entry];
1058
1059                         msg_slot->msg_slot_number = entry;
1060                         xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list,
1061                                               &msg_slot->next);
1062                 }
1063
1064                 spin_lock_irqsave(&ch->lock, irq_flags);
1065                 if (nentries < ch->local_nentries)
1066                         ch->local_nentries = nentries;
1067                 spin_unlock_irqrestore(&ch->lock, irq_flags);
1068                 return xpSuccess;
1069         }
1070
1071         return xpNoMemory;
1072 }
1073
1074 static enum xp_retval
1075 xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch)
1076 {
1077         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1078         struct xpc_notify_mq_msg_uv *msg_slot;
1079         unsigned long irq_flags;
1080         int nentries;
1081         int entry;
1082         size_t nbytes;
1083
1084         for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
1085                 nbytes = nentries * ch->entry_size;
1086                 ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL);
1087                 if (ch_uv->recv_msg_slots == NULL)
1088                         continue;
1089
1090                 for (entry = 0; entry < nentries; entry++) {
1091                         msg_slot = ch_uv->recv_msg_slots +
1092                             entry * ch->entry_size;
1093
1094                         msg_slot->hdr.msg_slot_number = entry;
1095                 }
1096
1097                 spin_lock_irqsave(&ch->lock, irq_flags);
1098                 if (nentries < ch->remote_nentries)
1099                         ch->remote_nentries = nentries;
1100                 spin_unlock_irqrestore(&ch->lock, irq_flags);
1101                 return xpSuccess;
1102         }
1103
1104         return xpNoMemory;
1105 }
1106
1107 /*
1108  * Allocate msg_slots associated with the channel.
1109  */
1110 static enum xp_retval
1111 xpc_setup_msg_structures_uv(struct xpc_channel *ch)
1112 {
1113         static enum xp_retval ret;
1114         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1115
1116         DBUG_ON(ch->flags & XPC_C_SETUP);
1117
1118         ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct
1119                                                    gru_message_queue_desc),
1120                                                    GFP_KERNEL);
1121         if (ch_uv->cached_notify_gru_mq_desc == NULL)
1122                 return xpNoMemory;
1123
1124         ret = xpc_allocate_send_msg_slot_uv(ch);
1125         if (ret == xpSuccess) {
1126
1127                 ret = xpc_allocate_recv_msg_slot_uv(ch);
1128                 if (ret != xpSuccess) {
1129                         kfree(ch_uv->send_msg_slots);
1130                         xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1131                 }
1132         }
1133         return ret;
1134 }
1135
1136 /*
1137  * Free up msg_slots and clear other stuff that were setup for the specified
1138  * channel.
1139  */
1140 static void
1141 xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
1142 {
1143         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1144
1145         DBUG_ON(!spin_is_locked(&ch->lock));
1146
1147         kfree(ch_uv->cached_notify_gru_mq_desc);
1148         ch_uv->cached_notify_gru_mq_desc = NULL;
1149
1150         if (ch->flags & XPC_C_SETUP) {
1151                 xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
1152                 kfree(ch_uv->send_msg_slots);
1153                 xpc_init_fifo_uv(&ch_uv->recv_msg_list);
1154                 kfree(ch_uv->recv_msg_slots);
1155         }
1156 }
1157
1158 static void
1159 xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1160 {
1161         struct xpc_activate_mq_msg_chctl_closerequest_uv msg;
1162
1163         msg.ch_number = ch->number;
1164         msg.reason = ch->reason;
1165         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1166                                     XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV);
1167 }
1168
1169 static void
1170 xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1171 {
1172         struct xpc_activate_mq_msg_chctl_closereply_uv msg;
1173
1174         msg.ch_number = ch->number;
1175         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1176                                     XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV);
1177 }
1178
1179 static void
1180 xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1181 {
1182         struct xpc_activate_mq_msg_chctl_openrequest_uv msg;
1183
1184         msg.ch_number = ch->number;
1185         msg.entry_size = ch->entry_size;
1186         msg.local_nentries = ch->local_nentries;
1187         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1188                                     XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV);
1189 }
1190
1191 static void
1192 xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
1193 {
1194         struct xpc_activate_mq_msg_chctl_openreply_uv msg;
1195
1196         msg.ch_number = ch->number;
1197         msg.local_nentries = ch->local_nentries;
1198         msg.remote_nentries = ch->remote_nentries;
1199         msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
1200         xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
1201                                     XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
1202 }
1203
1204 static void
1205 xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
1206 {
1207         unsigned long irq_flags;
1208
1209         spin_lock_irqsave(&part->chctl_lock, irq_flags);
1210         part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST;
1211         spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1212
1213         xpc_wakeup_channel_mgr(part);
1214 }
1215
1216 static enum xp_retval
1217 xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
1218                                unsigned long gru_mq_desc_gpa)
1219 {
1220         struct xpc_channel_uv *ch_uv = &ch->sn.uv;
1221
1222         DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
1223         return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
1224                                                gru_mq_desc_gpa);
1225 }
1226
1227 static void
1228 xpc_indicate_partition_engaged_uv(struct xpc_partition *part)
1229 {
1230         struct xpc_activate_mq_msg_uv msg;
1231
1232         xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1233                                       XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV);
1234 }
1235
1236 static void
1237 xpc_indicate_partition_disengaged_uv(struct xpc_partition *part)
1238 {
1239         struct xpc_activate_mq_msg_uv msg;
1240
1241         xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
1242                                       XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV);
1243 }
1244
1245 static void
1246 xpc_assume_partition_disengaged_uv(short partid)
1247 {
1248         struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv;
1249         unsigned long irq_flags;
1250
1251         spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
1252         part_uv->flags &= ~XPC_P_ENGAGED_UV;
1253         spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
1254 }
1255
1256 static int
1257 xpc_partition_engaged_uv(short partid)
1258 {
1259         return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0;
1260 }
1261
1262 static int
1263 xpc_any_partition_engaged_uv(void)
1264 {
1265         struct xpc_partition_uv *part_uv;
1266         short partid;
1267
1268         for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
1269                 part_uv = &xpc_partitions[partid].sn.uv;
1270                 if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0)
1271                         return 1;
1272         }
1273         return 0;
1274 }
1275
1276 static enum xp_retval
1277 xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags,
1278                          struct xpc_send_msg_slot_uv **address_of_msg_slot)
1279 {
1280         enum xp_retval ret;
1281         struct xpc_send_msg_slot_uv *msg_slot;
1282         struct xpc_fifo_entry_uv *entry;
1283
1284         while (1) {
1285                 entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list);
1286                 if (entry != NULL)
1287                         break;
1288
1289                 if (flags & XPC_NOWAIT)
1290                         return xpNoWait;
1291
1292                 ret = xpc_allocate_msg_wait(ch);
1293                 if (ret != xpInterrupted && ret != xpTimeout)
1294                         return ret;
1295         }
1296
1297         msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next);
1298         *address_of_msg_slot = msg_slot;
1299         return xpSuccess;
1300 }
1301
1302 static void
1303 xpc_free_msg_slot_uv(struct xpc_channel *ch,
1304                      struct xpc_send_msg_slot_uv *msg_slot)
1305 {
1306         xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next);
1307
1308         /* wakeup anyone waiting for a free msg slot */
1309         if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1310                 wake_up(&ch->msg_allocate_wq);
1311 }
1312
1313 static void
1314 xpc_notify_sender_uv(struct xpc_channel *ch,
1315                      struct xpc_send_msg_slot_uv *msg_slot,
1316                      enum xp_retval reason)
1317 {
1318         xpc_notify_func func = msg_slot->func;
1319
1320         if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) {
1321
1322                 atomic_dec(&ch->n_to_notify);
1323
1324                 dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p "
1325                         "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1326                         msg_slot->msg_slot_number, ch->partid, ch->number);
1327
1328                 func(reason, ch->partid, ch->number, msg_slot->key);
1329
1330                 dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p "
1331                         "msg_slot_number=%d partid=%d channel=%d\n", msg_slot,
1332                         msg_slot->msg_slot_number, ch->partid, ch->number);
1333         }
1334 }
1335
1336 static void
1337 xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch,
1338                             struct xpc_notify_mq_msg_uv *msg)
1339 {
1340         struct xpc_send_msg_slot_uv *msg_slot;
1341         int entry = msg->hdr.msg_slot_number % ch->local_nentries;
1342
1343         msg_slot = &ch->sn.uv.send_msg_slots[entry];
1344
1345         BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number);
1346         msg_slot->msg_slot_number += ch->local_nentries;
1347
1348         if (msg_slot->func != NULL)
1349                 xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered);
1350
1351         xpc_free_msg_slot_uv(ch, msg_slot);
1352 }
1353
1354 static void
1355 xpc_handle_notify_mq_msg_uv(struct xpc_partition *part,
1356                             struct xpc_notify_mq_msg_uv *msg)
1357 {
1358         struct xpc_partition_uv *part_uv = &part->sn.uv;
1359         struct xpc_channel *ch;
1360         struct xpc_channel_uv *ch_uv;
1361         struct xpc_notify_mq_msg_uv *msg_slot;
1362         unsigned long irq_flags;
1363         int ch_number = msg->hdr.ch_number;
1364
1365         if (unlikely(ch_number >= part->nchannels)) {
1366                 dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid "
1367                         "channel number=0x%x in message from partid=%d\n",
1368                         ch_number, XPC_PARTID(part));
1369
1370                 /* get hb checker to deactivate from the remote partition */
1371                 spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1372                 if (part_uv->act_state_req == 0)
1373                         xpc_activate_IRQ_rcvd++;
1374                 part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV;
1375                 part_uv->reason = xpBadChannelNumber;
1376                 spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
1377
1378                 wake_up_interruptible(&xpc_activate_IRQ_wq);
1379                 return;
1380         }
1381
1382         ch = &part->channels[ch_number];
1383         xpc_msgqueue_ref(ch);
1384
1385         if (!(ch->flags & XPC_C_CONNECTED)) {
1386                 xpc_msgqueue_deref(ch);
1387                 return;
1388         }
1389
1390         /* see if we're really dealing with an ACK for a previously sent msg */
1391         if (msg->hdr.size == 0) {
1392                 xpc_handle_notify_mq_ack_uv(ch, msg);
1393                 xpc_msgqueue_deref(ch);
1394                 return;
1395         }
1396
1397         /* we're dealing with a normal message sent via the notify_mq */
1398         ch_uv = &ch->sn.uv;
1399
1400         msg_slot = ch_uv->recv_msg_slots +
1401             (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size;
1402
1403         BUG_ON(msg->hdr.msg_slot_number != msg_slot->hdr.msg_slot_number);
1404         BUG_ON(msg_slot->hdr.size != 0);
1405
1406         memcpy(msg_slot, msg, msg->hdr.size);
1407
1408         xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next);
1409
1410         if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
1411                 /*
1412                  * If there is an existing idle kthread get it to deliver
1413                  * the payload, otherwise we'll have to get the channel mgr
1414                  * for this partition to create a kthread to do the delivery.
1415                  */
1416                 if (atomic_read(&ch->kthreads_idle) > 0)
1417                         wake_up_nr(&ch->idle_wq, 1);
1418                 else
1419                         xpc_send_chctl_local_msgrequest_uv(part, ch->number);
1420         }
1421         xpc_msgqueue_deref(ch);
1422 }
1423
1424 static irqreturn_t
1425 xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
1426 {
1427         struct xpc_notify_mq_msg_uv *msg;
1428         short partid;
1429         struct xpc_partition *part;
1430
1431         while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
1432                NULL) {
1433
1434                 partid = msg->hdr.partid;
1435                 if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
1436                         dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received "
1437                                 "invalid partid=0x%x in message\n", partid);
1438                 } else {
1439                         part = &xpc_partitions[partid];
1440
1441                         if (xpc_part_ref(part)) {
1442                                 xpc_handle_notify_mq_msg_uv(part, msg);
1443                                 xpc_part_deref(part);
1444                         }
1445                 }
1446
1447                 gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
1448         }
1449
1450         return IRQ_HANDLED;
1451 }
1452
1453 static int
1454 xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch)
1455 {
1456         return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list);
1457 }
1458
1459 static void
1460 xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number)
1461 {
1462         struct xpc_channel *ch = &part->channels[ch_number];
1463         int ndeliverable_payloads;
1464
1465         xpc_msgqueue_ref(ch);
1466
1467         ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch);
1468
1469         if (ndeliverable_payloads > 0 &&
1470             (ch->flags & XPC_C_CONNECTED) &&
1471             (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) {
1472
1473                 xpc_activate_kthreads(ch, ndeliverable_payloads);
1474         }
1475
1476         xpc_msgqueue_deref(ch);
1477 }
1478
1479 static enum xp_retval
1480 xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
1481                     u16 payload_size, u8 notify_type, xpc_notify_func func,
1482                     void *key)
1483 {
1484         enum xp_retval ret = xpSuccess;
1485         struct xpc_send_msg_slot_uv *msg_slot = NULL;
1486         struct xpc_notify_mq_msg_uv *msg;
1487         u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV];
1488         size_t msg_size;
1489
1490         DBUG_ON(notify_type != XPC_N_CALL);
1491
1492         msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size;
1493         if (msg_size > ch->entry_size)
1494                 return xpPayloadTooBig;
1495
1496         xpc_msgqueue_ref(ch);
1497
1498         if (ch->flags & XPC_C_DISCONNECTING) {
1499                 ret = ch->reason;
1500                 goto out_1;
1501         }
1502         if (!(ch->flags & XPC_C_CONNECTED)) {
1503                 ret = xpNotConnected;
1504                 goto out_1;
1505         }
1506
1507         ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot);
1508         if (ret != xpSuccess)
1509                 goto out_1;
1510
1511         if (func != NULL) {
1512                 atomic_inc(&ch->n_to_notify);
1513
1514                 msg_slot->key = key;
1515                 smp_wmb(); /* a non-NULL func must hit memory after the key */
1516                 msg_slot->func = func;
1517
1518                 if (ch->flags & XPC_C_DISCONNECTING) {
1519                         ret = ch->reason;
1520                         goto out_2;
1521                 }
1522         }
1523
1524         msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer;
1525         msg->hdr.partid = xp_partition_id;
1526         msg->hdr.ch_number = ch->number;
1527         msg->hdr.size = msg_size;
1528         msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
1529         memcpy(&msg->payload, payload, payload_size);
1530
1531         ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1532                                msg_size);
1533         if (ret == xpSuccess)
1534                 goto out_1;
1535
1536         XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1537 out_2:
1538         if (func != NULL) {
1539                 /*
1540                  * Try to NULL the msg_slot's func field. If we fail, then
1541                  * xpc_notify_senders_of_disconnect_uv() beat us to it, in which
1542                  * case we need to pretend we succeeded to send the message
1543                  * since the user will get a callout for the disconnect error
1544                  * by xpc_notify_senders_of_disconnect_uv(), and to also get an
1545                  * error returned here will confuse them. Additionally, since
1546                  * in this case the channel is being disconnected we don't need
1547                  * to put the the msg_slot back on the free list.
1548                  */
1549                 if (cmpxchg(&msg_slot->func, func, NULL) != func) {
1550                         ret = xpSuccess;
1551                         goto out_1;
1552                 }
1553
1554                 msg_slot->key = NULL;
1555                 atomic_dec(&ch->n_to_notify);
1556         }
1557         xpc_free_msg_slot_uv(ch, msg_slot);
1558 out_1:
1559         xpc_msgqueue_deref(ch);
1560         return ret;
1561 }
1562
1563 /*
1564  * Tell the callers of xpc_send_notify() that the status of their payloads
1565  * is unknown because the channel is now disconnecting.
1566  *
1567  * We don't worry about putting these msg_slots on the free list since the
1568  * msg_slots themselves are about to be kfree'd.
1569  */
1570 static void
1571 xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch)
1572 {
1573         struct xpc_send_msg_slot_uv *msg_slot;
1574         int entry;
1575
1576         DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING));
1577
1578         for (entry = 0; entry < ch->local_nentries; entry++) {
1579
1580                 if (atomic_read(&ch->n_to_notify) == 0)
1581                         break;
1582
1583                 msg_slot = &ch->sn.uv.send_msg_slots[entry];
1584                 if (msg_slot->func != NULL)
1585                         xpc_notify_sender_uv(ch, msg_slot, ch->reason);
1586         }
1587 }
1588
1589 /*
1590  * Get the next deliverable message's payload.
1591  */
1592 static void *
1593 xpc_get_deliverable_payload_uv(struct xpc_channel *ch)
1594 {
1595         struct xpc_fifo_entry_uv *entry;
1596         struct xpc_notify_mq_msg_uv *msg;
1597         void *payload = NULL;
1598
1599         if (!(ch->flags & XPC_C_DISCONNECTING)) {
1600                 entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list);
1601                 if (entry != NULL) {
1602                         msg = container_of(entry, struct xpc_notify_mq_msg_uv,
1603                                            hdr.u.next);
1604                         payload = &msg->payload;
1605                 }
1606         }
1607         return payload;
1608 }
1609
1610 static void
1611 xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
1612 {
1613         struct xpc_notify_mq_msg_uv *msg;
1614         enum xp_retval ret;
1615
1616         msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload);
1617
1618         /* return an ACK to the sender of this message */
1619
1620         msg->hdr.partid = xp_partition_id;
1621         msg->hdr.size = 0;      /* size of zero indicates this is an ACK */
1622
1623         ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
1624                                sizeof(struct xpc_notify_mq_msghdr_uv));
1625         if (ret != xpSuccess)
1626                 XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
1627
1628         msg->hdr.msg_slot_number += ch->remote_nentries;
1629 }
1630
1631 int
1632 xpc_init_uv(void)
1633 {
1634         xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv;
1635         xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_uv;
1636         xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
1637         xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
1638         xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
1639
1640         xpc_allow_hb = xpc_allow_hb_uv;
1641         xpc_disallow_hb = xpc_disallow_hb_uv;
1642         xpc_disallow_all_hbs = xpc_disallow_all_hbs_uv;
1643         xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
1644         xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
1645         xpc_online_heartbeat = xpc_online_heartbeat_uv;
1646         xpc_heartbeat_init = xpc_heartbeat_init_uv;
1647         xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
1648         xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv;
1649
1650         xpc_request_partition_activation = xpc_request_partition_activation_uv;
1651         xpc_request_partition_reactivation =
1652             xpc_request_partition_reactivation_uv;
1653         xpc_request_partition_deactivation =
1654             xpc_request_partition_deactivation_uv;
1655         xpc_cancel_partition_deactivation_request =
1656             xpc_cancel_partition_deactivation_request_uv;
1657
1658         xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv;
1659         xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv;
1660
1661         xpc_make_first_contact = xpc_make_first_contact_uv;
1662
1663         xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv;
1664         xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv;
1665         xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv;
1666         xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv;
1667         xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv;
1668
1669         xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv;
1670
1671         xpc_setup_msg_structures = xpc_setup_msg_structures_uv;
1672         xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv;
1673
1674         xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv;
1675         xpc_indicate_partition_disengaged =
1676             xpc_indicate_partition_disengaged_uv;
1677         xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv;
1678         xpc_partition_engaged = xpc_partition_engaged_uv;
1679         xpc_any_partition_engaged = xpc_any_partition_engaged_uv;
1680
1681         xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv;
1682         xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv;
1683         xpc_send_payload = xpc_send_payload_uv;
1684         xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv;
1685         xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv;
1686         xpc_received_payload = xpc_received_payload_uv;
1687
1688         if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
1689                 dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
1690                         XPC_MSG_HDR_MAX_SIZE);
1691                 return -E2BIG;
1692         }
1693
1694         xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0,
1695                                                   XPC_ACTIVATE_IRQ_NAME,
1696                                                   xpc_handle_activate_IRQ_uv);
1697         if (IS_ERR(xpc_activate_mq_uv))
1698                 return PTR_ERR(xpc_activate_mq_uv);
1699
1700         xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0,
1701                                                 XPC_NOTIFY_IRQ_NAME,
1702                                                 xpc_handle_notify_IRQ_uv);
1703         if (IS_ERR(xpc_notify_mq_uv)) {
1704                 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1705                 return PTR_ERR(xpc_notify_mq_uv);
1706         }
1707
1708         return 0;
1709 }
1710
1711 void
1712 xpc_exit_uv(void)
1713 {
1714         xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);
1715         xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);
1716 }