sgi-xp: isolate allocation of XPC's msgqueues to sn2 only
[linux-2.6] / drivers / misc / sgi-xp / xpc_sn2.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8
9 /*
10  * Cross Partition Communication (XPC) sn2-based functions.
11  *
12  *     Architecture specific implementation of common functions.
13  *
14  */
15
16 #include <linux/kernel.h>
17 #include <linux/delay.h>
18 #include <asm/uncached.h>
19 #include <asm/sn/sn_sal.h>
20 #include "xpc.h"
21
22 static struct xpc_vars_sn2 *xpc_vars;   /* >>> Add _sn2 suffix? */
23 static struct xpc_vars_part_sn2 *xpc_vars_part; /* >>> Add _sn2 suffix? */
24
25 /* SH_IPI_ACCESS shub register value on startup */
26 static u64 xpc_sh1_IPI_access;
27 static u64 xpc_sh2_IPI_access0;
28 static u64 xpc_sh2_IPI_access1;
29 static u64 xpc_sh2_IPI_access2;
30 static u64 xpc_sh2_IPI_access3;
31
32 /*
33  * Change protections to allow IPI operations.
34  */
35 static void
36 xpc_allow_IPI_ops_sn2(void)
37 {
38         int node;
39         int nasid;
40
41         /* >>> The following should get moved into SAL. */
42         if (is_shub2()) {
43                 xpc_sh2_IPI_access0 =
44                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
45                 xpc_sh2_IPI_access1 =
46                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
47                 xpc_sh2_IPI_access2 =
48                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
49                 xpc_sh2_IPI_access3 =
50                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
51
52                 for_each_online_node(node) {
53                         nasid = cnodeid_to_nasid(node);
54                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
55                               -1UL);
56                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
57                               -1UL);
58                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
59                               -1UL);
60                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
61                               -1UL);
62                 }
63         } else {
64                 xpc_sh1_IPI_access =
65                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
66
67                 for_each_online_node(node) {
68                         nasid = cnodeid_to_nasid(node);
69                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
70                               -1UL);
71                 }
72         }
73 }
74
75 /*
76  * Restrict protections to disallow IPI operations.
77  */
78 static void
79 xpc_disallow_IPI_ops_sn2(void)
80 {
81         int node;
82         int nasid;
83
84         /* >>> The following should get moved into SAL. */
85         if (is_shub2()) {
86                 for_each_online_node(node) {
87                         nasid = cnodeid_to_nasid(node);
88                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
89                               xpc_sh2_IPI_access0);
90                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
91                               xpc_sh2_IPI_access1);
92                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
93                               xpc_sh2_IPI_access2);
94                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
95                               xpc_sh2_IPI_access3);
96                 }
97         } else {
98                 for_each_online_node(node) {
99                         nasid = cnodeid_to_nasid(node);
100                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
101                               xpc_sh1_IPI_access);
102                 }
103         }
104 }
105
106 /*
107  * The following set of functions are used for the sending and receiving of
108  * IRQs (also known as IPIs). There are two flavors of IRQs, one that is
109  * associated with partition activity (SGI_XPC_ACTIVATE) and the other that
110  * is associated with channel activity (SGI_XPC_NOTIFY).
111  */
112
113 static u64
114 xpc_receive_IRQ_amo_sn2(struct amo *amo)
115 {
116         return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR);
117 }
118
119 static enum xp_retval
120 xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid,
121                  int vector)
122 {
123         int ret = 0;
124         unsigned long irq_flags;
125
126         local_irq_save(irq_flags);
127
128         FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag);
129         sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
130
131         /*
132          * We must always use the nofault function regardless of whether we
133          * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
134          * didn't, we'd never know that the other partition is down and would
135          * keep sending IRQs and amos to it until the heartbeat times out.
136          */
137         ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
138                                                      xp_nofault_PIOR_target));
139
140         local_irq_restore(irq_flags);
141
142         return ((ret == 0) ? xpSuccess : xpPioReadError);
143 }
144
145 static struct amo *
146 xpc_init_IRQ_amo_sn2(int index)
147 {
148         struct amo *amo = xpc_vars->amos_page + index;
149
150         (void)xpc_receive_IRQ_amo_sn2(amo);     /* clear amo variable */
151         return amo;
152 }
153
154 /*
155  * Functions associated with SGI_XPC_ACTIVATE IRQ.
156  */
157
158 /*
159  * Notify the heartbeat check thread that an activate IRQ has been received.
160  */
161 static irqreturn_t
162 xpc_handle_activate_IRQ_sn2(int irq, void *dev_id)
163 {
164         atomic_inc(&xpc_activate_IRQ_rcvd);
165         wake_up_interruptible(&xpc_activate_IRQ_wq);
166         return IRQ_HANDLED;
167 }
168
169 /*
170  * Flag the appropriate amo variable and send an IRQ to the specified node.
171  */
172 static void
173 xpc_send_activate_IRQ_sn2(u64 amos_page_pa, int from_nasid, int to_nasid,
174                           int to_phys_cpuid)
175 {
176         int w_index = XPC_NASID_W_INDEX(from_nasid);
177         int b_index = XPC_NASID_B_INDEX(from_nasid);
178         struct amo *amos = (struct amo *)__va(amos_page_pa +
179                                               (XPC_ACTIVATE_IRQ_AMOS *
180                                               sizeof(struct amo)));
181
182         (void)xpc_send_IRQ_sn2(&amos[w_index], (1UL << b_index), to_nasid,
183                                to_phys_cpuid, SGI_XPC_ACTIVATE);
184 }
185
186 static void
187 xpc_send_local_activate_IRQ_sn2(int from_nasid)
188 {
189         int w_index = XPC_NASID_W_INDEX(from_nasid);
190         int b_index = XPC_NASID_B_INDEX(from_nasid);
191         struct amo *amos = (struct amo *)__va(xpc_vars->amos_page_pa +
192                                               (XPC_ACTIVATE_IRQ_AMOS *
193                                               sizeof(struct amo)));
194
195         /* fake the sending and receipt of an activate IRQ from remote nasid */
196         FETCHOP_STORE_OP(TO_AMO((u64)&amos[w_index].variable), FETCHOP_OR,
197                          (1UL << b_index));
198         atomic_inc(&xpc_activate_IRQ_rcvd);
199         wake_up_interruptible(&xpc_activate_IRQ_wq);
200 }
201
202 /*
203  * Functions associated with SGI_XPC_NOTIFY IRQ.
204  */
205
206 /*
207  * Check to see if any chctl flags were sent from the specified partition.
208  */
209 static void
210 xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part)
211 {
212         union xpc_channel_ctl_flags chctl;
213         unsigned long irq_flags;
214
215         chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2.
216                                                   local_chctl_amo_va);
217         if (chctl.all_flags == 0)
218                 return;
219
220         spin_lock_irqsave(&part->chctl_lock, irq_flags);
221         part->chctl.all_flags |= chctl.all_flags;
222         spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
223
224         dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags="
225                 "0x%lx\n", XPC_PARTID(part), chctl.all_flags);
226
227         xpc_wakeup_channel_mgr(part);
228 }
229
230 /*
231  * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
232  * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
233  * than one partition, we use an amo structure per partition to indicate
234  * whether a partition has sent an IRQ or not.  If it has, then wake up the
235  * associated kthread to handle it.
236  *
237  * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC
238  * running on other partitions.
239  *
240  * Noteworthy Arguments:
241  *
242  *      irq - Interrupt ReQuest number. NOT USED.
243  *
244  *      dev_id - partid of IRQ's potential sender.
245  */
246 static irqreturn_t
247 xpc_handle_notify_IRQ_sn2(int irq, void *dev_id)
248 {
249         short partid = (short)(u64)dev_id;
250         struct xpc_partition *part = &xpc_partitions[partid];
251
252         DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
253
254         if (xpc_part_ref(part)) {
255                 xpc_check_for_sent_chctl_flags_sn2(part);
256
257                 xpc_part_deref(part);
258         }
259         return IRQ_HANDLED;
260 }
261
262 /*
263  * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor
264  * because the write to their associated amo variable completed after the IRQ
265  * was received.
266  */
267 static void
268 xpc_check_for_dropped_notify_IRQ_sn2(struct xpc_partition *part)
269 {
270         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
271
272         if (xpc_part_ref(part)) {
273                 xpc_check_for_sent_chctl_flags_sn2(part);
274
275                 part_sn2->dropped_notify_IRQ_timer.expires = jiffies +
276                     XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
277                 add_timer(&part_sn2->dropped_notify_IRQ_timer);
278                 xpc_part_deref(part);
279         }
280 }
281
282 /*
283  * Send a notify IRQ to the remote partition that is associated with the
284  * specified channel.
285  */
286 static void
287 xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
288                         char *chctl_flag_string, unsigned long *irq_flags)
289 {
290         struct xpc_partition *part = &xpc_partitions[ch->partid];
291         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
292         union xpc_channel_ctl_flags chctl = { 0 };
293         enum xp_retval ret;
294
295         if (likely(part->act_state != XPC_P_DEACTIVATING)) {
296                 chctl.flags[ch->number] = chctl_flag;
297                 ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va,
298                                        chctl.all_flags,
299                                        part_sn2->notify_IRQ_nasid,
300                                        part_sn2->notify_IRQ_phys_cpuid,
301                                        SGI_XPC_NOTIFY);
302                 dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
303                         chctl_flag_string, ch->partid, ch->number, ret);
304                 if (unlikely(ret != xpSuccess)) {
305                         if (irq_flags != NULL)
306                                 spin_unlock_irqrestore(&ch->lock, *irq_flags);
307                         XPC_DEACTIVATE_PARTITION(part, ret);
308                         if (irq_flags != NULL)
309                                 spin_lock_irqsave(&ch->lock, *irq_flags);
310                 }
311         }
312 }
313
314 #define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \
315                 xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f)
316
317 /*
318  * Make it look like the remote partition, which is associated with the
319  * specified channel, sent us a notify IRQ. This faked IRQ will be handled
320  * by xpc_check_for_dropped_notify_IRQ_sn2().
321  */
322 static void
323 xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag,
324                               char *chctl_flag_string)
325 {
326         struct xpc_partition *part = &xpc_partitions[ch->partid];
327         union xpc_channel_ctl_flags chctl = { 0 };
328
329         chctl.flags[ch->number] = chctl_flag;
330         FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va->
331                                 variable), FETCHOP_OR, chctl.all_flags);
332         dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
333                 chctl_flag_string, ch->partid, ch->number);
334 }
335
336 #define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \
337                 xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f)
338
339 static void
340 xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch,
341                                 unsigned long *irq_flags)
342 {
343         struct xpc_openclose_args *args = ch->local_openclose_args;
344
345         args->reason = ch->reason;
346         XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags);
347 }
348
349 static void
350 xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
351 {
352         XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags);
353 }
354
355 static void
356 xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
357 {
358         struct xpc_openclose_args *args = ch->local_openclose_args;
359
360         args->msg_size = ch->msg_size;
361         args->local_nentries = ch->local_nentries;
362         XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags);
363 }
364
365 static void
366 xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
367 {
368         struct xpc_openclose_args *args = ch->local_openclose_args;
369
370         args->remote_nentries = ch->remote_nentries;
371         args->local_nentries = ch->local_nentries;
372         args->local_msgqueue_pa = __pa(ch->local_msgqueue);
373         XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags);
374 }
375
376 static void
377 xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
378 {
379         XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL);
380 }
381
382 static void
383 xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch)
384 {
385         XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST);
386 }
387
388 /*
389  * This next set of functions are used to keep track of when a partition is
390  * potentially engaged in accessing memory belonging to another partition.
391  */
392
393 static void
394 xpc_indicate_partition_engaged_sn2(struct xpc_partition *part)
395 {
396         unsigned long irq_flags;
397         struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
398                                              (XPC_ENGAGED_PARTITIONS_AMO *
399                                              sizeof(struct amo)));
400
401         local_irq_save(irq_flags);
402
403         /* set bit corresponding to our partid in remote partition's amo */
404         FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
405                          (1UL << sn_partition_id));
406         /*
407          * We must always use the nofault function regardless of whether we
408          * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
409          * didn't, we'd never know that the other partition is down and would
410          * keep sending IRQs and amos to it until the heartbeat times out.
411          */
412         (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
413                                                                variable),
414                                                      xp_nofault_PIOR_target));
415
416         local_irq_restore(irq_flags);
417 }
418
419 static void
420 xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part)
421 {
422         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
423         unsigned long irq_flags;
424         struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
425                                              (XPC_ENGAGED_PARTITIONS_AMO *
426                                              sizeof(struct amo)));
427
428         local_irq_save(irq_flags);
429
430         /* clear bit corresponding to our partid in remote partition's amo */
431         FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
432                          ~(1UL << sn_partition_id));
433         /*
434          * We must always use the nofault function regardless of whether we
435          * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
436          * didn't, we'd never know that the other partition is down and would
437          * keep sending IRQs and amos to it until the heartbeat times out.
438          */
439         (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
440                                                                variable),
441                                                      xp_nofault_PIOR_target));
442
443         local_irq_restore(irq_flags);
444
445         /*
446          * Send activate IRQ to get other side to see that we've cleared our
447          * bit in their engaged partitions amo.
448          */
449         xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
450                                   cnodeid_to_nasid(0),
451                                   part_sn2->activate_IRQ_nasid,
452                                   part_sn2->activate_IRQ_phys_cpuid);
453 }
454
455 static int
456 xpc_partition_engaged_sn2(short partid)
457 {
458         struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
459
460         /* our partition's amo variable ANDed with partid mask */
461         return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
462                 (1UL << partid)) != 0;
463 }
464
465 static int
466 xpc_any_partition_engaged_sn2(void)
467 {
468         struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
469
470         /* our partition's amo variable */
471         return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0;
472 }
473
474 static void
475 xpc_assume_partition_disengaged_sn2(short partid)
476 {
477         struct amo *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
478
479         /* clear bit(s) based on partid mask in our partition's amo */
480         FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
481                          ~(1UL << partid));
482 }
483
484 /* original protection values for each node */
485 static u64 xpc_prot_vec_sn2[MAX_NUMNODES];
486
487 /*
488  * Change protections to allow amo operations on non-Shub 1.1 systems.
489  */
490 static enum xp_retval
491 xpc_allow_amo_ops_sn2(struct amo *amos_page)
492 {
493         u64 nasid_array = 0;
494         int ret;
495
496         /*
497          * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST
498          * collides with memory operations. On those systems we call
499          * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead.
500          */
501         if (!enable_shub_wars_1_1()) {
502                 ret = sn_change_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE,
503                                            SN_MEMPROT_ACCESS_CLASS_1,
504                                            &nasid_array);
505                 if (ret != 0)
506                         return xpSalError;
507         }
508         return xpSuccess;
509 }
510
511 /*
512  * Change protections to allow amo operations on Shub 1.1 systems.
513  */
514 static void
515 xpc_allow_amo_ops_shub_wars_1_1_sn2(void)
516 {
517         int node;
518         int nasid;
519
520         if (!enable_shub_wars_1_1())
521                 return;
522
523         for_each_online_node(node) {
524                 nasid = cnodeid_to_nasid(node);
525                 /* save current protection values */
526                 xpc_prot_vec_sn2[node] =
527                     (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid,
528                                                   SH1_MD_DQLP_MMR_DIR_PRIVEC0));
529                 /* open up everything */
530                 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
531                                              SH1_MD_DQLP_MMR_DIR_PRIVEC0),
532                       -1UL);
533                 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
534                                              SH1_MD_DQRP_MMR_DIR_PRIVEC0),
535                       -1UL);
536         }
537 }
538
539 static enum xp_retval
540 xpc_rsvd_page_init_sn2(struct xpc_rsvd_page *rp)
541 {
542         struct amo *amos_page;
543         int i;
544         int ret;
545
546         xpc_vars = XPC_RP_VARS(rp);
547
548         rp->sn.vars_pa = __pa(xpc_vars);
549
550         /* vars_part array follows immediately after vars */
551         xpc_vars_part = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
552                                                      XPC_RP_VARS_SIZE);
553
554         /*
555          * Before clearing xpc_vars, see if a page of amos had been previously
556          * allocated. If not we'll need to allocate one and set permissions
557          * so that cross-partition amos are allowed.
558          *
559          * The allocated amo page needs MCA reporting to remain disabled after
560          * XPC has unloaded.  To make this work, we keep a copy of the pointer
561          * to this page (i.e., amos_page) in the struct xpc_vars structure,
562          * which is pointed to by the reserved page, and re-use that saved copy
563          * on subsequent loads of XPC. This amo page is never freed, and its
564          * memory protections are never restricted.
565          */
566         amos_page = xpc_vars->amos_page;
567         if (amos_page == NULL) {
568                 amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1));
569                 if (amos_page == NULL) {
570                         dev_err(xpc_part, "can't allocate page of amos\n");
571                         return xpNoMemory;
572                 }
573
574                 /*
575                  * Open up amo-R/W to cpu.  This is done on Shub 1.1 systems
576                  * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called.
577                  */
578                 ret = xpc_allow_amo_ops_sn2(amos_page);
579                 if (ret != xpSuccess) {
580                         dev_err(xpc_part, "can't allow amo operations\n");
581                         uncached_free_page(__IA64_UNCACHED_OFFSET |
582                                            TO_PHYS((u64)amos_page), 1);
583                         return ret;
584                 }
585         }
586
587         /* clear xpc_vars */
588         memset(xpc_vars, 0, sizeof(struct xpc_vars_sn2));
589
590         xpc_vars->version = XPC_V_VERSION;
591         xpc_vars->activate_IRQ_nasid = cpuid_to_nasid(0);
592         xpc_vars->activate_IRQ_phys_cpuid = cpu_physical_id(0);
593         xpc_vars->vars_part_pa = __pa(xpc_vars_part);
594         xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
595         xpc_vars->amos_page = amos_page;        /* save for next load of XPC */
596
597         /* clear xpc_vars_part */
598         memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part_sn2) *
599                xp_max_npartitions);
600
601         /* initialize the activate IRQ related amo variables */
602         for (i = 0; i < xp_nasid_mask_words; i++)
603                 (void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS + i);
604
605         /* initialize the engaged remote partitions related amo variables */
606         (void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO);
607         (void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO);
608
609         return xpSuccess;
610 }
611
612 static void
613 xpc_increment_heartbeat_sn2(void)
614 {
615         xpc_vars->heartbeat++;
616 }
617
618 static void
619 xpc_offline_heartbeat_sn2(void)
620 {
621         xpc_increment_heartbeat_sn2();
622         xpc_vars->heartbeat_offline = 1;
623 }
624
625 static void
626 xpc_online_heartbeat_sn2(void)
627 {
628         xpc_increment_heartbeat_sn2();
629         xpc_vars->heartbeat_offline = 0;
630 }
631
632 static void
633 xpc_heartbeat_init_sn2(void)
634 {
635         DBUG_ON(xpc_vars == NULL);
636
637         bitmap_zero(xpc_vars->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
638         xpc_heartbeating_to_mask = &xpc_vars->heartbeating_to_mask[0];
639         xpc_online_heartbeat_sn2();
640 }
641
642 static void
643 xpc_heartbeat_exit_sn2(void)
644 {
645         xpc_offline_heartbeat_sn2();
646 }
647
648 /*
649  * At periodic intervals, scan through all active partitions and ensure
650  * their heartbeat is still active.  If not, the partition is deactivated.
651  */
652 static void
653 xpc_check_remote_hb_sn2(void)
654 {
655         struct xpc_vars_sn2 *remote_vars;
656         struct xpc_partition *part;
657         short partid;
658         enum xp_retval ret;
659
660         remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer;
661
662         for (partid = 0; partid < xp_max_npartitions; partid++) {
663
664                 if (xpc_exiting)
665                         break;
666
667                 if (partid == sn_partition_id)
668                         continue;
669
670                 part = &xpc_partitions[partid];
671
672                 if (part->act_state == XPC_P_INACTIVE ||
673                     part->act_state == XPC_P_DEACTIVATING) {
674                         continue;
675                 }
676
677                 /* pull the remote_hb cache line */
678                 ret = xp_remote_memcpy(remote_vars,
679                                        (void *)part->sn.sn2.remote_vars_pa,
680                                        XPC_RP_VARS_SIZE);
681                 if (ret != xpSuccess) {
682                         XPC_DEACTIVATE_PARTITION(part, ret);
683                         continue;
684                 }
685
686                 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
687                         " = %ld, heartbeat_offline = %ld, HB_mask[0] = 0x%lx\n",
688                         partid, remote_vars->heartbeat, part->last_heartbeat,
689                         remote_vars->heartbeat_offline,
690                         remote_vars->heartbeating_to_mask[0]);
691
692                 if (((remote_vars->heartbeat == part->last_heartbeat) &&
693                      (remote_vars->heartbeat_offline == 0)) ||
694                     !xpc_hb_allowed(sn_partition_id,
695                                     &remote_vars->heartbeating_to_mask)) {
696
697                         XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
698                         continue;
699                 }
700
701                 part->last_heartbeat = remote_vars->heartbeat;
702         }
703 }
704
705 /*
706  * Get a copy of the remote partition's XPC variables from the reserved page.
707  *
708  * remote_vars points to a buffer that is cacheline aligned for BTE copies and
709  * assumed to be of size XPC_RP_VARS_SIZE.
710  */
711 static enum xp_retval
712 xpc_get_remote_vars_sn2(u64 remote_vars_pa, struct xpc_vars_sn2 *remote_vars)
713 {
714         enum xp_retval ret;
715
716         if (remote_vars_pa == 0)
717                 return xpVarsNotSet;
718
719         /* pull over the cross partition variables */
720         ret = xp_remote_memcpy(remote_vars, (void *)remote_vars_pa,
721                                XPC_RP_VARS_SIZE);
722         if (ret != xpSuccess)
723                 return ret;
724
725         if (XPC_VERSION_MAJOR(remote_vars->version) !=
726             XPC_VERSION_MAJOR(XPC_V_VERSION)) {
727                 return xpBadVersion;
728         }
729
730         return xpSuccess;
731 }
732
733 static void
734 xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp,
735                                      u64 remote_rp_pa, int nasid)
736 {
737         xpc_send_local_activate_IRQ_sn2(nasid);
738 }
739
740 static void
741 xpc_request_partition_reactivation_sn2(struct xpc_partition *part)
742 {
743         xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid);
744 }
745
746 static void
747 xpc_request_partition_deactivation_sn2(struct xpc_partition *part)
748 {
749         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
750         unsigned long irq_flags;
751         struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa +
752                                              (XPC_DEACTIVATE_REQUEST_AMO *
753                                              sizeof(struct amo)));
754
755         local_irq_save(irq_flags);
756
757         /* set bit corresponding to our partid in remote partition's amo */
758         FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR,
759                          (1UL << sn_partition_id));
760         /*
761          * We must always use the nofault function regardless of whether we
762          * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
763          * didn't, we'd never know that the other partition is down and would
764          * keep sending IRQs and amos to it until the heartbeat times out.
765          */
766         (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
767                                                                variable),
768                                                      xp_nofault_PIOR_target));
769
770         local_irq_restore(irq_flags);
771
772         /*
773          * Send activate IRQ to get other side to see that we've set our
774          * bit in their deactivate request amo.
775          */
776         xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
777                                   cnodeid_to_nasid(0),
778                                   part_sn2->activate_IRQ_nasid,
779                                   part_sn2->activate_IRQ_phys_cpuid);
780 }
781
782 static void
783 xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part)
784 {
785         unsigned long irq_flags;
786         struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa +
787                                              (XPC_DEACTIVATE_REQUEST_AMO *
788                                              sizeof(struct amo)));
789
790         local_irq_save(irq_flags);
791
792         /* clear bit corresponding to our partid in remote partition's amo */
793         FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND,
794                          ~(1UL << sn_partition_id));
795         /*
796          * We must always use the nofault function regardless of whether we
797          * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
798          * didn't, we'd never know that the other partition is down and would
799          * keep sending IRQs and amos to it until the heartbeat times out.
800          */
801         (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->
802                                                                variable),
803                                                      xp_nofault_PIOR_target));
804
805         local_irq_restore(irq_flags);
806 }
807
808 static int
809 xpc_partition_deactivation_requested_sn2(short partid)
810 {
811         struct amo *amo = xpc_vars->amos_page + XPC_DEACTIVATE_REQUEST_AMO;
812
813         /* our partition's amo variable ANDed with partid mask */
814         return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) &
815                 (1UL << partid)) != 0;
816 }
817
818 /*
819  * Update the remote partition's info.
820  */
821 static void
822 xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version,
823                               unsigned long *remote_rp_stamp, u64 remote_rp_pa,
824                               u64 remote_vars_pa,
825                               struct xpc_vars_sn2 *remote_vars)
826 {
827         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
828
829         part->remote_rp_version = remote_rp_version;
830         dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
831                 part->remote_rp_version);
832
833         part->remote_rp_stamp = *remote_rp_stamp;
834         dev_dbg(xpc_part, "  remote_rp_stamp = 0x%016lx\n",
835                 part->remote_rp_stamp);
836
837         part->remote_rp_pa = remote_rp_pa;
838         dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
839
840         part_sn2->remote_vars_pa = remote_vars_pa;
841         dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
842                 part_sn2->remote_vars_pa);
843
844         part->last_heartbeat = remote_vars->heartbeat;
845         dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
846                 part->last_heartbeat);
847
848         part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa;
849         dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
850                 part_sn2->remote_vars_part_pa);
851
852         part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid;
853         dev_dbg(xpc_part, "  activate_IRQ_nasid = 0x%x\n",
854                 part_sn2->activate_IRQ_nasid);
855
856         part_sn2->activate_IRQ_phys_cpuid =
857             remote_vars->activate_IRQ_phys_cpuid;
858         dev_dbg(xpc_part, "  activate_IRQ_phys_cpuid = 0x%x\n",
859                 part_sn2->activate_IRQ_phys_cpuid);
860
861         part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa;
862         dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
863                 part_sn2->remote_amos_page_pa);
864
865         part_sn2->remote_vars_version = remote_vars->version;
866         dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
867                 part_sn2->remote_vars_version);
868 }
869
870 /*
871  * Prior code has determined the nasid which generated a activate IRQ.
872  * Inspect that nasid to determine if its partition needs to be activated
873  * or deactivated.
874  *
875  * A partition is considered "awaiting activation" if our partition
876  * flags indicate it is not active and it has a heartbeat.  A
877  * partition is considered "awaiting deactivation" if our partition
878  * flags indicate it is active but it has no heartbeat or it is not
879  * sending its heartbeat to us.
880  *
881  * To determine the heartbeat, the remote nasid must have a properly
882  * initialized reserved page.
883  */
884 static void
885 xpc_identify_activate_IRQ_req_sn2(int nasid)
886 {
887         struct xpc_rsvd_page *remote_rp;
888         struct xpc_vars_sn2 *remote_vars;
889         u64 remote_rp_pa;
890         u64 remote_vars_pa;
891         int remote_rp_version;
892         int reactivate = 0;
893         unsigned long remote_rp_stamp = 0;
894         short partid;
895         struct xpc_partition *part;
896         struct xpc_partition_sn2 *part_sn2;
897         enum xp_retval ret;
898
899         /* pull over the reserved page structure */
900
901         remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
902
903         ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
904         if (ret != xpSuccess) {
905                 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
906                          "which sent interrupt, reason=%d\n", nasid, ret);
907                 return;
908         }
909
910         remote_vars_pa = remote_rp->sn.vars_pa;
911         remote_rp_version = remote_rp->version;
912         remote_rp_stamp = remote_rp->stamp;
913
914         partid = remote_rp->SAL_partid;
915         part = &xpc_partitions[partid];
916         part_sn2 = &part->sn.sn2;
917
918         /* pull over the cross partition variables */
919
920         remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer;
921
922         ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars);
923         if (ret != xpSuccess) {
924                 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
925                          "which sent interrupt, reason=%d\n", nasid, ret);
926
927                 XPC_DEACTIVATE_PARTITION(part, ret);
928                 return;
929         }
930
931         part->activate_IRQ_rcvd++;
932
933         dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
934                 "%ld:0x%lx\n", (int)nasid, (int)partid, part->activate_IRQ_rcvd,
935                 remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]);
936
937         if (xpc_partition_disengaged(part) &&
938             part->act_state == XPC_P_INACTIVE) {
939
940                 xpc_update_partition_info_sn2(part, remote_rp_version,
941                                               &remote_rp_stamp, remote_rp_pa,
942                                               remote_vars_pa, remote_vars);
943
944                 if (xpc_partition_deactivation_requested_sn2(partid)) {
945                         /*
946                          * Other side is waiting on us to deactivate even though
947                          * we already have.
948                          */
949                         return;
950                 }
951
952                 xpc_activate_partition(part);
953                 return;
954         }
955
956         DBUG_ON(part->remote_rp_version == 0);
957         DBUG_ON(part_sn2->remote_vars_version == 0);
958
959         if (remote_rp_stamp != part->remote_rp_stamp) {
960
961                 /* the other side rebooted */
962
963                 DBUG_ON(xpc_partition_engaged_sn2(partid));
964                 DBUG_ON(xpc_partition_deactivation_requested_sn2(partid));
965
966                 xpc_update_partition_info_sn2(part, remote_rp_version,
967                                               &remote_rp_stamp, remote_rp_pa,
968                                               remote_vars_pa, remote_vars);
969                 reactivate = 1;
970         }
971
972         if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) {
973                 /* still waiting on other side to disengage from us */
974                 return;
975         }
976
977         if (reactivate)
978                 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
979         else if (xpc_partition_deactivation_requested_sn2(partid))
980                 XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
981 }
982
983 /*
984  * Loop through the activation amo variables and process any bits
985  * which are set.  Each bit indicates a nasid sending a partition
986  * activation or deactivation request.
987  *
988  * Return #of IRQs detected.
989  */
990 int
991 xpc_identify_activate_IRQ_sender_sn2(void)
992 {
993         int word, bit;
994         u64 nasid_mask;
995         u64 nasid;              /* remote nasid */
996         int n_IRQs_detected = 0;
997         struct amo *act_amos;
998
999         act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
1000
1001         /* scan through act amo variable looking for non-zero entries */
1002         for (word = 0; word < xp_nasid_mask_words; word++) {
1003
1004                 if (xpc_exiting)
1005                         break;
1006
1007                 nasid_mask = xpc_receive_IRQ_amo_sn2(&act_amos[word]);
1008                 if (nasid_mask == 0) {
1009                         /* no IRQs from nasids in this variable */
1010                         continue;
1011                 }
1012
1013                 dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", word,
1014                         nasid_mask);
1015
1016                 /*
1017                  * If this nasid has been added to the machine since
1018                  * our partition was reset, this will retain the
1019                  * remote nasid in our reserved pages machine mask.
1020                  * This is used in the event of module reload.
1021                  */
1022                 xpc_mach_nasids[word] |= nasid_mask;
1023
1024                 /* locate the nasid(s) which sent interrupts */
1025
1026                 for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
1027                         if (nasid_mask & (1UL << bit)) {
1028                                 n_IRQs_detected++;
1029                                 nasid = XPC_NASID_FROM_W_B(word, bit);
1030                                 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
1031                                         nasid);
1032                                 xpc_identify_activate_IRQ_req_sn2(nasid);
1033                         }
1034                 }
1035         }
1036         return n_IRQs_detected;
1037 }
1038
1039 static void
1040 xpc_process_activate_IRQ_rcvd_sn2(int n_IRQs_expected)
1041 {
1042         int n_IRQs_detected;
1043
1044         n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2();
1045         if (n_IRQs_detected < n_IRQs_expected) {
1046                 /* retry once to help avoid missing amo */
1047                 (void)xpc_identify_activate_IRQ_sender_sn2();
1048         }
1049 }
1050
1051 /*
1052  * Guarantee that the kzalloc'd memory is cacheline aligned.
1053  */
1054 static void *
1055 xpc_kzalloc_cacheline_aligned_sn2(size_t size, gfp_t flags, void **base)
1056 {
1057         /* see if kzalloc will give us cachline aligned memory by default */
1058         *base = kzalloc(size, flags);
1059         if (*base == NULL)
1060                 return NULL;
1061
1062         if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
1063                 return *base;
1064
1065         kfree(*base);
1066
1067         /* nope, we'll have to do it ourselves */
1068         *base = kzalloc(size + L1_CACHE_BYTES, flags);
1069         if (*base == NULL)
1070                 return NULL;
1071
1072         return (void *)L1_CACHE_ALIGN((u64)*base);
1073 }
1074
1075 /*
1076  * Setup the infrastructure necessary to support XPartition Communication
1077  * between the specified remote partition and the local one.
1078  */
1079 static enum xp_retval
1080 xpc_setup_infrastructure_sn2(struct xpc_partition *part)
1081 {
1082         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1083         enum xp_retval retval;
1084         int ret;
1085         int cpuid;
1086         int ch_number;
1087         struct xpc_channel *ch;
1088         struct timer_list *timer;
1089         short partid = XPC_PARTID(part);
1090
1091         /*
1092          * Allocate all of the channel structures as a contiguous chunk of
1093          * memory.
1094          */
1095         DBUG_ON(part->channels != NULL);
1096         part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
1097                                  GFP_KERNEL);
1098         if (part->channels == NULL) {
1099                 dev_err(xpc_chan, "can't get memory for channels\n");
1100                 return xpNoMemory;
1101         }
1102
1103         /* allocate all the required GET/PUT values */
1104
1105         part_sn2->local_GPs =
1106             xpc_kzalloc_cacheline_aligned_sn2(XPC_GP_SIZE, GFP_KERNEL,
1107                                               &part_sn2->local_GPs_base);
1108         if (part_sn2->local_GPs == NULL) {
1109                 dev_err(xpc_chan, "can't get memory for local get/put "
1110                         "values\n");
1111                 retval = xpNoMemory;
1112                 goto out_1;
1113         }
1114
1115         part_sn2->remote_GPs =
1116             xpc_kzalloc_cacheline_aligned_sn2(XPC_GP_SIZE, GFP_KERNEL,
1117                                               &part_sn2->remote_GPs_base);
1118         if (part_sn2->remote_GPs == NULL) {
1119                 dev_err(xpc_chan, "can't get memory for remote get/put "
1120                         "values\n");
1121                 retval = xpNoMemory;
1122                 goto out_2;
1123         }
1124
1125         part_sn2->remote_GPs_pa = 0;
1126
1127         /* allocate all the required open and close args */
1128
1129         part->local_openclose_args =
1130             xpc_kzalloc_cacheline_aligned_sn2(XPC_OPENCLOSE_ARGS_SIZE,
1131                                               GFP_KERNEL,
1132                                               &part->local_openclose_args_base);
1133         if (part->local_openclose_args == NULL) {
1134                 dev_err(xpc_chan, "can't get memory for local connect args\n");
1135                 retval = xpNoMemory;
1136                 goto out_3;
1137         }
1138
1139         part->remote_openclose_args =
1140             xpc_kzalloc_cacheline_aligned_sn2(XPC_OPENCLOSE_ARGS_SIZE,
1141                                               GFP_KERNEL,
1142                                              &part->remote_openclose_args_base);
1143         if (part->remote_openclose_args == NULL) {
1144                 dev_err(xpc_chan, "can't get memory for remote connect args\n");
1145                 retval = xpNoMemory;
1146                 goto out_4;
1147         }
1148
1149         part_sn2->remote_openclose_args_pa = 0;
1150
1151         part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid);
1152         part->chctl.all_flags = 0;
1153         spin_lock_init(&part->chctl_lock);
1154
1155         part_sn2->notify_IRQ_nasid = 0;
1156         part_sn2->notify_IRQ_phys_cpuid = 0;
1157         part_sn2->remote_chctl_amo_va = NULL;
1158
1159         atomic_set(&part->channel_mgr_requests, 1);
1160         init_waitqueue_head(&part->channel_mgr_wq);
1161
1162         sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid);
1163         ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2,
1164                           IRQF_SHARED, part_sn2->notify_IRQ_owner,
1165                           (void *)(u64)partid);
1166         if (ret != 0) {
1167                 dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
1168                         "errno=%d\n", -ret);
1169                 retval = xpLackOfResources;
1170                 goto out_5;
1171         }
1172
1173         /* Setup a timer to check for dropped notify IRQs */
1174         timer = &part_sn2->dropped_notify_IRQ_timer;
1175         init_timer(timer);
1176         timer->function =
1177             (void (*)(unsigned long))xpc_check_for_dropped_notify_IRQ_sn2;
1178         timer->data = (unsigned long)part;
1179         timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL;
1180         add_timer(timer);
1181
1182         part->nchannels = XPC_MAX_NCHANNELS;
1183
1184         atomic_set(&part->nchannels_active, 0);
1185         atomic_set(&part->nchannels_engaged, 0);
1186
1187         for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
1188                 ch = &part->channels[ch_number];
1189
1190                 ch->partid = partid;
1191                 ch->number = ch_number;
1192                 ch->flags = XPC_C_DISCONNECTED;
1193
1194                 ch->sn.sn2.local_GP = &part_sn2->local_GPs[ch_number];
1195                 ch->local_openclose_args =
1196                     &part->local_openclose_args[ch_number];
1197
1198                 atomic_set(&ch->kthreads_assigned, 0);
1199                 atomic_set(&ch->kthreads_idle, 0);
1200                 atomic_set(&ch->kthreads_active, 0);
1201
1202                 atomic_set(&ch->references, 0);
1203                 atomic_set(&ch->n_to_notify, 0);
1204
1205                 spin_lock_init(&ch->lock);
1206                 mutex_init(&ch->sn.sn2.msg_to_pull_mutex);
1207                 init_completion(&ch->wdisconnect_wait);
1208
1209                 atomic_set(&ch->n_on_msg_allocate_wq, 0);
1210                 init_waitqueue_head(&ch->msg_allocate_wq);
1211                 init_waitqueue_head(&ch->idle_wq);
1212         }
1213
1214         /*
1215          * With the setting of the partition setup_state to XPC_P_SETUP, we're
1216          * declaring that this partition is ready to go.
1217          */
1218         part->setup_state = XPC_P_SETUP;
1219
1220         /*
1221          * Setup the per partition specific variables required by the
1222          * remote partition to establish channel connections with us.
1223          *
1224          * The setting of the magic # indicates that these per partition
1225          * specific variables are ready to be used.
1226          */
1227         xpc_vars_part[partid].GPs_pa = __pa(part_sn2->local_GPs);
1228         xpc_vars_part[partid].openclose_args_pa =
1229             __pa(part->local_openclose_args);
1230         xpc_vars_part[partid].chctl_amo_pa = __pa(part_sn2->local_chctl_amo_va);
1231         cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */
1232         xpc_vars_part[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid);
1233         xpc_vars_part[partid].notify_IRQ_phys_cpuid = cpu_physical_id(cpuid);
1234         xpc_vars_part[partid].nchannels = part->nchannels;
1235         xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
1236
1237         return xpSuccess;
1238
1239         /* setup of infrastructure failed */
1240 out_5:
1241         kfree(part->remote_openclose_args_base);
1242         part->remote_openclose_args = NULL;
1243 out_4:
1244         kfree(part->local_openclose_args_base);
1245         part->local_openclose_args = NULL;
1246 out_3:
1247         kfree(part_sn2->remote_GPs_base);
1248         part_sn2->remote_GPs = NULL;
1249 out_2:
1250         kfree(part_sn2->local_GPs_base);
1251         part_sn2->local_GPs = NULL;
1252 out_1:
1253         kfree(part->channels);
1254         part->channels = NULL;
1255         return retval;
1256 }
1257
1258 /*
1259  * Teardown the infrastructure necessary to support XPartition Communication
1260  * between the specified remote partition and the local one.
1261  */
1262 static void
1263 xpc_teardown_infrastructure_sn2(struct xpc_partition *part)
1264 {
1265         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1266         short partid = XPC_PARTID(part);
1267
1268         /*
1269          * We start off by making this partition inaccessible to local
1270          * processes by marking it as no longer setup. Then we make it
1271          * inaccessible to remote processes by clearing the XPC per partition
1272          * specific variable's magic # (which indicates that these variables
1273          * are no longer valid) and by ignoring all XPC notify IRQs sent to
1274          * this partition.
1275          */
1276
1277         DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
1278         DBUG_ON(atomic_read(&part->nchannels_active) != 0);
1279         DBUG_ON(part->setup_state != XPC_P_SETUP);
1280         part->setup_state = XPC_P_WTEARDOWN;
1281
1282         xpc_vars_part[partid].magic = 0;
1283
1284         free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid);
1285
1286         /*
1287          * Before proceeding with the teardown we have to wait until all
1288          * existing references cease.
1289          */
1290         wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
1291
1292         /* now we can begin tearing down the infrastructure */
1293
1294         part->setup_state = XPC_P_TORNDOWN;
1295
1296         /* in case we've still got outstanding timers registered... */
1297         del_timer_sync(&part_sn2->dropped_notify_IRQ_timer);
1298
1299         kfree(part->remote_openclose_args_base);
1300         part->remote_openclose_args = NULL;
1301         kfree(part->local_openclose_args_base);
1302         part->local_openclose_args = NULL;
1303         kfree(part_sn2->remote_GPs_base);
1304         part_sn2->remote_GPs = NULL;
1305         kfree(part_sn2->local_GPs_base);
1306         part_sn2->local_GPs = NULL;
1307         kfree(part->channels);
1308         part->channels = NULL;
1309         part_sn2->local_chctl_amo_va = NULL;
1310 }
1311
1312 /*
1313  * Create a wrapper that hides the underlying mechanism for pulling a cacheline
1314  * (or multiple cachelines) from a remote partition.
1315  *
1316  * src must be a cacheline aligned physical address on the remote partition.
1317  * dst must be a cacheline aligned virtual address on this partition.
1318  * cnt must be cacheline sized
1319  */
1320 /* >>> Replace this function by call to xp_remote_memcpy() or bte_copy()? */
1321 static enum xp_retval
1322 xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst,
1323                                const void *src, size_t cnt)
1324 {
1325         enum xp_retval ret;
1326
1327         DBUG_ON((u64)src != L1_CACHE_ALIGN((u64)src));
1328         DBUG_ON((u64)dst != L1_CACHE_ALIGN((u64)dst));
1329         DBUG_ON(cnt != L1_CACHE_ALIGN(cnt));
1330
1331         if (part->act_state == XPC_P_DEACTIVATING)
1332                 return part->reason;
1333
1334         ret = xp_remote_memcpy(dst, src, cnt);
1335         if (ret != xpSuccess) {
1336                 dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed,"
1337                         " ret=%d\n", XPC_PARTID(part), ret);
1338         }
1339         return ret;
1340 }
1341
1342 /*
1343  * Pull the remote per partition specific variables from the specified
1344  * partition.
1345  */
1346 static enum xp_retval
1347 xpc_pull_remote_vars_part_sn2(struct xpc_partition *part)
1348 {
1349         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1350         u8 buffer[L1_CACHE_BYTES * 2];
1351         struct xpc_vars_part_sn2 *pulled_entry_cacheline =
1352             (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer);
1353         struct xpc_vars_part_sn2 *pulled_entry;
1354         u64 remote_entry_cacheline_pa, remote_entry_pa;
1355         short partid = XPC_PARTID(part);
1356         enum xp_retval ret;
1357
1358         /* pull the cacheline that contains the variables we're interested in */
1359
1360         DBUG_ON(part_sn2->remote_vars_part_pa !=
1361                 L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa));
1362         DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2);
1363
1364         remote_entry_pa = part_sn2->remote_vars_part_pa +
1365             sn_partition_id * sizeof(struct xpc_vars_part_sn2);
1366
1367         remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1));
1368
1369         pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline
1370                                                     + (remote_entry_pa &
1371                                                     (L1_CACHE_BYTES - 1)));
1372
1373         ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline,
1374                                              (void *)remote_entry_cacheline_pa,
1375                                              L1_CACHE_BYTES);
1376         if (ret != xpSuccess) {
1377                 dev_dbg(xpc_chan, "failed to pull XPC vars_part from "
1378                         "partition %d, ret=%d\n", partid, ret);
1379                 return ret;
1380         }
1381
1382         /* see if they've been set up yet */
1383
1384         if (pulled_entry->magic != XPC_VP_MAGIC1 &&
1385             pulled_entry->magic != XPC_VP_MAGIC2) {
1386
1387                 if (pulled_entry->magic != 0) {
1388                         dev_dbg(xpc_chan, "partition %d's XPC vars_part for "
1389                                 "partition %d has bad magic value (=0x%lx)\n",
1390                                 partid, sn_partition_id, pulled_entry->magic);
1391                         return xpBadMagic;
1392                 }
1393
1394                 /* they've not been initialized yet */
1395                 return xpRetry;
1396         }
1397
1398         if (xpc_vars_part[partid].magic == XPC_VP_MAGIC1) {
1399
1400                 /* validate the variables */
1401
1402                 if (pulled_entry->GPs_pa == 0 ||
1403                     pulled_entry->openclose_args_pa == 0 ||
1404                     pulled_entry->chctl_amo_pa == 0) {
1405
1406                         dev_err(xpc_chan, "partition %d's XPC vars_part for "
1407                                 "partition %d are not valid\n", partid,
1408                                 sn_partition_id);
1409                         return xpInvalidAddress;
1410                 }
1411
1412                 /* the variables we imported look to be valid */
1413
1414                 part_sn2->remote_GPs_pa = pulled_entry->GPs_pa;
1415                 part_sn2->remote_openclose_args_pa =
1416                     pulled_entry->openclose_args_pa;
1417                 part_sn2->remote_chctl_amo_va =
1418                     (struct amo *)__va(pulled_entry->chctl_amo_pa);
1419                 part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid;
1420                 part_sn2->notify_IRQ_phys_cpuid =
1421                     pulled_entry->notify_IRQ_phys_cpuid;
1422
1423                 if (part->nchannels > pulled_entry->nchannels)
1424                         part->nchannels = pulled_entry->nchannels;
1425
1426                 /* let the other side know that we've pulled their variables */
1427
1428                 xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
1429         }
1430
1431         if (pulled_entry->magic == XPC_VP_MAGIC1)
1432                 return xpRetry;
1433
1434         return xpSuccess;
1435 }
1436
1437 /*
1438  * Establish first contact with the remote partititon. This involves pulling
1439  * the XPC per partition variables from the remote partition and waiting for
1440  * the remote partition to pull ours.
1441  */
1442 static enum xp_retval
1443 xpc_make_first_contact_sn2(struct xpc_partition *part)
1444 {
1445         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1446         enum xp_retval ret;
1447
1448         /*
1449          * Register the remote partition's amos with SAL so it can handle
1450          * and cleanup errors within that address range should the remote
1451          * partition go down. We don't unregister this range because it is
1452          * difficult to tell when outstanding writes to the remote partition
1453          * are finished and thus when it is safe to unregister. This should
1454          * not result in wasted space in the SAL xp_addr_region table because
1455          * we should get the same page for remote_amos_page_pa after module
1456          * reloads and system reboots.
1457          */
1458         if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa,
1459                                        PAGE_SIZE, 1) < 0) {
1460                 dev_warn(xpc_part, "xpc_activating(%d) failed to register "
1461                          "xp_addr region\n", XPC_PARTID(part));
1462
1463                 ret = xpPhysAddrRegFailed;
1464                 XPC_DEACTIVATE_PARTITION(part, ret);
1465                 return ret;
1466         }
1467
1468         /*
1469          * Send activate IRQ to get other side to activate if they've not
1470          * already begun to do so.
1471          */
1472         xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa,
1473                                   cnodeid_to_nasid(0),
1474                                   part_sn2->activate_IRQ_nasid,
1475                                   part_sn2->activate_IRQ_phys_cpuid);
1476
1477         while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) {
1478                 if (ret != xpRetry) {
1479                         XPC_DEACTIVATE_PARTITION(part, ret);
1480                         return ret;
1481                 }
1482
1483                 dev_dbg(xpc_part, "waiting to make first contact with "
1484                         "partition %d\n", XPC_PARTID(part));
1485
1486                 /* wait a 1/4 of a second or so */
1487                 (void)msleep_interruptible(250);
1488
1489                 if (part->act_state == XPC_P_DEACTIVATING)
1490                         return part->reason;
1491         }
1492
1493         return xpSuccess;
1494 }
1495
1496 /*
1497  * Get the chctl flags and pull the openclose args and/or remote GPs as needed.
1498  */
1499 static u64
1500 xpc_get_chctl_all_flags_sn2(struct xpc_partition *part)
1501 {
1502         struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
1503         unsigned long irq_flags;
1504         union xpc_channel_ctl_flags chctl;
1505         enum xp_retval ret;
1506
1507         /*
1508          * See if there are any chctl flags to be handled.
1509          */
1510
1511         spin_lock_irqsave(&part->chctl_lock, irq_flags);
1512         chctl = part->chctl;
1513         if (chctl.all_flags != 0)
1514                 part->chctl.all_flags = 0;
1515
1516         spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
1517
1518         if (xpc_any_openclose_chctl_flags_set(&chctl)) {
1519                 ret = xpc_pull_remote_cachelines_sn2(part, part->
1520                                                      remote_openclose_args,
1521                                                      (void *)part_sn2->
1522                                                      remote_openclose_args_pa,
1523                                                      XPC_OPENCLOSE_ARGS_SIZE);
1524                 if (ret != xpSuccess) {
1525                         XPC_DEACTIVATE_PARTITION(part, ret);
1526
1527                         dev_dbg(xpc_chan, "failed to pull openclose args from "
1528                                 "partition %d, ret=%d\n", XPC_PARTID(part),
1529                                 ret);
1530
1531                         /* don't bother processing chctl flags anymore */
1532                         chctl.all_flags = 0;
1533                 }
1534         }
1535
1536         if (xpc_any_msg_chctl_flags_set(&chctl)) {
1537                 ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs,
1538                                                 (void *)part_sn2->remote_GPs_pa,
1539                                                      XPC_GP_SIZE);
1540                 if (ret != xpSuccess) {
1541                         XPC_DEACTIVATE_PARTITION(part, ret);
1542
1543                         dev_dbg(xpc_chan, "failed to pull GPs from partition "
1544                                 "%d, ret=%d\n", XPC_PARTID(part), ret);
1545
1546                         /* don't bother processing chctl flags anymore */
1547                         chctl.all_flags = 0;
1548                 }
1549         }
1550
1551         return chctl.all_flags;
1552 }
1553
1554 /*
1555  * Allocate the local message queue and the notify queue.
1556  */
1557 static enum xp_retval
1558 xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch)
1559 {
1560         unsigned long irq_flags;
1561         int nentries;
1562         size_t nbytes;
1563
1564         for (nentries = ch->local_nentries; nentries > 0; nentries--) {
1565
1566                 nbytes = nentries * ch->msg_size;
1567                 ch->local_msgqueue =
1568                     xpc_kzalloc_cacheline_aligned_sn2(nbytes, GFP_KERNEL,
1569                                                       &ch->local_msgqueue_base);
1570                 if (ch->local_msgqueue == NULL)
1571                         continue;
1572
1573                 nbytes = nentries * sizeof(struct xpc_notify);
1574                 ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
1575                 if (ch->notify_queue == NULL) {
1576                         kfree(ch->local_msgqueue_base);
1577                         ch->local_msgqueue = NULL;
1578                         continue;
1579                 }
1580
1581                 spin_lock_irqsave(&ch->lock, irq_flags);
1582                 if (nentries < ch->local_nentries) {
1583                         dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, "
1584                                 "partid=%d, channel=%d\n", nentries,
1585                                 ch->local_nentries, ch->partid, ch->number);
1586
1587                         ch->local_nentries = nentries;
1588                 }
1589                 spin_unlock_irqrestore(&ch->lock, irq_flags);
1590                 return xpSuccess;
1591         }
1592
1593         dev_dbg(xpc_chan, "can't get memory for local message queue and notify "
1594                 "queue, partid=%d, channel=%d\n", ch->partid, ch->number);
1595         return xpNoMemory;
1596 }
1597
1598 /*
1599  * Allocate the cached remote message queue.
1600  */
1601 static enum xp_retval
1602 xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch)
1603 {
1604         unsigned long irq_flags;
1605         int nentries;
1606         size_t nbytes;
1607
1608         DBUG_ON(ch->remote_nentries <= 0);
1609
1610         for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
1611
1612                 nbytes = nentries * ch->msg_size;
1613                 ch->remote_msgqueue =
1614                     xpc_kzalloc_cacheline_aligned_sn2(nbytes, GFP_KERNEL,
1615                                                      &ch->remote_msgqueue_base);
1616                 if (ch->remote_msgqueue == NULL)
1617                         continue;
1618
1619                 spin_lock_irqsave(&ch->lock, irq_flags);
1620                 if (nentries < ch->remote_nentries) {
1621                         dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, "
1622                                 "partid=%d, channel=%d\n", nentries,
1623                                 ch->remote_nentries, ch->partid, ch->number);
1624
1625                         ch->remote_nentries = nentries;
1626                 }
1627                 spin_unlock_irqrestore(&ch->lock, irq_flags);
1628                 return xpSuccess;
1629         }
1630
1631         dev_dbg(xpc_chan, "can't get memory for cached remote message queue, "
1632                 "partid=%d, channel=%d\n", ch->partid, ch->number);
1633         return xpNoMemory;
1634 }
1635
1636 /*
1637  * Allocate message queues and other stuff associated with a channel.
1638  *
1639  * Note: Assumes all of the channel sizes are filled in.
1640  */
1641 static enum xp_retval
1642 xpc_allocate_msgqueues_sn2(struct xpc_channel *ch)
1643 {
1644         enum xp_retval ret;
1645
1646         DBUG_ON(ch->flags & XPC_C_SETUP);
1647
1648         ret = xpc_allocate_local_msgqueue_sn2(ch);
1649         if (ret == xpSuccess) {
1650
1651                 ret = xpc_allocate_remote_msgqueue_sn2(ch);
1652                 if (ret != xpSuccess) {
1653                         kfree(ch->local_msgqueue_base);
1654                         ch->local_msgqueue = NULL;
1655                         kfree(ch->notify_queue);
1656                         ch->notify_queue = NULL;
1657                 }
1658         }
1659         return ret;
1660 }
1661
1662 /*
1663  * Free up message queues and other stuff that were allocated for the specified
1664  * channel.
1665  *
1666  * Note: ch->reason and ch->reason_line are left set for debugging purposes,
1667  * they're cleared when XPC_C_DISCONNECTED is cleared.
1668  */
1669 static void
1670 xpc_free_msgqueues_sn2(struct xpc_channel *ch)
1671 {
1672         struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1673
1674         DBUG_ON(!spin_is_locked(&ch->lock));
1675         DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
1676
1677         ch->remote_msgqueue_pa = 0;
1678         ch->func = NULL;
1679         ch->key = NULL;
1680         ch->msg_size = 0;
1681         ch->local_nentries = 0;
1682         ch->remote_nentries = 0;
1683         ch->kthreads_assigned_limit = 0;
1684         ch->kthreads_idle_limit = 0;
1685
1686         ch_sn2->local_GP->get = 0;
1687         ch_sn2->local_GP->put = 0;
1688         ch_sn2->remote_GP.get = 0;
1689         ch_sn2->remote_GP.put = 0;
1690         ch_sn2->w_local_GP.get = 0;
1691         ch_sn2->w_local_GP.put = 0;
1692         ch_sn2->w_remote_GP.get = 0;
1693         ch_sn2->w_remote_GP.put = 0;
1694         ch_sn2->next_msg_to_pull = 0;
1695
1696         if (ch->flags & XPC_C_SETUP) {
1697                 dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n",
1698                         ch->flags, ch->partid, ch->number);
1699
1700                 kfree(ch->local_msgqueue_base);
1701                 ch->local_msgqueue = NULL;
1702                 kfree(ch->remote_msgqueue_base);
1703                 ch->remote_msgqueue = NULL;
1704                 kfree(ch->notify_queue);
1705                 ch->notify_queue = NULL;
1706         }
1707 }
1708
1709 /*
1710  * Notify those who wanted to be notified upon delivery of their message.
1711  */
1712 static void
1713 xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put)
1714 {
1715         struct xpc_notify *notify;
1716         u8 notify_type;
1717         s64 get = ch->sn.sn2.w_remote_GP.get - 1;
1718
1719         while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
1720
1721                 notify = &ch->notify_queue[get % ch->local_nentries];
1722
1723                 /*
1724                  * See if the notify entry indicates it was associated with
1725                  * a message who's sender wants to be notified. It is possible
1726                  * that it is, but someone else is doing or has done the
1727                  * notification.
1728                  */
1729                 notify_type = notify->type;
1730                 if (notify_type == 0 ||
1731                     cmpxchg(&notify->type, notify_type, 0) != notify_type) {
1732                         continue;
1733                 }
1734
1735                 DBUG_ON(notify_type != XPC_N_CALL);
1736
1737                 atomic_dec(&ch->n_to_notify);
1738
1739                 if (notify->func != NULL) {
1740                         dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
1741                                 "msg_number=%ld, partid=%d, channel=%d\n",
1742                                 (void *)notify, get, ch->partid, ch->number);
1743
1744                         notify->func(reason, ch->partid, ch->number,
1745                                      notify->key);
1746
1747                         dev_dbg(xpc_chan, "notify->func() returned, "
1748                                 "notify=0x%p, msg_number=%ld, partid=%d, "
1749                                 "channel=%d\n", (void *)notify, get,
1750                                 ch->partid, ch->number);
1751                 }
1752         }
1753 }
1754
1755 static void
1756 xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch)
1757 {
1758         xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put);
1759 }
1760
1761 /*
1762  * Clear some of the msg flags in the local message queue.
1763  */
1764 static inline void
1765 xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch)
1766 {
1767         struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1768         struct xpc_msg *msg;
1769         s64 get;
1770
1771         get = ch_sn2->w_remote_GP.get;
1772         do {
1773                 msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
1774                                          (get % ch->local_nentries) *
1775                                          ch->msg_size);
1776                 msg->flags = 0;
1777         } while (++get < ch_sn2->remote_GP.get);
1778 }
1779
1780 /*
1781  * Clear some of the msg flags in the remote message queue.
1782  */
1783 static inline void
1784 xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
1785 {
1786         struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1787         struct xpc_msg *msg;
1788         s64 put;
1789
1790         put = ch_sn2->w_remote_GP.put;
1791         do {
1792                 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
1793                                          (put % ch->remote_nentries) *
1794                                          ch->msg_size);
1795                 msg->flags = 0;
1796         } while (++put < ch_sn2->remote_GP.put);
1797 }
1798
1799 static void
1800 xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number)
1801 {
1802         struct xpc_channel *ch = &part->channels[ch_number];
1803         struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1804         int nmsgs_sent;
1805
1806         ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number];
1807
1808         /* See what, if anything, has changed for each connected channel */
1809
1810         xpc_msgqueue_ref(ch);
1811
1812         if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get &&
1813             ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) {
1814                 /* nothing changed since GPs were last pulled */
1815                 xpc_msgqueue_deref(ch);
1816                 return;
1817         }
1818
1819         if (!(ch->flags & XPC_C_CONNECTED)) {
1820                 xpc_msgqueue_deref(ch);
1821                 return;
1822         }
1823
1824         /*
1825          * First check to see if messages recently sent by us have been
1826          * received by the other side. (The remote GET value will have
1827          * changed since we last looked at it.)
1828          */
1829
1830         if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) {
1831
1832                 /*
1833                  * We need to notify any senders that want to be notified
1834                  * that their sent messages have been received by their
1835                  * intended recipients. We need to do this before updating
1836                  * w_remote_GP.get so that we don't allocate the same message
1837                  * queue entries prematurely (see xpc_allocate_msg()).
1838                  */
1839                 if (atomic_read(&ch->n_to_notify) > 0) {
1840                         /*
1841                          * Notify senders that messages sent have been
1842                          * received and delivered by the other side.
1843                          */
1844                         xpc_notify_senders_sn2(ch, xpMsgDelivered,
1845                                                ch_sn2->remote_GP.get);
1846                 }
1847
1848                 /*
1849                  * Clear msg->flags in previously sent messages, so that
1850                  * they're ready for xpc_allocate_msg().
1851                  */
1852                 xpc_clear_local_msgqueue_flags_sn2(ch);
1853
1854                 ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get;
1855
1856                 dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
1857                         "channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid,
1858                         ch->number);
1859
1860                 /*
1861                  * If anyone was waiting for message queue entries to become
1862                  * available, wake them up.
1863                  */
1864                 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0)
1865                         wake_up(&ch->msg_allocate_wq);
1866         }
1867
1868         /*
1869          * Now check for newly sent messages by the other side. (The remote
1870          * PUT value will have changed since we last looked at it.)
1871          */
1872
1873         if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) {
1874                 /*
1875                  * Clear msg->flags in previously received messages, so that
1876                  * they're ready for xpc_get_deliverable_msg().
1877                  */
1878                 xpc_clear_remote_msgqueue_flags_sn2(ch);
1879
1880                 ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put;
1881
1882                 dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
1883                         "channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid,
1884                         ch->number);
1885
1886                 nmsgs_sent = ch_sn2->w_remote_GP.put - ch_sn2->w_local_GP.get;
1887                 if (nmsgs_sent > 0) {
1888                         dev_dbg(xpc_chan, "msgs waiting to be copied and "
1889                                 "delivered=%d, partid=%d, channel=%d\n",
1890                                 nmsgs_sent, ch->partid, ch->number);
1891
1892                         if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)
1893                                 xpc_activate_kthreads(ch, nmsgs_sent);
1894                 }
1895         }
1896
1897         xpc_msgqueue_deref(ch);
1898 }
1899
1900 static struct xpc_msg *
1901 xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get)
1902 {
1903         struct xpc_partition *part = &xpc_partitions[ch->partid];
1904         struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1905         struct xpc_msg *remote_msg, *msg;
1906         u32 msg_index, nmsgs;
1907         u64 msg_offset;
1908         enum xp_retval ret;
1909
1910         if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) {
1911                 /* we were interrupted by a signal */
1912                 return NULL;
1913         }
1914
1915         while (get >= ch_sn2->next_msg_to_pull) {
1916
1917                 /* pull as many messages as are ready and able to be pulled */
1918
1919                 msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries;
1920
1921                 DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put);
1922                 nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull;
1923                 if (msg_index + nmsgs > ch->remote_nentries) {
1924                         /* ignore the ones that wrap the msg queue for now */
1925                         nmsgs = ch->remote_nentries - msg_index;
1926                 }
1927
1928                 msg_offset = msg_index * ch->msg_size;
1929                 msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
1930                 remote_msg = (struct xpc_msg *)(ch->remote_msgqueue_pa +
1931                                                 msg_offset);
1932
1933                 ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg,
1934                                                      nmsgs * ch->msg_size);
1935                 if (ret != xpSuccess) {
1936
1937                         dev_dbg(xpc_chan, "failed to pull %d msgs starting with"
1938                                 " msg %ld from partition %d, channel=%d, "
1939                                 "ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull,
1940                                 ch->partid, ch->number, ret);
1941
1942                         XPC_DEACTIVATE_PARTITION(part, ret);
1943
1944                         mutex_unlock(&ch_sn2->msg_to_pull_mutex);
1945                         return NULL;
1946                 }
1947
1948                 ch_sn2->next_msg_to_pull += nmsgs;
1949         }
1950
1951         mutex_unlock(&ch_sn2->msg_to_pull_mutex);
1952
1953         /* return the message we were looking for */
1954         msg_offset = (get % ch->remote_nentries) * ch->msg_size;
1955         msg = (struct xpc_msg *)((u64)ch->remote_msgqueue + msg_offset);
1956
1957         return msg;
1958 }
1959
1960 static int
1961 xpc_n_of_deliverable_msgs_sn2(struct xpc_channel *ch)
1962 {
1963         return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get;
1964 }
1965
1966 /*
1967  * Get a message to be delivered.
1968  */
1969 static struct xpc_msg *
1970 xpc_get_deliverable_msg_sn2(struct xpc_channel *ch)
1971 {
1972         struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
1973         struct xpc_msg *msg = NULL;
1974         s64 get;
1975
1976         do {
1977                 if (ch->flags & XPC_C_DISCONNECTING)
1978                         break;
1979
1980                 get = ch_sn2->w_local_GP.get;
1981                 rmb();  /* guarantee that .get loads before .put */
1982                 if (get == ch_sn2->w_remote_GP.put)
1983                         break;
1984
1985                 /* There are messages waiting to be pulled and delivered.
1986                  * We need to try to secure one for ourselves. We'll do this
1987                  * by trying to increment w_local_GP.get and hope that no one
1988                  * else beats us to it. If they do, we'll we'll simply have
1989                  * to try again for the next one.
1990                  */
1991
1992                 if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) {
1993                         /* we got the entry referenced by get */
1994
1995                         dev_dbg(xpc_chan, "w_local_GP.get changed to %ld, "
1996                                 "partid=%d, channel=%d\n", get + 1,
1997                                 ch->partid, ch->number);
1998
1999                         /* pull the message from the remote partition */
2000
2001                         msg = xpc_pull_remote_msg_sn2(ch, get);
2002
2003                         DBUG_ON(msg != NULL && msg->number != get);
2004                         DBUG_ON(msg != NULL && (msg->flags & XPC_M_DONE));
2005                         DBUG_ON(msg != NULL && !(msg->flags & XPC_M_READY));
2006
2007                         break;
2008                 }
2009
2010         } while (1);
2011
2012         return msg;
2013 }
2014
2015 /*
2016  * Now we actually send the messages that are ready to be sent by advancing
2017  * the local message queue's Put value and then send a chctl msgrequest to the
2018  * recipient partition.
2019  */
2020 static void
2021 xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put)
2022 {
2023         struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
2024         struct xpc_msg *msg;
2025         s64 put = initial_put + 1;
2026         int send_msgrequest = 0;
2027
2028         while (1) {
2029
2030                 while (1) {
2031                         if (put == ch_sn2->w_local_GP.put)
2032                                 break;
2033
2034                         msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
2035                                                  (put % ch->local_nentries) *
2036                                                  ch->msg_size);
2037
2038                         if (!(msg->flags & XPC_M_READY))
2039                                 break;
2040
2041                         put++;
2042                 }
2043
2044                 if (put == initial_put) {
2045                         /* nothing's changed */
2046                         break;
2047                 }
2048
2049                 if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) !=
2050                     initial_put) {
2051                         /* someone else beat us to it */
2052                         DBUG_ON(ch_sn2->local_GP->put < initial_put);
2053                         break;
2054                 }
2055
2056                 /* we just set the new value of local_GP->put */
2057
2058                 dev_dbg(xpc_chan, "local_GP->put changed to %ld, partid=%d, "
2059                         "channel=%d\n", put, ch->partid, ch->number);
2060
2061                 send_msgrequest = 1;
2062
2063                 /*
2064                  * We need to ensure that the message referenced by
2065                  * local_GP->put is not XPC_M_READY or that local_GP->put
2066                  * equals w_local_GP.put, so we'll go have a look.
2067                  */
2068                 initial_put = put;
2069         }
2070
2071         if (send_msgrequest)
2072                 xpc_send_chctl_msgrequest_sn2(ch);
2073 }
2074
2075 /*
2076  * Allocate an entry for a message from the message queue associated with the
2077  * specified channel.
2078  */
2079 static enum xp_retval
2080 xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags,
2081                      struct xpc_msg **address_of_msg)
2082 {
2083         struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
2084         struct xpc_msg *msg;
2085         enum xp_retval ret;
2086         s64 put;
2087
2088         /*
2089          * Get the next available message entry from the local message queue.
2090          * If none are available, we'll make sure that we grab the latest
2091          * GP values.
2092          */
2093         ret = xpTimeout;
2094
2095         while (1) {
2096
2097                 put = ch_sn2->w_local_GP.put;
2098                 rmb();  /* guarantee that .put loads before .get */
2099                 if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) {
2100
2101                         /* There are available message entries. We need to try
2102                          * to secure one for ourselves. We'll do this by trying
2103                          * to increment w_local_GP.put as long as someone else
2104                          * doesn't beat us to it. If they do, we'll have to
2105                          * try again.
2106                          */
2107                         if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) ==
2108                             put) {
2109                                 /* we got the entry referenced by put */
2110                                 break;
2111                         }
2112                         continue;       /* try again */
2113                 }
2114
2115                 /*
2116                  * There aren't any available msg entries at this time.
2117                  *
2118                  * In waiting for a message entry to become available,
2119                  * we set a timeout in case the other side is not sending
2120                  * completion interrupts. This lets us fake a notify IRQ
2121                  * that will cause the notify IRQ handler to fetch the latest
2122                  * GP values as if an interrupt was sent by the other side.
2123                  */
2124                 if (ret == xpTimeout)
2125                         xpc_send_chctl_local_msgrequest_sn2(ch);
2126
2127                 if (flags & XPC_NOWAIT)
2128                         return xpNoWait;
2129
2130                 ret = xpc_allocate_msg_wait(ch);
2131                 if (ret != xpInterrupted && ret != xpTimeout)
2132                         return ret;
2133         }
2134
2135         /* get the message's address and initialize it */
2136         msg = (struct xpc_msg *)((u64)ch->local_msgqueue +
2137                                  (put % ch->local_nentries) * ch->msg_size);
2138
2139         DBUG_ON(msg->flags != 0);
2140         msg->number = put;
2141
2142         dev_dbg(xpc_chan, "w_local_GP.put changed to %ld; msg=0x%p, "
2143                 "msg_number=%ld, partid=%d, channel=%d\n", put + 1,
2144                 (void *)msg, msg->number, ch->partid, ch->number);
2145
2146         *address_of_msg = msg;
2147         return xpSuccess;
2148 }
2149
2150 /*
2151  * Common code that does the actual sending of the message by advancing the
2152  * local message queue's Put value and sends a chctl msgrequest to the
2153  * partition the message is being sent to.
2154  */
2155 static enum xp_retval
2156 xpc_send_msg_sn2(struct xpc_channel *ch, u32 flags, void *payload,
2157                  u16 payload_size, u8 notify_type, xpc_notify_func func,
2158                  void *key)
2159 {
2160         enum xp_retval ret = xpSuccess;
2161         struct xpc_msg *msg = msg;
2162         struct xpc_notify *notify = notify;
2163         s64 msg_number;
2164         s64 put;
2165
2166         DBUG_ON(notify_type == XPC_N_CALL && func == NULL);
2167
2168         if (XPC_MSG_SIZE(payload_size) > ch->msg_size)
2169                 return xpPayloadTooBig;
2170
2171         xpc_msgqueue_ref(ch);
2172
2173         if (ch->flags & XPC_C_DISCONNECTING) {
2174                 ret = ch->reason;
2175                 goto out_1;
2176         }
2177         if (!(ch->flags & XPC_C_CONNECTED)) {
2178                 ret = xpNotConnected;
2179                 goto out_1;
2180         }
2181
2182         ret = xpc_allocate_msg_sn2(ch, flags, &msg);
2183         if (ret != xpSuccess)
2184                 goto out_1;
2185
2186         msg_number = msg->number;
2187
2188         if (notify_type != 0) {
2189                 /*
2190                  * Tell the remote side to send an ACK interrupt when the
2191                  * message has been delivered.
2192                  */
2193                 msg->flags |= XPC_M_INTERRUPT;
2194
2195                 atomic_inc(&ch->n_to_notify);
2196
2197                 notify = &ch->notify_queue[msg_number % ch->local_nentries];
2198                 notify->func = func;
2199                 notify->key = key;
2200                 notify->type = notify_type;
2201
2202                 /* >>> is a mb() needed here? */
2203
2204                 if (ch->flags & XPC_C_DISCONNECTING) {
2205                         /*
2206                          * An error occurred between our last error check and
2207                          * this one. We will try to clear the type field from
2208                          * the notify entry. If we succeed then
2209                          * xpc_disconnect_channel() didn't already process
2210                          * the notify entry.
2211                          */
2212                         if (cmpxchg(&notify->type, notify_type, 0) ==
2213                             notify_type) {
2214                                 atomic_dec(&ch->n_to_notify);
2215                                 ret = ch->reason;
2216                         }
2217                         goto out_1;
2218                 }
2219         }
2220
2221         memcpy(&msg->payload, payload, payload_size);
2222
2223         msg->flags |= XPC_M_READY;
2224
2225         /*
2226          * The preceding store of msg->flags must occur before the following
2227          * load of local_GP->put.
2228          */
2229         mb();
2230
2231         /* see if the message is next in line to be sent, if so send it */
2232
2233         put = ch->sn.sn2.local_GP->put;
2234         if (put == msg_number)
2235                 xpc_send_msgs_sn2(ch, put);
2236
2237 out_1:
2238         xpc_msgqueue_deref(ch);
2239         return ret;
2240 }
2241
2242 /*
2243  * Now we actually acknowledge the messages that have been delivered and ack'd
2244  * by advancing the cached remote message queue's Get value and if requested
2245  * send a chctl msgrequest to the message sender's partition.
2246  */
2247 static void
2248 xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags)
2249 {
2250         struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
2251         struct xpc_msg *msg;
2252         s64 get = initial_get + 1;
2253         int send_msgrequest = 0;
2254
2255         while (1) {
2256
2257                 while (1) {
2258                         if (get == ch_sn2->w_local_GP.get)
2259                                 break;
2260
2261                         msg = (struct xpc_msg *)((u64)ch->remote_msgqueue +
2262                                                  (get % ch->remote_nentries) *
2263                                                  ch->msg_size);
2264
2265                         if (!(msg->flags & XPC_M_DONE))
2266                                 break;
2267
2268                         msg_flags |= msg->flags;
2269                         get++;
2270                 }
2271
2272                 if (get == initial_get) {
2273                         /* nothing's changed */
2274                         break;
2275                 }
2276
2277                 if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) !=
2278                     initial_get) {
2279                         /* someone else beat us to it */
2280                         DBUG_ON(ch_sn2->local_GP->get <= initial_get);
2281                         break;
2282                 }
2283
2284                 /* we just set the new value of local_GP->get */
2285
2286                 dev_dbg(xpc_chan, "local_GP->get changed to %ld, partid=%d, "
2287                         "channel=%d\n", get, ch->partid, ch->number);
2288
2289                 send_msgrequest = (msg_flags & XPC_M_INTERRUPT);
2290
2291                 /*
2292                  * We need to ensure that the message referenced by
2293                  * local_GP->get is not XPC_M_DONE or that local_GP->get
2294                  * equals w_local_GP.get, so we'll go have a look.
2295                  */
2296                 initial_get = get;
2297         }
2298
2299         if (send_msgrequest)
2300                 xpc_send_chctl_msgrequest_sn2(ch);
2301 }
2302
2303 static void
2304 xpc_received_msg_sn2(struct xpc_channel *ch, struct xpc_msg *msg)
2305 {
2306         s64 get;
2307         s64 msg_number = msg->number;
2308
2309         dev_dbg(xpc_chan, "msg=0x%p, msg_number=%ld, partid=%d, channel=%d\n",
2310                 (void *)msg, msg_number, ch->partid, ch->number);
2311
2312         DBUG_ON((((u64)msg - (u64)ch->remote_msgqueue) / ch->msg_size) !=
2313                 msg_number % ch->remote_nentries);
2314         DBUG_ON(msg->flags & XPC_M_DONE);
2315
2316         msg->flags |= XPC_M_DONE;
2317
2318         /*
2319          * The preceding store of msg->flags must occur before the following
2320          * load of local_GP->get.
2321          */
2322         mb();
2323
2324         /*
2325          * See if this message is next in line to be acknowledged as having
2326          * been delivered.
2327          */
2328         get = ch->sn.sn2.local_GP->get;
2329         if (get == msg_number)
2330                 xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
2331 }
2332
2333 int
2334 xpc_init_sn2(void)
2335 {
2336         int ret;
2337
2338         xpc_rsvd_page_init = xpc_rsvd_page_init_sn2;
2339         xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
2340         xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
2341         xpc_online_heartbeat = xpc_online_heartbeat_sn2;
2342         xpc_heartbeat_init = xpc_heartbeat_init_sn2;
2343         xpc_heartbeat_exit = xpc_heartbeat_exit_sn2;
2344         xpc_check_remote_hb = xpc_check_remote_hb_sn2;
2345
2346         xpc_request_partition_activation = xpc_request_partition_activation_sn2;
2347         xpc_request_partition_reactivation =
2348             xpc_request_partition_reactivation_sn2;
2349         xpc_request_partition_deactivation =
2350             xpc_request_partition_deactivation_sn2;
2351         xpc_cancel_partition_deactivation_request =
2352             xpc_cancel_partition_deactivation_request_sn2;
2353
2354         xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2;
2355         xpc_setup_infrastructure = xpc_setup_infrastructure_sn2;
2356         xpc_teardown_infrastructure = xpc_teardown_infrastructure_sn2;
2357         xpc_make_first_contact = xpc_make_first_contact_sn2;
2358         xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2;
2359         xpc_allocate_msgqueues = xpc_allocate_msgqueues_sn2;
2360         xpc_free_msgqueues = xpc_free_msgqueues_sn2;
2361         xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
2362         xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
2363         xpc_n_of_deliverable_msgs = xpc_n_of_deliverable_msgs_sn2;
2364         xpc_get_deliverable_msg = xpc_get_deliverable_msg_sn2;
2365
2366         xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2;
2367         xpc_partition_engaged = xpc_partition_engaged_sn2;
2368         xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
2369         xpc_indicate_partition_disengaged =
2370             xpc_indicate_partition_disengaged_sn2;
2371         xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
2372
2373         xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2;
2374         xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2;
2375         xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2;
2376         xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2;
2377
2378         xpc_send_msg = xpc_send_msg_sn2;
2379         xpc_received_msg = xpc_received_msg_sn2;
2380
2381         /* open up protections for IPI and [potentially] amo operations */
2382         xpc_allow_IPI_ops_sn2();
2383         xpc_allow_amo_ops_shub_wars_1_1_sn2();
2384
2385         /*
2386          * This is safe to do before the xpc_hb_checker thread has started
2387          * because the handler releases a wait queue.  If an interrupt is
2388          * received before the thread is waiting, it will not go to sleep,
2389          * but rather immediately process the interrupt.
2390          */
2391         ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0,
2392                           "xpc hb", NULL);
2393         if (ret != 0) {
2394                 dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
2395                         "errno=%d\n", -ret);
2396                 xpc_disallow_IPI_ops_sn2();
2397         }
2398         return ret;
2399 }
2400
2401 void
2402 xpc_exit_sn2(void)
2403 {
2404         free_irq(SGI_XPC_ACTIVATE, NULL);
2405         xpc_disallow_IPI_ops_sn2();
2406 }