Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux...
[linux-2.6] / drivers / misc / sgi-xp / xpc_partition.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8
9 /*
10  * Cross Partition Communication (XPC) partition support.
11  *
12  *      This is the part of XPC that detects the presence/absence of
13  *      other partitions. It provides a heartbeat and monitors the
14  *      heartbeats of other partitions.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/sysctl.h>
20 #include <linux/cache.h>
21 #include <linux/mmzone.h>
22 #include <linux/nodemask.h>
23 #include <asm/uncached.h>
24 #include <asm/sn/bte.h>
25 #include <asm/sn/intr.h>
26 #include <asm/sn/sn_sal.h>
27 #include <asm/sn/nodepda.h>
28 #include <asm/sn/addrs.h>
29 #include "xpc.h"
30
31 /* XPC is exiting flag */
32 int xpc_exiting;
33
34 /* SH_IPI_ACCESS shub register value on startup */
35 static u64 xpc_sh1_IPI_access;
36 static u64 xpc_sh2_IPI_access0;
37 static u64 xpc_sh2_IPI_access1;
38 static u64 xpc_sh2_IPI_access2;
39 static u64 xpc_sh2_IPI_access3;
40
41 /* original protection values for each node */
42 u64 xpc_prot_vec[MAX_NUMNODES];
43
44 /* this partition's reserved page pointers */
45 struct xpc_rsvd_page *xpc_rsvd_page;
46 static u64 *xpc_part_nasids;
47 static u64 *xpc_mach_nasids;
48 struct xpc_vars *xpc_vars;
49 struct xpc_vars_part *xpc_vars_part;
50
51 static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
52 static int xp_nasid_mask_words; /* actual size in words of nasid mask */
53
54 /*
55  * For performance reasons, each entry of xpc_partitions[] is cacheline
56  * aligned. And xpc_partitions[] is padded with an additional entry at the
57  * end so that the last legitimate entry doesn't share its cacheline with
58  * another variable.
59  */
60 struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
61
62 /*
63  * Generic buffer used to store a local copy of portions of a remote
64  * partition's reserved page (either its header and part_nasids mask,
65  * or its vars).
66  */
67 char *xpc_remote_copy_buffer;
68 void *xpc_remote_copy_buffer_base;
69
70 /*
71  * Guarantee that the kmalloc'd memory is cacheline aligned.
72  */
73 void *
74 xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
75 {
76         /* see if kmalloc will give us cachline aligned memory by default */
77         *base = kmalloc(size, flags);
78         if (*base == NULL)
79                 return NULL;
80
81         if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
82                 return *base;
83
84         kfree(*base);
85
86         /* nope, we'll have to do it ourselves */
87         *base = kmalloc(size + L1_CACHE_BYTES, flags);
88         if (*base == NULL)
89                 return NULL;
90
91         return (void *)L1_CACHE_ALIGN((u64)*base);
92 }
93
94 /*
95  * Given a nasid, get the physical address of the  partition's reserved page
96  * for that nasid. This function returns 0 on any error.
97  */
98 static u64
99 xpc_get_rsvd_page_pa(int nasid)
100 {
101         bte_result_t bte_res;
102         s64 status;
103         u64 cookie = 0;
104         u64 rp_pa = nasid;      /* seed with nasid */
105         u64 len = 0;
106         u64 buf = buf;
107         u64 buf_len = 0;
108         void *buf_base = NULL;
109
110         while (1) {
111
112                 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
113                                                        &len);
114
115                 dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
116                         "0x%016lx, address=0x%016lx, len=0x%016lx\n",
117                         status, cookie, rp_pa, len);
118
119                 if (status != SALRET_MORE_PASSES)
120                         break;
121
122                 if (L1_CACHE_ALIGN(len) > buf_len) {
123                         kfree(buf_base);
124                         buf_len = L1_CACHE_ALIGN(len);
125                         buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
126                                                                  GFP_KERNEL,
127                                                                  &buf_base);
128                         if (buf_base == NULL) {
129                                 dev_err(xpc_part, "unable to kmalloc "
130                                         "len=0x%016lx\n", buf_len);
131                                 status = SALRET_ERROR;
132                                 break;
133                         }
134                 }
135
136                 bte_res = xp_bte_copy(rp_pa, buf, buf_len,
137                                       (BTE_NOTIFY | BTE_WACQUIRE), NULL);
138                 if (bte_res != BTE_SUCCESS) {
139                         dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
140                         status = SALRET_ERROR;
141                         break;
142                 }
143         }
144
145         kfree(buf_base);
146
147         if (status != SALRET_OK)
148                 rp_pa = 0;
149
150         dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
151         return rp_pa;
152 }
153
154 /*
155  * Fill the partition reserved page with the information needed by
156  * other partitions to discover we are alive and establish initial
157  * communications.
158  */
159 struct xpc_rsvd_page *
160 xpc_rsvd_page_init(void)
161 {
162         struct xpc_rsvd_page *rp;
163         AMO_t *amos_page;
164         u64 rp_pa, nasid_array = 0;
165         int i, ret;
166
167         /* get the local reserved page's address */
168
169         preempt_disable();
170         rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
171         preempt_enable();
172         if (rp_pa == 0) {
173                 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
174                 return NULL;
175         }
176         rp = (struct xpc_rsvd_page *)__va(rp_pa);
177
178         if (rp->partid != sn_partition_id) {
179                 dev_err(xpc_part, "the reserved page's partid of %d should be "
180                         "%d\n", rp->partid, sn_partition_id);
181                 return NULL;
182         }
183
184         rp->version = XPC_RP_VERSION;
185
186         /* establish the actual sizes of the nasid masks */
187         if (rp->SAL_version == 1) {
188                 /* SAL_version 1 didn't set the nasids_size field */
189                 rp->nasids_size = 128;
190         }
191         xp_nasid_mask_bytes = rp->nasids_size;
192         xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
193
194         /* setup the pointers to the various items in the reserved page */
195         xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
196         xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
197         xpc_vars = XPC_RP_VARS(rp);
198         xpc_vars_part = XPC_RP_VARS_PART(rp);
199
200         /*
201          * Before clearing xpc_vars, see if a page of AMOs had been previously
202          * allocated. If not we'll need to allocate one and set permissions
203          * so that cross-partition AMOs are allowed.
204          *
205          * The allocated AMO page needs MCA reporting to remain disabled after
206          * XPC has unloaded.  To make this work, we keep a copy of the pointer
207          * to this page (i.e., amos_page) in the struct xpc_vars structure,
208          * which is pointed to by the reserved page, and re-use that saved copy
209          * on subsequent loads of XPC. This AMO page is never freed, and its
210          * memory protections are never restricted.
211          */
212         amos_page = xpc_vars->amos_page;
213         if (amos_page == NULL) {
214                 amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1));
215                 if (amos_page == NULL) {
216                         dev_err(xpc_part, "can't allocate page of AMOs\n");
217                         return NULL;
218                 }
219
220                 /*
221                  * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
222                  * when xpc_allow_IPI_ops() is called via xpc_hb_init().
223                  */
224                 if (!enable_shub_wars_1_1()) {
225                         ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
226                                                    PAGE_SIZE,
227                                                    SN_MEMPROT_ACCESS_CLASS_1,
228                                                    &nasid_array);
229                         if (ret != 0) {
230                                 dev_err(xpc_part, "can't change memory "
231                                         "protections\n");
232                                 uncached_free_page(__IA64_UNCACHED_OFFSET |
233                                                    TO_PHYS((u64)amos_page), 1);
234                                 return NULL;
235                         }
236                 }
237         } else if (!IS_AMO_ADDRESS((u64)amos_page)) {
238                 /*
239                  * EFI's XPBOOT can also set amos_page in the reserved page,
240                  * but it happens to leave it as an uncached physical address
241                  * and we need it to be an uncached virtual, so we'll have to
242                  * convert it.
243                  */
244                 if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) {
245                         dev_err(xpc_part, "previously used amos_page address "
246                                 "is bad = 0x%p\n", (void *)amos_page);
247                         return NULL;
248                 }
249                 amos_page = (AMO_t *)TO_AMO((u64)amos_page);
250         }
251
252         /* clear xpc_vars */
253         memset(xpc_vars, 0, sizeof(struct xpc_vars));
254
255         xpc_vars->version = XPC_V_VERSION;
256         xpc_vars->act_nasid = cpuid_to_nasid(0);
257         xpc_vars->act_phys_cpuid = cpu_physical_id(0);
258         xpc_vars->vars_part_pa = __pa(xpc_vars_part);
259         xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
260         xpc_vars->amos_page = amos_page;        /* save for next load of XPC */
261
262         /* clear xpc_vars_part */
263         memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
264                XP_MAX_PARTITIONS);
265
266         /* initialize the activate IRQ related AMO variables */
267         for (i = 0; i < xp_nasid_mask_words; i++)
268                 (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
269
270         /* initialize the engaged remote partitions related AMO variables */
271         (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
272         (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
273
274         /* timestamp of when reserved page was setup by XPC */
275         rp->stamp = CURRENT_TIME;
276
277         /*
278          * This signifies to the remote partition that our reserved
279          * page is initialized.
280          */
281         rp->vars_pa = __pa(xpc_vars);
282
283         return rp;
284 }
285
286 /*
287  * Change protections to allow IPI operations (and AMO operations on
288  * Shub 1.1 systems).
289  */
290 void
291 xpc_allow_IPI_ops(void)
292 {
293         int node;
294         int nasid;
295
296         /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
297
298         if (is_shub2()) {
299                 xpc_sh2_IPI_access0 =
300                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
301                 xpc_sh2_IPI_access1 =
302                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
303                 xpc_sh2_IPI_access2 =
304                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
305                 xpc_sh2_IPI_access3 =
306                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
307
308                 for_each_online_node(node) {
309                         nasid = cnodeid_to_nasid(node);
310                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
311                               -1UL);
312                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
313                               -1UL);
314                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
315                               -1UL);
316                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
317                               -1UL);
318                 }
319
320         } else {
321                 xpc_sh1_IPI_access =
322                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
323
324                 for_each_online_node(node) {
325                         nasid = cnodeid_to_nasid(node);
326                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
327                               -1UL);
328
329                         /*
330                          * Since the BIST collides with memory operations on
331                          * SHUB 1.1 sn_change_memprotect() cannot be used.
332                          */
333                         if (enable_shub_wars_1_1()) {
334                                 /* open up everything */
335                                 xpc_prot_vec[node] = (u64)HUB_L((u64 *)
336                                                                 GLOBAL_MMR_ADDR
337                                                                 (nasid,
338                                                   SH1_MD_DQLP_MMR_DIR_PRIVEC0));
339                                 HUB_S((u64 *)
340                                       GLOBAL_MMR_ADDR(nasid,
341                                                    SH1_MD_DQLP_MMR_DIR_PRIVEC0),
342                                       -1UL);
343                                 HUB_S((u64 *)
344                                       GLOBAL_MMR_ADDR(nasid,
345                                                    SH1_MD_DQRP_MMR_DIR_PRIVEC0),
346                                       -1UL);
347                         }
348                 }
349         }
350 }
351
352 /*
353  * Restrict protections to disallow IPI operations (and AMO operations on
354  * Shub 1.1 systems).
355  */
356 void
357 xpc_restrict_IPI_ops(void)
358 {
359         int node;
360         int nasid;
361
362         /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
363
364         if (is_shub2()) {
365
366                 for_each_online_node(node) {
367                         nasid = cnodeid_to_nasid(node);
368                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
369                               xpc_sh2_IPI_access0);
370                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
371                               xpc_sh2_IPI_access1);
372                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
373                               xpc_sh2_IPI_access2);
374                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
375                               xpc_sh2_IPI_access3);
376                 }
377
378         } else {
379
380                 for_each_online_node(node) {
381                         nasid = cnodeid_to_nasid(node);
382                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
383                               xpc_sh1_IPI_access);
384
385                         if (enable_shub_wars_1_1()) {
386                                 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
387                                                    SH1_MD_DQLP_MMR_DIR_PRIVEC0),
388                                       xpc_prot_vec[node]);
389                                 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
390                                                    SH1_MD_DQRP_MMR_DIR_PRIVEC0),
391                                       xpc_prot_vec[node]);
392                         }
393                 }
394         }
395 }
396
397 /*
398  * At periodic intervals, scan through all active partitions and ensure
399  * their heartbeat is still active.  If not, the partition is deactivated.
400  */
401 void
402 xpc_check_remote_hb(void)
403 {
404         struct xpc_vars *remote_vars;
405         struct xpc_partition *part;
406         partid_t partid;
407         bte_result_t bres;
408
409         remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
410
411         for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
412
413                 if (xpc_exiting)
414                         break;
415
416                 if (partid == sn_partition_id)
417                         continue;
418
419                 part = &xpc_partitions[partid];
420
421                 if (part->act_state == XPC_P_INACTIVE ||
422                     part->act_state == XPC_P_DEACTIVATING) {
423                         continue;
424                 }
425
426                 /* pull the remote_hb cache line */
427                 bres = xp_bte_copy(part->remote_vars_pa,
428                                    (u64)remote_vars,
429                                    XPC_RP_VARS_SIZE,
430                                    (BTE_NOTIFY | BTE_WACQUIRE), NULL);
431                 if (bres != BTE_SUCCESS) {
432                         XPC_DEACTIVATE_PARTITION(part,
433                                                  xpc_map_bte_errors(bres));
434                         continue;
435                 }
436
437                 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
438                         " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
439                         partid, remote_vars->heartbeat, part->last_heartbeat,
440                         remote_vars->heartbeat_offline,
441                         remote_vars->heartbeating_to_mask);
442
443                 if (((remote_vars->heartbeat == part->last_heartbeat) &&
444                      (remote_vars->heartbeat_offline == 0)) ||
445                     !xpc_hb_allowed(sn_partition_id, remote_vars)) {
446
447                         XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
448                         continue;
449                 }
450
451                 part->last_heartbeat = remote_vars->heartbeat;
452         }
453 }
454
455 /*
456  * Get a copy of a portion of the remote partition's rsvd page.
457  *
458  * remote_rp points to a buffer that is cacheline aligned for BTE copies and
459  * is large enough to contain a copy of their reserved page header and
460  * part_nasids mask.
461  */
462 static enum xpc_retval
463 xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
464                   struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
465 {
466         int bres, i;
467
468         /* get the reserved page's physical address */
469
470         *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
471         if (*remote_rp_pa == 0)
472                 return xpcNoRsvdPageAddr;
473
474         /* pull over the reserved page header and part_nasids mask */
475         bres = xp_bte_copy(*remote_rp_pa, (u64)remote_rp,
476                            XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
477                            (BTE_NOTIFY | BTE_WACQUIRE), NULL);
478         if (bres != BTE_SUCCESS)
479                 return xpc_map_bte_errors(bres);
480
481         if (discovered_nasids != NULL) {
482                 u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
483
484                 for (i = 0; i < xp_nasid_mask_words; i++)
485                         discovered_nasids[i] |= remote_part_nasids[i];
486         }
487
488         /* check that the partid is for another partition */
489
490         if (remote_rp->partid < 1 ||
491             remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
492                 return xpcInvalidPartid;
493         }
494
495         if (remote_rp->partid == sn_partition_id)
496                 return xpcLocalPartid;
497
498         if (XPC_VERSION_MAJOR(remote_rp->version) !=
499             XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
500                 return xpcBadVersion;
501         }
502
503         return xpcSuccess;
504 }
505
506 /*
507  * Get a copy of the remote partition's XPC variables from the reserved page.
508  *
509  * remote_vars points to a buffer that is cacheline aligned for BTE copies and
510  * assumed to be of size XPC_RP_VARS_SIZE.
511  */
512 static enum xpc_retval
513 xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
514 {
515         int bres;
516
517         if (remote_vars_pa == 0)
518                 return xpcVarsNotSet;
519
520         /* pull over the cross partition variables */
521         bres = xp_bte_copy(remote_vars_pa, (u64)remote_vars, XPC_RP_VARS_SIZE,
522                            (BTE_NOTIFY | BTE_WACQUIRE), NULL);
523         if (bres != BTE_SUCCESS)
524                 return xpc_map_bte_errors(bres);
525
526         if (XPC_VERSION_MAJOR(remote_vars->version) !=
527             XPC_VERSION_MAJOR(XPC_V_VERSION)) {
528                 return xpcBadVersion;
529         }
530
531         return xpcSuccess;
532 }
533
534 /*
535  * Update the remote partition's info.
536  */
537 static void
538 xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
539                           struct timespec *remote_rp_stamp, u64 remote_rp_pa,
540                           u64 remote_vars_pa, struct xpc_vars *remote_vars)
541 {
542         part->remote_rp_version = remote_rp_version;
543         dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
544                 part->remote_rp_version);
545
546         part->remote_rp_stamp = *remote_rp_stamp;
547         dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
548                 part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
549
550         part->remote_rp_pa = remote_rp_pa;
551         dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
552
553         part->remote_vars_pa = remote_vars_pa;
554         dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
555                 part->remote_vars_pa);
556
557         part->last_heartbeat = remote_vars->heartbeat;
558         dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
559                 part->last_heartbeat);
560
561         part->remote_vars_part_pa = remote_vars->vars_part_pa;
562         dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
563                 part->remote_vars_part_pa);
564
565         part->remote_act_nasid = remote_vars->act_nasid;
566         dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
567                 part->remote_act_nasid);
568
569         part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
570         dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
571                 part->remote_act_phys_cpuid);
572
573         part->remote_amos_page_pa = remote_vars->amos_page_pa;
574         dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
575                 part->remote_amos_page_pa);
576
577         part->remote_vars_version = remote_vars->version;
578         dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
579                 part->remote_vars_version);
580 }
581
582 /*
583  * Prior code has determined the nasid which generated an IPI.  Inspect
584  * that nasid to determine if its partition needs to be activated or
585  * deactivated.
586  *
587  * A partition is consider "awaiting activation" if our partition
588  * flags indicate it is not active and it has a heartbeat.  A
589  * partition is considered "awaiting deactivation" if our partition
590  * flags indicate it is active but it has no heartbeat or it is not
591  * sending its heartbeat to us.
592  *
593  * To determine the heartbeat, the remote nasid must have a properly
594  * initialized reserved page.
595  */
596 static void
597 xpc_identify_act_IRQ_req(int nasid)
598 {
599         struct xpc_rsvd_page *remote_rp;
600         struct xpc_vars *remote_vars;
601         u64 remote_rp_pa;
602         u64 remote_vars_pa;
603         int remote_rp_version;
604         int reactivate = 0;
605         int stamp_diff;
606         struct timespec remote_rp_stamp = { 0, 0 };
607         partid_t partid;
608         struct xpc_partition *part;
609         enum xpc_retval ret;
610
611         /* pull over the reserved page structure */
612
613         remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
614
615         ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
616         if (ret != xpcSuccess) {
617                 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
618                          "which sent interrupt, reason=%d\n", nasid, ret);
619                 return;
620         }
621
622         remote_vars_pa = remote_rp->vars_pa;
623         remote_rp_version = remote_rp->version;
624         if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
625                 remote_rp_stamp = remote_rp->stamp;
626
627         partid = remote_rp->partid;
628         part = &xpc_partitions[partid];
629
630         /* pull over the cross partition variables */
631
632         remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
633
634         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
635         if (ret != xpcSuccess) {
636
637                 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
638                          "which sent interrupt, reason=%d\n", nasid, ret);
639
640                 XPC_DEACTIVATE_PARTITION(part, ret);
641                 return;
642         }
643
644         part->act_IRQ_rcvd++;
645
646         dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
647                 "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
648                 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
649
650         if (xpc_partition_disengaged(part) &&
651             part->act_state == XPC_P_INACTIVE) {
652
653                 xpc_update_partition_info(part, remote_rp_version,
654                                           &remote_rp_stamp, remote_rp_pa,
655                                           remote_vars_pa, remote_vars);
656
657                 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
658                         if (xpc_partition_disengage_requested(1UL << partid)) {
659                                 /*
660                                  * Other side is waiting on us to disengage,
661                                  * even though we already have.
662                                  */
663                                 return;
664                         }
665                 } else {
666                         /* other side doesn't support disengage requests */
667                         xpc_clear_partition_disengage_request(1UL << partid);
668                 }
669
670                 xpc_activate_partition(part);
671                 return;
672         }
673
674         DBUG_ON(part->remote_rp_version == 0);
675         DBUG_ON(part->remote_vars_version == 0);
676
677         if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
678                 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
679                                                        remote_vars_version));
680
681                 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
682                         DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
683                                                                version));
684                         /* see if the other side rebooted */
685                         if (part->remote_amos_page_pa ==
686                             remote_vars->amos_page_pa &&
687                             xpc_hb_allowed(sn_partition_id, remote_vars)) {
688                                 /* doesn't look that way, so ignore the IPI */
689                                 return;
690                         }
691                 }
692
693                 /*
694                  * Other side rebooted and previous XPC didn't support the
695                  * disengage request, so we don't need to do anything special.
696                  */
697
698                 xpc_update_partition_info(part, remote_rp_version,
699                                           &remote_rp_stamp, remote_rp_pa,
700                                           remote_vars_pa, remote_vars);
701                 part->reactivate_nasid = nasid;
702                 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
703                 return;
704         }
705
706         DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
707
708         if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
709                 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
710
711                 /*
712                  * Other side rebooted and previous XPC did support the
713                  * disengage request, but the new one doesn't.
714                  */
715
716                 xpc_clear_partition_engaged(1UL << partid);
717                 xpc_clear_partition_disengage_request(1UL << partid);
718
719                 xpc_update_partition_info(part, remote_rp_version,
720                                           &remote_rp_stamp, remote_rp_pa,
721                                           remote_vars_pa, remote_vars);
722                 reactivate = 1;
723
724         } else {
725                 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
726
727                 stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
728                                                 &remote_rp_stamp);
729                 if (stamp_diff != 0) {
730                         DBUG_ON(stamp_diff >= 0);
731
732                         /*
733                          * Other side rebooted and the previous XPC did support
734                          * the disengage request, as does the new one.
735                          */
736
737                         DBUG_ON(xpc_partition_engaged(1UL << partid));
738                         DBUG_ON(xpc_partition_disengage_requested(1UL <<
739                                                                   partid));
740
741                         xpc_update_partition_info(part, remote_rp_version,
742                                                   &remote_rp_stamp,
743                                                   remote_rp_pa, remote_vars_pa,
744                                                   remote_vars);
745                         reactivate = 1;
746                 }
747         }
748
749         if (part->disengage_request_timeout > 0 &&
750             !xpc_partition_disengaged(part)) {
751                 /* still waiting on other side to disengage from us */
752                 return;
753         }
754
755         if (reactivate) {
756                 part->reactivate_nasid = nasid;
757                 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
758
759         } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
760                    xpc_partition_disengage_requested(1UL << partid)) {
761                 XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
762         }
763 }
764
765 /*
766  * Loop through the activation AMO variables and process any bits
767  * which are set.  Each bit indicates a nasid sending a partition
768  * activation or deactivation request.
769  *
770  * Return #of IRQs detected.
771  */
772 int
773 xpc_identify_act_IRQ_sender(void)
774 {
775         int word, bit;
776         u64 nasid_mask;
777         u64 nasid;              /* remote nasid */
778         int n_IRQs_detected = 0;
779         AMO_t *act_amos;
780
781         act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
782
783         /* scan through act AMO variable looking for non-zero entries */
784         for (word = 0; word < xp_nasid_mask_words; word++) {
785
786                 if (xpc_exiting)
787                         break;
788
789                 nasid_mask = xpc_IPI_receive(&act_amos[word]);
790                 if (nasid_mask == 0) {
791                         /* no IRQs from nasids in this variable */
792                         continue;
793                 }
794
795                 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
796                         nasid_mask);
797
798                 /*
799                  * If this nasid has been added to the machine since
800                  * our partition was reset, this will retain the
801                  * remote nasid in our reserved pages machine mask.
802                  * This is used in the event of module reload.
803                  */
804                 xpc_mach_nasids[word] |= nasid_mask;
805
806                 /* locate the nasid(s) which sent interrupts */
807
808                 for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
809                         if (nasid_mask & (1UL << bit)) {
810                                 n_IRQs_detected++;
811                                 nasid = XPC_NASID_FROM_W_B(word, bit);
812                                 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
813                                         nasid);
814                                 xpc_identify_act_IRQ_req(nasid);
815                         }
816                 }
817         }
818         return n_IRQs_detected;
819 }
820
821 /*
822  * See if the other side has responded to a partition disengage request
823  * from us.
824  */
825 int
826 xpc_partition_disengaged(struct xpc_partition *part)
827 {
828         partid_t partid = XPC_PARTID(part);
829         int disengaged;
830
831         disengaged = (xpc_partition_engaged(1UL << partid) == 0);
832         if (part->disengage_request_timeout) {
833                 if (!disengaged) {
834                         if (time_before(jiffies,
835                             part->disengage_request_timeout)) {
836                                 /* timelimit hasn't been reached yet */
837                                 return 0;
838                         }
839
840                         /*
841                          * Other side hasn't responded to our disengage
842                          * request in a timely fashion, so assume it's dead.
843                          */
844
845                         dev_info(xpc_part, "disengage from remote partition %d "
846                                  "timed out\n", partid);
847                         xpc_disengage_request_timedout = 1;
848                         xpc_clear_partition_engaged(1UL << partid);
849                         disengaged = 1;
850                 }
851                 part->disengage_request_timeout = 0;
852
853                 /* cancel the timer function, provided it's not us */
854                 if (!in_interrupt()) {
855                         del_singleshot_timer_sync(&part->
856                                                   disengage_request_timer);
857                 }
858
859                 DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
860                         part->act_state != XPC_P_INACTIVE);
861                 if (part->act_state != XPC_P_INACTIVE)
862                         xpc_wakeup_channel_mgr(part);
863
864                 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version))
865                         xpc_cancel_partition_disengage_request(part);
866         }
867         return disengaged;
868 }
869
870 /*
871  * Mark specified partition as active.
872  */
873 enum xpc_retval
874 xpc_mark_partition_active(struct xpc_partition *part)
875 {
876         unsigned long irq_flags;
877         enum xpc_retval ret;
878
879         dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
880
881         spin_lock_irqsave(&part->act_lock, irq_flags);
882         if (part->act_state == XPC_P_ACTIVATING) {
883                 part->act_state = XPC_P_ACTIVE;
884                 ret = xpcSuccess;
885         } else {
886                 DBUG_ON(part->reason == xpcSuccess);
887                 ret = part->reason;
888         }
889         spin_unlock_irqrestore(&part->act_lock, irq_flags);
890
891         return ret;
892 }
893
894 /*
895  * Notify XPC that the partition is down.
896  */
897 void
898 xpc_deactivate_partition(const int line, struct xpc_partition *part,
899                          enum xpc_retval reason)
900 {
901         unsigned long irq_flags;
902
903         spin_lock_irqsave(&part->act_lock, irq_flags);
904
905         if (part->act_state == XPC_P_INACTIVE) {
906                 XPC_SET_REASON(part, reason, line);
907                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
908                 if (reason == xpcReactivating) {
909                         /* we interrupt ourselves to reactivate partition */
910                         xpc_IPI_send_reactivate(part);
911                 }
912                 return;
913         }
914         if (part->act_state == XPC_P_DEACTIVATING) {
915                 if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
916                     reason == xpcReactivating) {
917                         XPC_SET_REASON(part, reason, line);
918                 }
919                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
920                 return;
921         }
922
923         part->act_state = XPC_P_DEACTIVATING;
924         XPC_SET_REASON(part, reason, line);
925
926         spin_unlock_irqrestore(&part->act_lock, irq_flags);
927
928         if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
929                 xpc_request_partition_disengage(part);
930                 xpc_IPI_send_disengage(part);
931
932                 /* set a timelimit on the disengage request */
933                 part->disengage_request_timeout = jiffies +
934                     (xpc_disengage_request_timelimit * HZ);
935                 part->disengage_request_timer.expires =
936                     part->disengage_request_timeout;
937                 add_timer(&part->disengage_request_timer);
938         }
939
940         dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
941                 XPC_PARTID(part), reason);
942
943         xpc_partition_going_down(part, reason);
944 }
945
946 /*
947  * Mark specified partition as inactive.
948  */
949 void
950 xpc_mark_partition_inactive(struct xpc_partition *part)
951 {
952         unsigned long irq_flags;
953
954         dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
955                 XPC_PARTID(part));
956
957         spin_lock_irqsave(&part->act_lock, irq_flags);
958         part->act_state = XPC_P_INACTIVE;
959         spin_unlock_irqrestore(&part->act_lock, irq_flags);
960         part->remote_rp_pa = 0;
961 }
962
963 /*
964  * SAL has provided a partition and machine mask.  The partition mask
965  * contains a bit for each even nasid in our partition.  The machine
966  * mask contains a bit for each even nasid in the entire machine.
967  *
968  * Using those two bit arrays, we can determine which nasids are
969  * known in the machine.  Each should also have a reserved page
970  * initialized if they are available for partitioning.
971  */
972 void
973 xpc_discovery(void)
974 {
975         void *remote_rp_base;
976         struct xpc_rsvd_page *remote_rp;
977         struct xpc_vars *remote_vars;
978         u64 remote_rp_pa;
979         u64 remote_vars_pa;
980         int region;
981         int region_size;
982         int max_regions;
983         int nasid;
984         struct xpc_rsvd_page *rp;
985         partid_t partid;
986         struct xpc_partition *part;
987         u64 *discovered_nasids;
988         enum xpc_retval ret;
989
990         remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
991                                                   xp_nasid_mask_bytes,
992                                                   GFP_KERNEL, &remote_rp_base);
993         if (remote_rp == NULL)
994                 return;
995
996         remote_vars = (struct xpc_vars *)remote_rp;
997
998         discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
999                                     GFP_KERNEL);
1000         if (discovered_nasids == NULL) {
1001                 kfree(remote_rp_base);
1002                 return;
1003         }
1004
1005         rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
1006
1007         /*
1008          * The term 'region' in this context refers to the minimum number of
1009          * nodes that can comprise an access protection grouping. The access
1010          * protection is in regards to memory, IOI and IPI.
1011          */
1012         max_regions = 64;
1013         region_size = sn_region_size;
1014
1015         switch (region_size) {
1016         case 128:
1017                 max_regions *= 2;
1018         case 64:
1019                 max_regions *= 2;
1020         case 32:
1021                 max_regions *= 2;
1022                 region_size = 16;
1023                 DBUG_ON(!is_shub2());
1024         }
1025
1026         for (region = 0; region < max_regions; region++) {
1027
1028                 if (xpc_exiting)
1029                         break;
1030
1031                 dev_dbg(xpc_part, "searching region %d\n", region);
1032
1033                 for (nasid = (region * region_size * 2);
1034                      nasid < ((region + 1) * region_size * 2); nasid += 2) {
1035
1036                         if (xpc_exiting)
1037                                 break;
1038
1039                         dev_dbg(xpc_part, "checking nasid %d\n", nasid);
1040
1041                         if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
1042                                 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
1043                                         "part of the local partition; skipping "
1044                                         "region\n", nasid);
1045                                 break;
1046                         }
1047
1048                         if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
1049                                 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
1050                                         "not on Numa-Link network at reset\n",
1051                                         nasid);
1052                                 continue;
1053                         }
1054
1055                         if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
1056                                 dev_dbg(xpc_part, "Nasid %d is part of a "
1057                                         "partition which was previously "
1058                                         "discovered\n", nasid);
1059                                 continue;
1060                         }
1061
1062                         /* pull over the reserved page structure */
1063
1064                         ret = xpc_get_remote_rp(nasid, discovered_nasids,
1065                                                 remote_rp, &remote_rp_pa);
1066                         if (ret != xpcSuccess) {
1067                                 dev_dbg(xpc_part, "unable to get reserved page "
1068                                         "from nasid %d, reason=%d\n", nasid,
1069                                         ret);
1070
1071                                 if (ret == xpcLocalPartid)
1072                                         break;
1073
1074                                 continue;
1075                         }
1076
1077                         remote_vars_pa = remote_rp->vars_pa;
1078
1079                         partid = remote_rp->partid;
1080                         part = &xpc_partitions[partid];
1081
1082                         /* pull over the cross partition variables */
1083
1084                         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
1085                         if (ret != xpcSuccess) {
1086                                 dev_dbg(xpc_part, "unable to get XPC variables "
1087                                         "from nasid %d, reason=%d\n", nasid,
1088                                         ret);
1089
1090                                 XPC_DEACTIVATE_PARTITION(part, ret);
1091                                 continue;
1092                         }
1093
1094                         if (part->act_state != XPC_P_INACTIVE) {
1095                                 dev_dbg(xpc_part, "partition %d on nasid %d is "
1096                                         "already activating\n", partid, nasid);
1097                                 break;
1098                         }
1099
1100                         /*
1101                          * Register the remote partition's AMOs with SAL so it
1102                          * can handle and cleanup errors within that address
1103                          * range should the remote partition go down. We don't
1104                          * unregister this range because it is difficult to
1105                          * tell when outstanding writes to the remote partition
1106                          * are finished and thus when it is thus safe to
1107                          * unregister. This should not result in wasted space
1108                          * in the SAL xp_addr_region table because we should
1109                          * get the same page for remote_act_amos_pa after
1110                          * module reloads and system reboots.
1111                          */
1112                         if (sn_register_xp_addr_region
1113                             (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
1114                                 dev_dbg(xpc_part,
1115                                         "partition %d failed to "
1116                                         "register xp_addr region 0x%016lx\n",
1117                                         partid, remote_vars->amos_page_pa);
1118
1119                                 XPC_SET_REASON(part, xpcPhysAddrRegFailed,
1120                                                __LINE__);
1121                                 break;
1122                         }
1123
1124                         /*
1125                          * The remote nasid is valid and available.
1126                          * Send an interrupt to that nasid to notify
1127                          * it that we are ready to begin activation.
1128                          */
1129                         dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
1130                                 "nasid %d, phys_cpuid 0x%x\n",
1131                                 remote_vars->amos_page_pa,
1132                                 remote_vars->act_nasid,
1133                                 remote_vars->act_phys_cpuid);
1134
1135                         if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
1136                                                            version)) {
1137                                 part->remote_amos_page_pa =
1138                                     remote_vars->amos_page_pa;
1139                                 xpc_mark_partition_disengaged(part);
1140                                 xpc_cancel_partition_disengage_request(part);
1141                         }
1142                         xpc_IPI_send_activate(remote_vars);
1143                 }
1144         }
1145
1146         kfree(discovered_nasids);
1147         kfree(remote_rp_base);
1148 }
1149
1150 /*
1151  * Given a partid, get the nasids owned by that partition from the
1152  * remote partition's reserved page.
1153  */
1154 enum xpc_retval
1155 xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
1156 {
1157         struct xpc_partition *part;
1158         u64 part_nasid_pa;
1159         int bte_res;
1160
1161         part = &xpc_partitions[partid];
1162         if (part->remote_rp_pa == 0)
1163                 return xpcPartitionDown;
1164
1165         memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
1166
1167         part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
1168
1169         bte_res = xp_bte_copy(part_nasid_pa, (u64)nasid_mask,
1170                               xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE),
1171                               NULL);
1172
1173         return xpc_map_bte_errors(bte_res);
1174 }