[PATCH] s390: improved machine check handling
[linux-2.6] / drivers / s390 / s390mach.c
1 /*
2  *  drivers/s390/s390mach.c
3  *   S/390 machine check handler
4  *
5  *  S390 version
6  *    Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
7  *    Author(s): Ingo Adlung (adlung@de.ibm.com)
8  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
9  */
10
11 #include <linux/config.h>
12 #include <linux/init.h>
13 #include <linux/sched.h>
14 #include <linux/errno.h>
15 #include <linux/workqueue.h>
16
17 #include <asm/lowcore.h>
18
19 #include "s390mach.h"
20
21 #define DBG printk
22 // #define DBG(args,...) do {} while (0);
23
24 static struct semaphore m_sem;
25
26 extern int css_process_crw(int);
27 extern int chsc_process_crw(void);
28 extern int chp_process_crw(int, int);
29 extern void css_reiterate_subchannels(void);
30
31 extern struct workqueue_struct *slow_path_wq;
32 extern struct work_struct slow_path_work;
33
34 static NORET_TYPE void
35 s390_handle_damage(char *msg)
36 {
37 #ifdef CONFIG_SMP
38         smp_send_stop();
39 #endif
40         disabled_wait((unsigned long) __builtin_return_address(0));
41         for(;;);
42 }
43
44 /*
45  * Retrieve CRWs and call function to handle event.
46  *
47  * Note : we currently process CRWs for io and chsc subchannels only
48  */
49 static int
50 s390_collect_crw_info(void *param)
51 {
52         struct crw crw;
53         int ccode, ret, slow;
54         struct semaphore *sem;
55
56         sem = (struct semaphore *)param;
57         /* Set a nice name. */
58         daemonize("kmcheck");
59 repeat:
60         down_interruptible(sem);
61         slow = 0;
62         while (1) {
63                 ccode = stcrw(&crw);
64                 if (ccode != 0)
65                         break;
66                 DBG(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, "
67                     "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
68                     crw.slct, crw.oflw, crw.chn, crw.rsc, crw.anc,
69                     crw.erc, crw.rsid);
70                 /* Check for overflows. */
71                 if (crw.oflw) {
72                         pr_debug("%s: crw overflow detected!\n", __FUNCTION__);
73                         css_reiterate_subchannels();
74                         slow = 1;
75                         continue;
76                 }
77                 switch (crw.rsc) {
78                 case CRW_RSC_SCH:
79                         pr_debug("source is subchannel %04X\n", crw.rsid);
80                         ret = css_process_crw (crw.rsid);
81                         if (ret == -EAGAIN)
82                                 slow = 1;
83                         break;
84                 case CRW_RSC_MONITOR:
85                         pr_debug("source is monitoring facility\n");
86                         break;
87                 case CRW_RSC_CPATH:
88                         pr_debug("source is channel path %02X\n", crw.rsid);
89                         switch (crw.erc) {
90                         case CRW_ERC_IPARM: /* Path has come. */
91                                 ret = chp_process_crw(crw.rsid, 1);
92                                 break;
93                         case CRW_ERC_PERRI: /* Path has gone. */
94                         case CRW_ERC_PERRN:
95                                 ret = chp_process_crw(crw.rsid, 0);
96                                 break;
97                         default:
98                                 pr_debug("Don't know how to handle erc=%x\n",
99                                          crw.erc);
100                                 ret = 0;
101                         }
102                         if (ret == -EAGAIN)
103                                 slow = 1;
104                         break;
105                 case CRW_RSC_CONFIG:
106                         pr_debug("source is configuration-alert facility\n");
107                         break;
108                 case CRW_RSC_CSS:
109                         pr_debug("source is channel subsystem\n");
110                         ret = chsc_process_crw();
111                         if (ret == -EAGAIN)
112                                 slow = 1;
113                         break;
114                 default:
115                         pr_debug("unknown source\n");
116                         break;
117                 }
118         }
119         if (slow)
120                 queue_work(slow_path_wq, &slow_path_work);
121         goto repeat;
122         return 0;
123 }
124
125 struct mcck_struct {
126         int kill_task;
127         int channel_report;
128         int warning;
129         unsigned long long mcck_code;
130 };
131
132 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
133
134 /*
135  * Main machine check handler function. Will be called with interrupts enabled
136  * or disabled and machine checks enabled or disabled.
137  */
138 void
139 s390_handle_mcck(void)
140 {
141         unsigned long flags;
142         struct mcck_struct mcck;
143
144         /*
145          * Disable machine checks and get the current state of accumulated
146          * machine checks. Afterwards delete the old state and enable machine
147          * checks again.
148          */
149         local_irq_save(flags);
150         local_mcck_disable();
151         mcck = __get_cpu_var(cpu_mcck);
152         memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
153         clear_thread_flag(TIF_MCCK_PENDING);
154         local_mcck_enable();
155         local_irq_restore(flags);
156
157         if (mcck.channel_report)
158                 up(&m_sem);
159
160 #ifdef CONFIG_MACHCHK_WARNING
161 /*
162  * The warning may remain for a prolonged period on the bare iron.
163  * (actually till the machine is powered off, or until the problem is gone)
164  * So we just stop listening for the WARNING MCH and prevent continuously
165  * being interrupted.  One caveat is however, that we must do this per
166  * processor and cannot use the smp version of ctl_clear_bit().
167  * On VM we only get one interrupt per virtally presented machinecheck.
168  * Though one suffices, we may get one interrupt per (virtual) processor.
169  */
170         if (mcck.warning) {     /* WARNING pending ? */
171                 static int mchchk_wng_posted = 0;
172                 /*
173                  * Use single machine clear, as we cannot handle smp right now
174                  */
175                 __ctl_clear_bit(14, 24);        /* Disable WARNING MCH */
176                 if (xchg(&mchchk_wng_posted, 1) == 0)
177                         kill_proc(1, SIGPWR, 1);
178         }
179 #endif
180
181         if (mcck.kill_task) {
182                 local_irq_enable();
183                 printk(KERN_EMERG "mcck: Terminating task because of machine "
184                        "malfunction (code 0x%016llx).\n", mcck.mcck_code);
185                 printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
186                        current->comm, current->pid);
187                 do_exit(SIGSEGV);
188         }
189 }
190
191 /*
192  * returns 0 if all registers could be validated
193  * returns 1 otherwise
194  */
195 static int
196 s390_revalidate_registers(struct mci *mci)
197 {
198         int kill_task;
199         u64 tmpclock;
200         u64 zero;
201         void *fpt_save_area, *fpt_creg_save_area;
202
203         kill_task = 0;
204         zero = 0;
205         /* General purpose registers */
206         if (!mci->gr)
207                 /*
208                  * General purpose registers couldn't be restored and have
209                  * unknown contents. Process needs to be terminated.
210                  */
211                 kill_task = 1;
212
213         /* Revalidate floating point registers */
214         if (!mci->fp)
215                 /*
216                  * Floating point registers can't be restored and
217                  * therefore the process needs to be terminated.
218                  */
219                 kill_task = 1;
220
221 #ifndef __s390x__
222         asm volatile("ld 0,0(%0)\n"
223                      "ld 2,8(%0)\n"
224                      "ld 4,16(%0)\n"
225                      "ld 6,24(%0)"
226                      : : "a" (&S390_lowcore.floating_pt_save_area));
227 #endif
228
229         if (MACHINE_HAS_IEEE) {
230 #ifdef __s390x__
231                 fpt_save_area = &S390_lowcore.floating_pt_save_area;
232                 fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
233 #else
234                 fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
235                 fpt_creg_save_area = fpt_save_area+128;
236 #endif
237                 /* Floating point control register */
238                 if (!mci->fc) {
239                         /*
240                          * Floating point control register can't be restored.
241                          * Task will be terminated.
242                          */
243                         asm volatile ("lfpc 0(%0)" : : "a" (&zero));
244                         kill_task = 1;
245
246                 }
247                 else
248                         asm volatile (
249                                 "lfpc 0(%0)"
250                                 : : "a" (fpt_creg_save_area));
251
252                 asm volatile("ld  0,0(%0)\n"
253                              "ld  1,8(%0)\n"
254                              "ld  2,16(%0)\n"
255                              "ld  3,24(%0)\n"
256                              "ld  4,32(%0)\n"
257                              "ld  5,40(%0)\n"
258                              "ld  6,48(%0)\n"
259                              "ld  7,56(%0)\n"
260                              "ld  8,64(%0)\n"
261                              "ld  9,72(%0)\n"
262                              "ld 10,80(%0)\n"
263                              "ld 11,88(%0)\n"
264                              "ld 12,96(%0)\n"
265                              "ld 13,104(%0)\n"
266                              "ld 14,112(%0)\n"
267                              "ld 15,120(%0)\n"
268                              : : "a" (fpt_save_area));
269         }
270
271         /* Revalidate access registers */
272         asm volatile("lam 0,15,0(%0)"
273                      : : "a" (&S390_lowcore.access_regs_save_area));
274         if (!mci->ar)
275                 /*
276                  * Access registers have unknown contents.
277                  * Terminating task.
278                  */
279                 kill_task = 1;
280
281         /* Revalidate control registers */
282         if (!mci->cr)
283                 /*
284                  * Control registers have unknown contents.
285                  * Can't recover and therefore stopping machine.
286                  */
287                 s390_handle_damage("invalid control registers.");
288         else
289 #ifdef __s390x__
290                 asm volatile("lctlg 0,15,0(%0)"
291                              : : "a" (&S390_lowcore.cregs_save_area));
292 #else
293                 asm volatile("lctl 0,15,0(%0)"
294                              : : "a" (&S390_lowcore.cregs_save_area));
295 #endif
296
297         /*
298          * We don't even try to revalidate the TOD register, since we simply
299          * can't write something sensible into that register.
300          */
301
302 #ifdef __s390x__
303         /*
304          * See if we can revalidate the TOD programmable register with its
305          * old contents (should be zero) otherwise set it to zero.
306          */
307         if (!mci->pr)
308                 asm volatile("sr 0,0\n"
309                              "sckpf"
310                              : : : "0", "cc");
311         else
312                 asm volatile(
313                         "l 0,0(%0)\n"
314                         "sckpf"
315                         : : "a" (&S390_lowcore.tod_progreg_save_area) : "0", "cc");
316 #endif
317
318         /* Revalidate clock comparator register */
319         asm volatile ("stck 0(%1)\n"
320                       "sckc 0(%1)"
321                       : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
322
323         /* Check if old PSW is valid */
324         if (!mci->wp)
325                 /*
326                  * Can't tell if we come from user or kernel mode
327                  * -> stopping machine.
328                  */
329                 s390_handle_damage("old psw invalid.");
330
331         if (!mci->ms || !mci->pm || !mci->ia)
332                 kill_task = 1;
333
334         return kill_task;
335 }
336
337 /*
338  * machine check handler.
339  */
340 void
341 s390_do_machine_check(struct pt_regs *regs)
342 {
343         struct mci *mci;
344         struct mcck_struct *mcck;
345         int umode;
346
347         mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
348         mcck = &__get_cpu_var(cpu_mcck);
349         umode = user_mode(regs);
350
351         if (mci->sd)
352                 /* System damage -> stopping machine */
353                 s390_handle_damage("received system damage machine check.");
354
355         if (mci->pd) {
356                 if (mci->b) {
357                         /* Processing backup -> verify if we can survive this */
358                         u64 z_mcic, o_mcic, t_mcic;
359 #ifdef __s390x__
360                         z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
361                         o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
362                                   1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
363                                   1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
364                                   1ULL<<16);
365 #else
366                         z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
367                                   1ULL<<29);
368                         o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
369                                   1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
370                                   1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
371 #endif
372                         t_mcic = *(u64 *)mci;
373
374                         if (((t_mcic & z_mcic) != 0) ||
375                             ((t_mcic & o_mcic) != o_mcic)) {
376                                 s390_handle_damage("processing backup machine "
377                                                    "check with damage.");
378                         }
379                         if (!umode)
380                                 s390_handle_damage("processing backup machine "
381                                                    "check in kernel mode.");
382                         mcck->kill_task = 1;
383                         mcck->mcck_code = *(unsigned long long *) mci;
384                 }
385                 else {
386                         /* Processing damage -> stopping machine */
387                         s390_handle_damage("received instruction processing "
388                                            "damage machine check.");
389                 }
390         }
391         if (s390_revalidate_registers(mci)) {
392                 if (umode) {
393                         /*
394                          * Couldn't restore all register contents while in
395                          * user mode -> mark task for termination.
396                          */
397                         mcck->kill_task = 1;
398                         mcck->mcck_code = *(unsigned long long *) mci;
399                         set_thread_flag(TIF_MCCK_PENDING);
400                 }
401                 else
402                         /*
403                          * Couldn't restore all register contents while in
404                          * kernel mode -> stopping machine.
405                          */
406                         s390_handle_damage("unable to revalidate registers.");
407         }
408
409         if (mci->se)
410                 /* Storage error uncorrected */
411                 s390_handle_damage("received storage error uncorrected "
412                                    "machine check.");
413
414         if (mci->ke)
415                 /* Storage key-error uncorrected */
416                 s390_handle_damage("received storage key-error uncorrected "
417                                    "machine check.");
418
419         if (mci->ds && mci->fa)
420                 /* Storage degradation */
421                 s390_handle_damage("received storage degradation machine "
422                                    "check.");
423
424         if (mci->cp) {
425                 /* Channel report word pending */
426                 mcck->channel_report = 1;
427                 set_thread_flag(TIF_MCCK_PENDING);
428         }
429
430         if (mci->w) {
431                 /* Warning pending */
432                 mcck->warning = 1;
433                 set_thread_flag(TIF_MCCK_PENDING);
434         }
435 }
436
437 /*
438  * s390_init_machine_check
439  *
440  * initialize machine check handling
441  */
442 static int
443 machine_check_init(void)
444 {
445         init_MUTEX_LOCKED(&m_sem);
446         ctl_clear_bit(14, 25);  /* disable external damage MCH */
447         ctl_set_bit(14, 27);    /* enable system recovery MCH */
448 #ifdef CONFIG_MACHCHK_WARNING
449         ctl_set_bit(14, 24);    /* enable warning MCH */
450 #endif
451         return 0;
452 }
453
454 /*
455  * Initialize the machine check handler really early to be able to
456  * catch all machine checks that happen during boot
457  */
458 arch_initcall(machine_check_init);
459
460 /*
461  * Machine checks for the channel subsystem must be enabled
462  * after the channel subsystem is initialized
463  */
464 static int __init
465 machine_check_crw_init (void)
466 {
467         kernel_thread(s390_collect_crw_info, &m_sem, CLONE_FS|CLONE_FILES);
468         ctl_set_bit(14, 28);    /* enable channel report MCH */
469         return 0;
470 }
471
472 device_initcall (machine_check_crw_init);