Merge branch 'rfc' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/perfcounte...
[linux-2.6] / drivers / s390 / s390mach.c
1 /*
2  *  drivers/s390/s390mach.c
3  *   S/390 machine check handler
4  *
5  *    Copyright IBM Corp. 2000,2008
6  *    Author(s): Ingo Adlung (adlung@de.ibm.com)
7  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
8  *               Cornelia Huck <cornelia.huck@de.ibm.com>
9  */
10
11 #include <linux/init.h>
12 #include <linux/sched.h>
13 #include <linux/errno.h>
14 #include <linux/workqueue.h>
15 #include <linux/time.h>
16 #include <linux/device.h>
17 #include <linux/kthread.h>
18 #include <asm/etr.h>
19 #include <asm/lowcore.h>
20 #include <asm/cio.h>
21 #include <asm/cpu.h>
22 #include "s390mach.h"
23
24 static struct semaphore m_sem;
25
26 static NORET_TYPE void
27 s390_handle_damage(char *msg)
28 {
29 #ifdef CONFIG_SMP
30         smp_send_stop();
31 #endif
32         disabled_wait((unsigned long) __builtin_return_address(0));
33         for(;;);
34 }
35
36 static crw_handler_t crw_handlers[NR_RSCS];
37
38 /**
39  * s390_register_crw_handler() - register a channel report word handler
40  * @rsc: reporting source code to handle
41  * @handler: handler to be registered
42  *
43  * Returns %0 on success and a negative error value otherwise.
44  */
45 int s390_register_crw_handler(int rsc, crw_handler_t handler)
46 {
47         if ((rsc < 0) || (rsc >= NR_RSCS))
48                 return -EINVAL;
49         if (!cmpxchg(&crw_handlers[rsc], NULL, handler))
50                 return 0;
51         return -EBUSY;
52 }
53
54 /**
55  * s390_unregister_crw_handler() - unregister a channel report word handler
56  * @rsc: reporting source code to handle
57  */
58 void s390_unregister_crw_handler(int rsc)
59 {
60         if ((rsc < 0) || (rsc >= NR_RSCS))
61                 return;
62         xchg(&crw_handlers[rsc], NULL);
63         synchronize_sched();
64 }
65
66 /*
67  * Retrieve CRWs and call function to handle event.
68  */
69 static int s390_collect_crw_info(void *param)
70 {
71         struct crw crw[2];
72         int ccode;
73         struct semaphore *sem;
74         unsigned int chain;
75         int ignore;
76
77         sem = (struct semaphore *)param;
78 repeat:
79         ignore = down_interruptible(sem);
80         chain = 0;
81         while (1) {
82                 if (unlikely(chain > 1)) {
83                         struct crw tmp_crw;
84
85                         printk(KERN_WARNING"%s: Code does not support more "
86                                "than two chained crws; please report to "
87                                "linux390@de.ibm.com!\n", __func__);
88                         ccode = stcrw(&tmp_crw);
89                         printk(KERN_WARNING"%s: crw reports slct=%d, oflw=%d, "
90                                "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
91                                __func__, tmp_crw.slct, tmp_crw.oflw,
92                                tmp_crw.chn, tmp_crw.rsc, tmp_crw.anc,
93                                tmp_crw.erc, tmp_crw.rsid);
94                         printk(KERN_WARNING"%s: This was crw number %x in the "
95                                "chain\n", __func__, chain);
96                         if (ccode != 0)
97                                 break;
98                         chain = tmp_crw.chn ? chain + 1 : 0;
99                         continue;
100                 }
101                 ccode = stcrw(&crw[chain]);
102                 if (ccode != 0)
103                         break;
104                 printk(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, "
105                        "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
106                        crw[chain].slct, crw[chain].oflw, crw[chain].chn,
107                        crw[chain].rsc, crw[chain].anc, crw[chain].erc,
108                        crw[chain].rsid);
109                 /* Check for overflows. */
110                 if (crw[chain].oflw) {
111                         int i;
112
113                         pr_debug("%s: crw overflow detected!\n", __func__);
114                         for (i = 0; i < NR_RSCS; i++) {
115                                 if (crw_handlers[i])
116                                         crw_handlers[i](NULL, NULL, 1);
117                         }
118                         chain = 0;
119                         continue;
120                 }
121                 if (crw[0].chn && !chain) {
122                         chain++;
123                         continue;
124                 }
125                 if (crw_handlers[crw[chain].rsc])
126                         crw_handlers[crw[chain].rsc](&crw[0],
127                                                      chain ? &crw[1] : NULL,
128                                                      0);
129                 /* chain is always 0 or 1 here. */
130                 chain = crw[chain].chn ? chain + 1 : 0;
131         }
132         goto repeat;
133         return 0;
134 }
135
136 struct mcck_struct {
137         int kill_task;
138         int channel_report;
139         int warning;
140         unsigned long long mcck_code;
141 };
142
143 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
144
145 /*
146  * Main machine check handler function. Will be called with interrupts enabled
147  * or disabled and machine checks enabled or disabled.
148  */
149 void
150 s390_handle_mcck(void)
151 {
152         unsigned long flags;
153         struct mcck_struct mcck;
154
155         /*
156          * Disable machine checks and get the current state of accumulated
157          * machine checks. Afterwards delete the old state and enable machine
158          * checks again.
159          */
160         local_irq_save(flags);
161         local_mcck_disable();
162         mcck = __get_cpu_var(cpu_mcck);
163         memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
164         clear_thread_flag(TIF_MCCK_PENDING);
165         local_mcck_enable();
166         local_irq_restore(flags);
167
168         if (mcck.channel_report)
169                 up(&m_sem);
170
171 #ifdef CONFIG_MACHCHK_WARNING
172 /*
173  * The warning may remain for a prolonged period on the bare iron.
174  * (actually till the machine is powered off, or until the problem is gone)
175  * So we just stop listening for the WARNING MCH and prevent continuously
176  * being interrupted.  One caveat is however, that we must do this per
177  * processor and cannot use the smp version of ctl_clear_bit().
178  * On VM we only get one interrupt per virtally presented machinecheck.
179  * Though one suffices, we may get one interrupt per (virtual) processor.
180  */
181         if (mcck.warning) {     /* WARNING pending ? */
182                 static int mchchk_wng_posted = 0;
183                 /*
184                  * Use single machine clear, as we cannot handle smp right now
185                  */
186                 __ctl_clear_bit(14, 24);        /* Disable WARNING MCH */
187                 if (xchg(&mchchk_wng_posted, 1) == 0)
188                         kill_cad_pid(SIGPWR, 1);
189         }
190 #endif
191
192         if (mcck.kill_task) {
193                 local_irq_enable();
194                 printk(KERN_EMERG "mcck: Terminating task because of machine "
195                        "malfunction (code 0x%016llx).\n", mcck.mcck_code);
196                 printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
197                        current->comm, current->pid);
198                 do_exit(SIGSEGV);
199         }
200 }
201 EXPORT_SYMBOL_GPL(s390_handle_mcck);
202
203 /*
204  * returns 0 if all registers could be validated
205  * returns 1 otherwise
206  */
207 static int
208 s390_revalidate_registers(struct mci *mci)
209 {
210         int kill_task;
211         u64 tmpclock;
212         u64 zero;
213         void *fpt_save_area, *fpt_creg_save_area;
214
215         kill_task = 0;
216         zero = 0;
217         /* General purpose registers */
218         if (!mci->gr)
219                 /*
220                  * General purpose registers couldn't be restored and have
221                  * unknown contents. Process needs to be terminated.
222                  */
223                 kill_task = 1;
224
225         /* Revalidate floating point registers */
226         if (!mci->fp)
227                 /*
228                  * Floating point registers can't be restored and
229                  * therefore the process needs to be terminated.
230                  */
231                 kill_task = 1;
232
233 #ifndef CONFIG_64BIT
234         asm volatile(
235                 "       ld      0,0(%0)\n"
236                 "       ld      2,8(%0)\n"
237                 "       ld      4,16(%0)\n"
238                 "       ld      6,24(%0)"
239                 : : "a" (&S390_lowcore.floating_pt_save_area));
240 #endif
241
242         if (MACHINE_HAS_IEEE) {
243 #ifdef CONFIG_64BIT
244                 fpt_save_area = &S390_lowcore.floating_pt_save_area;
245                 fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
246 #else
247                 fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
248                 fpt_creg_save_area = fpt_save_area+128;
249 #endif
250                 /* Floating point control register */
251                 if (!mci->fc) {
252                         /*
253                          * Floating point control register can't be restored.
254                          * Task will be terminated.
255                          */
256                         asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
257                         kill_task = 1;
258
259                 } else
260                         asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
261
262                 asm volatile(
263                         "       ld      0,0(%0)\n"
264                         "       ld      1,8(%0)\n"
265                         "       ld      2,16(%0)\n"
266                         "       ld      3,24(%0)\n"
267                         "       ld      4,32(%0)\n"
268                         "       ld      5,40(%0)\n"
269                         "       ld      6,48(%0)\n"
270                         "       ld      7,56(%0)\n"
271                         "       ld      8,64(%0)\n"
272                         "       ld      9,72(%0)\n"
273                         "       ld      10,80(%0)\n"
274                         "       ld      11,88(%0)\n"
275                         "       ld      12,96(%0)\n"
276                         "       ld      13,104(%0)\n"
277                         "       ld      14,112(%0)\n"
278                         "       ld      15,120(%0)\n"
279                         : : "a" (fpt_save_area));
280         }
281
282         /* Revalidate access registers */
283         asm volatile(
284                 "       lam     0,15,0(%0)"
285                 : : "a" (&S390_lowcore.access_regs_save_area));
286         if (!mci->ar)
287                 /*
288                  * Access registers have unknown contents.
289                  * Terminating task.
290                  */
291                 kill_task = 1;
292
293         /* Revalidate control registers */
294         if (!mci->cr)
295                 /*
296                  * Control registers have unknown contents.
297                  * Can't recover and therefore stopping machine.
298                  */
299                 s390_handle_damage("invalid control registers.");
300         else
301 #ifdef CONFIG_64BIT
302                 asm volatile(
303                         "       lctlg   0,15,0(%0)"
304                         : : "a" (&S390_lowcore.cregs_save_area));
305 #else
306                 asm volatile(
307                         "       lctl    0,15,0(%0)"
308                         : : "a" (&S390_lowcore.cregs_save_area));
309 #endif
310
311         /*
312          * We don't even try to revalidate the TOD register, since we simply
313          * can't write something sensible into that register.
314          */
315
316 #ifdef CONFIG_64BIT
317         /*
318          * See if we can revalidate the TOD programmable register with its
319          * old contents (should be zero) otherwise set it to zero.
320          */
321         if (!mci->pr)
322                 asm volatile(
323                         "       sr      0,0\n"
324                         "       sckpf"
325                         : : : "0", "cc");
326         else
327                 asm volatile(
328                         "       l       0,0(%0)\n"
329                         "       sckpf"
330                         : : "a" (&S390_lowcore.tod_progreg_save_area)
331                         : "0", "cc");
332 #endif
333
334         /* Revalidate clock comparator register */
335         asm volatile(
336                 "       stck    0(%1)\n"
337                 "       sckc    0(%1)"
338                 : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
339
340         /* Check if old PSW is valid */
341         if (!mci->wp)
342                 /*
343                  * Can't tell if we come from user or kernel mode
344                  * -> stopping machine.
345                  */
346                 s390_handle_damage("old psw invalid.");
347
348         if (!mci->ms || !mci->pm || !mci->ia)
349                 kill_task = 1;
350
351         return kill_task;
352 }
353
354 #define MAX_IPD_COUNT   29
355 #define MAX_IPD_TIME    (5 * 60 * USEC_PER_SEC) /* 5 minutes */
356
357 /*
358  * machine check handler.
359  */
360 void
361 s390_do_machine_check(struct pt_regs *regs)
362 {
363         static DEFINE_SPINLOCK(ipd_lock);
364         static unsigned long long last_ipd;
365         static int ipd_count;
366         unsigned long long tmp;
367         struct mci *mci;
368         struct mcck_struct *mcck;
369         int umode;
370
371         lockdep_off();
372
373         s390_idle_check();
374
375         mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
376         mcck = &__get_cpu_var(cpu_mcck);
377         umode = user_mode(regs);
378
379         if (mci->sd)
380                 /* System damage -> stopping machine */
381                 s390_handle_damage("received system damage machine check.");
382
383         if (mci->pd) {
384                 if (mci->b) {
385                         /* Processing backup -> verify if we can survive this */
386                         u64 z_mcic, o_mcic, t_mcic;
387 #ifdef CONFIG_64BIT
388                         z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
389                         o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
390                                   1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
391                                   1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
392                                   1ULL<<16);
393 #else
394                         z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
395                                   1ULL<<29);
396                         o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
397                                   1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
398                                   1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
399 #endif
400                         t_mcic = *(u64 *)mci;
401
402                         if (((t_mcic & z_mcic) != 0) ||
403                             ((t_mcic & o_mcic) != o_mcic)) {
404                                 s390_handle_damage("processing backup machine "
405                                                    "check with damage.");
406                         }
407
408                         /*
409                          * Nullifying exigent condition, therefore we might
410                          * retry this instruction.
411                          */
412
413                         spin_lock(&ipd_lock);
414
415                         tmp = get_clock();
416
417                         if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
418                                 ipd_count++;
419                         else
420                                 ipd_count = 1;
421
422                         last_ipd = tmp;
423
424                         if (ipd_count == MAX_IPD_COUNT)
425                                 s390_handle_damage("too many ipd retries.");
426
427                         spin_unlock(&ipd_lock);
428                 }
429                 else {
430                         /* Processing damage -> stopping machine */
431                         s390_handle_damage("received instruction processing "
432                                            "damage machine check.");
433                 }
434         }
435         if (s390_revalidate_registers(mci)) {
436                 if (umode) {
437                         /*
438                          * Couldn't restore all register contents while in
439                          * user mode -> mark task for termination.
440                          */
441                         mcck->kill_task = 1;
442                         mcck->mcck_code = *(unsigned long long *) mci;
443                         set_thread_flag(TIF_MCCK_PENDING);
444                 }
445                 else
446                         /*
447                          * Couldn't restore all register contents while in
448                          * kernel mode -> stopping machine.
449                          */
450                         s390_handle_damage("unable to revalidate registers.");
451         }
452
453         if (mci->cd) {
454                 /* Timing facility damage */
455                 s390_handle_damage("TOD clock damaged");
456         }
457
458         if (mci->ed && mci->ec) {
459                 /* External damage */
460                 if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
461                         etr_sync_check();
462                 if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
463                         etr_switch_to_local();
464                 if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
465                         stp_sync_check();
466                 if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
467                         stp_island_check();
468         }
469
470         if (mci->se)
471                 /* Storage error uncorrected */
472                 s390_handle_damage("received storage error uncorrected "
473                                    "machine check.");
474
475         if (mci->ke)
476                 /* Storage key-error uncorrected */
477                 s390_handle_damage("received storage key-error uncorrected "
478                                    "machine check.");
479
480         if (mci->ds && mci->fa)
481                 /* Storage degradation */
482                 s390_handle_damage("received storage degradation machine "
483                                    "check.");
484
485         if (mci->cp) {
486                 /* Channel report word pending */
487                 mcck->channel_report = 1;
488                 set_thread_flag(TIF_MCCK_PENDING);
489         }
490
491         if (mci->w) {
492                 /* Warning pending */
493                 mcck->warning = 1;
494                 set_thread_flag(TIF_MCCK_PENDING);
495         }
496         lockdep_on();
497 }
498
499 /*
500  * s390_init_machine_check
501  *
502  * initialize machine check handling
503  */
504 static int
505 machine_check_init(void)
506 {
507         init_MUTEX_LOCKED(&m_sem);
508         ctl_set_bit(14, 25);    /* enable external damage MCH */
509         ctl_set_bit(14, 27);    /* enable system recovery MCH */
510 #ifdef CONFIG_MACHCHK_WARNING
511         ctl_set_bit(14, 24);    /* enable warning MCH */
512 #endif
513         return 0;
514 }
515
516 /*
517  * Initialize the machine check handler really early to be able to
518  * catch all machine checks that happen during boot
519  */
520 arch_initcall(machine_check_init);
521
522 /*
523  * Machine checks for the channel subsystem must be enabled
524  * after the channel subsystem is initialized
525  */
526 static int __init
527 machine_check_crw_init (void)
528 {
529         struct task_struct *task;
530
531         task = kthread_run(s390_collect_crw_info, &m_sem, "kmcheck");
532         if (IS_ERR(task))
533                 return PTR_ERR(task);
534         ctl_set_bit(14, 28);    /* enable channel report MCH */
535         return 0;
536 }
537
538 device_initcall (machine_check_crw_init);