2 * linux/arch/i386/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
16 #include <linux/delay.h>
17 #include <linux/interrupt.h>
18 #include <linux/module.h>
19 #include <linux/nmi.h>
20 #include <linux/sysdev.h>
21 #include <linux/sysctl.h>
22 #include <linux/percpu.h>
23 #include <linux/dmi.h>
24 #include <linux/kprobes.h>
25 #include <linux/cpumask.h>
29 #include <asm/kdebug.h>
30 #include <asm/intel_arch_perfmon.h>
32 #include "mach_traps.h"
34 int unknown_nmi_panic;
35 int nmi_watchdog_enabled;
37 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
38 * evtsel_nmi_owner tracks the ownership of the event selection
39 * - different performance counters/ event selection may be reserved for
40 * different subsystems this reservation system just tries to coordinate
43 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
44 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
46 static cpumask_t backtrace_mask = CPU_MASK_NONE;
48 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
49 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
51 #define NMI_MAX_COUNTER_BITS 66
54 * >0: the lapic NMI watchdog is active, but can be disabled
55 * <0: the lapic NMI watchdog has not been set up, and cannot
57 * 0: the lapic NMI watchdog is disabled, but can be enabled
59 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
61 unsigned int nmi_watchdog = NMI_DEFAULT;
62 static unsigned int nmi_hz = HZ;
64 struct nmi_watchdog_ctlblk {
67 unsigned int cccr_msr;
68 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
69 unsigned int evntsel_msr; /* the MSR to select the events to handle */
71 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
73 /* local prototypes */
74 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
76 extern void show_registers(struct pt_regs *regs);
77 extern int unknown_nmi_panic;
79 /* converts an msr to an appropriate reservation bit */
80 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
82 /* returns the bit offset of the performance counter register */
83 switch (boot_cpu_data.x86_vendor) {
85 return (msr - MSR_K7_PERFCTR0);
86 case X86_VENDOR_INTEL:
87 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
88 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
90 switch (boot_cpu_data.x86) {
92 return (msr - MSR_P6_PERFCTR0);
94 return (msr - MSR_P4_BPU_PERFCTR0);
100 /* converts an msr to an appropriate reservation bit */
101 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
103 /* returns the bit offset of the event selection register */
104 switch (boot_cpu_data.x86_vendor) {
106 return (msr - MSR_K7_EVNTSEL0);
107 case X86_VENDOR_INTEL:
108 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
109 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
111 switch (boot_cpu_data.x86) {
113 return (msr - MSR_P6_EVNTSEL0);
115 return (msr - MSR_P4_BSU_ESCR0);
121 /* checks for a bit availability (hack for oprofile) */
122 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
124 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
129 /* checks the an msr for availability */
130 int avail_to_resrv_perfctr_nmi(unsigned int msr)
132 unsigned int counter;
134 counter = nmi_perfctr_msr_to_bit(msr);
135 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
137 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
140 int reserve_perfctr_nmi(unsigned int msr)
142 unsigned int counter;
144 counter = nmi_perfctr_msr_to_bit(msr);
145 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
147 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
152 void release_perfctr_nmi(unsigned int msr)
154 unsigned int counter;
156 counter = nmi_perfctr_msr_to_bit(msr);
157 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
159 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
162 int reserve_evntsel_nmi(unsigned int msr)
164 unsigned int counter;
166 counter = nmi_evntsel_msr_to_bit(msr);
167 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
169 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
174 void release_evntsel_nmi(unsigned int msr)
176 unsigned int counter;
178 counter = nmi_evntsel_msr_to_bit(msr);
179 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
181 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
184 static __cpuinit inline int nmi_known_cpu(void)
186 switch (boot_cpu_data.x86_vendor) {
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
189 case X86_VENDOR_INTEL:
190 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
193 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
198 static int endflag __initdata = 0;
201 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
202 * the CPU is idle. To make sure the NMI watchdog really ticks on all
203 * CPUs during the test make them busy.
205 static __init void nmi_cpu_busy(void *data)
207 local_irq_enable_in_hardirq();
208 /* Intentionally don't use cpu_relax here. This is
209 to make sure that the performance counter really ticks,
210 even if there is a simulator or similar that catches the
211 pause instruction. On a real HT machine this is fine because
212 all other CPUs are busy with "useless" delay loops and don't
213 care if they get somewhat less cycles. */
219 static unsigned int adjust_for_32bit_ctr(unsigned int hz)
222 unsigned int retval = hz;
225 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
226 * are writable, with higher bits sign extending from bit 31.
227 * So, we can only program the counter with 31 bit values and
228 * 32nd bit should be 1, for 33.. to be 1.
229 * Find the appropriate nmi_hz
231 counter_val = (u64)cpu_khz * 1000;
232 do_div(counter_val, retval);
233 if (counter_val > 0x7fffffffULL) {
234 u64 count = (u64)cpu_khz * 1000;
235 do_div(count, 0x7fffffffUL);
241 static int __init check_nmi_watchdog(void)
243 unsigned int *prev_nmi_count;
246 /* Enable NMI watchdog for newer systems.
247 Probably safe on most older systems too, but let's be careful.
248 IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
249 which hangs the system. Disable watchdog for all thinkpads */
250 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
251 !dmi_name_in_vendors("ThinkPad"))
252 nmi_watchdog = NMI_LOCAL_APIC;
254 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
257 if (!atomic_read(&nmi_active))
260 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
264 printk(KERN_INFO "Testing NMI watchdog ... ");
266 if (nmi_watchdog == NMI_LOCAL_APIC)
267 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
269 for_each_possible_cpu(cpu)
270 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
272 mdelay((10*1000)/nmi_hz); // wait 10 ticks
274 for_each_possible_cpu(cpu) {
276 /* Check cpu_callin_map here because that is set
277 after the timer is started. */
278 if (!cpu_isset(cpu, cpu_callin_map))
281 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
283 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
284 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
288 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
289 atomic_dec(&nmi_active);
292 if (!atomic_read(&nmi_active)) {
293 kfree(prev_nmi_count);
294 atomic_set(&nmi_active, -1);
300 /* now that we know it works we can reduce NMI frequency to
301 something more reasonable; makes a difference in some configs */
302 if (nmi_watchdog == NMI_LOCAL_APIC) {
303 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
307 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
308 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
309 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
313 kfree(prev_nmi_count);
316 /* This needs to happen later in boot so counters are working */
317 late_initcall(check_nmi_watchdog);
319 static int __init setup_nmi_watchdog(char *str)
323 get_option(&str, &nmi);
325 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
332 __setup("nmi_watchdog=", setup_nmi_watchdog);
334 static void disable_lapic_nmi_watchdog(void)
336 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
338 if (atomic_read(&nmi_active) <= 0)
341 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
343 BUG_ON(atomic_read(&nmi_active) != 0);
346 static void enable_lapic_nmi_watchdog(void)
348 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
350 /* are we already enabled */
351 if (atomic_read(&nmi_active) != 0)
354 /* are we lapic aware */
355 if (nmi_known_cpu() <= 0)
358 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
359 touch_nmi_watchdog();
362 void disable_timer_nmi_watchdog(void)
364 BUG_ON(nmi_watchdog != NMI_IO_APIC);
366 if (atomic_read(&nmi_active) <= 0)
370 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
372 BUG_ON(atomic_read(&nmi_active) != 0);
375 void enable_timer_nmi_watchdog(void)
377 BUG_ON(nmi_watchdog != NMI_IO_APIC);
379 if (atomic_read(&nmi_active) == 0) {
380 touch_nmi_watchdog();
381 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
388 static int nmi_pm_active; /* nmi_active before suspend */
390 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
392 /* only CPU0 goes here, other CPUs should be offline */
393 nmi_pm_active = atomic_read(&nmi_active);
394 stop_apic_nmi_watchdog(NULL);
395 BUG_ON(atomic_read(&nmi_active) != 0);
399 static int lapic_nmi_resume(struct sys_device *dev)
401 /* only CPU0 goes here, other CPUs should be offline */
402 if (nmi_pm_active > 0) {
403 setup_apic_nmi_watchdog(NULL);
404 touch_nmi_watchdog();
410 static struct sysdev_class nmi_sysclass = {
411 set_kset_name("lapic_nmi"),
412 .resume = lapic_nmi_resume,
413 .suspend = lapic_nmi_suspend,
416 static struct sys_device device_lapic_nmi = {
418 .cls = &nmi_sysclass,
421 static int __init init_lapic_nmi_sysfs(void)
425 /* should really be a BUG_ON but b/c this is an
426 * init call, it just doesn't work. -dcz
428 if (nmi_watchdog != NMI_LOCAL_APIC)
431 if ( atomic_read(&nmi_active) < 0 )
434 error = sysdev_class_register(&nmi_sysclass);
436 error = sysdev_register(&device_lapic_nmi);
439 /* must come after the local APIC's device_initcall() */
440 late_initcall(init_lapic_nmi_sysfs);
442 #endif /* CONFIG_PM */
445 * Activate the NMI watchdog via the local APIC.
446 * Original code written by Keith Owens.
449 static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
451 u64 count = (u64)cpu_khz * 1000;
453 do_div(count, nmi_hz);
455 Dprintk("setting %s to -0x%08Lx\n", descr, count);
456 wrmsrl(perfctr_msr, 0 - count);
459 static void write_watchdog_counter32(unsigned int perfctr_msr,
462 u64 count = (u64)cpu_khz * 1000;
464 do_div(count, nmi_hz);
466 Dprintk("setting %s to -0x%08Lx\n", descr, count);
467 wrmsr(perfctr_msr, (u32)(-count), 0);
470 /* Note that these events don't tick when the CPU idles. This means
471 the frequency varies with CPU load. */
473 #define K7_EVNTSEL_ENABLE (1 << 22)
474 #define K7_EVNTSEL_INT (1 << 20)
475 #define K7_EVNTSEL_OS (1 << 17)
476 #define K7_EVNTSEL_USR (1 << 16)
477 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
478 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
480 static int setup_k7_watchdog(void)
482 unsigned int perfctr_msr, evntsel_msr;
483 unsigned int evntsel;
484 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
486 perfctr_msr = MSR_K7_PERFCTR0;
487 evntsel_msr = MSR_K7_EVNTSEL0;
488 if (!reserve_perfctr_nmi(perfctr_msr))
491 if (!reserve_evntsel_nmi(evntsel_msr))
494 wrmsrl(perfctr_msr, 0UL);
496 evntsel = K7_EVNTSEL_INT
501 /* setup the timer */
502 wrmsr(evntsel_msr, evntsel, 0);
503 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
504 apic_write(APIC_LVTPC, APIC_DM_NMI);
505 evntsel |= K7_EVNTSEL_ENABLE;
506 wrmsr(evntsel_msr, evntsel, 0);
508 wd->perfctr_msr = perfctr_msr;
509 wd->evntsel_msr = evntsel_msr;
510 wd->cccr_msr = 0; //unused
511 wd->check_bit = 1ULL<<63;
514 release_perfctr_nmi(perfctr_msr);
519 static void stop_k7_watchdog(void)
521 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
523 wrmsr(wd->evntsel_msr, 0, 0);
525 release_evntsel_nmi(wd->evntsel_msr);
526 release_perfctr_nmi(wd->perfctr_msr);
529 #define P6_EVNTSEL0_ENABLE (1 << 22)
530 #define P6_EVNTSEL_INT (1 << 20)
531 #define P6_EVNTSEL_OS (1 << 17)
532 #define P6_EVNTSEL_USR (1 << 16)
533 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
534 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
536 static int setup_p6_watchdog(void)
538 unsigned int perfctr_msr, evntsel_msr;
539 unsigned int evntsel;
540 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
542 perfctr_msr = MSR_P6_PERFCTR0;
543 evntsel_msr = MSR_P6_EVNTSEL0;
544 if (!reserve_perfctr_nmi(perfctr_msr))
547 if (!reserve_evntsel_nmi(evntsel_msr))
550 wrmsrl(perfctr_msr, 0UL);
552 evntsel = P6_EVNTSEL_INT
557 /* setup the timer */
558 wrmsr(evntsel_msr, evntsel, 0);
559 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
560 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
561 apic_write(APIC_LVTPC, APIC_DM_NMI);
562 evntsel |= P6_EVNTSEL0_ENABLE;
563 wrmsr(evntsel_msr, evntsel, 0);
565 wd->perfctr_msr = perfctr_msr;
566 wd->evntsel_msr = evntsel_msr;
567 wd->cccr_msr = 0; //unused
568 wd->check_bit = 1ULL<<39;
571 release_perfctr_nmi(perfctr_msr);
576 static void stop_p6_watchdog(void)
578 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
580 wrmsr(wd->evntsel_msr, 0, 0);
582 release_evntsel_nmi(wd->evntsel_msr);
583 release_perfctr_nmi(wd->perfctr_msr);
586 /* Note that these events don't tick when the CPU idles. This means
587 the frequency varies with CPU load. */
589 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
590 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
591 #define P4_ESCR_OS (1<<3)
592 #define P4_ESCR_USR (1<<2)
593 #define P4_CCCR_OVF_PMI0 (1<<26)
594 #define P4_CCCR_OVF_PMI1 (1<<27)
595 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
596 #define P4_CCCR_COMPLEMENT (1<<19)
597 #define P4_CCCR_COMPARE (1<<18)
598 #define P4_CCCR_REQUIRED (3<<16)
599 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
600 #define P4_CCCR_ENABLE (1<<12)
601 #define P4_CCCR_OVF (1<<31)
602 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
603 CRU_ESCR0 (with any non-null event selector) through a complemented
604 max threshold. [IA32-Vol3, Section 14.9.9] */
606 static int setup_p4_watchdog(void)
608 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
609 unsigned int evntsel, cccr_val;
610 unsigned int misc_enable, dummy;
612 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
614 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
615 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
619 /* detect which hyperthread we are on */
620 if (smp_num_siblings == 2) {
621 unsigned int ebx, apicid;
624 apicid = (ebx >> 24) & 0xff;
630 /* performance counters are shared resources
631 * assign each hyperthread its own set
632 * (re-use the ESCR0 register, seems safe
633 * and keeps the cccr_val the same)
637 perfctr_msr = MSR_P4_IQ_PERFCTR0;
638 evntsel_msr = MSR_P4_CRU_ESCR0;
639 cccr_msr = MSR_P4_IQ_CCCR0;
640 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
643 perfctr_msr = MSR_P4_IQ_PERFCTR1;
644 evntsel_msr = MSR_P4_CRU_ESCR0;
645 cccr_msr = MSR_P4_IQ_CCCR1;
646 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
649 if (!reserve_perfctr_nmi(perfctr_msr))
652 if (!reserve_evntsel_nmi(evntsel_msr))
655 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
659 cccr_val |= P4_CCCR_THRESHOLD(15)
664 wrmsr(evntsel_msr, evntsel, 0);
665 wrmsr(cccr_msr, cccr_val, 0);
666 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
667 apic_write(APIC_LVTPC, APIC_DM_NMI);
668 cccr_val |= P4_CCCR_ENABLE;
669 wrmsr(cccr_msr, cccr_val, 0);
670 wd->perfctr_msr = perfctr_msr;
671 wd->evntsel_msr = evntsel_msr;
672 wd->cccr_msr = cccr_msr;
673 wd->check_bit = 1ULL<<39;
676 release_perfctr_nmi(perfctr_msr);
681 static void stop_p4_watchdog(void)
683 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
685 wrmsr(wd->cccr_msr, 0, 0);
686 wrmsr(wd->evntsel_msr, 0, 0);
688 release_evntsel_nmi(wd->evntsel_msr);
689 release_perfctr_nmi(wd->perfctr_msr);
692 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
693 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
695 static int setup_intel_arch_watchdog(void)
698 union cpuid10_eax eax;
700 unsigned int perfctr_msr, evntsel_msr;
701 unsigned int evntsel;
702 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
705 * Check whether the Architectural PerfMon supports
706 * Unhalted Core Cycles Event or not.
707 * NOTE: Corresponding bit = 0 in ebx indicates event present.
709 cpuid(10, &(eax.full), &ebx, &unused, &unused);
710 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
711 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
714 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
715 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
717 if (!reserve_perfctr_nmi(perfctr_msr))
720 if (!reserve_evntsel_nmi(evntsel_msr))
723 wrmsrl(perfctr_msr, 0UL);
725 evntsel = ARCH_PERFMON_EVENTSEL_INT
726 | ARCH_PERFMON_EVENTSEL_OS
727 | ARCH_PERFMON_EVENTSEL_USR
728 | ARCH_PERFMON_NMI_EVENT_SEL
729 | ARCH_PERFMON_NMI_EVENT_UMASK;
731 /* setup the timer */
732 wrmsr(evntsel_msr, evntsel, 0);
733 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
734 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
735 apic_write(APIC_LVTPC, APIC_DM_NMI);
736 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
737 wrmsr(evntsel_msr, evntsel, 0);
739 wd->perfctr_msr = perfctr_msr;
740 wd->evntsel_msr = evntsel_msr;
741 wd->cccr_msr = 0; //unused
742 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
745 release_perfctr_nmi(perfctr_msr);
750 static void stop_intel_arch_watchdog(void)
753 union cpuid10_eax eax;
755 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
758 * Check whether the Architectural PerfMon supports
759 * Unhalted Core Cycles Event or not.
760 * NOTE: Corresponding bit = 0 in ebx indicates event present.
762 cpuid(10, &(eax.full), &ebx, &unused, &unused);
763 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
764 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
767 wrmsr(wd->evntsel_msr, 0, 0);
768 release_evntsel_nmi(wd->evntsel_msr);
769 release_perfctr_nmi(wd->perfctr_msr);
772 void setup_apic_nmi_watchdog (void *unused)
774 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
776 /* only support LOCAL and IO APICs for now */
777 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
778 (nmi_watchdog != NMI_IO_APIC))
781 if (wd->enabled == 1)
784 /* cheap hack to support suspend/resume */
785 /* if cpu0 is not active neither should the other cpus */
786 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
789 if (nmi_watchdog == NMI_LOCAL_APIC) {
790 switch (boot_cpu_data.x86_vendor) {
792 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
794 if (!setup_k7_watchdog())
797 case X86_VENDOR_INTEL:
798 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
799 if (!setup_intel_arch_watchdog())
803 switch (boot_cpu_data.x86) {
805 if (boot_cpu_data.x86_model > 0xd)
808 if (!setup_p6_watchdog())
812 if (boot_cpu_data.x86_model > 0x4)
815 if (!setup_p4_watchdog())
827 atomic_inc(&nmi_active);
830 void stop_apic_nmi_watchdog(void *unused)
832 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
834 /* only support LOCAL and IO APICs for now */
835 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
836 (nmi_watchdog != NMI_IO_APIC))
839 if (wd->enabled == 0)
842 if (nmi_watchdog == NMI_LOCAL_APIC) {
843 switch (boot_cpu_data.x86_vendor) {
847 case X86_VENDOR_INTEL:
848 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
849 stop_intel_arch_watchdog();
852 switch (boot_cpu_data.x86) {
854 if (boot_cpu_data.x86_model > 0xd)
859 if (boot_cpu_data.x86_model > 0x4)
870 atomic_dec(&nmi_active);
874 * the best way to detect whether a CPU has a 'hard lockup' problem
875 * is to check it's local APIC timer IRQ counts. If they are not
876 * changing then that CPU has some problem.
878 * as these watchdog NMI IRQs are generated on every CPU, we only
879 * have to check the current processor.
881 * since NMIs don't listen to _any_ locks, we have to be extremely
882 * careful not to rely on unsafe variables. The printk might lock
883 * up though, so we have to break up any console locks first ...
884 * [when there will be more tty-related locks, break them up
889 last_irq_sums [NR_CPUS],
890 alert_counter [NR_CPUS];
892 void touch_nmi_watchdog (void)
894 if (nmi_watchdog > 0) {
898 * Just reset the alert counters, (other CPUs might be
899 * spinning on locks we hold):
901 for_each_present_cpu (cpu)
902 alert_counter[cpu] = 0;
906 * Tickle the softlockup detector too:
908 touch_softlockup_watchdog();
910 EXPORT_SYMBOL(touch_nmi_watchdog);
912 extern void die_nmi(struct pt_regs *, const char *msg);
914 __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
918 * Since current_thread_info()-> is always on the stack, and we
919 * always switch the stack NMI-atomically, it's safe to use
920 * smp_processor_id().
924 int cpu = smp_processor_id();
925 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
929 /* check for other users first */
930 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
936 if (cpu_isset(cpu, backtrace_mask)) {
937 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
940 printk("NMI backtrace for cpu %d\n", cpu);
943 cpu_clear(cpu, backtrace_mask);
946 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
948 /* if the apic timer isn't firing, this cpu isn't doing much */
949 if (!touched && last_irq_sums[cpu] == sum) {
951 * Ayiee, looks like this CPU is stuck ...
952 * wait a few IRQs (5 seconds) before doing the oops ...
954 alert_counter[cpu]++;
955 if (alert_counter[cpu] == 5*nmi_hz)
957 * die_nmi will return ONLY if NOTIFY_STOP happens..
959 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
961 last_irq_sums[cpu] = sum;
962 alert_counter[cpu] = 0;
964 /* see if the nmi watchdog went off */
966 if (nmi_watchdog == NMI_LOCAL_APIC) {
967 rdmsrl(wd->perfctr_msr, dummy);
968 if (dummy & wd->check_bit){
969 /* this wasn't a watchdog timer interrupt */
973 /* only Intel P4 uses the cccr msr */
974 if (wd->cccr_msr != 0) {
977 * - An overflown perfctr will assert its interrupt
978 * until the OVF flag in its CCCR is cleared.
979 * - LVTPC is masked on interrupt and must be
980 * unmasked by the LVTPC handler.
982 rdmsrl(wd->cccr_msr, dummy);
983 dummy &= ~P4_CCCR_OVF;
984 wrmsrl(wd->cccr_msr, dummy);
985 apic_write(APIC_LVTPC, APIC_DM_NMI);
986 /* start the cycle over again */
987 write_watchdog_counter(wd->perfctr_msr, NULL);
989 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
990 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
991 /* P6 based Pentium M need to re-unmask
992 * the apic vector but it doesn't hurt
994 * ArchPerfom/Core Duo also needs this */
995 apic_write(APIC_LVTPC, APIC_DM_NMI);
996 /* P6/ARCH_PERFMON has 32 bit counter write */
997 write_watchdog_counter32(wd->perfctr_msr, NULL);
999 /* start the cycle over again */
1000 write_watchdog_counter(wd->perfctr_msr, NULL);
1003 } else if (nmi_watchdog == NMI_IO_APIC) {
1004 /* don't know how to accurately check for this.
1005 * just assume it was a watchdog timer interrupt
1006 * This matches the old behaviour.
1015 int do_nmi_callback(struct pt_regs * regs, int cpu)
1017 #ifdef CONFIG_SYSCTL
1018 if (unknown_nmi_panic)
1019 return unknown_nmi_panic_callback(regs, cpu);
1024 #ifdef CONFIG_SYSCTL
1026 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
1028 unsigned char reason = get_nmi_reason();
1031 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
1037 * proc handler for /proc/sys/kernel/nmi
1039 int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1040 void __user *buffer, size_t *length, loff_t *ppos)
1044 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1045 old_state = nmi_watchdog_enabled;
1046 proc_dointvec(table, write, file, buffer, length, ppos);
1047 if (!!old_state == !!nmi_watchdog_enabled)
1050 if (atomic_read(&nmi_active) < 0) {
1051 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
1055 if (nmi_watchdog == NMI_DEFAULT) {
1056 if (nmi_known_cpu() > 0)
1057 nmi_watchdog = NMI_LOCAL_APIC;
1059 nmi_watchdog = NMI_IO_APIC;
1062 if (nmi_watchdog == NMI_LOCAL_APIC) {
1063 if (nmi_watchdog_enabled)
1064 enable_lapic_nmi_watchdog();
1066 disable_lapic_nmi_watchdog();
1068 printk( KERN_WARNING
1069 "NMI watchdog doesn't know what hardware to touch\n");
1077 void __trigger_all_cpu_backtrace(void)
1081 backtrace_mask = cpu_online_map;
1082 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
1083 for (i = 0; i < 10 * 1000; i++) {
1084 if (cpus_empty(backtrace_mask))
1090 EXPORT_SYMBOL(nmi_active);
1091 EXPORT_SYMBOL(nmi_watchdog);
1092 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1093 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1094 EXPORT_SYMBOL(reserve_perfctr_nmi);
1095 EXPORT_SYMBOL(release_perfctr_nmi);
1096 EXPORT_SYMBOL(reserve_evntsel_nmi);
1097 EXPORT_SYMBOL(release_evntsel_nmi);
1098 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1099 EXPORT_SYMBOL(enable_timer_nmi_watchdog);