2 * linux/arch/i386/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
16 #include <linux/config.h>
17 #include <linux/delay.h>
18 #include <linux/interrupt.h>
19 #include <linux/module.h>
20 #include <linux/nmi.h>
21 #include <linux/sysdev.h>
22 #include <linux/sysctl.h>
23 #include <linux/percpu.h>
24 #include <linux/dmi.h>
25 #include <linux/kprobes.h>
29 #include <asm/kdebug.h>
30 #include <asm/intel_arch_perfmon.h>
32 #include "mach_traps.h"
34 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
35 * evtsel_nmi_owner tracks the ownership of the event selection
36 * - different performance counters/ event selection may be reserved for
37 * different subsystems this reservation system just tries to coordinate
40 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
41 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
43 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
44 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
46 #define NMI_MAX_COUNTER_BITS 66
49 * >0: the lapic NMI watchdog is active, but can be disabled
50 * <0: the lapic NMI watchdog has not been set up, and cannot
52 * 0: the lapic NMI watchdog is disabled, but can be enabled
54 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
56 unsigned int nmi_watchdog = NMI_DEFAULT;
57 static unsigned int nmi_hz = HZ;
59 struct nmi_watchdog_ctlblk {
62 unsigned int cccr_msr;
63 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
64 unsigned int evntsel_msr; /* the MSR to select the events to handle */
66 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
68 /* local prototypes */
69 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
71 extern void show_registers(struct pt_regs *regs);
72 extern int unknown_nmi_panic;
74 /* converts an msr to an appropriate reservation bit */
75 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
77 /* returns the bit offset of the performance counter register */
78 switch (boot_cpu_data.x86_vendor) {
80 return (msr - MSR_K7_PERFCTR0);
81 case X86_VENDOR_INTEL:
82 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
83 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
85 switch (boot_cpu_data.x86) {
87 return (msr - MSR_P6_PERFCTR0);
89 return (msr - MSR_P4_BPU_PERFCTR0);
95 /* converts an msr to an appropriate reservation bit */
96 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
98 /* returns the bit offset of the event selection register */
99 switch (boot_cpu_data.x86_vendor) {
101 return (msr - MSR_K7_EVNTSEL0);
102 case X86_VENDOR_INTEL:
103 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
104 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
106 switch (boot_cpu_data.x86) {
108 return (msr - MSR_P6_EVNTSEL0);
110 return (msr - MSR_P4_BSU_ESCR0);
116 /* checks for a bit availability (hack for oprofile) */
117 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
119 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
121 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
124 /* checks the an msr for availability */
125 int avail_to_resrv_perfctr_nmi(unsigned int msr)
127 unsigned int counter;
129 counter = nmi_perfctr_msr_to_bit(msr);
130 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
132 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
135 int reserve_perfctr_nmi(unsigned int msr)
137 unsigned int counter;
139 counter = nmi_perfctr_msr_to_bit(msr);
140 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
142 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
147 void release_perfctr_nmi(unsigned int msr)
149 unsigned int counter;
151 counter = nmi_perfctr_msr_to_bit(msr);
152 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
154 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
157 int reserve_evntsel_nmi(unsigned int msr)
159 unsigned int counter;
161 counter = nmi_evntsel_msr_to_bit(msr);
162 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
164 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
169 void release_evntsel_nmi(unsigned int msr)
171 unsigned int counter;
173 counter = nmi_evntsel_msr_to_bit(msr);
174 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
176 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
179 static __cpuinit inline int nmi_known_cpu(void)
181 switch (boot_cpu_data.x86_vendor) {
183 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
184 case X86_VENDOR_INTEL:
185 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
194 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
195 * the CPU is idle. To make sure the NMI watchdog really ticks on all
196 * CPUs during the test make them busy.
198 static __init void nmi_cpu_busy(void *data)
200 volatile int *endflag = data;
201 local_irq_enable_in_hardirq();
202 /* Intentionally don't use cpu_relax here. This is
203 to make sure that the performance counter really ticks,
204 even if there is a simulator or similar that catches the
205 pause instruction. On a real HT machine this is fine because
206 all other CPUs are busy with "useless" delay loops and don't
207 care if they get somewhat less cycles. */
208 while (*endflag == 0)
213 static int __init check_nmi_watchdog(void)
215 volatile int endflag = 0;
216 unsigned int *prev_nmi_count;
219 /* Enable NMI watchdog for newer systems.
220 Actually it should be safe for most systems before 2004 too except
221 for some IBM systems that corrupt registers when NMI happens
222 during SMM. Unfortunately we don't have more exact information
223 on these and use this coarse check. */
224 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
225 nmi_watchdog = NMI_LOCAL_APIC;
227 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
230 if (!atomic_read(&nmi_active))
233 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
237 printk(KERN_INFO "Testing NMI watchdog ... ");
239 if (nmi_watchdog == NMI_LOCAL_APIC)
240 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
242 for_each_possible_cpu(cpu)
243 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
245 mdelay((10*1000)/nmi_hz); // wait 10 ticks
247 for_each_possible_cpu(cpu) {
249 /* Check cpu_callin_map here because that is set
250 after the timer is started. */
251 if (!cpu_isset(cpu, cpu_callin_map))
254 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
256 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
257 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
261 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
262 atomic_dec(&nmi_active);
265 if (!atomic_read(&nmi_active)) {
266 kfree(prev_nmi_count);
267 atomic_set(&nmi_active, -1);
273 /* now that we know it works we can reduce NMI frequency to
274 something more reasonable; makes a difference in some configs */
275 if (nmi_watchdog == NMI_LOCAL_APIC) {
276 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
280 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
281 * are writable, with higher bits sign extending from bit 31.
282 * So, we can only program the counter with 31 bit values and
283 * 32nd bit should be 1, for 33.. to be 1.
284 * Find the appropriate nmi_hz
286 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
287 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
288 u64 count = (u64)cpu_khz * 1000;
289 do_div(count, 0x7fffffffUL);
294 kfree(prev_nmi_count);
297 /* This needs to happen later in boot so counters are working */
298 late_initcall(check_nmi_watchdog);
300 static int __init setup_nmi_watchdog(char *str)
304 get_option(&str, &nmi);
306 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
309 * If any other x86 CPU has a local APIC, then
310 * please test the NMI stuff there and send me the
311 * missing bits. Right now Intel P6/P4 and AMD K7 only.
313 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
314 return 0; /* no lapic support */
319 __setup("nmi_watchdog=", setup_nmi_watchdog);
321 static void disable_lapic_nmi_watchdog(void)
323 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
325 if (atomic_read(&nmi_active) <= 0)
328 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
330 BUG_ON(atomic_read(&nmi_active) != 0);
333 static void enable_lapic_nmi_watchdog(void)
335 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
337 /* are we already enabled */
338 if (atomic_read(&nmi_active) != 0)
341 /* are we lapic aware */
342 if (nmi_known_cpu() <= 0)
345 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
346 touch_nmi_watchdog();
349 void disable_timer_nmi_watchdog(void)
351 BUG_ON(nmi_watchdog != NMI_IO_APIC);
353 if (atomic_read(&nmi_active) <= 0)
357 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
359 BUG_ON(atomic_read(&nmi_active) != 0);
362 void enable_timer_nmi_watchdog(void)
364 BUG_ON(nmi_watchdog != NMI_IO_APIC);
366 if (atomic_read(&nmi_active) == 0) {
367 touch_nmi_watchdog();
368 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
375 static int nmi_pm_active; /* nmi_active before suspend */
377 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
379 /* only CPU0 goes here, other CPUs should be offline */
380 nmi_pm_active = atomic_read(&nmi_active);
381 stop_apic_nmi_watchdog(NULL);
382 BUG_ON(atomic_read(&nmi_active) != 0);
386 static int lapic_nmi_resume(struct sys_device *dev)
388 /* only CPU0 goes here, other CPUs should be offline */
389 if (nmi_pm_active > 0) {
390 setup_apic_nmi_watchdog(NULL);
391 touch_nmi_watchdog();
397 static struct sysdev_class nmi_sysclass = {
398 set_kset_name("lapic_nmi"),
399 .resume = lapic_nmi_resume,
400 .suspend = lapic_nmi_suspend,
403 static struct sys_device device_lapic_nmi = {
405 .cls = &nmi_sysclass,
408 static int __init init_lapic_nmi_sysfs(void)
412 /* should really be a BUG_ON but b/c this is an
413 * init call, it just doesn't work. -dcz
415 if (nmi_watchdog != NMI_LOCAL_APIC)
418 if ( atomic_read(&nmi_active) < 0 )
421 error = sysdev_class_register(&nmi_sysclass);
423 error = sysdev_register(&device_lapic_nmi);
426 /* must come after the local APIC's device_initcall() */
427 late_initcall(init_lapic_nmi_sysfs);
429 #endif /* CONFIG_PM */
432 * Activate the NMI watchdog via the local APIC.
433 * Original code written by Keith Owens.
436 static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
438 u64 count = (u64)cpu_khz * 1000;
440 do_div(count, nmi_hz);
442 Dprintk("setting %s to -0x%08Lx\n", descr, count);
443 wrmsrl(perfctr_msr, 0 - count);
446 /* Note that these events don't tick when the CPU idles. This means
447 the frequency varies with CPU load. */
449 #define K7_EVNTSEL_ENABLE (1 << 22)
450 #define K7_EVNTSEL_INT (1 << 20)
451 #define K7_EVNTSEL_OS (1 << 17)
452 #define K7_EVNTSEL_USR (1 << 16)
453 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
454 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
456 static int setup_k7_watchdog(void)
458 unsigned int perfctr_msr, evntsel_msr;
459 unsigned int evntsel;
460 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
462 perfctr_msr = MSR_K7_PERFCTR0;
463 evntsel_msr = MSR_K7_EVNTSEL0;
464 if (!reserve_perfctr_nmi(perfctr_msr))
467 if (!reserve_evntsel_nmi(evntsel_msr))
470 wrmsrl(perfctr_msr, 0UL);
472 evntsel = K7_EVNTSEL_INT
477 /* setup the timer */
478 wrmsr(evntsel_msr, evntsel, 0);
479 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
480 apic_write(APIC_LVTPC, APIC_DM_NMI);
481 evntsel |= K7_EVNTSEL_ENABLE;
482 wrmsr(evntsel_msr, evntsel, 0);
484 wd->perfctr_msr = perfctr_msr;
485 wd->evntsel_msr = evntsel_msr;
486 wd->cccr_msr = 0; //unused
487 wd->check_bit = 1ULL<<63;
490 release_perfctr_nmi(perfctr_msr);
495 static void stop_k7_watchdog(void)
497 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
499 wrmsr(wd->evntsel_msr, 0, 0);
501 release_evntsel_nmi(wd->evntsel_msr);
502 release_perfctr_nmi(wd->perfctr_msr);
505 #define P6_EVNTSEL0_ENABLE (1 << 22)
506 #define P6_EVNTSEL_INT (1 << 20)
507 #define P6_EVNTSEL_OS (1 << 17)
508 #define P6_EVNTSEL_USR (1 << 16)
509 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
510 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
512 static int setup_p6_watchdog(void)
514 unsigned int perfctr_msr, evntsel_msr;
515 unsigned int evntsel;
516 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
518 perfctr_msr = MSR_P6_PERFCTR0;
519 evntsel_msr = MSR_P6_EVNTSEL0;
520 if (!reserve_perfctr_nmi(perfctr_msr))
523 if (!reserve_evntsel_nmi(evntsel_msr))
526 wrmsrl(perfctr_msr, 0UL);
528 evntsel = P6_EVNTSEL_INT
533 /* setup the timer */
534 wrmsr(evntsel_msr, evntsel, 0);
535 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
536 apic_write(APIC_LVTPC, APIC_DM_NMI);
537 evntsel |= P6_EVNTSEL0_ENABLE;
538 wrmsr(evntsel_msr, evntsel, 0);
540 wd->perfctr_msr = perfctr_msr;
541 wd->evntsel_msr = evntsel_msr;
542 wd->cccr_msr = 0; //unused
543 wd->check_bit = 1ULL<<39;
546 release_perfctr_nmi(perfctr_msr);
551 static void stop_p6_watchdog(void)
553 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
555 wrmsr(wd->evntsel_msr, 0, 0);
557 release_evntsel_nmi(wd->evntsel_msr);
558 release_perfctr_nmi(wd->perfctr_msr);
561 /* Note that these events don't tick when the CPU idles. This means
562 the frequency varies with CPU load. */
564 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
565 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
566 #define P4_ESCR_OS (1<<3)
567 #define P4_ESCR_USR (1<<2)
568 #define P4_CCCR_OVF_PMI0 (1<<26)
569 #define P4_CCCR_OVF_PMI1 (1<<27)
570 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
571 #define P4_CCCR_COMPLEMENT (1<<19)
572 #define P4_CCCR_COMPARE (1<<18)
573 #define P4_CCCR_REQUIRED (3<<16)
574 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
575 #define P4_CCCR_ENABLE (1<<12)
576 #define P4_CCCR_OVF (1<<31)
577 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
578 CRU_ESCR0 (with any non-null event selector) through a complemented
579 max threshold. [IA32-Vol3, Section 14.9.9] */
581 static int setup_p4_watchdog(void)
583 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
584 unsigned int evntsel, cccr_val;
585 unsigned int misc_enable, dummy;
587 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
589 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
590 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
594 /* detect which hyperthread we are on */
595 if (smp_num_siblings == 2) {
596 unsigned int ebx, apicid;
599 apicid = (ebx >> 24) & 0xff;
605 /* performance counters are shared resources
606 * assign each hyperthread its own set
607 * (re-use the ESCR0 register, seems safe
608 * and keeps the cccr_val the same)
612 perfctr_msr = MSR_P4_IQ_PERFCTR0;
613 evntsel_msr = MSR_P4_CRU_ESCR0;
614 cccr_msr = MSR_P4_IQ_CCCR0;
615 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
618 perfctr_msr = MSR_P4_IQ_PERFCTR1;
619 evntsel_msr = MSR_P4_CRU_ESCR0;
620 cccr_msr = MSR_P4_IQ_CCCR1;
621 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
624 if (!reserve_perfctr_nmi(perfctr_msr))
627 if (!reserve_evntsel_nmi(evntsel_msr))
630 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
634 cccr_val |= P4_CCCR_THRESHOLD(15)
639 wrmsr(evntsel_msr, evntsel, 0);
640 wrmsr(cccr_msr, cccr_val, 0);
641 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
642 apic_write(APIC_LVTPC, APIC_DM_NMI);
643 cccr_val |= P4_CCCR_ENABLE;
644 wrmsr(cccr_msr, cccr_val, 0);
645 wd->perfctr_msr = perfctr_msr;
646 wd->evntsel_msr = evntsel_msr;
647 wd->cccr_msr = cccr_msr;
648 wd->check_bit = 1ULL<<39;
651 release_perfctr_nmi(perfctr_msr);
656 static void stop_p4_watchdog(void)
658 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
660 wrmsr(wd->cccr_msr, 0, 0);
661 wrmsr(wd->evntsel_msr, 0, 0);
663 release_evntsel_nmi(wd->evntsel_msr);
664 release_perfctr_nmi(wd->perfctr_msr);
667 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
668 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
670 static int setup_intel_arch_watchdog(void)
673 union cpuid10_eax eax;
675 unsigned int perfctr_msr, evntsel_msr;
676 unsigned int evntsel;
677 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
680 * Check whether the Architectural PerfMon supports
681 * Unhalted Core Cycles Event or not.
682 * NOTE: Corresponding bit = 0 in ebx indicates event present.
684 cpuid(10, &(eax.full), &ebx, &unused, &unused);
685 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
686 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
689 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
690 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
692 if (!reserve_perfctr_nmi(perfctr_msr))
695 if (!reserve_evntsel_nmi(evntsel_msr))
698 wrmsrl(perfctr_msr, 0UL);
700 evntsel = ARCH_PERFMON_EVENTSEL_INT
701 | ARCH_PERFMON_EVENTSEL_OS
702 | ARCH_PERFMON_EVENTSEL_USR
703 | ARCH_PERFMON_NMI_EVENT_SEL
704 | ARCH_PERFMON_NMI_EVENT_UMASK;
706 /* setup the timer */
707 wrmsr(evntsel_msr, evntsel, 0);
708 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
709 apic_write(APIC_LVTPC, APIC_DM_NMI);
710 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
711 wrmsr(evntsel_msr, evntsel, 0);
713 wd->perfctr_msr = perfctr_msr;
714 wd->evntsel_msr = evntsel_msr;
715 wd->cccr_msr = 0; //unused
716 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
719 release_perfctr_nmi(perfctr_msr);
724 static void stop_intel_arch_watchdog(void)
727 union cpuid10_eax eax;
729 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
732 * Check whether the Architectural PerfMon supports
733 * Unhalted Core Cycles Event or not.
734 * NOTE: Corresponding bit = 0 in ebx indicates event present.
736 cpuid(10, &(eax.full), &ebx, &unused, &unused);
737 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
738 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
741 wrmsr(wd->evntsel_msr, 0, 0);
742 release_evntsel_nmi(wd->evntsel_msr);
743 release_perfctr_nmi(wd->perfctr_msr);
746 void setup_apic_nmi_watchdog (void *unused)
748 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
750 /* only support LOCAL and IO APICs for now */
751 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
752 (nmi_watchdog != NMI_IO_APIC))
755 if (wd->enabled == 1)
758 /* cheap hack to support suspend/resume */
759 /* if cpu0 is not active neither should the other cpus */
760 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
763 if (nmi_watchdog == NMI_LOCAL_APIC) {
764 switch (boot_cpu_data.x86_vendor) {
766 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
768 if (!setup_k7_watchdog())
771 case X86_VENDOR_INTEL:
772 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
773 if (!setup_intel_arch_watchdog())
777 switch (boot_cpu_data.x86) {
779 if (boot_cpu_data.x86_model > 0xd)
782 if (!setup_p6_watchdog())
786 if (boot_cpu_data.x86_model > 0x4)
789 if (!setup_p4_watchdog())
801 atomic_inc(&nmi_active);
804 void stop_apic_nmi_watchdog(void *unused)
806 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
808 /* only support LOCAL and IO APICs for now */
809 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
810 (nmi_watchdog != NMI_IO_APIC))
813 if (wd->enabled == 0)
816 if (nmi_watchdog == NMI_LOCAL_APIC) {
817 switch (boot_cpu_data.x86_vendor) {
821 case X86_VENDOR_INTEL:
822 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
823 stop_intel_arch_watchdog();
826 switch (boot_cpu_data.x86) {
828 if (boot_cpu_data.x86_model > 0xd)
833 if (boot_cpu_data.x86_model > 0x4)
844 atomic_dec(&nmi_active);
848 * the best way to detect whether a CPU has a 'hard lockup' problem
849 * is to check it's local APIC timer IRQ counts. If they are not
850 * changing then that CPU has some problem.
852 * as these watchdog NMI IRQs are generated on every CPU, we only
853 * have to check the current processor.
855 * since NMIs don't listen to _any_ locks, we have to be extremely
856 * careful not to rely on unsafe variables. The printk might lock
857 * up though, so we have to break up any console locks first ...
858 * [when there will be more tty-related locks, break them up
863 last_irq_sums [NR_CPUS],
864 alert_counter [NR_CPUS];
866 void touch_nmi_watchdog (void)
871 * Just reset the alert counters, (other CPUs might be
872 * spinning on locks we hold):
874 for_each_possible_cpu(i)
875 alert_counter[i] = 0;
878 * Tickle the softlockup detector too:
880 touch_softlockup_watchdog();
882 EXPORT_SYMBOL(touch_nmi_watchdog);
884 extern void die_nmi(struct pt_regs *, const char *msg);
886 __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
890 * Since current_thread_info()-> is always on the stack, and we
891 * always switch the stack NMI-atomically, it's safe to use
892 * smp_processor_id().
896 int cpu = smp_processor_id();
897 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
901 /* check for other users first */
902 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
908 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
910 /* if the apic timer isn't firing, this cpu isn't doing much */
911 if (!touched && last_irq_sums[cpu] == sum) {
913 * Ayiee, looks like this CPU is stuck ...
914 * wait a few IRQs (5 seconds) before doing the oops ...
916 alert_counter[cpu]++;
917 if (alert_counter[cpu] == 5*nmi_hz)
919 * die_nmi will return ONLY if NOTIFY_STOP happens..
921 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
923 last_irq_sums[cpu] = sum;
924 alert_counter[cpu] = 0;
926 /* see if the nmi watchdog went off */
928 if (nmi_watchdog == NMI_LOCAL_APIC) {
929 rdmsrl(wd->perfctr_msr, dummy);
930 if (dummy & wd->check_bit){
931 /* this wasn't a watchdog timer interrupt */
935 /* only Intel P4 uses the cccr msr */
936 if (wd->cccr_msr != 0) {
939 * - An overflown perfctr will assert its interrupt
940 * until the OVF flag in its CCCR is cleared.
941 * - LVTPC is masked on interrupt and must be
942 * unmasked by the LVTPC handler.
944 rdmsrl(wd->cccr_msr, dummy);
945 dummy &= ~P4_CCCR_OVF;
946 wrmsrl(wd->cccr_msr, dummy);
947 apic_write(APIC_LVTPC, APIC_DM_NMI);
949 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
950 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
951 /* P6 based Pentium M need to re-unmask
952 * the apic vector but it doesn't hurt
954 * ArchPerfom/Core Duo also needs this */
955 apic_write(APIC_LVTPC, APIC_DM_NMI);
957 /* start the cycle over again */
958 write_watchdog_counter(wd->perfctr_msr, NULL);
960 } else if (nmi_watchdog == NMI_IO_APIC) {
961 /* don't know how to accurately check for this.
962 * just assume it was a watchdog timer interrupt
963 * This matches the old behaviour.
972 int do_nmi_callback(struct pt_regs * regs, int cpu)
975 if (unknown_nmi_panic)
976 return unknown_nmi_panic_callback(regs, cpu);
983 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
985 unsigned char reason = get_nmi_reason();
988 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
994 * proc handler for /proc/sys/kernel/nmi
996 int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
997 void __user *buffer, size_t *length, loff_t *ppos)
1001 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1002 old_state = nmi_watchdog_enabled;
1003 proc_dointvec(table, write, file, buffer, length, ppos);
1004 if (!!old_state == !!nmi_watchdog_enabled)
1007 if (atomic_read(&nmi_active) < 0) {
1008 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
1012 if (nmi_watchdog == NMI_DEFAULT) {
1013 if (nmi_known_cpu() > 0)
1014 nmi_watchdog = NMI_LOCAL_APIC;
1016 nmi_watchdog = NMI_IO_APIC;
1019 if (nmi_watchdog == NMI_LOCAL_APIC) {
1020 if (nmi_watchdog_enabled)
1021 enable_lapic_nmi_watchdog();
1023 disable_lapic_nmi_watchdog();
1025 printk( KERN_WARNING
1026 "NMI watchdog doesn't know what hardware to touch\n");
1034 EXPORT_SYMBOL(nmi_active);
1035 EXPORT_SYMBOL(nmi_watchdog);
1036 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1037 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1038 EXPORT_SYMBOL(reserve_perfctr_nmi);
1039 EXPORT_SYMBOL(release_perfctr_nmi);
1040 EXPORT_SYMBOL(reserve_evntsel_nmi);
1041 EXPORT_SYMBOL(release_evntsel_nmi);
1042 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1043 EXPORT_SYMBOL(enable_timer_nmi_watchdog);