2 * linux/arch/i386/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
16 #include <linux/config.h>
17 #include <linux/delay.h>
18 #include <linux/interrupt.h>
19 #include <linux/module.h>
20 #include <linux/nmi.h>
21 #include <linux/sysdev.h>
22 #include <linux/sysctl.h>
23 #include <linux/percpu.h>
24 #include <linux/dmi.h>
25 #include <linux/kprobes.h>
29 #include <asm/kdebug.h>
30 #include <asm/intel_arch_perfmon.h>
32 #include "mach_traps.h"
34 int unknown_nmi_panic;
35 int nmi_watchdog_enabled;
37 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
38 * evtsel_nmi_owner tracks the ownership of the event selection
39 * - different performance counters/ event selection may be reserved for
40 * different subsystems this reservation system just tries to coordinate
43 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
44 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
46 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
49 #define NMI_MAX_COUNTER_BITS 66
52 * >0: the lapic NMI watchdog is active, but can be disabled
53 * <0: the lapic NMI watchdog has not been set up, and cannot
55 * 0: the lapic NMI watchdog is disabled, but can be enabled
57 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
59 unsigned int nmi_watchdog = NMI_DEFAULT;
60 static unsigned int nmi_hz = HZ;
62 struct nmi_watchdog_ctlblk {
65 unsigned int cccr_msr;
66 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
67 unsigned int evntsel_msr; /* the MSR to select the events to handle */
69 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
71 /* local prototypes */
72 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
74 extern void show_registers(struct pt_regs *regs);
75 extern int unknown_nmi_panic;
77 /* converts an msr to an appropriate reservation bit */
78 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
80 /* returns the bit offset of the performance counter register */
81 switch (boot_cpu_data.x86_vendor) {
83 return (msr - MSR_K7_PERFCTR0);
84 case X86_VENDOR_INTEL:
85 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
86 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
88 switch (boot_cpu_data.x86) {
90 return (msr - MSR_P6_PERFCTR0);
92 return (msr - MSR_P4_BPU_PERFCTR0);
98 /* converts an msr to an appropriate reservation bit */
99 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
101 /* returns the bit offset of the event selection register */
102 switch (boot_cpu_data.x86_vendor) {
104 return (msr - MSR_K7_EVNTSEL0);
105 case X86_VENDOR_INTEL:
106 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
107 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
109 switch (boot_cpu_data.x86) {
111 return (msr - MSR_P6_EVNTSEL0);
113 return (msr - MSR_P4_BSU_ESCR0);
119 /* checks for a bit availability (hack for oprofile) */
120 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
122 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
124 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
127 /* checks the an msr for availability */
128 int avail_to_resrv_perfctr_nmi(unsigned int msr)
130 unsigned int counter;
132 counter = nmi_perfctr_msr_to_bit(msr);
133 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
135 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
138 int reserve_perfctr_nmi(unsigned int msr)
140 unsigned int counter;
142 counter = nmi_perfctr_msr_to_bit(msr);
143 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
145 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
150 void release_perfctr_nmi(unsigned int msr)
152 unsigned int counter;
154 counter = nmi_perfctr_msr_to_bit(msr);
155 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
157 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
160 int reserve_evntsel_nmi(unsigned int msr)
162 unsigned int counter;
164 counter = nmi_evntsel_msr_to_bit(msr);
165 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
167 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
172 void release_evntsel_nmi(unsigned int msr)
174 unsigned int counter;
176 counter = nmi_evntsel_msr_to_bit(msr);
177 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
179 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
182 static __cpuinit inline int nmi_known_cpu(void)
184 switch (boot_cpu_data.x86_vendor) {
186 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
187 case X86_VENDOR_INTEL:
188 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
191 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
197 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
198 * the CPU is idle. To make sure the NMI watchdog really ticks on all
199 * CPUs during the test make them busy.
201 static __init void nmi_cpu_busy(void *data)
203 volatile int *endflag = data;
204 local_irq_enable_in_hardirq();
205 /* Intentionally don't use cpu_relax here. This is
206 to make sure that the performance counter really ticks,
207 even if there is a simulator or similar that catches the
208 pause instruction. On a real HT machine this is fine because
209 all other CPUs are busy with "useless" delay loops and don't
210 care if they get somewhat less cycles. */
211 while (*endflag == 0)
216 static int __init check_nmi_watchdog(void)
218 volatile int endflag = 0;
219 unsigned int *prev_nmi_count;
222 /* Enable NMI watchdog for newer systems.
223 Actually it should be safe for most systems before 2004 too except
224 for some IBM systems that corrupt registers when NMI happens
225 during SMM. Unfortunately we don't have more exact information
226 on these and use this coarse check. */
227 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
228 nmi_watchdog = NMI_LOCAL_APIC;
230 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
233 if (!atomic_read(&nmi_active))
236 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
240 printk(KERN_INFO "Testing NMI watchdog ... ");
242 if (nmi_watchdog == NMI_LOCAL_APIC)
243 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
245 for_each_possible_cpu(cpu)
246 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
248 mdelay((10*1000)/nmi_hz); // wait 10 ticks
250 for_each_possible_cpu(cpu) {
252 /* Check cpu_callin_map here because that is set
253 after the timer is started. */
254 if (!cpu_isset(cpu, cpu_callin_map))
257 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
259 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
260 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
264 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
265 atomic_dec(&nmi_active);
268 if (!atomic_read(&nmi_active)) {
269 kfree(prev_nmi_count);
270 atomic_set(&nmi_active, -1);
276 /* now that we know it works we can reduce NMI frequency to
277 something more reasonable; makes a difference in some configs */
278 if (nmi_watchdog == NMI_LOCAL_APIC) {
279 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
283 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
284 * are writable, with higher bits sign extending from bit 31.
285 * So, we can only program the counter with 31 bit values and
286 * 32nd bit should be 1, for 33.. to be 1.
287 * Find the appropriate nmi_hz
289 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
290 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
291 u64 count = (u64)cpu_khz * 1000;
292 do_div(count, 0x7fffffffUL);
297 kfree(prev_nmi_count);
300 /* This needs to happen later in boot so counters are working */
301 late_initcall(check_nmi_watchdog);
303 static int __init setup_nmi_watchdog(char *str)
307 get_option(&str, &nmi);
309 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
312 * If any other x86 CPU has a local APIC, then
313 * please test the NMI stuff there and send me the
314 * missing bits. Right now Intel P6/P4 and AMD K7 only.
316 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
317 return 0; /* no lapic support */
322 __setup("nmi_watchdog=", setup_nmi_watchdog);
324 static void disable_lapic_nmi_watchdog(void)
326 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
328 if (atomic_read(&nmi_active) <= 0)
331 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
333 BUG_ON(atomic_read(&nmi_active) != 0);
336 static void enable_lapic_nmi_watchdog(void)
338 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
340 /* are we already enabled */
341 if (atomic_read(&nmi_active) != 0)
344 /* are we lapic aware */
345 if (nmi_known_cpu() <= 0)
348 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
349 touch_nmi_watchdog();
352 void disable_timer_nmi_watchdog(void)
354 BUG_ON(nmi_watchdog != NMI_IO_APIC);
356 if (atomic_read(&nmi_active) <= 0)
360 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
362 BUG_ON(atomic_read(&nmi_active) != 0);
365 void enable_timer_nmi_watchdog(void)
367 BUG_ON(nmi_watchdog != NMI_IO_APIC);
369 if (atomic_read(&nmi_active) == 0) {
370 touch_nmi_watchdog();
371 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
378 static int nmi_pm_active; /* nmi_active before suspend */
380 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
382 /* only CPU0 goes here, other CPUs should be offline */
383 nmi_pm_active = atomic_read(&nmi_active);
384 stop_apic_nmi_watchdog(NULL);
385 BUG_ON(atomic_read(&nmi_active) != 0);
389 static int lapic_nmi_resume(struct sys_device *dev)
391 /* only CPU0 goes here, other CPUs should be offline */
392 if (nmi_pm_active > 0) {
393 setup_apic_nmi_watchdog(NULL);
394 touch_nmi_watchdog();
400 static struct sysdev_class nmi_sysclass = {
401 set_kset_name("lapic_nmi"),
402 .resume = lapic_nmi_resume,
403 .suspend = lapic_nmi_suspend,
406 static struct sys_device device_lapic_nmi = {
408 .cls = &nmi_sysclass,
411 static int __init init_lapic_nmi_sysfs(void)
415 /* should really be a BUG_ON but b/c this is an
416 * init call, it just doesn't work. -dcz
418 if (nmi_watchdog != NMI_LOCAL_APIC)
421 if ( atomic_read(&nmi_active) < 0 )
424 error = sysdev_class_register(&nmi_sysclass);
426 error = sysdev_register(&device_lapic_nmi);
429 /* must come after the local APIC's device_initcall() */
430 late_initcall(init_lapic_nmi_sysfs);
432 #endif /* CONFIG_PM */
435 * Activate the NMI watchdog via the local APIC.
436 * Original code written by Keith Owens.
439 static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
441 u64 count = (u64)cpu_khz * 1000;
443 do_div(count, nmi_hz);
445 Dprintk("setting %s to -0x%08Lx\n", descr, count);
446 wrmsrl(perfctr_msr, 0 - count);
449 /* Note that these events don't tick when the CPU idles. This means
450 the frequency varies with CPU load. */
452 #define K7_EVNTSEL_ENABLE (1 << 22)
453 #define K7_EVNTSEL_INT (1 << 20)
454 #define K7_EVNTSEL_OS (1 << 17)
455 #define K7_EVNTSEL_USR (1 << 16)
456 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
457 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
459 static int setup_k7_watchdog(void)
461 unsigned int perfctr_msr, evntsel_msr;
462 unsigned int evntsel;
463 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
465 perfctr_msr = MSR_K7_PERFCTR0;
466 evntsel_msr = MSR_K7_EVNTSEL0;
467 if (!reserve_perfctr_nmi(perfctr_msr))
470 if (!reserve_evntsel_nmi(evntsel_msr))
473 wrmsrl(perfctr_msr, 0UL);
475 evntsel = K7_EVNTSEL_INT
480 /* setup the timer */
481 wrmsr(evntsel_msr, evntsel, 0);
482 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
483 apic_write(APIC_LVTPC, APIC_DM_NMI);
484 evntsel |= K7_EVNTSEL_ENABLE;
485 wrmsr(evntsel_msr, evntsel, 0);
487 wd->perfctr_msr = perfctr_msr;
488 wd->evntsel_msr = evntsel_msr;
489 wd->cccr_msr = 0; //unused
490 wd->check_bit = 1ULL<<63;
493 release_perfctr_nmi(perfctr_msr);
498 static void stop_k7_watchdog(void)
500 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
502 wrmsr(wd->evntsel_msr, 0, 0);
504 release_evntsel_nmi(wd->evntsel_msr);
505 release_perfctr_nmi(wd->perfctr_msr);
508 #define P6_EVNTSEL0_ENABLE (1 << 22)
509 #define P6_EVNTSEL_INT (1 << 20)
510 #define P6_EVNTSEL_OS (1 << 17)
511 #define P6_EVNTSEL_USR (1 << 16)
512 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
513 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
515 static int setup_p6_watchdog(void)
517 unsigned int perfctr_msr, evntsel_msr;
518 unsigned int evntsel;
519 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
521 perfctr_msr = MSR_P6_PERFCTR0;
522 evntsel_msr = MSR_P6_EVNTSEL0;
523 if (!reserve_perfctr_nmi(perfctr_msr))
526 if (!reserve_evntsel_nmi(evntsel_msr))
529 wrmsrl(perfctr_msr, 0UL);
531 evntsel = P6_EVNTSEL_INT
536 /* setup the timer */
537 wrmsr(evntsel_msr, evntsel, 0);
538 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
539 apic_write(APIC_LVTPC, APIC_DM_NMI);
540 evntsel |= P6_EVNTSEL0_ENABLE;
541 wrmsr(evntsel_msr, evntsel, 0);
543 wd->perfctr_msr = perfctr_msr;
544 wd->evntsel_msr = evntsel_msr;
545 wd->cccr_msr = 0; //unused
546 wd->check_bit = 1ULL<<39;
549 release_perfctr_nmi(perfctr_msr);
554 static void stop_p6_watchdog(void)
556 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
558 wrmsr(wd->evntsel_msr, 0, 0);
560 release_evntsel_nmi(wd->evntsel_msr);
561 release_perfctr_nmi(wd->perfctr_msr);
564 /* Note that these events don't tick when the CPU idles. This means
565 the frequency varies with CPU load. */
567 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
568 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
569 #define P4_ESCR_OS (1<<3)
570 #define P4_ESCR_USR (1<<2)
571 #define P4_CCCR_OVF_PMI0 (1<<26)
572 #define P4_CCCR_OVF_PMI1 (1<<27)
573 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
574 #define P4_CCCR_COMPLEMENT (1<<19)
575 #define P4_CCCR_COMPARE (1<<18)
576 #define P4_CCCR_REQUIRED (3<<16)
577 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
578 #define P4_CCCR_ENABLE (1<<12)
579 #define P4_CCCR_OVF (1<<31)
580 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
581 CRU_ESCR0 (with any non-null event selector) through a complemented
582 max threshold. [IA32-Vol3, Section 14.9.9] */
584 static int setup_p4_watchdog(void)
586 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
587 unsigned int evntsel, cccr_val;
588 unsigned int misc_enable, dummy;
590 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
592 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
593 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
597 /* detect which hyperthread we are on */
598 if (smp_num_siblings == 2) {
599 unsigned int ebx, apicid;
602 apicid = (ebx >> 24) & 0xff;
608 /* performance counters are shared resources
609 * assign each hyperthread its own set
610 * (re-use the ESCR0 register, seems safe
611 * and keeps the cccr_val the same)
615 perfctr_msr = MSR_P4_IQ_PERFCTR0;
616 evntsel_msr = MSR_P4_CRU_ESCR0;
617 cccr_msr = MSR_P4_IQ_CCCR0;
618 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
621 perfctr_msr = MSR_P4_IQ_PERFCTR1;
622 evntsel_msr = MSR_P4_CRU_ESCR0;
623 cccr_msr = MSR_P4_IQ_CCCR1;
624 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
627 if (!reserve_perfctr_nmi(perfctr_msr))
630 if (!reserve_evntsel_nmi(evntsel_msr))
633 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
637 cccr_val |= P4_CCCR_THRESHOLD(15)
642 wrmsr(evntsel_msr, evntsel, 0);
643 wrmsr(cccr_msr, cccr_val, 0);
644 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
645 apic_write(APIC_LVTPC, APIC_DM_NMI);
646 cccr_val |= P4_CCCR_ENABLE;
647 wrmsr(cccr_msr, cccr_val, 0);
648 wd->perfctr_msr = perfctr_msr;
649 wd->evntsel_msr = evntsel_msr;
650 wd->cccr_msr = cccr_msr;
651 wd->check_bit = 1ULL<<39;
654 release_perfctr_nmi(perfctr_msr);
659 static void stop_p4_watchdog(void)
661 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
663 wrmsr(wd->cccr_msr, 0, 0);
664 wrmsr(wd->evntsel_msr, 0, 0);
666 release_evntsel_nmi(wd->evntsel_msr);
667 release_perfctr_nmi(wd->perfctr_msr);
670 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
671 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
673 static int setup_intel_arch_watchdog(void)
676 union cpuid10_eax eax;
678 unsigned int perfctr_msr, evntsel_msr;
679 unsigned int evntsel;
680 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
683 * Check whether the Architectural PerfMon supports
684 * Unhalted Core Cycles Event or not.
685 * NOTE: Corresponding bit = 0 in ebx indicates event present.
687 cpuid(10, &(eax.full), &ebx, &unused, &unused);
688 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
689 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
692 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
693 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
695 if (!reserve_perfctr_nmi(perfctr_msr))
698 if (!reserve_evntsel_nmi(evntsel_msr))
701 wrmsrl(perfctr_msr, 0UL);
703 evntsel = ARCH_PERFMON_EVENTSEL_INT
704 | ARCH_PERFMON_EVENTSEL_OS
705 | ARCH_PERFMON_EVENTSEL_USR
706 | ARCH_PERFMON_NMI_EVENT_SEL
707 | ARCH_PERFMON_NMI_EVENT_UMASK;
709 /* setup the timer */
710 wrmsr(evntsel_msr, evntsel, 0);
711 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
712 apic_write(APIC_LVTPC, APIC_DM_NMI);
713 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
714 wrmsr(evntsel_msr, evntsel, 0);
716 wd->perfctr_msr = perfctr_msr;
717 wd->evntsel_msr = evntsel_msr;
718 wd->cccr_msr = 0; //unused
719 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
722 release_perfctr_nmi(perfctr_msr);
727 static void stop_intel_arch_watchdog(void)
730 union cpuid10_eax eax;
732 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
735 * Check whether the Architectural PerfMon supports
736 * Unhalted Core Cycles Event or not.
737 * NOTE: Corresponding bit = 0 in ebx indicates event present.
739 cpuid(10, &(eax.full), &ebx, &unused, &unused);
740 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
741 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
744 wrmsr(wd->evntsel_msr, 0, 0);
745 release_evntsel_nmi(wd->evntsel_msr);
746 release_perfctr_nmi(wd->perfctr_msr);
749 void setup_apic_nmi_watchdog (void *unused)
751 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
753 /* only support LOCAL and IO APICs for now */
754 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
755 (nmi_watchdog != NMI_IO_APIC))
758 if (wd->enabled == 1)
761 /* cheap hack to support suspend/resume */
762 /* if cpu0 is not active neither should the other cpus */
763 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
766 if (nmi_watchdog == NMI_LOCAL_APIC) {
767 switch (boot_cpu_data.x86_vendor) {
769 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
771 if (!setup_k7_watchdog())
774 case X86_VENDOR_INTEL:
775 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
776 if (!setup_intel_arch_watchdog())
780 switch (boot_cpu_data.x86) {
782 if (boot_cpu_data.x86_model > 0xd)
785 if (!setup_p6_watchdog())
789 if (boot_cpu_data.x86_model > 0x4)
792 if (!setup_p4_watchdog())
804 atomic_inc(&nmi_active);
807 void stop_apic_nmi_watchdog(void *unused)
809 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
811 /* only support LOCAL and IO APICs for now */
812 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
813 (nmi_watchdog != NMI_IO_APIC))
816 if (wd->enabled == 0)
819 if (nmi_watchdog == NMI_LOCAL_APIC) {
820 switch (boot_cpu_data.x86_vendor) {
824 case X86_VENDOR_INTEL:
825 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
826 stop_intel_arch_watchdog();
829 switch (boot_cpu_data.x86) {
831 if (boot_cpu_data.x86_model > 0xd)
836 if (boot_cpu_data.x86_model > 0x4)
847 atomic_dec(&nmi_active);
851 * the best way to detect whether a CPU has a 'hard lockup' problem
852 * is to check it's local APIC timer IRQ counts. If they are not
853 * changing then that CPU has some problem.
855 * as these watchdog NMI IRQs are generated on every CPU, we only
856 * have to check the current processor.
858 * since NMIs don't listen to _any_ locks, we have to be extremely
859 * careful not to rely on unsafe variables. The printk might lock
860 * up though, so we have to break up any console locks first ...
861 * [when there will be more tty-related locks, break them up
866 last_irq_sums [NR_CPUS],
867 alert_counter [NR_CPUS];
869 void touch_nmi_watchdog (void)
874 * Just reset the alert counters, (other CPUs might be
875 * spinning on locks we hold):
877 for_each_possible_cpu(i)
878 alert_counter[i] = 0;
881 * Tickle the softlockup detector too:
883 touch_softlockup_watchdog();
885 EXPORT_SYMBOL(touch_nmi_watchdog);
887 extern void die_nmi(struct pt_regs *, const char *msg);
889 __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
893 * Since current_thread_info()-> is always on the stack, and we
894 * always switch the stack NMI-atomically, it's safe to use
895 * smp_processor_id().
899 int cpu = smp_processor_id();
900 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
904 /* check for other users first */
905 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
911 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
913 /* if the apic timer isn't firing, this cpu isn't doing much */
914 if (!touched && last_irq_sums[cpu] == sum) {
916 * Ayiee, looks like this CPU is stuck ...
917 * wait a few IRQs (5 seconds) before doing the oops ...
919 alert_counter[cpu]++;
920 if (alert_counter[cpu] == 5*nmi_hz)
922 * die_nmi will return ONLY if NOTIFY_STOP happens..
924 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
926 last_irq_sums[cpu] = sum;
927 alert_counter[cpu] = 0;
929 /* see if the nmi watchdog went off */
931 if (nmi_watchdog == NMI_LOCAL_APIC) {
932 rdmsrl(wd->perfctr_msr, dummy);
933 if (dummy & wd->check_bit){
934 /* this wasn't a watchdog timer interrupt */
938 /* only Intel P4 uses the cccr msr */
939 if (wd->cccr_msr != 0) {
942 * - An overflown perfctr will assert its interrupt
943 * until the OVF flag in its CCCR is cleared.
944 * - LVTPC is masked on interrupt and must be
945 * unmasked by the LVTPC handler.
947 rdmsrl(wd->cccr_msr, dummy);
948 dummy &= ~P4_CCCR_OVF;
949 wrmsrl(wd->cccr_msr, dummy);
950 apic_write(APIC_LVTPC, APIC_DM_NMI);
952 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
953 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
954 /* P6 based Pentium M need to re-unmask
955 * the apic vector but it doesn't hurt
957 * ArchPerfom/Core Duo also needs this */
958 apic_write(APIC_LVTPC, APIC_DM_NMI);
960 /* start the cycle over again */
961 write_watchdog_counter(wd->perfctr_msr, NULL);
963 } else if (nmi_watchdog == NMI_IO_APIC) {
964 /* don't know how to accurately check for this.
965 * just assume it was a watchdog timer interrupt
966 * This matches the old behaviour.
975 int do_nmi_callback(struct pt_regs * regs, int cpu)
978 if (unknown_nmi_panic)
979 return unknown_nmi_panic_callback(regs, cpu);
986 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
988 unsigned char reason = get_nmi_reason();
991 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
997 * proc handler for /proc/sys/kernel/nmi
999 int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1000 void __user *buffer, size_t *length, loff_t *ppos)
1004 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1005 old_state = nmi_watchdog_enabled;
1006 proc_dointvec(table, write, file, buffer, length, ppos);
1007 if (!!old_state == !!nmi_watchdog_enabled)
1010 if (atomic_read(&nmi_active) < 0) {
1011 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
1015 if (nmi_watchdog == NMI_DEFAULT) {
1016 if (nmi_known_cpu() > 0)
1017 nmi_watchdog = NMI_LOCAL_APIC;
1019 nmi_watchdog = NMI_IO_APIC;
1022 if (nmi_watchdog == NMI_LOCAL_APIC) {
1023 if (nmi_watchdog_enabled)
1024 enable_lapic_nmi_watchdog();
1026 disable_lapic_nmi_watchdog();
1028 printk( KERN_WARNING
1029 "NMI watchdog doesn't know what hardware to touch\n");
1037 EXPORT_SYMBOL(nmi_active);
1038 EXPORT_SYMBOL(nmi_watchdog);
1039 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1040 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1041 EXPORT_SYMBOL(reserve_perfctr_nmi);
1042 EXPORT_SYMBOL(release_perfctr_nmi);
1043 EXPORT_SYMBOL(reserve_evntsel_nmi);
1044 EXPORT_SYMBOL(release_evntsel_nmi);
1045 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1046 EXPORT_SYMBOL(enable_timer_nmi_watchdog);