2 * (c) 2005 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
7 * Written by Jacob Shin - AMD, Inc.
9 * Support : jacob.shin@amd.com
11 * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
12 * MC4_MISC0 exists per physical processor.
16 #include <linux/cpu.h>
17 #include <linux/errno.h>
18 #include <linux/init.h>
19 #include <linux/interrupt.h>
20 #include <linux/kobject.h>
21 #include <linux/notifier.h>
22 #include <linux/sched.h>
23 #include <linux/smp.h>
24 #include <linux/sysdev.h>
25 #include <linux/sysfs.h>
29 #include <asm/percpu.h>
32 #define PFX "mce_threshold: "
33 #define VERSION "version 1.00.9"
35 #define THRESHOLD_MAX 0xFFF
36 #define INT_TYPE_APIC 0x00020000
37 #define MASK_VALID_HI 0x80000000
38 #define MASK_LVTOFF_HI 0x00F00000
39 #define MASK_COUNT_EN_HI 0x00080000
40 #define MASK_INT_TYPE_HI 0x00060000
41 #define MASK_OVERFLOW_HI 0x00010000
42 #define MASK_ERR_COUNT_HI 0x00000FFF
43 #define MASK_OVERFLOW 0x0001000000000000L
45 struct threshold_bank {
53 static struct threshold_bank threshold_defaults = {
54 .interrupt_enable = 0,
55 .threshold_limit = THRESHOLD_MAX,
59 static unsigned char shared_bank[NR_BANKS] = {
64 static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
70 /* must be called with correct cpu affinity */
71 static void threshold_restart_bank(struct threshold_bank *b,
72 int reset, u16 old_limit)
74 u32 mci_misc_hi, mci_misc_lo;
76 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
78 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
79 reset = 1; /* limit cannot be lower than err count */
81 if (reset) { /* reset err count and overflow bit */
83 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
84 (THRESHOLD_MAX - b->threshold_limit);
85 } else if (old_limit) { /* change limit w/o reset */
86 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
87 (old_limit - b->threshold_limit);
88 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
89 (new_count & THRESHOLD_MAX);
93 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
94 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
96 mci_misc_hi |= MASK_COUNT_EN_HI;
97 wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
100 void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
103 u32 mci_misc_lo, mci_misc_hi;
104 unsigned int cpu = smp_processor_id();
106 for (bank = 0; bank < NR_BANKS; ++bank) {
107 rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
109 /* !valid, !counter present, bios locked */
110 if (!(mci_misc_hi & MASK_VALID_HI) ||
111 !(mci_misc_hi & MASK_VALID_HI >> 1) ||
112 (mci_misc_hi & MASK_VALID_HI >> 2))
115 per_cpu(bank_map, cpu) |= (1 << bank);
118 if (shared_bank[bank] && cpu_core_id[cpu])
122 setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
123 threshold_defaults.cpu = cpu;
124 threshold_defaults.bank = bank;
125 threshold_restart_bank(&threshold_defaults, 0, 0);
130 * APIC Interrupt Handler
134 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
135 * the interrupt goes off when error_count reaches threshold_limit.
136 * the handler will simply log mcelog w/ software defined bank number.
138 asmlinkage void mce_threshold_interrupt(void)
147 memset(&m, 0, sizeof(m));
149 m.cpu = smp_processor_id();
151 /* assume first bank caused it */
152 for (bank = 0; bank < NR_BANKS; ++bank) {
153 m.bank = MCE_THRESHOLD_BASE + bank;
154 rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
156 if (m.misc & MASK_OVERFLOW) {
169 static struct sysdev_class threshold_sysclass = {
170 set_kset_name("threshold"),
173 static DEFINE_PER_CPU(struct sys_device, device_threshold);
175 struct threshold_attr {
176 struct attribute attr;
177 ssize_t(*show) (struct threshold_bank *, char *);
178 ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
181 static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
183 static cpumask_t affinity_set(unsigned int cpu)
185 cpumask_t oldmask = current->cpus_allowed;
186 cpumask_t newmask = CPU_MASK_NONE;
187 cpu_set(cpu, newmask);
188 set_cpus_allowed(current, newmask);
192 static void affinity_restore(cpumask_t oldmask)
194 set_cpus_allowed(current, oldmask);
197 #define SHOW_FIELDS(name) \
198 static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
200 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
202 SHOW_FIELDS(interrupt_enable)
203 SHOW_FIELDS(threshold_limit)
205 static ssize_t store_interrupt_enable(struct threshold_bank *b,
206 const char *buf, size_t count)
210 unsigned long new = simple_strtoul(buf, &end, 0);
213 b->interrupt_enable = !!new;
215 oldmask = affinity_set(b->cpu);
216 threshold_restart_bank(b, 0, 0);
217 affinity_restore(oldmask);
222 static ssize_t store_threshold_limit(struct threshold_bank *b,
223 const char *buf, size_t count)
228 unsigned long new = simple_strtoul(buf, &end, 0);
231 if (new > THRESHOLD_MAX)
235 old = b->threshold_limit;
236 b->threshold_limit = new;
238 oldmask = affinity_set(b->cpu);
239 threshold_restart_bank(b, 0, old);
240 affinity_restore(oldmask);
245 static ssize_t show_error_count(struct threshold_bank *b, char *buf)
249 oldmask = affinity_set(b->cpu);
250 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
251 affinity_restore(oldmask);
252 return sprintf(buf, "%x\n",
253 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
256 static ssize_t store_error_count(struct threshold_bank *b,
257 const char *buf, size_t count)
260 oldmask = affinity_set(b->cpu);
261 threshold_restart_bank(b, 1, 0);
262 affinity_restore(oldmask);
266 #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \
267 .attr = {.name = __stringify(_name), .mode = _mode }, \
272 #define ATTR_FIELDS(name) \
273 static struct threshold_attr name = \
274 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
276 ATTR_FIELDS(interrupt_enable);
277 ATTR_FIELDS(threshold_limit);
278 ATTR_FIELDS(error_count);
280 static struct attribute *default_attrs[] = {
281 &interrupt_enable.attr,
282 &threshold_limit.attr,
287 #define to_bank(k) container_of(k,struct threshold_bank,kobj)
288 #define to_attr(a) container_of(a,struct threshold_attr,attr)
290 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
292 struct threshold_bank *b = to_bank(kobj);
293 struct threshold_attr *a = to_attr(attr);
295 ret = a->show ? a->show(b, buf) : -EIO;
299 static ssize_t store(struct kobject *kobj, struct attribute *attr,
300 const char *buf, size_t count)
302 struct threshold_bank *b = to_bank(kobj);
303 struct threshold_attr *a = to_attr(attr);
305 ret = a->store ? a->store(b, buf, count) : -EIO;
309 static struct sysfs_ops threshold_ops = {
314 static struct kobj_type threshold_ktype = {
315 .sysfs_ops = &threshold_ops,
316 .default_attrs = default_attrs,
319 /* symlinks sibling shared banks to first core. first core owns dir/files. */
320 static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
323 struct threshold_bank *b = NULL;
326 if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */
328 unsigned lcpu = first_cpu(cpu_core_map[cpu]);
329 if (cpu_core_id[lcpu])
330 goto out; /* first core not up yet */
332 b = per_cpu(threshold_banks, lcpu)[bank];
335 sprintf(name, "bank%i", bank);
336 err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
340 per_cpu(threshold_banks, cpu)[bank] = b;
345 b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
350 memset(b, 0, sizeof(struct threshold_bank));
354 b->interrupt_enable = 0;
355 b->threshold_limit = THRESHOLD_MAX;
356 kobject_set_name(&b->kobj, "bank%i", bank);
357 b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
358 b->kobj.ktype = &threshold_ktype;
360 err = kobject_register(&b->kobj);
365 per_cpu(threshold_banks, cpu)[bank] = b;
370 /* create dir/files for all valid threshold banks */
371 static __cpuinit int threshold_create_device(unsigned int cpu)
376 per_cpu(device_threshold, cpu).id = cpu;
377 per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
378 err = sysdev_register(&per_cpu(device_threshold, cpu));
382 for (bank = 0; bank < NR_BANKS; ++bank) {
383 if (!(per_cpu(bank_map, cpu) & 1 << bank))
385 err = threshold_create_bank(cpu, bank);
393 #ifdef CONFIG_HOTPLUG_CPU
395 * let's be hotplug friendly.
396 * in case of multiple core processors, the first core always takes ownership
397 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
400 /* cpu hotplug call removes all symlinks before first core dies */
401 static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
403 struct threshold_bank *b;
406 b = per_cpu(threshold_banks, cpu)[bank];
409 if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
410 sprintf(name, "bank%i", bank);
411 sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
412 per_cpu(threshold_banks, cpu)[bank] = NULL;
414 kobject_unregister(&b->kobj);
415 kfree(per_cpu(threshold_banks, cpu)[bank]);
419 static __cpuinit void threshold_remove_device(unsigned int cpu)
423 for (bank = 0; bank < NR_BANKS; ++bank) {
424 if (!(per_cpu(bank_map, cpu) & 1 << bank))
426 threshold_remove_bank(cpu, bank);
428 sysdev_unregister(&per_cpu(device_threshold, cpu));
431 /* link all existing siblings when first core comes up */
432 static __cpuinit int threshold_create_symlinks(unsigned int cpu)
435 unsigned int lcpu = 0;
437 if (cpu_core_id[cpu])
439 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
442 for (bank = 0; bank < NR_BANKS; ++bank) {
443 if (!(per_cpu(bank_map, cpu) & 1 << bank))
445 if (!shared_bank[bank])
447 err = threshold_create_bank(lcpu, bank);
453 /* remove all symlinks before first core dies. */
454 static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
457 unsigned int lcpu = 0;
458 if (cpu_core_id[cpu])
460 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
463 for (bank = 0; bank < NR_BANKS; ++bank) {
464 if (!(per_cpu(bank_map, cpu) & 1 << bank))
466 if (!shared_bank[bank])
468 threshold_remove_bank(lcpu, bank);
472 #else /* !CONFIG_HOTPLUG_CPU */
473 static __cpuinit void threshold_create_symlinks(unsigned int cpu)
476 static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
479 static void threshold_remove_device(unsigned int cpu)
484 /* get notified when a cpu comes on/off */
485 static int threshold_cpu_callback(struct notifier_block *nfb,
486 unsigned long action, void *hcpu)
488 /* cpu was unsigned int to begin with */
489 unsigned int cpu = (unsigned long)hcpu;
496 threshold_create_device(cpu);
497 threshold_create_symlinks(cpu);
499 case CPU_DOWN_PREPARE:
500 threshold_remove_symlinks(cpu);
502 case CPU_DOWN_FAILED:
503 threshold_create_symlinks(cpu);
506 threshold_remove_device(cpu);
515 static struct notifier_block threshold_cpu_notifier = {
516 .notifier_call = threshold_cpu_callback,
519 static __init int threshold_init_device(void)
524 err = sysdev_class_register(&threshold_sysclass);
528 /* to hit CPUs online before the notifier is up */
529 for_each_online_cpu(lcpu) {
530 err = threshold_create_device(lcpu);
534 register_cpu_notifier(&threshold_cpu_notifier);
540 device_initcall(threshold_init_device);