2 * (c) 2005 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
7 * Written by Jacob Shin - AMD, Inc.
9 * Support : jacob.shin@amd.com
11 * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
12 * MC4_MISC0 exists per physical processor.
16 #include <linux/cpu.h>
17 #include <linux/errno.h>
18 #include <linux/init.h>
19 #include <linux/interrupt.h>
20 #include <linux/kobject.h>
21 #include <linux/notifier.h>
22 #include <linux/sched.h>
23 #include <linux/smp.h>
24 #include <linux/sysdev.h>
25 #include <linux/sysfs.h>
29 #include <asm/percpu.h>
31 #define PFX "mce_threshold: "
32 #define VERSION "version 1.00.9"
34 #define THRESHOLD_MAX 0xFFF
35 #define INT_TYPE_APIC 0x00020000
36 #define MASK_VALID_HI 0x80000000
37 #define MASK_LVTOFF_HI 0x00F00000
38 #define MASK_COUNT_EN_HI 0x00080000
39 #define MASK_INT_TYPE_HI 0x00060000
40 #define MASK_OVERFLOW_HI 0x00010000
41 #define MASK_ERR_COUNT_HI 0x00000FFF
42 #define MASK_OVERFLOW 0x0001000000000000L
44 struct threshold_bank {
52 static struct threshold_bank threshold_defaults = {
53 .interrupt_enable = 0,
54 .threshold_limit = THRESHOLD_MAX,
58 static unsigned char shared_bank[NR_BANKS] = {
63 static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
69 /* must be called with correct cpu affinity */
70 static void threshold_restart_bank(struct threshold_bank *b,
71 int reset, u16 old_limit)
73 u32 mci_misc_hi, mci_misc_lo;
75 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
77 if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
78 reset = 1; /* limit cannot be lower than err count */
80 if (reset) { /* reset err count and overflow bit */
82 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
83 (THRESHOLD_MAX - b->threshold_limit);
84 } else if (old_limit) { /* change limit w/o reset */
85 int new_count = (mci_misc_hi & THRESHOLD_MAX) +
86 (old_limit - b->threshold_limit);
87 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
88 (new_count & THRESHOLD_MAX);
92 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
93 (mci_misc_hi &= ~MASK_INT_TYPE_HI);
95 mci_misc_hi |= MASK_COUNT_EN_HI;
96 wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
99 void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
102 u32 mci_misc_lo, mci_misc_hi;
103 unsigned int cpu = smp_processor_id();
105 for (bank = 0; bank < NR_BANKS; ++bank) {
106 rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
108 /* !valid, !counter present, bios locked */
109 if (!(mci_misc_hi & MASK_VALID_HI) ||
110 !(mci_misc_hi & MASK_VALID_HI >> 1) ||
111 (mci_misc_hi & MASK_VALID_HI >> 2))
114 per_cpu(bank_map, cpu) |= (1 << bank);
117 if (shared_bank[bank] && cpu_core_id[cpu])
121 setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
122 threshold_defaults.cpu = cpu;
123 threshold_defaults.bank = bank;
124 threshold_restart_bank(&threshold_defaults, 0, 0);
129 * APIC Interrupt Handler
133 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
134 * the interrupt goes off when error_count reaches threshold_limit.
135 * the handler will simply log mcelog w/ software defined bank number.
137 asmlinkage void mce_threshold_interrupt(void)
145 memset(&m, 0, sizeof(m));
147 m.cpu = smp_processor_id();
149 /* assume first bank caused it */
150 for (bank = 0; bank < NR_BANKS; ++bank) {
151 m.bank = MCE_THRESHOLD_BASE + bank;
152 rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
154 if (m.misc & MASK_OVERFLOW) {
167 static struct sysdev_class threshold_sysclass = {
168 set_kset_name("threshold"),
171 static DEFINE_PER_CPU(struct sys_device, device_threshold);
173 struct threshold_attr {
174 struct attribute attr;
175 ssize_t(*show) (struct threshold_bank *, char *);
176 ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
179 static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
181 static cpumask_t affinity_set(unsigned int cpu)
183 cpumask_t oldmask = current->cpus_allowed;
184 cpumask_t newmask = CPU_MASK_NONE;
185 cpu_set(cpu, newmask);
186 set_cpus_allowed(current, newmask);
190 static void affinity_restore(cpumask_t oldmask)
192 set_cpus_allowed(current, oldmask);
195 #define SHOW_FIELDS(name) \
196 static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
198 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
200 SHOW_FIELDS(interrupt_enable)
201 SHOW_FIELDS(threshold_limit)
203 static ssize_t store_interrupt_enable(struct threshold_bank *b,
204 const char *buf, size_t count)
208 unsigned long new = simple_strtoul(buf, &end, 0);
211 b->interrupt_enable = !!new;
213 oldmask = affinity_set(b->cpu);
214 threshold_restart_bank(b, 0, 0);
215 affinity_restore(oldmask);
220 static ssize_t store_threshold_limit(struct threshold_bank *b,
221 const char *buf, size_t count)
226 unsigned long new = simple_strtoul(buf, &end, 0);
229 if (new > THRESHOLD_MAX)
233 old = b->threshold_limit;
234 b->threshold_limit = new;
236 oldmask = affinity_set(b->cpu);
237 threshold_restart_bank(b, 0, old);
238 affinity_restore(oldmask);
243 static ssize_t show_error_count(struct threshold_bank *b, char *buf)
247 oldmask = affinity_set(b->cpu);
248 rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
249 affinity_restore(oldmask);
250 return sprintf(buf, "%x\n",
251 (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
254 static ssize_t store_error_count(struct threshold_bank *b,
255 const char *buf, size_t count)
258 oldmask = affinity_set(b->cpu);
259 threshold_restart_bank(b, 1, 0);
260 affinity_restore(oldmask);
264 #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \
265 .attr = {.name = __stringify(_name), .mode = _mode }, \
270 #define ATTR_FIELDS(name) \
271 static struct threshold_attr name = \
272 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
274 ATTR_FIELDS(interrupt_enable);
275 ATTR_FIELDS(threshold_limit);
276 ATTR_FIELDS(error_count);
278 static struct attribute *default_attrs[] = {
279 &interrupt_enable.attr,
280 &threshold_limit.attr,
285 #define to_bank(k) container_of(k,struct threshold_bank,kobj)
286 #define to_attr(a) container_of(a,struct threshold_attr,attr)
288 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
290 struct threshold_bank *b = to_bank(kobj);
291 struct threshold_attr *a = to_attr(attr);
293 ret = a->show ? a->show(b, buf) : -EIO;
297 static ssize_t store(struct kobject *kobj, struct attribute *attr,
298 const char *buf, size_t count)
300 struct threshold_bank *b = to_bank(kobj);
301 struct threshold_attr *a = to_attr(attr);
303 ret = a->store ? a->store(b, buf, count) : -EIO;
307 static struct sysfs_ops threshold_ops = {
312 static struct kobj_type threshold_ktype = {
313 .sysfs_ops = &threshold_ops,
314 .default_attrs = default_attrs,
317 /* symlinks sibling shared banks to first core. first core owns dir/files. */
318 static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
321 struct threshold_bank *b = 0;
324 if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */
326 unsigned lcpu = first_cpu(cpu_core_map[cpu]);
327 if (cpu_core_id[lcpu])
328 goto out; /* first core not up yet */
330 b = per_cpu(threshold_banks, lcpu)[bank];
333 sprintf(name, "bank%i", bank);
334 err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
338 per_cpu(threshold_banks, cpu)[bank] = b;
343 b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
348 memset(b, 0, sizeof(struct threshold_bank));
352 b->interrupt_enable = 0;
353 b->threshold_limit = THRESHOLD_MAX;
354 kobject_set_name(&b->kobj, "bank%i", bank);
355 b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
356 b->kobj.ktype = &threshold_ktype;
358 err = kobject_register(&b->kobj);
363 per_cpu(threshold_banks, cpu)[bank] = b;
368 /* create dir/files for all valid threshold banks */
369 static __cpuinit int threshold_create_device(unsigned int cpu)
374 per_cpu(device_threshold, cpu).id = cpu;
375 per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
376 err = sysdev_register(&per_cpu(device_threshold, cpu));
380 for (bank = 0; bank < NR_BANKS; ++bank) {
381 if (!(per_cpu(bank_map, cpu) & 1 << bank))
383 err = threshold_create_bank(cpu, bank);
391 #ifdef CONFIG_HOTPLUG_CPU
393 * let's be hotplug friendly.
394 * in case of multiple core processors, the first core always takes ownership
395 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
398 /* cpu hotplug call removes all symlinks before first core dies */
399 static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
401 struct threshold_bank *b;
404 b = per_cpu(threshold_banks, cpu)[bank];
407 if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
408 sprintf(name, "bank%i", bank);
409 sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
410 per_cpu(threshold_banks, cpu)[bank] = 0;
412 kobject_unregister(&b->kobj);
413 kfree(per_cpu(threshold_banks, cpu)[bank]);
417 static __cpuinit void threshold_remove_device(unsigned int cpu)
421 for (bank = 0; bank < NR_BANKS; ++bank) {
422 if (!(per_cpu(bank_map, cpu) & 1 << bank))
424 threshold_remove_bank(cpu, bank);
426 sysdev_unregister(&per_cpu(device_threshold, cpu));
429 /* link all existing siblings when first core comes up */
430 static __cpuinit int threshold_create_symlinks(unsigned int cpu)
433 unsigned int lcpu = 0;
435 if (cpu_core_id[cpu])
437 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
440 for (bank = 0; bank < NR_BANKS; ++bank) {
441 if (!(per_cpu(bank_map, cpu) & 1 << bank))
443 if (!shared_bank[bank])
445 err = threshold_create_bank(lcpu, bank);
451 /* remove all symlinks before first core dies. */
452 static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
455 unsigned int lcpu = 0;
456 if (cpu_core_id[cpu])
458 for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
461 for (bank = 0; bank < NR_BANKS; ++bank) {
462 if (!(per_cpu(bank_map, cpu) & 1 << bank))
464 if (!shared_bank[bank])
466 threshold_remove_bank(lcpu, bank);
470 #else /* !CONFIG_HOTPLUG_CPU */
471 static __cpuinit void threshold_create_symlinks(unsigned int cpu)
474 static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
477 static void threshold_remove_device(unsigned int cpu)
482 /* get notified when a cpu comes on/off */
483 static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb,
484 unsigned long action, void *hcpu)
486 /* cpu was unsigned int to begin with */
487 unsigned int cpu = (unsigned long)hcpu;
494 threshold_create_device(cpu);
495 threshold_create_symlinks(cpu);
497 case CPU_DOWN_PREPARE:
498 threshold_remove_symlinks(cpu);
500 case CPU_DOWN_FAILED:
501 threshold_create_symlinks(cpu);
504 threshold_remove_device(cpu);
513 static struct notifier_block threshold_cpu_notifier = {
514 .notifier_call = threshold_cpu_callback,
517 static __init int threshold_init_device(void)
522 err = sysdev_class_register(&threshold_sysclass);
526 /* to hit CPUs online before the notifier is up */
527 for_each_online_cpu(lcpu) {
528 err = threshold_create_device(lcpu);
532 register_cpu_notifier(&threshold_cpu_notifier);
538 device_initcall(threshold_init_device);