2 * edac_mc kernel module
3 * (C) 2005 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
11 * Modified by Dave Peterson and Doug Thompson
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/proc_fs.h>
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21 #include <linux/smp.h>
22 #include <linux/init.h>
23 #include <linux/sysctl.h>
24 #include <linux/highmem.h>
25 #include <linux/timer.h>
26 #include <linux/slab.h>
27 #include <linux/jiffies.h>
28 #include <linux/spinlock.h>
29 #include <linux/list.h>
30 #include <linux/sysdev.h>
31 #include <linux/ctype.h>
32 #include <linux/kthread.h>
34 #include <asm/uaccess.h>
40 #define EDAC_MC_VERSION "Ver: 2.0.0 " __DATE__
42 /* For now, disable the EDAC sysfs code. The sysfs interface that EDAC
43 * presents to user space needs more thought, and is likely to change
46 #define DISABLE_EDAC_SYSFS
48 #ifdef CONFIG_EDAC_DEBUG
49 /* Values of 0 to 4 will generate output */
50 int edac_debug_level = 1;
51 EXPORT_SYMBOL(edac_debug_level);
54 /* EDAC Controls, setable by module parameter, and sysfs */
55 static int log_ue = 1;
56 static int log_ce = 1;
57 static int panic_on_ue;
58 static int poll_msec = 1000;
60 static int check_pci_parity = 0; /* default YES check PCI parity */
61 static int panic_on_pci_parity; /* default no panic on PCI Parity */
62 static atomic_t pci_parity_count = ATOMIC_INIT(0);
64 /* lock to memory controller's control array */
65 static DECLARE_MUTEX(mem_ctls_mutex);
66 static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);
68 static struct task_struct *edac_thread;
70 /* Structure of the whitelist and blacklist arrays */
71 struct edac_pci_device_list {
72 unsigned int vendor; /* Vendor ID */
73 unsigned int device; /* Deviice ID */
77 #define MAX_LISTED_PCI_DEVICES 32
79 /* List of PCI devices (vendor-id:device-id) that should be skipped */
80 static struct edac_pci_device_list pci_blacklist[MAX_LISTED_PCI_DEVICES];
81 static int pci_blacklist_count;
83 /* List of PCI devices (vendor-id:device-id) that should be scanned */
84 static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES];
85 static int pci_whitelist_count ;
87 /* START sysfs data and methods */
89 #ifndef DISABLE_EDAC_SYSFS
91 static const char *mem_types[] = {
92 [MEM_EMPTY] = "Empty",
93 [MEM_RESERVED] = "Reserved",
94 [MEM_UNKNOWN] = "Unknown",
98 [MEM_SDR] = "Unbuffered-SDR",
99 [MEM_RDR] = "Registered-SDR",
100 [MEM_DDR] = "Unbuffered-DDR",
101 [MEM_RDDR] = "Registered-DDR",
105 static const char *dev_types[] = {
106 [DEV_UNKNOWN] = "Unknown",
116 static const char *edac_caps[] = {
117 [EDAC_UNKNOWN] = "Unknown",
118 [EDAC_NONE] = "None",
119 [EDAC_RESERVED] = "Reserved",
120 [EDAC_PARITY] = "PARITY",
122 [EDAC_SECDED] = "SECDED",
123 [EDAC_S2ECD2ED] = "S2ECD2ED",
124 [EDAC_S4ECD4ED] = "S4ECD4ED",
125 [EDAC_S8ECD8ED] = "S8ECD8ED",
126 [EDAC_S16ECD16ED] = "S16ECD16ED"
130 /* sysfs object: /sys/devices/system/edac */
131 static struct sysdev_class edac_class = {
132 set_kset_name("edac"),
136 * /sys/devices/system/edac/mc
137 * /sys/devices/system/edac/pci
139 static struct kobject edac_memctrl_kobj;
140 static struct kobject edac_pci_kobj;
143 * /sys/devices/system/edac/mc;
144 * data structures and methods
147 static ssize_t memctrl_string_show(void *ptr, char *buffer)
149 char *value = (char*) ptr;
150 return sprintf(buffer, "%s\n", value);
154 static ssize_t memctrl_int_show(void *ptr, char *buffer)
156 int *value = (int*) ptr;
157 return sprintf(buffer, "%d\n", *value);
160 static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
162 int *value = (int*) ptr;
164 if (isdigit(*buffer))
165 *value = simple_strtoul(buffer, NULL, 0);
170 struct memctrl_dev_attribute {
171 struct attribute attr;
173 ssize_t (*show)(void *,char *);
174 ssize_t (*store)(void *, const char *, size_t);
177 /* Set of show/store abstract level functions for memory control object */
179 memctrl_dev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
181 struct memctrl_dev_attribute *memctrl_dev;
182 memctrl_dev = (struct memctrl_dev_attribute*)attr;
184 if (memctrl_dev->show)
185 return memctrl_dev->show(memctrl_dev->value, buffer);
190 memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
191 const char *buffer, size_t count)
193 struct memctrl_dev_attribute *memctrl_dev;
194 memctrl_dev = (struct memctrl_dev_attribute*)attr;
196 if (memctrl_dev->store)
197 return memctrl_dev->store(memctrl_dev->value, buffer, count);
201 static struct sysfs_ops memctrlfs_ops = {
202 .show = memctrl_dev_show,
203 .store = memctrl_dev_store
206 #define MEMCTRL_ATTR(_name,_mode,_show,_store) \
207 struct memctrl_dev_attribute attr_##_name = { \
208 .attr = {.name = __stringify(_name), .mode = _mode }, \
214 #define MEMCTRL_STRING_ATTR(_name,_data,_mode,_show,_store) \
215 struct memctrl_dev_attribute attr_##_name = { \
216 .attr = {.name = __stringify(_name), .mode = _mode }, \
222 /* cwrow<id> attribute f*/
224 MEMCTRL_STRING_ATTR(mc_version,EDAC_MC_VERSION,S_IRUGO,memctrl_string_show,NULL);
227 /* csrow<id> control files */
228 MEMCTRL_ATTR(panic_on_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
229 MEMCTRL_ATTR(log_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
230 MEMCTRL_ATTR(log_ce,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
231 MEMCTRL_ATTR(poll_msec,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
234 /* Base Attributes of the memory ECC object */
235 static struct memctrl_dev_attribute *memctrl_attr[] = {
243 /* Main MC kobject release() function */
244 static void edac_memctrl_master_release(struct kobject *kobj)
246 debugf1("%s()\n", __func__);
249 static struct kobj_type ktype_memctrl = {
250 .release = edac_memctrl_master_release,
251 .sysfs_ops = &memctrlfs_ops,
252 .default_attrs = (struct attribute **) memctrl_attr,
255 #endif /* DISABLE_EDAC_SYSFS */
257 /* Initialize the main sysfs entries for edac:
258 * /sys/devices/system/edac
265 static int edac_sysfs_memctrl_setup(void)
266 #ifdef DISABLE_EDAC_SYSFS
274 debugf1("%s()\n", __func__);
276 /* create the /sys/devices/system/edac directory */
277 err = sysdev_class_register(&edac_class);
279 /* Init the MC's kobject */
280 memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj));
281 edac_memctrl_kobj.parent = &edac_class.kset.kobj;
282 edac_memctrl_kobj.ktype = &ktype_memctrl;
284 /* generate sysfs "..../edac/mc" */
285 err = kobject_set_name(&edac_memctrl_kobj,"mc");
287 /* FIXME: maybe new sysdev_create_subdir() */
288 err = kobject_register(&edac_memctrl_kobj);
290 debugf1("Failed to register '.../edac/mc'\n");
292 debugf1("Registered '.../edac/mc' kobject\n");
296 debugf1("%s() error=%d\n", __func__, err);
301 #endif /* DISABLE_EDAC_SYSFS */
305 * the '..../edac/mc' kobject followed by '..../edac' itself
307 static void edac_sysfs_memctrl_teardown(void)
309 #ifndef DISABLE_EDAC_SYSFS
310 debugf0("MC: " __FILE__ ": %s()\n", __func__);
312 /* Unregister the MC's kobject */
313 kobject_unregister(&edac_memctrl_kobj);
315 /* Unregister the 'edac' object */
316 sysdev_class_unregister(&edac_class);
317 #endif /* DISABLE_EDAC_SYSFS */
320 #ifndef DISABLE_EDAC_SYSFS
323 * /sys/devices/system/edac/pci;
324 * data structures and methods
327 struct list_control {
328 struct edac_pci_device_list *list;
334 /* Output the list as: vendor_id:device:id<,vendor_id:device_id> */
335 static ssize_t edac_pci_list_string_show(void *ptr, char *buffer)
337 struct list_control *listctl;
338 struct edac_pci_device_list *list;
344 list = listctl->list;
346 for (i = 0; i < *(listctl->count); i++, list++ ) {
348 len += snprintf(p + len, (PAGE_SIZE-len), ",");
350 len += snprintf(p + len,
353 list->vendor,list->device);
356 len += snprintf(p + len,(PAGE_SIZE-len), "\n");
358 return (ssize_t) len;
363 * Scan string from **s to **e looking for one 'vendor:device' tuple
364 * where each field is a hex value
366 * return 0 if an entry is NOT found
367 * return 1 if an entry is found
368 * fill in *vendor_id and *device_id with values found
370 * In both cases, make sure *s has been moved forward toward *e
372 static int parse_one_device(const char **s,const char **e,
373 unsigned int *vendor_id, unsigned int *device_id)
375 const char *runner, *p;
377 /* if null byte, we are done */
379 (*s)++; /* keep *s moving */
383 /* skip over newlines & whitespace */
384 if ((**s == '\n') || isspace(**s)) {
389 if (!isxdigit(**s)) {
394 /* parse vendor_id */
396 while (runner < *e) {
397 /* scan for vendor:device delimiter */
398 if (*runner == ':') {
399 *vendor_id = simple_strtol((char*) *s, (char**) &p, 16);
406 if (!isxdigit(*runner)) {
411 /* parse device_id */
413 *device_id = simple_strtol((char*)runner, (char**)&p, 16);
422 static ssize_t edac_pci_list_string_store(void *ptr, const char *buffer,
425 struct list_control *listctl;
426 struct edac_pci_device_list *list;
427 unsigned int vendor_id, device_id;
435 list = listctl->list;
436 index = listctl->count;
439 while (*index < MAX_LISTED_PCI_DEVICES) {
441 if (parse_one_device(&s,&e,&vendor_id,&device_id)) {
442 list[ *index ].vendor = vendor_id;
443 list[ *index ].device = device_id;
447 /* check for all data consume */
456 static ssize_t edac_pci_int_show(void *ptr, char *buffer)
459 return sprintf(buffer,"%d\n",*value);
462 static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count)
466 if (isdigit(*buffer))
467 *value = simple_strtoul(buffer,NULL,0);
472 struct edac_pci_dev_attribute {
473 struct attribute attr;
475 ssize_t (*show)(void *,char *);
476 ssize_t (*store)(void *, const char *,size_t);
479 /* Set of show/store abstract level functions for PCI Parity object */
480 static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr,
483 struct edac_pci_dev_attribute *edac_pci_dev;
484 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
486 if (edac_pci_dev->show)
487 return edac_pci_dev->show(edac_pci_dev->value, buffer);
491 static ssize_t edac_pci_dev_store(struct kobject *kobj, struct attribute *attr,
492 const char *buffer, size_t count)
494 struct edac_pci_dev_attribute *edac_pci_dev;
495 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
497 if (edac_pci_dev->show)
498 return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
502 static struct sysfs_ops edac_pci_sysfs_ops = {
503 .show = edac_pci_dev_show,
504 .store = edac_pci_dev_store
508 #define EDAC_PCI_ATTR(_name,_mode,_show,_store) \
509 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
510 .attr = {.name = __stringify(_name), .mode = _mode }, \
516 #define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \
517 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
518 .attr = {.name = __stringify(_name), .mode = _mode }, \
525 static struct list_control pci_whitelist_control = {
526 .list = pci_whitelist,
527 .count = &pci_whitelist_count
530 static struct list_control pci_blacklist_control = {
531 .list = pci_blacklist,
532 .count = &pci_blacklist_count
535 /* whitelist attribute */
536 EDAC_PCI_STRING_ATTR(pci_parity_whitelist,
537 &pci_whitelist_control,
539 edac_pci_list_string_show,
540 edac_pci_list_string_store);
542 EDAC_PCI_STRING_ATTR(pci_parity_blacklist,
543 &pci_blacklist_control,
545 edac_pci_list_string_show,
546 edac_pci_list_string_store);
549 /* PCI Parity control files */
550 EDAC_PCI_ATTR(check_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
551 EDAC_PCI_ATTR(panic_on_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
552 EDAC_PCI_ATTR(pci_parity_count,S_IRUGO,edac_pci_int_show,NULL);
554 /* Base Attributes of the memory ECC object */
555 static struct edac_pci_dev_attribute *edac_pci_attr[] = {
556 &edac_pci_attr_check_pci_parity,
557 &edac_pci_attr_panic_on_pci_parity,
558 &edac_pci_attr_pci_parity_count,
562 /* No memory to release */
563 static void edac_pci_release(struct kobject *kobj)
565 debugf1("%s()\n", __func__);
568 static struct kobj_type ktype_edac_pci = {
569 .release = edac_pci_release,
570 .sysfs_ops = &edac_pci_sysfs_ops,
571 .default_attrs = (struct attribute **) edac_pci_attr,
574 #endif /* DISABLE_EDAC_SYSFS */
577 * edac_sysfs_pci_setup()
580 static int edac_sysfs_pci_setup(void)
581 #ifdef DISABLE_EDAC_SYSFS
589 debugf1("%s()\n", __func__);
591 memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj));
592 edac_pci_kobj.parent = &edac_class.kset.kobj;
593 edac_pci_kobj.ktype = &ktype_edac_pci;
595 err = kobject_set_name(&edac_pci_kobj, "pci");
597 /* Instanstiate the csrow object */
598 /* FIXME: maybe new sysdev_create_subdir() */
599 err = kobject_register(&edac_pci_kobj);
601 debugf1("Failed to register '.../edac/pci'\n");
603 debugf1("Registered '.../edac/pci' kobject\n");
607 #endif /* DISABLE_EDAC_SYSFS */
609 static void edac_sysfs_pci_teardown(void)
611 #ifndef DISABLE_EDAC_SYSFS
612 debugf0("%s()\n", __func__);
614 kobject_unregister(&edac_pci_kobj);
618 #ifndef DISABLE_EDAC_SYSFS
620 /* EDAC sysfs CSROW data structures and methods */
622 /* Set of more detailed csrow<id> attribute show/store functions */
623 static ssize_t csrow_ch0_dimm_label_show(struct csrow_info *csrow, char *data)
627 if (csrow->nr_channels > 0) {
628 size = snprintf(data, EDAC_MC_LABEL_LEN,"%s\n",
629 csrow->channels[0].label);
634 static ssize_t csrow_ch1_dimm_label_show(struct csrow_info *csrow, char *data)
638 if (csrow->nr_channels > 0) {
639 size = snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
640 csrow->channels[1].label);
645 static ssize_t csrow_ch0_dimm_label_store(struct csrow_info *csrow,
646 const char *data, size_t size)
648 ssize_t max_size = 0;
650 if (csrow->nr_channels > 0) {
651 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
652 strncpy(csrow->channels[0].label, data, max_size);
653 csrow->channels[0].label[max_size] = '\0';
658 static ssize_t csrow_ch1_dimm_label_store(struct csrow_info *csrow,
659 const char *data, size_t size)
661 ssize_t max_size = 0;
663 if (csrow->nr_channels > 1) {
664 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
665 strncpy(csrow->channels[1].label, data, max_size);
666 csrow->channels[1].label[max_size] = '\0';
671 static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data)
673 return sprintf(data,"%u\n", csrow->ue_count);
676 static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data)
678 return sprintf(data,"%u\n", csrow->ce_count);
681 static ssize_t csrow_ch0_ce_count_show(struct csrow_info *csrow, char *data)
685 if (csrow->nr_channels > 0) {
686 size = sprintf(data,"%u\n", csrow->channels[0].ce_count);
691 static ssize_t csrow_ch1_ce_count_show(struct csrow_info *csrow, char *data)
695 if (csrow->nr_channels > 1) {
696 size = sprintf(data,"%u\n", csrow->channels[1].ce_count);
701 static ssize_t csrow_size_show(struct csrow_info *csrow, char *data)
703 return sprintf(data,"%u\n", PAGES_TO_MiB(csrow->nr_pages));
706 static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data)
708 return sprintf(data,"%s\n", mem_types[csrow->mtype]);
711 static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data)
713 return sprintf(data,"%s\n", dev_types[csrow->dtype]);
716 static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data)
718 return sprintf(data,"%s\n", edac_caps[csrow->edac_mode]);
721 struct csrowdev_attribute {
722 struct attribute attr;
723 ssize_t (*show)(struct csrow_info *,char *);
724 ssize_t (*store)(struct csrow_info *, const char *,size_t);
727 #define to_csrow(k) container_of(k, struct csrow_info, kobj)
728 #define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr)
730 /* Set of show/store higher level functions for csrow objects */
731 static ssize_t csrowdev_show(struct kobject *kobj, struct attribute *attr,
734 struct csrow_info *csrow = to_csrow(kobj);
735 struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr);
737 if (csrowdev_attr->show)
738 return csrowdev_attr->show(csrow, buffer);
742 static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr,
743 const char *buffer, size_t count)
745 struct csrow_info *csrow = to_csrow(kobj);
746 struct csrowdev_attribute * csrowdev_attr = to_csrowdev_attr(attr);
748 if (csrowdev_attr->store)
749 return csrowdev_attr->store(csrow, buffer, count);
753 static struct sysfs_ops csrowfs_ops = {
754 .show = csrowdev_show,
755 .store = csrowdev_store
758 #define CSROWDEV_ATTR(_name,_mode,_show,_store) \
759 struct csrowdev_attribute attr_##_name = { \
760 .attr = {.name = __stringify(_name), .mode = _mode }, \
765 /* cwrow<id>/attribute files */
766 CSROWDEV_ATTR(size_mb,S_IRUGO,csrow_size_show,NULL);
767 CSROWDEV_ATTR(dev_type,S_IRUGO,csrow_dev_type_show,NULL);
768 CSROWDEV_ATTR(mem_type,S_IRUGO,csrow_mem_type_show,NULL);
769 CSROWDEV_ATTR(edac_mode,S_IRUGO,csrow_edac_mode_show,NULL);
770 CSROWDEV_ATTR(ue_count,S_IRUGO,csrow_ue_count_show,NULL);
771 CSROWDEV_ATTR(ce_count,S_IRUGO,csrow_ce_count_show,NULL);
772 CSROWDEV_ATTR(ch0_ce_count,S_IRUGO,csrow_ch0_ce_count_show,NULL);
773 CSROWDEV_ATTR(ch1_ce_count,S_IRUGO,csrow_ch1_ce_count_show,NULL);
775 /* control/attribute files */
776 CSROWDEV_ATTR(ch0_dimm_label,S_IRUGO|S_IWUSR,
777 csrow_ch0_dimm_label_show,
778 csrow_ch0_dimm_label_store);
779 CSROWDEV_ATTR(ch1_dimm_label,S_IRUGO|S_IWUSR,
780 csrow_ch1_dimm_label_show,
781 csrow_ch1_dimm_label_store);
784 /* Attributes of the CSROW<id> object */
785 static struct csrowdev_attribute *csrow_attr[] = {
794 &attr_ch0_dimm_label,
795 &attr_ch1_dimm_label,
800 /* No memory to release */
801 static void edac_csrow_instance_release(struct kobject *kobj)
803 debugf1("%s()\n", __func__);
806 static struct kobj_type ktype_csrow = {
807 .release = edac_csrow_instance_release,
808 .sysfs_ops = &csrowfs_ops,
809 .default_attrs = (struct attribute **) csrow_attr,
812 /* Create a CSROW object under specifed edac_mc_device */
813 static int edac_create_csrow_object(struct kobject *edac_mci_kobj,
814 struct csrow_info *csrow, int index )
818 debugf0("%s()\n", __func__);
820 memset(&csrow->kobj, 0, sizeof(csrow->kobj));
822 /* generate ..../edac/mc/mc<id>/csrow<index> */
824 csrow->kobj.parent = edac_mci_kobj;
825 csrow->kobj.ktype = &ktype_csrow;
827 /* name this instance of csrow<id> */
828 err = kobject_set_name(&csrow->kobj,"csrow%d",index);
830 /* Instanstiate the csrow object */
831 err = kobject_register(&csrow->kobj);
833 debugf0("Failed to register CSROW%d\n",index);
835 debugf0("Registered CSROW%d\n",index);
841 /* sysfs data structures and methods for the MCI kobjects */
843 static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
844 const char *data, size_t count )
848 mci->ue_noinfo_count = 0;
849 mci->ce_noinfo_count = 0;
852 for (row = 0; row < mci->nr_csrows; row++) {
853 struct csrow_info *ri = &mci->csrows[row];
857 for (chan = 0; chan < ri->nr_channels; chan++)
858 ri->channels[chan].ce_count = 0;
860 mci->start_time = jiffies;
865 static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
867 return sprintf(data,"%d\n", mci->ue_count);
870 static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
872 return sprintf(data,"%d\n", mci->ce_count);
875 static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
877 return sprintf(data,"%d\n", mci->ce_noinfo_count);
880 static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data)
882 return sprintf(data,"%d\n", mci->ue_noinfo_count);
885 static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data)
887 return sprintf(data,"%ld\n", (jiffies - mci->start_time) / HZ);
890 static ssize_t mci_mod_name_show(struct mem_ctl_info *mci, char *data)
892 return sprintf(data,"%s %s\n", mci->mod_name, mci->mod_ver);
895 static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data)
897 return sprintf(data,"%s\n", mci->ctl_name);
900 static int mci_output_edac_cap(char *buf, unsigned long edac_cap)
905 for (bit_idx = 0; bit_idx < 8 * sizeof(edac_cap); bit_idx++) {
906 if ((edac_cap >> bit_idx) & 0x1)
907 p += sprintf(p, "%s ", edac_caps[bit_idx]);
913 static ssize_t mci_edac_capability_show(struct mem_ctl_info *mci, char *data)
917 p += mci_output_edac_cap(p,mci->edac_ctl_cap);
918 p += sprintf(p, "\n");
923 static ssize_t mci_edac_current_capability_show(struct mem_ctl_info *mci,
928 p += mci_output_edac_cap(p,mci->edac_cap);
929 p += sprintf(p, "\n");
934 static int mci_output_mtype_cap(char *buf, unsigned long mtype_cap)
939 for (bit_idx = 0; bit_idx < 8 * sizeof(mtype_cap); bit_idx++) {
940 if ((mtype_cap >> bit_idx) & 0x1)
941 p += sprintf(p, "%s ", mem_types[bit_idx]);
947 static ssize_t mci_supported_mem_type_show(struct mem_ctl_info *mci, char *data)
951 p += mci_output_mtype_cap(p,mci->mtype_cap);
952 p += sprintf(p, "\n");
957 static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data)
959 int total_pages, csrow_idx;
961 for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows;
963 struct csrow_info *csrow = &mci->csrows[csrow_idx];
965 if (!csrow->nr_pages)
967 total_pages += csrow->nr_pages;
970 return sprintf(data,"%u\n", PAGES_TO_MiB(total_pages));
973 struct mcidev_attribute {
974 struct attribute attr;
975 ssize_t (*show)(struct mem_ctl_info *,char *);
976 ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
979 #define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj)
980 #define to_mcidev_attr(a) container_of(a, struct mcidev_attribute, attr)
982 static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr,
985 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
986 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
988 if (mcidev_attr->show)
989 return mcidev_attr->show(mem_ctl_info, buffer);
993 static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr,
994 const char *buffer, size_t count)
996 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
997 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
999 if (mcidev_attr->store)
1000 return mcidev_attr->store(mem_ctl_info, buffer, count);
1004 static struct sysfs_ops mci_ops = {
1005 .show = mcidev_show,
1006 .store = mcidev_store
1009 #define MCIDEV_ATTR(_name,_mode,_show,_store) \
1010 struct mcidev_attribute mci_attr_##_name = { \
1011 .attr = {.name = __stringify(_name), .mode = _mode }, \
1017 MCIDEV_ATTR(reset_counters,S_IWUSR,NULL,mci_reset_counters_store);
1019 /* Attribute files */
1020 MCIDEV_ATTR(mc_name,S_IRUGO,mci_ctl_name_show,NULL);
1021 MCIDEV_ATTR(module_name,S_IRUGO,mci_mod_name_show,NULL);
1022 MCIDEV_ATTR(edac_capability,S_IRUGO,mci_edac_capability_show,NULL);
1023 MCIDEV_ATTR(size_mb,S_IRUGO,mci_size_mb_show,NULL);
1024 MCIDEV_ATTR(seconds_since_reset,S_IRUGO,mci_seconds_show,NULL);
1025 MCIDEV_ATTR(ue_noinfo_count,S_IRUGO,mci_ue_noinfo_show,NULL);
1026 MCIDEV_ATTR(ce_noinfo_count,S_IRUGO,mci_ce_noinfo_show,NULL);
1027 MCIDEV_ATTR(ue_count,S_IRUGO,mci_ue_count_show,NULL);
1028 MCIDEV_ATTR(ce_count,S_IRUGO,mci_ce_count_show,NULL);
1029 MCIDEV_ATTR(edac_current_capability,S_IRUGO,
1030 mci_edac_current_capability_show,NULL);
1031 MCIDEV_ATTR(supported_mem_type,S_IRUGO,
1032 mci_supported_mem_type_show,NULL);
1035 static struct mcidev_attribute *mci_attr[] = {
1036 &mci_attr_reset_counters,
1037 &mci_attr_module_name,
1039 &mci_attr_edac_capability,
1040 &mci_attr_edac_current_capability,
1041 &mci_attr_supported_mem_type,
1043 &mci_attr_seconds_since_reset,
1044 &mci_attr_ue_noinfo_count,
1045 &mci_attr_ce_noinfo_count,
1053 * Release of a MC controlling instance
1055 static void edac_mci_instance_release(struct kobject *kobj)
1057 struct mem_ctl_info *mci;
1058 mci = container_of(kobj,struct mem_ctl_info,edac_mci_kobj);
1060 debugf0("%s() idx=%d calling kfree\n", __func__, mci->mc_idx);
1065 static struct kobj_type ktype_mci = {
1066 .release = edac_mci_instance_release,
1067 .sysfs_ops = &mci_ops,
1068 .default_attrs = (struct attribute **) mci_attr,
1071 #endif /* DISABLE_EDAC_SYSFS */
1073 #define EDAC_DEVICE_SYMLINK "device"
1076 * Create a new Memory Controller kobject instance,
1077 * mc<id> under the 'mc' directory
1083 static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
1084 #ifdef DISABLE_EDAC_SYSFS
1092 struct csrow_info *csrow;
1093 struct kobject *edac_mci_kobj=&mci->edac_mci_kobj;
1095 debugf0("%s() idx=%d\n", __func__, mci->mc_idx);
1097 memset(edac_mci_kobj, 0, sizeof(*edac_mci_kobj));
1099 /* set the name of the mc<id> object */
1100 err = kobject_set_name(edac_mci_kobj,"mc%d",mci->mc_idx);
1104 /* link to our parent the '..../edac/mc' object */
1105 edac_mci_kobj->parent = &edac_memctrl_kobj;
1106 edac_mci_kobj->ktype = &ktype_mci;
1108 /* register the mc<id> kobject */
1109 err = kobject_register(edac_mci_kobj);
1113 /* create a symlink for the device */
1114 err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj,
1115 EDAC_DEVICE_SYMLINK);
1119 /* Make directories for each CSROW object
1120 * under the mc<id> kobject
1122 for (i = 0; i < mci->nr_csrows; i++) {
1124 csrow = &mci->csrows[i];
1126 /* Only expose populated CSROWs */
1127 if (csrow->nr_pages > 0) {
1128 err = edac_create_csrow_object(edac_mci_kobj,csrow,i);
1134 /* Mark this MCI instance as having sysfs entries */
1135 mci->sysfs_active = MCI_SYSFS_ACTIVE;
1140 /* CSROW error: backout what has already been registered, */
1142 for ( i--; i >= 0; i--) {
1143 if (csrow->nr_pages > 0)
1144 kobject_unregister(&mci->csrows[i].kobj);
1148 kobject_unregister(edac_mci_kobj);
1152 #endif /* DISABLE_EDAC_SYSFS */
1155 * remove a Memory Controller instance
1157 static void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
1159 #ifndef DISABLE_EDAC_SYSFS
1162 debugf0("%s()\n", __func__);
1164 /* remove all csrow kobjects */
1165 for (i = 0; i < mci->nr_csrows; i++) {
1166 if (mci->csrows[i].nr_pages > 0)
1167 kobject_unregister(&mci->csrows[i].kobj);
1170 sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK);
1172 kobject_unregister(&mci->edac_mci_kobj);
1173 #endif /* DISABLE_EDAC_SYSFS */
1176 /* END OF sysfs data and methods */
1178 #ifdef CONFIG_EDAC_DEBUG
1180 EXPORT_SYMBOL(edac_mc_dump_channel);
1182 void edac_mc_dump_channel(struct channel_info *chan)
1184 debugf4("\tchannel = %p\n", chan);
1185 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
1186 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
1187 debugf4("\tchannel->label = '%s'\n", chan->label);
1188 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
1192 EXPORT_SYMBOL(edac_mc_dump_csrow);
1194 void edac_mc_dump_csrow(struct csrow_info *csrow)
1196 debugf4("\tcsrow = %p\n", csrow);
1197 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
1198 debugf4("\tcsrow->first_page = 0x%lx\n",
1200 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
1201 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
1202 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
1203 debugf4("\tcsrow->nr_channels = %d\n",
1204 csrow->nr_channels);
1205 debugf4("\tcsrow->channels = %p\n", csrow->channels);
1206 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
1210 EXPORT_SYMBOL(edac_mc_dump_mci);
1212 void edac_mc_dump_mci(struct mem_ctl_info *mci)
1214 debugf3("\tmci = %p\n", mci);
1215 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
1216 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
1217 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
1218 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
1219 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
1220 mci->nr_csrows, mci->csrows);
1221 debugf3("\tpdev = %p\n", mci->pdev);
1222 debugf3("\tmod_name:ctl_name = %s:%s\n",
1223 mci->mod_name, mci->ctl_name);
1224 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
1228 #endif /* CONFIG_EDAC_DEBUG */
1230 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
1231 * Adjust 'ptr' so that its alignment is at least as stringent as what the
1232 * compiler would provide for X and return the aligned result.
1234 * If 'size' is a constant, the compiler will optimize this whole function
1235 * down to either a no-op or the addition of a constant to the value of 'ptr'.
1237 static inline char * align_ptr (void *ptr, unsigned size)
1241 /* Here we assume that the alignment of a "long long" is the most
1242 * stringent alignment that the compiler will ever provide by default.
1243 * As far as I know, this is a reasonable assumption.
1245 if (size > sizeof(long))
1246 align = sizeof(long long);
1247 else if (size > sizeof(int))
1248 align = sizeof(long);
1249 else if (size > sizeof(short))
1250 align = sizeof(int);
1251 else if (size > sizeof(char))
1252 align = sizeof(short);
1254 return (char *) ptr;
1259 return (char *) ptr;
1261 return (char *) (((unsigned long) ptr) + align - r);
1265 EXPORT_SYMBOL(edac_mc_alloc);
1268 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
1269 * @size_pvt: size of private storage needed
1270 * @nr_csrows: Number of CWROWS needed for this MC
1271 * @nr_chans: Number of channels for the MC
1273 * Everything is kmalloc'ed as one big chunk - more efficient.
1274 * Only can be used if all structures have the same lifetime - otherwise
1275 * you have to allocate and initialize your own structures.
1277 * Use edac_mc_free() to free mc structures allocated by this function.
1280 * NULL allocation failed
1281 * struct mem_ctl_info pointer
1283 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
1286 struct mem_ctl_info *mci;
1287 struct csrow_info *csi, *csrow;
1288 struct channel_info *chi, *chp, *chan;
1293 /* Figure out the offsets of the various items from the start of an mc
1294 * structure. We want the alignment of each item to be at least as
1295 * stringent as what the compiler would provide if we could simply
1296 * hardcode everything into a single struct.
1298 mci = (struct mem_ctl_info *) 0;
1299 csi = (struct csrow_info *)align_ptr(&mci[1], sizeof(*csi));
1300 chi = (struct channel_info *)
1301 align_ptr(&csi[nr_csrows], sizeof(*chi));
1302 pvt = align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
1303 size = ((unsigned long) pvt) + sz_pvt;
1305 if ((mci = kmalloc(size, GFP_KERNEL)) == NULL)
1308 /* Adjust pointers so they point within the memory we just allocated
1309 * rather than an imaginary chunk of memory located at address 0.
1311 csi = (struct csrow_info *) (((char *) mci) + ((unsigned long) csi));
1312 chi = (struct channel_info *) (((char *) mci) + ((unsigned long) chi));
1313 pvt = sz_pvt ? (((char *) mci) + ((unsigned long) pvt)) : NULL;
1315 memset(mci, 0, size); /* clear all fields */
1318 mci->pvt_info = pvt;
1319 mci->nr_csrows = nr_csrows;
1321 for (row = 0; row < nr_csrows; row++) {
1323 csrow->csrow_idx = row;
1325 csrow->nr_channels = nr_chans;
1326 chp = &chi[row * nr_chans];
1327 csrow->channels = chp;
1329 for (chn = 0; chn < nr_chans; chn++) {
1331 chan->chan_idx = chn;
1332 chan->csrow = csrow;
1340 EXPORT_SYMBOL(edac_mc_free);
1343 * edac_mc_free: Free a previously allocated 'mci' structure
1344 * @mci: pointer to a struct mem_ctl_info structure
1346 * Free up a previously allocated mci structure
1347 * A MCI structure can be in 2 states after being allocated
1348 * by edac_mc_alloc().
1349 * 1) Allocated in a MC driver's probe, but not yet committed
1350 * 2) Allocated and committed, by a call to edac_mc_add_mc()
1351 * edac_mc_add_mc() is the function that adds the sysfs entries
1352 * thus, this free function must determine which state the 'mci'
1353 * structure is in, then either free it directly or
1354 * perform kobject cleanup by calling edac_remove_sysfs_mci_device().
1358 void edac_mc_free(struct mem_ctl_info *mci)
1360 /* only if sysfs entries for this mci instance exist
1361 * do we remove them and defer the actual kfree via
1362 * the kobject 'release()' callback.
1364 * Otherwise, do a straight kfree now.
1366 if (mci->sysfs_active == MCI_SYSFS_ACTIVE)
1367 edac_remove_sysfs_mci_device(mci);
1374 EXPORT_SYMBOL(edac_mc_find_mci_by_pdev);
1376 struct mem_ctl_info *edac_mc_find_mci_by_pdev(struct pci_dev *pdev)
1378 struct mem_ctl_info *mci;
1379 struct list_head *item;
1381 debugf3("%s()\n", __func__);
1383 list_for_each(item, &mc_devices) {
1384 mci = list_entry(item, struct mem_ctl_info, link);
1386 if (mci->pdev == pdev)
1393 static int add_mc_to_global_list (struct mem_ctl_info *mci)
1395 struct list_head *item, *insert_before;
1396 struct mem_ctl_info *p;
1399 if (list_empty(&mc_devices)) {
1401 insert_before = &mc_devices;
1403 if (edac_mc_find_mci_by_pdev(mci->pdev)) {
1404 edac_printk(KERN_WARNING, EDAC_MC,
1405 "%s (%s) %s %s already assigned %d\n",
1406 mci->pdev->dev.bus_id,
1407 pci_name(mci->pdev), mci->mod_name,
1408 mci->ctl_name, mci->mc_idx);
1412 insert_before = NULL;
1415 list_for_each(item, &mc_devices) {
1416 p = list_entry(item, struct mem_ctl_info, link);
1418 if (p->mc_idx != i) {
1419 insert_before = item;
1428 if (insert_before == NULL)
1429 insert_before = &mc_devices;
1432 list_add_tail_rcu(&mci->link, insert_before);
1437 static void complete_mc_list_del (struct rcu_head *head)
1439 struct mem_ctl_info *mci;
1441 mci = container_of(head, struct mem_ctl_info, rcu);
1442 INIT_LIST_HEAD(&mci->link);
1443 complete(&mci->complete);
1447 static void del_mc_from_global_list (struct mem_ctl_info *mci)
1449 list_del_rcu(&mci->link);
1450 init_completion(&mci->complete);
1451 call_rcu(&mci->rcu, complete_mc_list_del);
1452 wait_for_completion(&mci->complete);
1456 EXPORT_SYMBOL(edac_mc_add_mc);
1459 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list
1460 * @mci: pointer to the mci structure to be added to the list
1467 /* FIXME - should a warning be printed if no error detection? correction? */
1468 int edac_mc_add_mc(struct mem_ctl_info *mci)
1470 debugf0("%s()\n", __func__);
1471 #ifdef CONFIG_EDAC_DEBUG
1472 if (edac_debug_level >= 3)
1473 edac_mc_dump_mci(mci);
1474 if (edac_debug_level >= 4) {
1477 for (i = 0; i < mci->nr_csrows; i++) {
1479 edac_mc_dump_csrow(&mci->csrows[i]);
1480 for (j = 0; j < mci->csrows[i].nr_channels; j++)
1481 edac_mc_dump_channel(&mci->csrows[i].
1486 down(&mem_ctls_mutex);
1488 if (add_mc_to_global_list(mci))
1491 /* set load time so that error rate can be tracked */
1492 mci->start_time = jiffies;
1494 if (edac_create_sysfs_mci_device(mci)) {
1495 edac_mc_printk(mci, KERN_WARNING,
1496 "failed to create sysfs device\n");
1500 /* Report action taken */
1501 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: PCI %s\n",
1502 mci->mod_name, mci->ctl_name, pci_name(mci->pdev));
1504 up(&mem_ctls_mutex);
1508 del_mc_from_global_list(mci);
1511 up(&mem_ctls_mutex);
1516 EXPORT_SYMBOL(edac_mc_del_mc);
1519 * edac_mc_del_mc: Remove the specified mci structure from global list
1520 * @mci: Pointer to struct mem_ctl_info structure
1526 int edac_mc_del_mc(struct mem_ctl_info *mci)
1530 debugf0("MC%d: %s()\n", mci->mc_idx, __func__);
1531 down(&mem_ctls_mutex);
1532 del_mc_from_global_list(mci);
1533 edac_printk(KERN_INFO, EDAC_MC,
1534 "Removed device %d for %s %s: PCI %s\n", mci->mc_idx,
1535 mci->mod_name, mci->ctl_name, pci_name(mci->pdev));
1537 up(&mem_ctls_mutex);
1543 EXPORT_SYMBOL(edac_mc_scrub_block);
1545 void edac_mc_scrub_block(unsigned long page, unsigned long offset,
1550 unsigned long flags = 0;
1552 debugf3("%s()\n", __func__);
1554 /* ECC error page was not in our memory. Ignore it. */
1555 if(!pfn_valid(page))
1558 /* Find the actual page structure then map it and fix */
1559 pg = pfn_to_page(page);
1561 if (PageHighMem(pg))
1562 local_irq_save(flags);
1564 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
1566 /* Perform architecture specific atomic scrub operation */
1567 atomic_scrub(virt_addr + offset, size);
1569 /* Unmap and complete */
1570 kunmap_atomic(virt_addr, KM_BOUNCE_READ);
1572 if (PageHighMem(pg))
1573 local_irq_restore(flags);
1577 /* FIXME - should return -1 */
1578 EXPORT_SYMBOL(edac_mc_find_csrow_by_page);
1580 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
1583 struct csrow_info *csrows = mci->csrows;
1586 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
1589 for (i = 0; i < mci->nr_csrows; i++) {
1590 struct csrow_info *csrow = &csrows[i];
1592 if (csrow->nr_pages == 0)
1595 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
1596 "mask(0x%lx)\n", mci->mc_idx, __func__,
1597 csrow->first_page, page, csrow->last_page,
1600 if ((page >= csrow->first_page) &&
1601 (page <= csrow->last_page) &&
1602 ((page & csrow->page_mask) ==
1603 (csrow->first_page & csrow->page_mask))) {
1610 edac_mc_printk(mci, KERN_ERR,
1611 "could not look up page error address %lx\n",
1612 (unsigned long) page);
1618 EXPORT_SYMBOL(edac_mc_handle_ce);
1620 /* FIXME - setable log (warning/emerg) levels */
1621 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
1622 void edac_mc_handle_ce(struct mem_ctl_info *mci,
1623 unsigned long page_frame_number,
1624 unsigned long offset_in_page,
1625 unsigned long syndrome, int row, int channel,
1628 unsigned long remapped_page;
1630 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1632 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1633 if (row >= mci->nr_csrows || row < 0) {
1634 /* something is wrong */
1635 edac_mc_printk(mci, KERN_ERR,
1636 "INTERNAL ERROR: row out of range "
1637 "(%d >= %d)\n", row, mci->nr_csrows);
1638 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1641 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
1642 /* something is wrong */
1643 edac_mc_printk(mci, KERN_ERR,
1644 "INTERNAL ERROR: channel out of range "
1645 "(%d >= %d)\n", channel,
1646 mci->csrows[row].nr_channels);
1647 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1652 /* FIXME - put in DIMM location */
1653 edac_mc_printk(mci, KERN_WARNING,
1654 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
1655 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
1656 page_frame_number, offset_in_page,
1657 mci->csrows[row].grain, syndrome, row, channel,
1658 mci->csrows[row].channels[channel].label, msg);
1661 mci->csrows[row].ce_count++;
1662 mci->csrows[row].channels[channel].ce_count++;
1664 if (mci->scrub_mode & SCRUB_SW_SRC) {
1666 * Some MC's can remap memory so that it is still available
1667 * at a different address when PCI devices map into memory.
1668 * MC's that can't do this lose the memory where PCI devices
1669 * are mapped. This mapping is MC dependant and so we call
1670 * back into the MC driver for it to map the MC page to
1671 * a physical (CPU) page which can then be mapped to a virtual
1672 * page - which can then be scrubbed.
1674 remapped_page = mci->ctl_page_to_phys ?
1675 mci->ctl_page_to_phys(mci, page_frame_number) :
1678 edac_mc_scrub_block(remapped_page, offset_in_page,
1679 mci->csrows[row].grain);
1684 EXPORT_SYMBOL(edac_mc_handle_ce_no_info);
1686 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
1690 edac_mc_printk(mci, KERN_WARNING,
1691 "CE - no information available: %s\n", msg);
1692 mci->ce_noinfo_count++;
1697 EXPORT_SYMBOL(edac_mc_handle_ue);
1699 void edac_mc_handle_ue(struct mem_ctl_info *mci,
1700 unsigned long page_frame_number,
1701 unsigned long offset_in_page, int row,
1704 int len = EDAC_MC_LABEL_LEN * 4;
1705 char labels[len + 1];
1710 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1712 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1713 if (row >= mci->nr_csrows || row < 0) {
1714 /* something is wrong */
1715 edac_mc_printk(mci, KERN_ERR,
1716 "INTERNAL ERROR: row out of range "
1717 "(%d >= %d)\n", row, mci->nr_csrows);
1718 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
1722 chars = snprintf(pos, len + 1, "%s",
1723 mci->csrows[row].channels[0].label);
1726 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
1728 chars = snprintf(pos, len + 1, ":%s",
1729 mci->csrows[row].channels[chan].label);
1735 edac_mc_printk(mci, KERN_EMERG,
1736 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
1737 "labels \"%s\": %s\n", page_frame_number,
1738 offset_in_page, mci->csrows[row].grain, row, labels,
1743 ("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d,"
1744 " labels \"%s\": %s\n", mci->mc_idx,
1745 page_frame_number, offset_in_page,
1746 mci->csrows[row].grain, row, labels, msg);
1749 mci->csrows[row].ue_count++;
1753 EXPORT_SYMBOL(edac_mc_handle_ue_no_info);
1755 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
1759 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
1762 edac_mc_printk(mci, KERN_WARNING,
1763 "UE - no information available: %s\n", msg);
1764 mci->ue_noinfo_count++;
1771 static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
1776 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
1777 pci_read_config_word(dev, where, &status);
1779 /* If we get back 0xFFFF then we must suspect that the card has been pulled but
1780 the Linux PCI layer has not yet finished cleaning up. We don't want to report
1783 if (status == 0xFFFF) {
1785 pci_read_config_dword(dev, 0, &sanity);
1786 if (sanity == 0xFFFFFFFF)
1789 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
1793 /* reset only the bits we are interested in */
1794 pci_write_config_word(dev, where, status);
1799 typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
1801 /* Clear any PCI parity errors logged by this device. */
1802 static void edac_pci_dev_parity_clear( struct pci_dev *dev )
1806 get_pci_parity_status(dev, 0);
1808 /* read the device TYPE, looking for bridges */
1809 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1811 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
1812 get_pci_parity_status(dev, 1);
1816 * PCI Parity polling
1819 static void edac_pci_dev_parity_test(struct pci_dev *dev)
1824 /* read the STATUS register on this device
1826 status = get_pci_parity_status(dev, 0);
1828 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
1830 /* check the status reg for errors */
1832 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1833 edac_printk(KERN_CRIT, EDAC_PCI,
1834 "Signaled System Error on %s\n",
1837 if (status & (PCI_STATUS_PARITY)) {
1838 edac_printk(KERN_CRIT, EDAC_PCI,
1839 "Master Data Parity Error on %s\n",
1842 atomic_inc(&pci_parity_count);
1845 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1846 edac_printk(KERN_CRIT, EDAC_PCI,
1847 "Detected Parity Error on %s\n",
1850 atomic_inc(&pci_parity_count);
1854 /* read the device TYPE, looking for bridges */
1855 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1857 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
1859 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
1860 /* On bridges, need to examine secondary status register */
1861 status = get_pci_parity_status(dev, 1);
1863 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
1864 status, dev->dev.bus_id );
1866 /* check the secondary status reg for errors */
1868 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1869 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1870 "Signaled System Error on %s\n",
1873 if (status & (PCI_STATUS_PARITY)) {
1874 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1875 "Master Data Parity Error on "
1876 "%s\n", pci_name(dev));
1878 atomic_inc(&pci_parity_count);
1881 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1882 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1883 "Detected Parity Error on %s\n",
1886 atomic_inc(&pci_parity_count);
1893 * check_dev_on_list: Scan for a PCI device on a white/black list
1894 * @list: an EDAC &edac_pci_device_list white/black list pointer
1895 * @free_index: index of next free entry on the list
1896 * @pci_dev: PCI Device pointer
1898 * see if list contains the device.
1900 * Returns: 0 not found
1903 static int check_dev_on_list(struct edac_pci_device_list *list, int free_index,
1904 struct pci_dev *dev)
1907 int rc = 0; /* Assume not found */
1908 unsigned short vendor=dev->vendor;
1909 unsigned short device=dev->device;
1911 /* Scan the list, looking for a vendor/device match
1913 for (i = 0; i < free_index; i++, list++ ) {
1914 if ( (list->vendor == vendor ) &&
1915 (list->device == device )) {
1925 * pci_dev parity list iterator
1926 * Scan the PCI device list for one iteration, looking for SERRORs
1927 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
1929 static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
1931 struct pci_dev *dev=NULL;
1933 /* request for kernel access to the next PCI device, if any,
1934 * and while we are looking at it have its reference count
1935 * bumped until we are done with it
1937 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1939 /* if whitelist exists then it has priority, so only scan those
1940 * devices on the whitelist
1942 if (pci_whitelist_count > 0 ) {
1943 if (check_dev_on_list(pci_whitelist,
1944 pci_whitelist_count, dev))
1948 * if no whitelist, then check if this devices is
1951 if (!check_dev_on_list(pci_blacklist,
1952 pci_blacklist_count, dev))
1958 static void do_pci_parity_check(void)
1960 unsigned long flags;
1963 debugf3("%s()\n", __func__);
1965 if (!check_pci_parity)
1968 before_count = atomic_read(&pci_parity_count);
1970 /* scan all PCI devices looking for a Parity Error on devices and
1973 local_irq_save(flags);
1974 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
1975 local_irq_restore(flags);
1977 /* Only if operator has selected panic on PCI Error */
1978 if (panic_on_pci_parity) {
1979 /* If the count is different 'after' from 'before' */
1980 if (before_count != atomic_read(&pci_parity_count))
1981 panic("EDAC: PCI Parity Error");
1986 static inline void clear_pci_parity_errors(void)
1988 /* Clear any PCI bus parity errors that devices initially have logged
1989 * in their registers.
1991 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
1995 #else /* CONFIG_PCI */
1998 static inline void do_pci_parity_check(void)
2004 static inline void clear_pci_parity_errors(void)
2010 #endif /* CONFIG_PCI */
2013 * Iterate over all MC instances and check for ECC, et al, errors
2015 static inline void check_mc_devices (void)
2017 unsigned long flags;
2018 struct list_head *item;
2019 struct mem_ctl_info *mci;
2021 debugf3("%s()\n", __func__);
2023 /* during poll, have interrupts off */
2024 local_irq_save(flags);
2026 list_for_each(item, &mc_devices) {
2027 mci = list_entry(item, struct mem_ctl_info, link);
2029 if (mci->edac_check != NULL)
2030 mci->edac_check(mci);
2033 local_irq_restore(flags);
2038 * Check MC status every poll_msec.
2039 * Check PCI status every poll_msec as well.
2041 * This where the work gets done for edac.
2043 * SMP safe, doesn't use NMI, and auto-rate-limits.
2045 static void do_edac_check(void)
2047 debugf3("%s()\n", __func__);
2049 do_pci_parity_check();
2052 static int edac_kernel_thread(void *arg)
2054 while (!kthread_should_stop()) {
2057 /* goto sleep for the interval */
2058 schedule_timeout_interruptible((HZ * poll_msec) / 1000);
2067 * module initialization entry point
2069 static int __init edac_mc_init(void)
2071 edac_printk(KERN_INFO, EDAC_MC, EDAC_MC_VERSION "\n");
2074 * Harvest and clear any boot/initialization PCI parity errors
2076 * FIXME: This only clears errors logged by devices present at time of
2077 * module initialization. We should also do an initial clear
2078 * of each newly hotplugged device.
2080 clear_pci_parity_errors();
2082 /* Create the MC sysfs entires */
2083 if (edac_sysfs_memctrl_setup()) {
2084 edac_printk(KERN_ERR, EDAC_MC,
2085 "Error initializing sysfs code\n");
2089 /* Create the PCI parity sysfs entries */
2090 if (edac_sysfs_pci_setup()) {
2091 edac_sysfs_memctrl_teardown();
2092 edac_printk(KERN_ERR, EDAC_MC,
2093 "EDAC PCI: Error initializing sysfs code\n");
2097 /* create our kernel thread */
2098 edac_thread = kthread_run(edac_kernel_thread, NULL, "kedac");
2099 if (IS_ERR(edac_thread)) {
2100 /* remove the sysfs entries */
2101 edac_sysfs_memctrl_teardown();
2102 edac_sysfs_pci_teardown();
2103 return PTR_ERR(edac_thread);
2112 * module exit/termination functioni
2114 static void __exit edac_mc_exit(void)
2116 debugf0("%s()\n", __func__);
2118 kthread_stop(edac_thread);
2120 /* tear down the sysfs device */
2121 edac_sysfs_memctrl_teardown();
2122 edac_sysfs_pci_teardown();
2128 module_init(edac_mc_init);
2129 module_exit(edac_mc_exit);
2131 MODULE_LICENSE("GPL");
2132 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
2133 "Based on.work by Dan Hollis et al");
2134 MODULE_DESCRIPTION("Core library routines for MC reporting");
2136 module_param(panic_on_ue, int, 0644);
2137 MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
2138 module_param(check_pci_parity, int, 0644);
2139 MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on");
2140 module_param(panic_on_pci_parity, int, 0644);
2141 MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on");
2142 module_param(log_ue, int, 0644);
2143 MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on");
2144 module_param(log_ce, int, 0644);
2145 MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on");
2146 module_param(poll_msec, int, 0644);
2147 MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds");
2148 #ifdef CONFIG_EDAC_DEBUG
2149 module_param(edac_debug_level, int, 0644);
2150 MODULE_PARM_DESC(edac_debug_level, "Debug level");