2 * edac_mc kernel module
3 * (C) 2005 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
11 * Modified by Dave Peterson and Doug Thompson
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/proc_fs.h>
19 #include <linux/kernel.h>
20 #include <linux/types.h>
21 #include <linux/smp.h>
22 #include <linux/init.h>
23 #include <linux/sysctl.h>
24 #include <linux/highmem.h>
25 #include <linux/timer.h>
26 #include <linux/slab.h>
27 #include <linux/jiffies.h>
28 #include <linux/spinlock.h>
29 #include <linux/list.h>
30 #include <linux/sysdev.h>
31 #include <linux/ctype.h>
33 #include <asm/uaccess.h>
39 #define EDAC_MC_VERSION "edac_mc Ver: 2.0.0 " __DATE__
41 /* For now, disable the EDAC sysfs code. The sysfs interface that EDAC
42 * presents to user space needs more thought, and is likely to change
45 #define DISABLE_EDAC_SYSFS
47 #ifdef CONFIG_EDAC_DEBUG
48 /* Values of 0 to 4 will generate output */
49 int edac_debug_level = 1;
50 EXPORT_SYMBOL(edac_debug_level);
53 /* EDAC Controls, setable by module parameter, and sysfs */
54 static int log_ue = 1;
55 static int log_ce = 1;
56 static int panic_on_ue;
57 static int poll_msec = 1000;
59 static int check_pci_parity = 0; /* default YES check PCI parity */
60 static int panic_on_pci_parity; /* default no panic on PCI Parity */
61 static atomic_t pci_parity_count = ATOMIC_INIT(0);
63 /* lock to memory controller's control array */
64 static DECLARE_MUTEX(mem_ctls_mutex);
65 static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);
67 /* Structure of the whitelist and blacklist arrays */
68 struct edac_pci_device_list {
69 unsigned int vendor; /* Vendor ID */
70 unsigned int device; /* Deviice ID */
74 #define MAX_LISTED_PCI_DEVICES 32
76 /* List of PCI devices (vendor-id:device-id) that should be skipped */
77 static struct edac_pci_device_list pci_blacklist[MAX_LISTED_PCI_DEVICES];
78 static int pci_blacklist_count;
80 /* List of PCI devices (vendor-id:device-id) that should be scanned */
81 static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES];
82 static int pci_whitelist_count ;
84 /* START sysfs data and methods */
86 #ifndef DISABLE_EDAC_SYSFS
88 static const char *mem_types[] = {
89 [MEM_EMPTY] = "Empty",
90 [MEM_RESERVED] = "Reserved",
91 [MEM_UNKNOWN] = "Unknown",
95 [MEM_SDR] = "Unbuffered-SDR",
96 [MEM_RDR] = "Registered-SDR",
97 [MEM_DDR] = "Unbuffered-DDR",
98 [MEM_RDDR] = "Registered-DDR",
102 static const char *dev_types[] = {
103 [DEV_UNKNOWN] = "Unknown",
113 static const char *edac_caps[] = {
114 [EDAC_UNKNOWN] = "Unknown",
115 [EDAC_NONE] = "None",
116 [EDAC_RESERVED] = "Reserved",
117 [EDAC_PARITY] = "PARITY",
119 [EDAC_SECDED] = "SECDED",
120 [EDAC_S2ECD2ED] = "S2ECD2ED",
121 [EDAC_S4ECD4ED] = "S4ECD4ED",
122 [EDAC_S8ECD8ED] = "S8ECD8ED",
123 [EDAC_S16ECD16ED] = "S16ECD16ED"
127 /* sysfs object: /sys/devices/system/edac */
128 static struct sysdev_class edac_class = {
129 set_kset_name("edac"),
133 * /sys/devices/system/edac/mc
134 * /sys/devices/system/edac/pci
136 static struct kobject edac_memctrl_kobj;
137 static struct kobject edac_pci_kobj;
140 * /sys/devices/system/edac/mc;
141 * data structures and methods
144 static ssize_t memctrl_string_show(void *ptr, char *buffer)
146 char *value = (char*) ptr;
147 return sprintf(buffer, "%s\n", value);
151 static ssize_t memctrl_int_show(void *ptr, char *buffer)
153 int *value = (int*) ptr;
154 return sprintf(buffer, "%d\n", *value);
157 static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
159 int *value = (int*) ptr;
161 if (isdigit(*buffer))
162 *value = simple_strtoul(buffer, NULL, 0);
167 struct memctrl_dev_attribute {
168 struct attribute attr;
170 ssize_t (*show)(void *,char *);
171 ssize_t (*store)(void *, const char *, size_t);
174 /* Set of show/store abstract level functions for memory control object */
176 memctrl_dev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
178 struct memctrl_dev_attribute *memctrl_dev;
179 memctrl_dev = (struct memctrl_dev_attribute*)attr;
181 if (memctrl_dev->show)
182 return memctrl_dev->show(memctrl_dev->value, buffer);
187 memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
188 const char *buffer, size_t count)
190 struct memctrl_dev_attribute *memctrl_dev;
191 memctrl_dev = (struct memctrl_dev_attribute*)attr;
193 if (memctrl_dev->store)
194 return memctrl_dev->store(memctrl_dev->value, buffer, count);
198 static struct sysfs_ops memctrlfs_ops = {
199 .show = memctrl_dev_show,
200 .store = memctrl_dev_store
203 #define MEMCTRL_ATTR(_name,_mode,_show,_store) \
204 struct memctrl_dev_attribute attr_##_name = { \
205 .attr = {.name = __stringify(_name), .mode = _mode }, \
211 #define MEMCTRL_STRING_ATTR(_name,_data,_mode,_show,_store) \
212 struct memctrl_dev_attribute attr_##_name = { \
213 .attr = {.name = __stringify(_name), .mode = _mode }, \
219 /* cwrow<id> attribute f*/
221 MEMCTRL_STRING_ATTR(mc_version,EDAC_MC_VERSION,S_IRUGO,memctrl_string_show,NULL);
224 /* csrow<id> control files */
225 MEMCTRL_ATTR(panic_on_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
226 MEMCTRL_ATTR(log_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
227 MEMCTRL_ATTR(log_ce,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
228 MEMCTRL_ATTR(poll_msec,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store);
231 /* Base Attributes of the memory ECC object */
232 static struct memctrl_dev_attribute *memctrl_attr[] = {
240 /* Main MC kobject release() function */
241 static void edac_memctrl_master_release(struct kobject *kobj)
243 debugf1("EDAC MC: " __FILE__ ": %s()\n", __func__);
246 static struct kobj_type ktype_memctrl = {
247 .release = edac_memctrl_master_release,
248 .sysfs_ops = &memctrlfs_ops,
249 .default_attrs = (struct attribute **) memctrl_attr,
252 #endif /* DISABLE_EDAC_SYSFS */
254 /* Initialize the main sysfs entries for edac:
255 * /sys/devices/system/edac
262 static int edac_sysfs_memctrl_setup(void)
263 #ifdef DISABLE_EDAC_SYSFS
271 debugf1("MC: " __FILE__ ": %s()\n", __func__);
273 /* create the /sys/devices/system/edac directory */
274 err = sysdev_class_register(&edac_class);
276 /* Init the MC's kobject */
277 memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj));
278 kobject_init(&edac_memctrl_kobj);
280 edac_memctrl_kobj.parent = &edac_class.kset.kobj;
281 edac_memctrl_kobj.ktype = &ktype_memctrl;
283 /* generate sysfs "..../edac/mc" */
284 err = kobject_set_name(&edac_memctrl_kobj,"mc");
286 /* FIXME: maybe new sysdev_create_subdir() */
287 err = kobject_register(&edac_memctrl_kobj);
289 debugf1("Failed to register '.../edac/mc'\n");
291 debugf1("Registered '.../edac/mc' kobject\n");
295 debugf1(KERN_WARNING "__FILE__ %s() error=%d\n", __func__,err);
300 #endif /* DISABLE_EDAC_SYSFS */
304 * the '..../edac/mc' kobject followed by '..../edac' itself
306 static void edac_sysfs_memctrl_teardown(void)
308 #ifndef DISABLE_EDAC_SYSFS
309 debugf0("MC: " __FILE__ ": %s()\n", __func__);
311 /* Unregister the MC's kobject */
312 kobject_unregister(&edac_memctrl_kobj);
314 /* release the master edac mc kobject */
315 kobject_put(&edac_memctrl_kobj);
317 /* Unregister the 'edac' object */
318 sysdev_class_unregister(&edac_class);
319 #endif /* DISABLE_EDAC_SYSFS */
322 #ifndef DISABLE_EDAC_SYSFS
325 * /sys/devices/system/edac/pci;
326 * data structures and methods
329 struct list_control {
330 struct edac_pci_device_list *list;
336 /* Output the list as: vendor_id:device:id<,vendor_id:device_id> */
337 static ssize_t edac_pci_list_string_show(void *ptr, char *buffer)
339 struct list_control *listctl;
340 struct edac_pci_device_list *list;
346 list = listctl->list;
348 for (i = 0; i < *(listctl->count); i++, list++ ) {
350 len += snprintf(p + len, (PAGE_SIZE-len), ",");
352 len += snprintf(p + len,
355 list->vendor,list->device);
358 len += snprintf(p + len,(PAGE_SIZE-len), "\n");
360 return (ssize_t) len;
365 * Scan string from **s to **e looking for one 'vendor:device' tuple
366 * where each field is a hex value
368 * return 0 if an entry is NOT found
369 * return 1 if an entry is found
370 * fill in *vendor_id and *device_id with values found
372 * In both cases, make sure *s has been moved forward toward *e
374 static int parse_one_device(const char **s,const char **e,
375 unsigned int *vendor_id, unsigned int *device_id)
377 const char *runner, *p;
379 /* if null byte, we are done */
381 (*s)++; /* keep *s moving */
385 /* skip over newlines & whitespace */
386 if ((**s == '\n') || isspace(**s)) {
391 if (!isxdigit(**s)) {
396 /* parse vendor_id */
398 while (runner < *e) {
399 /* scan for vendor:device delimiter */
400 if (*runner == ':') {
401 *vendor_id = simple_strtol((char*) *s, (char**) &p, 16);
408 if (!isxdigit(*runner)) {
413 /* parse device_id */
415 *device_id = simple_strtol((char*)runner, (char**)&p, 16);
424 static ssize_t edac_pci_list_string_store(void *ptr, const char *buffer,
427 struct list_control *listctl;
428 struct edac_pci_device_list *list;
429 unsigned int vendor_id, device_id;
437 list = listctl->list;
438 index = listctl->count;
441 while (*index < MAX_LISTED_PCI_DEVICES) {
443 if (parse_one_device(&s,&e,&vendor_id,&device_id)) {
444 list[ *index ].vendor = vendor_id;
445 list[ *index ].device = device_id;
449 /* check for all data consume */
458 static ssize_t edac_pci_int_show(void *ptr, char *buffer)
461 return sprintf(buffer,"%d\n",*value);
464 static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count)
468 if (isdigit(*buffer))
469 *value = simple_strtoul(buffer,NULL,0);
474 struct edac_pci_dev_attribute {
475 struct attribute attr;
477 ssize_t (*show)(void *,char *);
478 ssize_t (*store)(void *, const char *,size_t);
481 /* Set of show/store abstract level functions for PCI Parity object */
482 static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr,
485 struct edac_pci_dev_attribute *edac_pci_dev;
486 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
488 if (edac_pci_dev->show)
489 return edac_pci_dev->show(edac_pci_dev->value, buffer);
493 static ssize_t edac_pci_dev_store(struct kobject *kobj, struct attribute *attr,
494 const char *buffer, size_t count)
496 struct edac_pci_dev_attribute *edac_pci_dev;
497 edac_pci_dev= (struct edac_pci_dev_attribute*)attr;
499 if (edac_pci_dev->show)
500 return edac_pci_dev->store(edac_pci_dev->value, buffer, count);
504 static struct sysfs_ops edac_pci_sysfs_ops = {
505 .show = edac_pci_dev_show,
506 .store = edac_pci_dev_store
510 #define EDAC_PCI_ATTR(_name,_mode,_show,_store) \
511 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
512 .attr = {.name = __stringify(_name), .mode = _mode }, \
518 #define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \
519 struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
520 .attr = {.name = __stringify(_name), .mode = _mode }, \
527 static struct list_control pci_whitelist_control = {
528 .list = pci_whitelist,
529 .count = &pci_whitelist_count
532 static struct list_control pci_blacklist_control = {
533 .list = pci_blacklist,
534 .count = &pci_blacklist_count
537 /* whitelist attribute */
538 EDAC_PCI_STRING_ATTR(pci_parity_whitelist,
539 &pci_whitelist_control,
541 edac_pci_list_string_show,
542 edac_pci_list_string_store);
544 EDAC_PCI_STRING_ATTR(pci_parity_blacklist,
545 &pci_blacklist_control,
547 edac_pci_list_string_show,
548 edac_pci_list_string_store);
551 /* PCI Parity control files */
552 EDAC_PCI_ATTR(check_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
553 EDAC_PCI_ATTR(panic_on_pci_parity,S_IRUGO|S_IWUSR,edac_pci_int_show,edac_pci_int_store);
554 EDAC_PCI_ATTR(pci_parity_count,S_IRUGO,edac_pci_int_show,NULL);
556 /* Base Attributes of the memory ECC object */
557 static struct edac_pci_dev_attribute *edac_pci_attr[] = {
558 &edac_pci_attr_check_pci_parity,
559 &edac_pci_attr_panic_on_pci_parity,
560 &edac_pci_attr_pci_parity_count,
564 /* No memory to release */
565 static void edac_pci_release(struct kobject *kobj)
567 debugf1("EDAC PCI: " __FILE__ ": %s()\n", __func__);
570 static struct kobj_type ktype_edac_pci = {
571 .release = edac_pci_release,
572 .sysfs_ops = &edac_pci_sysfs_ops,
573 .default_attrs = (struct attribute **) edac_pci_attr,
576 #endif /* DISABLE_EDAC_SYSFS */
579 * edac_sysfs_pci_setup()
582 static int edac_sysfs_pci_setup(void)
583 #ifdef DISABLE_EDAC_SYSFS
591 debugf1("MC: " __FILE__ ": %s()\n", __func__);
593 memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj));
595 kobject_init(&edac_pci_kobj);
596 edac_pci_kobj.parent = &edac_class.kset.kobj;
597 edac_pci_kobj.ktype = &ktype_edac_pci;
599 err = kobject_set_name(&edac_pci_kobj, "pci");
601 /* Instanstiate the csrow object */
602 /* FIXME: maybe new sysdev_create_subdir() */
603 err = kobject_register(&edac_pci_kobj);
605 debugf1("Failed to register '.../edac/pci'\n");
607 debugf1("Registered '.../edac/pci' kobject\n");
611 #endif /* DISABLE_EDAC_SYSFS */
613 static void edac_sysfs_pci_teardown(void)
615 #ifndef DISABLE_EDAC_SYSFS
616 debugf0("MC: " __FILE__ ": %s()\n", __func__);
618 kobject_unregister(&edac_pci_kobj);
619 kobject_put(&edac_pci_kobj);
623 #ifndef DISABLE_EDAC_SYSFS
625 /* EDAC sysfs CSROW data structures and methods */
627 /* Set of more detailed csrow<id> attribute show/store functions */
628 static ssize_t csrow_ch0_dimm_label_show(struct csrow_info *csrow, char *data)
632 if (csrow->nr_channels > 0) {
633 size = snprintf(data, EDAC_MC_LABEL_LEN,"%s\n",
634 csrow->channels[0].label);
639 static ssize_t csrow_ch1_dimm_label_show(struct csrow_info *csrow, char *data)
643 if (csrow->nr_channels > 0) {
644 size = snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
645 csrow->channels[1].label);
650 static ssize_t csrow_ch0_dimm_label_store(struct csrow_info *csrow,
651 const char *data, size_t size)
653 ssize_t max_size = 0;
655 if (csrow->nr_channels > 0) {
656 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
657 strncpy(csrow->channels[0].label, data, max_size);
658 csrow->channels[0].label[max_size] = '\0';
663 static ssize_t csrow_ch1_dimm_label_store(struct csrow_info *csrow,
664 const char *data, size_t size)
666 ssize_t max_size = 0;
668 if (csrow->nr_channels > 1) {
669 max_size = min((ssize_t)size,(ssize_t)EDAC_MC_LABEL_LEN-1);
670 strncpy(csrow->channels[1].label, data, max_size);
671 csrow->channels[1].label[max_size] = '\0';
676 static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data)
678 return sprintf(data,"%u\n", csrow->ue_count);
681 static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data)
683 return sprintf(data,"%u\n", csrow->ce_count);
686 static ssize_t csrow_ch0_ce_count_show(struct csrow_info *csrow, char *data)
690 if (csrow->nr_channels > 0) {
691 size = sprintf(data,"%u\n", csrow->channels[0].ce_count);
696 static ssize_t csrow_ch1_ce_count_show(struct csrow_info *csrow, char *data)
700 if (csrow->nr_channels > 1) {
701 size = sprintf(data,"%u\n", csrow->channels[1].ce_count);
706 static ssize_t csrow_size_show(struct csrow_info *csrow, char *data)
708 return sprintf(data,"%u\n", PAGES_TO_MiB(csrow->nr_pages));
711 static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data)
713 return sprintf(data,"%s\n", mem_types[csrow->mtype]);
716 static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data)
718 return sprintf(data,"%s\n", dev_types[csrow->dtype]);
721 static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data)
723 return sprintf(data,"%s\n", edac_caps[csrow->edac_mode]);
726 struct csrowdev_attribute {
727 struct attribute attr;
728 ssize_t (*show)(struct csrow_info *,char *);
729 ssize_t (*store)(struct csrow_info *, const char *,size_t);
732 #define to_csrow(k) container_of(k, struct csrow_info, kobj)
733 #define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr)
735 /* Set of show/store higher level functions for csrow objects */
736 static ssize_t csrowdev_show(struct kobject *kobj, struct attribute *attr,
739 struct csrow_info *csrow = to_csrow(kobj);
740 struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr);
742 if (csrowdev_attr->show)
743 return csrowdev_attr->show(csrow, buffer);
747 static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr,
748 const char *buffer, size_t count)
750 struct csrow_info *csrow = to_csrow(kobj);
751 struct csrowdev_attribute * csrowdev_attr = to_csrowdev_attr(attr);
753 if (csrowdev_attr->store)
754 return csrowdev_attr->store(csrow, buffer, count);
758 static struct sysfs_ops csrowfs_ops = {
759 .show = csrowdev_show,
760 .store = csrowdev_store
763 #define CSROWDEV_ATTR(_name,_mode,_show,_store) \
764 struct csrowdev_attribute attr_##_name = { \
765 .attr = {.name = __stringify(_name), .mode = _mode }, \
770 /* cwrow<id>/attribute files */
771 CSROWDEV_ATTR(size_mb,S_IRUGO,csrow_size_show,NULL);
772 CSROWDEV_ATTR(dev_type,S_IRUGO,csrow_dev_type_show,NULL);
773 CSROWDEV_ATTR(mem_type,S_IRUGO,csrow_mem_type_show,NULL);
774 CSROWDEV_ATTR(edac_mode,S_IRUGO,csrow_edac_mode_show,NULL);
775 CSROWDEV_ATTR(ue_count,S_IRUGO,csrow_ue_count_show,NULL);
776 CSROWDEV_ATTR(ce_count,S_IRUGO,csrow_ce_count_show,NULL);
777 CSROWDEV_ATTR(ch0_ce_count,S_IRUGO,csrow_ch0_ce_count_show,NULL);
778 CSROWDEV_ATTR(ch1_ce_count,S_IRUGO,csrow_ch1_ce_count_show,NULL);
780 /* control/attribute files */
781 CSROWDEV_ATTR(ch0_dimm_label,S_IRUGO|S_IWUSR,
782 csrow_ch0_dimm_label_show,
783 csrow_ch0_dimm_label_store);
784 CSROWDEV_ATTR(ch1_dimm_label,S_IRUGO|S_IWUSR,
785 csrow_ch1_dimm_label_show,
786 csrow_ch1_dimm_label_store);
789 /* Attributes of the CSROW<id> object */
790 static struct csrowdev_attribute *csrow_attr[] = {
799 &attr_ch0_dimm_label,
800 &attr_ch1_dimm_label,
805 /* No memory to release */
806 static void edac_csrow_instance_release(struct kobject *kobj)
808 debugf1("EDAC MC: " __FILE__ ": %s()\n", __func__);
811 static struct kobj_type ktype_csrow = {
812 .release = edac_csrow_instance_release,
813 .sysfs_ops = &csrowfs_ops,
814 .default_attrs = (struct attribute **) csrow_attr,
817 /* Create a CSROW object under specifed edac_mc_device */
818 static int edac_create_csrow_object(struct kobject *edac_mci_kobj,
819 struct csrow_info *csrow, int index )
823 debugf0("MC: " __FILE__ ": %s()\n", __func__);
825 memset(&csrow->kobj, 0, sizeof(csrow->kobj));
827 /* generate ..../edac/mc/mc<id>/csrow<index> */
829 kobject_init(&csrow->kobj);
830 csrow->kobj.parent = edac_mci_kobj;
831 csrow->kobj.ktype = &ktype_csrow;
833 /* name this instance of csrow<id> */
834 err = kobject_set_name(&csrow->kobj,"csrow%d",index);
836 /* Instanstiate the csrow object */
837 err = kobject_register(&csrow->kobj);
839 debugf0("Failed to register CSROW%d\n",index);
841 debugf0("Registered CSROW%d\n",index);
847 /* sysfs data structures and methods for the MCI kobjects */
849 static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
850 const char *data, size_t count )
854 mci->ue_noinfo_count = 0;
855 mci->ce_noinfo_count = 0;
858 for (row = 0; row < mci->nr_csrows; row++) {
859 struct csrow_info *ri = &mci->csrows[row];
863 for (chan = 0; chan < ri->nr_channels; chan++)
864 ri->channels[chan].ce_count = 0;
866 mci->start_time = jiffies;
871 static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data)
873 return sprintf(data,"%d\n", mci->ue_count);
876 static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data)
878 return sprintf(data,"%d\n", mci->ce_count);
881 static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data)
883 return sprintf(data,"%d\n", mci->ce_noinfo_count);
886 static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data)
888 return sprintf(data,"%d\n", mci->ue_noinfo_count);
891 static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data)
893 return sprintf(data,"%ld\n", (jiffies - mci->start_time) / HZ);
896 static ssize_t mci_mod_name_show(struct mem_ctl_info *mci, char *data)
898 return sprintf(data,"%s %s\n", mci->mod_name, mci->mod_ver);
901 static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data)
903 return sprintf(data,"%s\n", mci->ctl_name);
906 static int mci_output_edac_cap(char *buf, unsigned long edac_cap)
911 for (bit_idx = 0; bit_idx < 8 * sizeof(edac_cap); bit_idx++) {
912 if ((edac_cap >> bit_idx) & 0x1)
913 p += sprintf(p, "%s ", edac_caps[bit_idx]);
919 static ssize_t mci_edac_capability_show(struct mem_ctl_info *mci, char *data)
923 p += mci_output_edac_cap(p,mci->edac_ctl_cap);
924 p += sprintf(p, "\n");
929 static ssize_t mci_edac_current_capability_show(struct mem_ctl_info *mci,
934 p += mci_output_edac_cap(p,mci->edac_cap);
935 p += sprintf(p, "\n");
940 static int mci_output_mtype_cap(char *buf, unsigned long mtype_cap)
945 for (bit_idx = 0; bit_idx < 8 * sizeof(mtype_cap); bit_idx++) {
946 if ((mtype_cap >> bit_idx) & 0x1)
947 p += sprintf(p, "%s ", mem_types[bit_idx]);
953 static ssize_t mci_supported_mem_type_show(struct mem_ctl_info *mci, char *data)
957 p += mci_output_mtype_cap(p,mci->mtype_cap);
958 p += sprintf(p, "\n");
963 static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data)
965 int total_pages, csrow_idx;
967 for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows;
969 struct csrow_info *csrow = &mci->csrows[csrow_idx];
971 if (!csrow->nr_pages)
973 total_pages += csrow->nr_pages;
976 return sprintf(data,"%u\n", PAGES_TO_MiB(total_pages));
979 struct mcidev_attribute {
980 struct attribute attr;
981 ssize_t (*show)(struct mem_ctl_info *,char *);
982 ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
985 #define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj)
986 #define to_mcidev_attr(a) container_of(a, struct mcidev_attribute, attr)
988 static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr,
991 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
992 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
994 if (mcidev_attr->show)
995 return mcidev_attr->show(mem_ctl_info, buffer);
999 static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr,
1000 const char *buffer, size_t count)
1002 struct mem_ctl_info *mem_ctl_info = to_mci(kobj);
1003 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr);
1005 if (mcidev_attr->store)
1006 return mcidev_attr->store(mem_ctl_info, buffer, count);
1010 static struct sysfs_ops mci_ops = {
1011 .show = mcidev_show,
1012 .store = mcidev_store
1015 #define MCIDEV_ATTR(_name,_mode,_show,_store) \
1016 struct mcidev_attribute mci_attr_##_name = { \
1017 .attr = {.name = __stringify(_name), .mode = _mode }, \
1023 MCIDEV_ATTR(reset_counters,S_IWUSR,NULL,mci_reset_counters_store);
1025 /* Attribute files */
1026 MCIDEV_ATTR(mc_name,S_IRUGO,mci_ctl_name_show,NULL);
1027 MCIDEV_ATTR(module_name,S_IRUGO,mci_mod_name_show,NULL);
1028 MCIDEV_ATTR(edac_capability,S_IRUGO,mci_edac_capability_show,NULL);
1029 MCIDEV_ATTR(size_mb,S_IRUGO,mci_size_mb_show,NULL);
1030 MCIDEV_ATTR(seconds_since_reset,S_IRUGO,mci_seconds_show,NULL);
1031 MCIDEV_ATTR(ue_noinfo_count,S_IRUGO,mci_ue_noinfo_show,NULL);
1032 MCIDEV_ATTR(ce_noinfo_count,S_IRUGO,mci_ce_noinfo_show,NULL);
1033 MCIDEV_ATTR(ue_count,S_IRUGO,mci_ue_count_show,NULL);
1034 MCIDEV_ATTR(ce_count,S_IRUGO,mci_ce_count_show,NULL);
1035 MCIDEV_ATTR(edac_current_capability,S_IRUGO,
1036 mci_edac_current_capability_show,NULL);
1037 MCIDEV_ATTR(supported_mem_type,S_IRUGO,
1038 mci_supported_mem_type_show,NULL);
1041 static struct mcidev_attribute *mci_attr[] = {
1042 &mci_attr_reset_counters,
1043 &mci_attr_module_name,
1045 &mci_attr_edac_capability,
1046 &mci_attr_edac_current_capability,
1047 &mci_attr_supported_mem_type,
1049 &mci_attr_seconds_since_reset,
1050 &mci_attr_ue_noinfo_count,
1051 &mci_attr_ce_noinfo_count,
1059 * Release of a MC controlling instance
1061 static void edac_mci_instance_release(struct kobject *kobj)
1063 struct mem_ctl_info *mci;
1064 mci = container_of(kobj,struct mem_ctl_info,edac_mci_kobj);
1066 debugf0("MC: " __FILE__ ": %s() idx=%d calling kfree\n",
1067 __func__, mci->mc_idx);
1072 static struct kobj_type ktype_mci = {
1073 .release = edac_mci_instance_release,
1074 .sysfs_ops = &mci_ops,
1075 .default_attrs = (struct attribute **) mci_attr,
1078 #endif /* DISABLE_EDAC_SYSFS */
1080 #define EDAC_DEVICE_SYMLINK "device"
1083 * Create a new Memory Controller kobject instance,
1084 * mc<id> under the 'mc' directory
1090 static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
1091 #ifdef DISABLE_EDAC_SYSFS
1099 struct csrow_info *csrow;
1100 struct kobject *edac_mci_kobj=&mci->edac_mci_kobj;
1102 debugf0("MC: " __FILE__ ": %s() idx=%d\n", __func__, mci->mc_idx);
1104 memset(edac_mci_kobj, 0, sizeof(*edac_mci_kobj));
1105 kobject_init(edac_mci_kobj);
1107 /* set the name of the mc<id> object */
1108 err = kobject_set_name(edac_mci_kobj,"mc%d",mci->mc_idx);
1112 /* link to our parent the '..../edac/mc' object */
1113 edac_mci_kobj->parent = &edac_memctrl_kobj;
1114 edac_mci_kobj->ktype = &ktype_mci;
1116 /* register the mc<id> kobject */
1117 err = kobject_register(edac_mci_kobj);
1121 /* create a symlink for the device */
1122 err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj,
1123 EDAC_DEVICE_SYMLINK);
1125 kobject_unregister(edac_mci_kobj);
1129 /* Make directories for each CSROW object
1130 * under the mc<id> kobject
1132 for (i = 0; i < mci->nr_csrows; i++) {
1134 csrow = &mci->csrows[i];
1136 /* Only expose populated CSROWs */
1137 if (csrow->nr_pages > 0) {
1138 err = edac_create_csrow_object(edac_mci_kobj,csrow,i);
1144 /* Mark this MCI instance as having sysfs entries */
1145 mci->sysfs_active = MCI_SYSFS_ACTIVE;
1150 /* CSROW error: backout what has already been registered, */
1152 for ( i--; i >= 0; i--) {
1153 if (csrow->nr_pages > 0) {
1154 kobject_unregister(&mci->csrows[i].kobj);
1155 kobject_put(&mci->csrows[i].kobj);
1159 kobject_unregister(edac_mci_kobj);
1160 kobject_put(edac_mci_kobj);
1164 #endif /* DISABLE_EDAC_SYSFS */
1167 * remove a Memory Controller instance
1169 static void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
1171 #ifndef DISABLE_EDAC_SYSFS
1174 debugf0("MC: " __FILE__ ": %s()\n", __func__);
1176 /* remove all csrow kobjects */
1177 for (i = 0; i < mci->nr_csrows; i++) {
1178 if (mci->csrows[i].nr_pages > 0) {
1179 kobject_unregister(&mci->csrows[i].kobj);
1180 kobject_put(&mci->csrows[i].kobj);
1184 sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK);
1186 kobject_unregister(&mci->edac_mci_kobj);
1187 kobject_put(&mci->edac_mci_kobj);
1188 #endif /* DISABLE_EDAC_SYSFS */
1191 /* END OF sysfs data and methods */
1193 #ifdef CONFIG_EDAC_DEBUG
1195 EXPORT_SYMBOL(edac_mc_dump_channel);
1197 void edac_mc_dump_channel(struct channel_info *chan)
1199 debugf4("\tchannel = %p\n", chan);
1200 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
1201 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
1202 debugf4("\tchannel->label = '%s'\n", chan->label);
1203 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
1207 EXPORT_SYMBOL(edac_mc_dump_csrow);
1209 void edac_mc_dump_csrow(struct csrow_info *csrow)
1211 debugf4("\tcsrow = %p\n", csrow);
1212 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
1213 debugf4("\tcsrow->first_page = 0x%lx\n",
1215 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
1216 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
1217 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
1218 debugf4("\tcsrow->nr_channels = %d\n",
1219 csrow->nr_channels);
1220 debugf4("\tcsrow->channels = %p\n", csrow->channels);
1221 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
1225 EXPORT_SYMBOL(edac_mc_dump_mci);
1227 void edac_mc_dump_mci(struct mem_ctl_info *mci)
1229 debugf3("\tmci = %p\n", mci);
1230 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
1231 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
1232 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
1233 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
1234 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
1235 mci->nr_csrows, mci->csrows);
1236 debugf3("\tpdev = %p\n", mci->pdev);
1237 debugf3("\tmod_name:ctl_name = %s:%s\n",
1238 mci->mod_name, mci->ctl_name);
1239 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
1243 #endif /* CONFIG_EDAC_DEBUG */
1245 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
1246 * Adjust 'ptr' so that its alignment is at least as stringent as what the
1247 * compiler would provide for X and return the aligned result.
1249 * If 'size' is a constant, the compiler will optimize this whole function
1250 * down to either a no-op or the addition of a constant to the value of 'ptr'.
1252 static inline char * align_ptr (void *ptr, unsigned size)
1256 /* Here we assume that the alignment of a "long long" is the most
1257 * stringent alignment that the compiler will ever provide by default.
1258 * As far as I know, this is a reasonable assumption.
1260 if (size > sizeof(long))
1261 align = sizeof(long long);
1262 else if (size > sizeof(int))
1263 align = sizeof(long);
1264 else if (size > sizeof(short))
1265 align = sizeof(int);
1266 else if (size > sizeof(char))
1267 align = sizeof(short);
1269 return (char *) ptr;
1274 return (char *) ptr;
1276 return (char *) (((unsigned long) ptr) + align - r);
1280 EXPORT_SYMBOL(edac_mc_alloc);
1283 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
1284 * @size_pvt: size of private storage needed
1285 * @nr_csrows: Number of CWROWS needed for this MC
1286 * @nr_chans: Number of channels for the MC
1288 * Everything is kmalloc'ed as one big chunk - more efficient.
1289 * Only can be used if all structures have the same lifetime - otherwise
1290 * you have to allocate and initialize your own structures.
1292 * Use edac_mc_free() to free mc structures allocated by this function.
1295 * NULL allocation failed
1296 * struct mem_ctl_info pointer
1298 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
1301 struct mem_ctl_info *mci;
1302 struct csrow_info *csi, *csrow;
1303 struct channel_info *chi, *chp, *chan;
1308 /* Figure out the offsets of the various items from the start of an mc
1309 * structure. We want the alignment of each item to be at least as
1310 * stringent as what the compiler would provide if we could simply
1311 * hardcode everything into a single struct.
1313 mci = (struct mem_ctl_info *) 0;
1314 csi = (struct csrow_info *)align_ptr(&mci[1], sizeof(*csi));
1315 chi = (struct channel_info *)
1316 align_ptr(&csi[nr_csrows], sizeof(*chi));
1317 pvt = align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
1318 size = ((unsigned long) pvt) + sz_pvt;
1320 if ((mci = kmalloc(size, GFP_KERNEL)) == NULL)
1323 /* Adjust pointers so they point within the memory we just allocated
1324 * rather than an imaginary chunk of memory located at address 0.
1326 csi = (struct csrow_info *) (((char *) mci) + ((unsigned long) csi));
1327 chi = (struct channel_info *) (((char *) mci) + ((unsigned long) chi));
1328 pvt = sz_pvt ? (((char *) mci) + ((unsigned long) pvt)) : NULL;
1330 memset(mci, 0, size); /* clear all fields */
1333 mci->pvt_info = pvt;
1334 mci->nr_csrows = nr_csrows;
1336 for (row = 0; row < nr_csrows; row++) {
1338 csrow->csrow_idx = row;
1340 csrow->nr_channels = nr_chans;
1341 chp = &chi[row * nr_chans];
1342 csrow->channels = chp;
1344 for (chn = 0; chn < nr_chans; chn++) {
1346 chan->chan_idx = chn;
1347 chan->csrow = csrow;
1355 EXPORT_SYMBOL(edac_mc_free);
1358 * edac_mc_free: Free a previously allocated 'mci' structure
1359 * @mci: pointer to a struct mem_ctl_info structure
1361 * Free up a previously allocated mci structure
1362 * A MCI structure can be in 2 states after being allocated
1363 * by edac_mc_alloc().
1364 * 1) Allocated in a MC driver's probe, but not yet committed
1365 * 2) Allocated and committed, by a call to edac_mc_add_mc()
1366 * edac_mc_add_mc() is the function that adds the sysfs entries
1367 * thus, this free function must determine which state the 'mci'
1368 * structure is in, then either free it directly or
1369 * perform kobject cleanup by calling edac_remove_sysfs_mci_device().
1373 void edac_mc_free(struct mem_ctl_info *mci)
1375 /* only if sysfs entries for this mci instance exist
1376 * do we remove them and defer the actual kfree via
1377 * the kobject 'release()' callback.
1379 * Otherwise, do a straight kfree now.
1381 if (mci->sysfs_active == MCI_SYSFS_ACTIVE)
1382 edac_remove_sysfs_mci_device(mci);
1389 EXPORT_SYMBOL(edac_mc_find_mci_by_pdev);
1391 struct mem_ctl_info *edac_mc_find_mci_by_pdev(struct pci_dev *pdev)
1393 struct mem_ctl_info *mci;
1394 struct list_head *item;
1396 debugf3("MC: " __FILE__ ": %s()\n", __func__);
1398 list_for_each(item, &mc_devices) {
1399 mci = list_entry(item, struct mem_ctl_info, link);
1401 if (mci->pdev == pdev)
1408 static int add_mc_to_global_list (struct mem_ctl_info *mci)
1410 struct list_head *item, *insert_before;
1411 struct mem_ctl_info *p;
1414 if (list_empty(&mc_devices)) {
1416 insert_before = &mc_devices;
1418 if (edac_mc_find_mci_by_pdev(mci->pdev)) {
1420 "EDAC MC: %s (%s) %s %s already assigned %d\n",
1421 mci->pdev->dev.bus_id, pci_name(mci->pdev),
1422 mci->mod_name, mci->ctl_name, mci->mc_idx);
1426 insert_before = NULL;
1429 list_for_each(item, &mc_devices) {
1430 p = list_entry(item, struct mem_ctl_info, link);
1432 if (p->mc_idx != i) {
1433 insert_before = item;
1442 if (insert_before == NULL)
1443 insert_before = &mc_devices;
1446 list_add_tail_rcu(&mci->link, insert_before);
1452 EXPORT_SYMBOL(edac_mc_add_mc);
1455 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list
1456 * @mci: pointer to the mci structure to be added to the list
1463 /* FIXME - should a warning be printed if no error detection? correction? */
1464 int edac_mc_add_mc(struct mem_ctl_info *mci)
1468 debugf0("MC: " __FILE__ ": %s()\n", __func__);
1469 #ifdef CONFIG_EDAC_DEBUG
1470 if (edac_debug_level >= 3)
1471 edac_mc_dump_mci(mci);
1472 if (edac_debug_level >= 4) {
1475 for (i = 0; i < mci->nr_csrows; i++) {
1477 edac_mc_dump_csrow(&mci->csrows[i]);
1478 for (j = 0; j < mci->csrows[i].nr_channels; j++)
1479 edac_mc_dump_channel(&mci->csrows[i].
1484 down(&mem_ctls_mutex);
1486 if (add_mc_to_global_list(mci))
1489 /* set load time so that error rate can be tracked */
1490 mci->start_time = jiffies;
1492 if (edac_create_sysfs_mci_device(mci)) {
1494 "EDAC MC%d: failed to create sysfs device\n",
1496 /* FIXME - should there be an error code and unwind? */
1500 /* Report action taken */
1502 "EDAC MC%d: Giving out device to %s %s: PCI %s\n",
1503 mci->mc_idx, mci->mod_name, mci->ctl_name,
1504 pci_name(mci->pdev));
1510 up(&mem_ctls_mutex);
1516 static void complete_mc_list_del (struct rcu_head *head)
1518 struct mem_ctl_info *mci;
1520 mci = container_of(head, struct mem_ctl_info, rcu);
1521 INIT_LIST_HEAD(&mci->link);
1522 complete(&mci->complete);
1525 static void del_mc_from_global_list (struct mem_ctl_info *mci)
1527 list_del_rcu(&mci->link);
1528 init_completion(&mci->complete);
1529 call_rcu(&mci->rcu, complete_mc_list_del);
1530 wait_for_completion(&mci->complete);
1533 EXPORT_SYMBOL(edac_mc_del_mc);
1536 * edac_mc_del_mc: Remove the specified mci structure from global list
1537 * @mci: Pointer to struct mem_ctl_info structure
1543 int edac_mc_del_mc(struct mem_ctl_info *mci)
1547 debugf0("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
1548 down(&mem_ctls_mutex);
1549 del_mc_from_global_list(mci);
1551 "EDAC MC%d: Removed device %d for %s %s: PCI %s\n",
1552 mci->mc_idx, mci->mc_idx, mci->mod_name, mci->ctl_name,
1553 pci_name(mci->pdev));
1555 up(&mem_ctls_mutex);
1561 EXPORT_SYMBOL(edac_mc_scrub_block);
1563 void edac_mc_scrub_block(unsigned long page, unsigned long offset,
1568 unsigned long flags = 0;
1570 debugf3("MC: " __FILE__ ": %s()\n", __func__);
1572 /* ECC error page was not in our memory. Ignore it. */
1573 if(!pfn_valid(page))
1576 /* Find the actual page structure then map it and fix */
1577 pg = pfn_to_page(page);
1579 if (PageHighMem(pg))
1580 local_irq_save(flags);
1582 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
1584 /* Perform architecture specific atomic scrub operation */
1585 atomic_scrub(virt_addr + offset, size);
1587 /* Unmap and complete */
1588 kunmap_atomic(virt_addr, KM_BOUNCE_READ);
1590 if (PageHighMem(pg))
1591 local_irq_restore(flags);
1595 /* FIXME - should return -1 */
1596 EXPORT_SYMBOL(edac_mc_find_csrow_by_page);
1598 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
1601 struct csrow_info *csrows = mci->csrows;
1604 debugf1("MC%d: " __FILE__ ": %s(): 0x%lx\n", mci->mc_idx, __func__,
1608 for (i = 0; i < mci->nr_csrows; i++) {
1609 struct csrow_info *csrow = &csrows[i];
1611 if (csrow->nr_pages == 0)
1614 debugf3("MC%d: " __FILE__
1615 ": %s(): first(0x%lx) page(0x%lx)"
1616 " last(0x%lx) mask(0x%lx)\n", mci->mc_idx,
1617 __func__, csrow->first_page, page,
1618 csrow->last_page, csrow->page_mask);
1620 if ((page >= csrow->first_page) &&
1621 (page <= csrow->last_page) &&
1622 ((page & csrow->page_mask) ==
1623 (csrow->first_page & csrow->page_mask))) {
1631 "EDAC MC%d: could not look up page error address %lx\n",
1632 mci->mc_idx, (unsigned long) page);
1638 EXPORT_SYMBOL(edac_mc_handle_ce);
1640 /* FIXME - setable log (warning/emerg) levels */
1641 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
1642 void edac_mc_handle_ce(struct mem_ctl_info *mci,
1643 unsigned long page_frame_number,
1644 unsigned long offset_in_page,
1645 unsigned long syndrome, int row, int channel,
1648 unsigned long remapped_page;
1650 debugf3("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
1652 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1653 if (row >= mci->nr_csrows || row < 0) {
1654 /* something is wrong */
1656 "EDAC MC%d: INTERNAL ERROR: row out of range (%d >= %d)\n",
1657 mci->mc_idx, row, mci->nr_csrows);
1658 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1661 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
1662 /* something is wrong */
1664 "EDAC MC%d: INTERNAL ERROR: channel out of range "
1666 mci->mc_idx, channel, mci->csrows[row].nr_channels);
1667 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
1672 /* FIXME - put in DIMM location */
1674 "EDAC MC%d: CE page 0x%lx, offset 0x%lx,"
1675 " grain %d, syndrome 0x%lx, row %d, channel %d,"
1676 " label \"%s\": %s\n", mci->mc_idx,
1677 page_frame_number, offset_in_page,
1678 mci->csrows[row].grain, syndrome, row, channel,
1679 mci->csrows[row].channels[channel].label, msg);
1682 mci->csrows[row].ce_count++;
1683 mci->csrows[row].channels[channel].ce_count++;
1685 if (mci->scrub_mode & SCRUB_SW_SRC) {
1687 * Some MC's can remap memory so that it is still available
1688 * at a different address when PCI devices map into memory.
1689 * MC's that can't do this lose the memory where PCI devices
1690 * are mapped. This mapping is MC dependant and so we call
1691 * back into the MC driver for it to map the MC page to
1692 * a physical (CPU) page which can then be mapped to a virtual
1693 * page - which can then be scrubbed.
1695 remapped_page = mci->ctl_page_to_phys ?
1696 mci->ctl_page_to_phys(mci, page_frame_number) :
1699 edac_mc_scrub_block(remapped_page, offset_in_page,
1700 mci->csrows[row].grain);
1705 EXPORT_SYMBOL(edac_mc_handle_ce_no_info);
1707 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
1712 "EDAC MC%d: CE - no information available: %s\n",
1714 mci->ce_noinfo_count++;
1719 EXPORT_SYMBOL(edac_mc_handle_ue);
1721 void edac_mc_handle_ue(struct mem_ctl_info *mci,
1722 unsigned long page_frame_number,
1723 unsigned long offset_in_page, int row,
1726 int len = EDAC_MC_LABEL_LEN * 4;
1727 char labels[len + 1];
1732 debugf3("MC%d: " __FILE__ ": %s()\n", mci->mc_idx, __func__);
1734 /* FIXME - maybe make panic on INTERNAL ERROR an option */
1735 if (row >= mci->nr_csrows || row < 0) {
1736 /* something is wrong */
1738 "EDAC MC%d: INTERNAL ERROR: row out of range (%d >= %d)\n",
1739 mci->mc_idx, row, mci->nr_csrows);
1740 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
1744 chars = snprintf(pos, len + 1, "%s",
1745 mci->csrows[row].channels[0].label);
1748 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
1750 chars = snprintf(pos, len + 1, ":%s",
1751 mci->csrows[row].channels[chan].label);
1758 "EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d,"
1759 " labels \"%s\": %s\n", mci->mc_idx,
1760 page_frame_number, offset_in_page,
1761 mci->csrows[row].grain, row, labels, msg);
1765 ("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, row %d,"
1766 " labels \"%s\": %s\n", mci->mc_idx,
1767 page_frame_number, offset_in_page,
1768 mci->csrows[row].grain, row, labels, msg);
1771 mci->csrows[row].ue_count++;
1775 EXPORT_SYMBOL(edac_mc_handle_ue_no_info);
1777 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
1781 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
1785 "EDAC MC%d: UE - no information available: %s\n",
1787 mci->ue_noinfo_count++;
1794 static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
1799 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
1800 pci_read_config_word(dev, where, &status);
1802 /* If we get back 0xFFFF then we must suspect that the card has been pulled but
1803 the Linux PCI layer has not yet finished cleaning up. We don't want to report
1806 if (status == 0xFFFF) {
1808 pci_read_config_dword(dev, 0, &sanity);
1809 if (sanity == 0xFFFFFFFF)
1812 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
1816 /* reset only the bits we are interested in */
1817 pci_write_config_word(dev, where, status);
1822 typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
1824 /* Clear any PCI parity errors logged by this device. */
1825 static void edac_pci_dev_parity_clear( struct pci_dev *dev )
1829 get_pci_parity_status(dev, 0);
1831 /* read the device TYPE, looking for bridges */
1832 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1834 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
1835 get_pci_parity_status(dev, 1);
1839 * PCI Parity polling
1842 static void edac_pci_dev_parity_test(struct pci_dev *dev)
1847 /* read the STATUS register on this device
1849 status = get_pci_parity_status(dev, 0);
1851 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
1853 /* check the status reg for errors */
1855 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1858 "Signaled System Error on %s\n",
1861 if (status & (PCI_STATUS_PARITY)) {
1864 "Master Data Parity Error on %s\n",
1867 atomic_inc(&pci_parity_count);
1870 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1873 "Detected Parity Error on %s\n",
1876 atomic_inc(&pci_parity_count);
1880 /* read the device TYPE, looking for bridges */
1881 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1883 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
1885 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
1886 /* On bridges, need to examine secondary status register */
1887 status = get_pci_parity_status(dev, 1);
1889 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
1890 status, dev->dev.bus_id );
1892 /* check the secondary status reg for errors */
1894 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1897 "Signaled System Error on %s\n",
1900 if (status & (PCI_STATUS_PARITY)) {
1903 "Master Data Parity Error on %s\n",
1906 atomic_inc(&pci_parity_count);
1909 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1912 "Detected Parity Error on %s\n",
1915 atomic_inc(&pci_parity_count);
1922 * check_dev_on_list: Scan for a PCI device on a white/black list
1923 * @list: an EDAC &edac_pci_device_list white/black list pointer
1924 * @free_index: index of next free entry on the list
1925 * @pci_dev: PCI Device pointer
1927 * see if list contains the device.
1929 * Returns: 0 not found
1932 static int check_dev_on_list(struct edac_pci_device_list *list, int free_index,
1933 struct pci_dev *dev)
1936 int rc = 0; /* Assume not found */
1937 unsigned short vendor=dev->vendor;
1938 unsigned short device=dev->device;
1940 /* Scan the list, looking for a vendor/device match
1942 for (i = 0; i < free_index; i++, list++ ) {
1943 if ( (list->vendor == vendor ) &&
1944 (list->device == device )) {
1954 * pci_dev parity list iterator
1955 * Scan the PCI device list for one iteration, looking for SERRORs
1956 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
1958 static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
1960 struct pci_dev *dev=NULL;
1962 /* request for kernel access to the next PCI device, if any,
1963 * and while we are looking at it have its reference count
1964 * bumped until we are done with it
1966 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1968 /* if whitelist exists then it has priority, so only scan those
1969 * devices on the whitelist
1971 if (pci_whitelist_count > 0 ) {
1972 if (check_dev_on_list(pci_whitelist,
1973 pci_whitelist_count, dev))
1977 * if no whitelist, then check if this devices is
1980 if (!check_dev_on_list(pci_blacklist,
1981 pci_blacklist_count, dev))
1987 static void do_pci_parity_check(void)
1989 unsigned long flags;
1992 debugf3("MC: " __FILE__ ": %s()\n", __func__);
1994 if (!check_pci_parity)
1997 before_count = atomic_read(&pci_parity_count);
1999 /* scan all PCI devices looking for a Parity Error on devices and
2002 local_irq_save(flags);
2003 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
2004 local_irq_restore(flags);
2006 /* Only if operator has selected panic on PCI Error */
2007 if (panic_on_pci_parity) {
2008 /* If the count is different 'after' from 'before' */
2009 if (before_count != atomic_read(&pci_parity_count))
2010 panic("EDAC: PCI Parity Error");
2015 static inline void clear_pci_parity_errors(void)
2017 /* Clear any PCI bus parity errors that devices initially have logged
2018 * in their registers.
2020 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
2024 #else /* CONFIG_PCI */
2027 static inline void do_pci_parity_check(void)
2033 static inline void clear_pci_parity_errors(void)
2039 #endif /* CONFIG_PCI */
2042 * Iterate over all MC instances and check for ECC, et al, errors
2044 static inline void check_mc_devices (void)
2046 unsigned long flags;
2047 struct list_head *item;
2048 struct mem_ctl_info *mci;
2050 debugf3("MC: " __FILE__ ": %s()\n", __func__);
2052 /* during poll, have interrupts off */
2053 local_irq_save(flags);
2055 list_for_each(item, &mc_devices) {
2056 mci = list_entry(item, struct mem_ctl_info, link);
2058 if (mci->edac_check != NULL)
2059 mci->edac_check(mci);
2062 local_irq_restore(flags);
2067 * Check MC status every poll_msec.
2068 * Check PCI status every poll_msec as well.
2070 * This where the work gets done for edac.
2072 * SMP safe, doesn't use NMI, and auto-rate-limits.
2074 static void do_edac_check(void)
2077 debugf3("MC: " __FILE__ ": %s()\n", __func__);
2081 do_pci_parity_check();
2086 * EDAC thread state information
2088 struct bs_thread_info
2090 struct task_struct *task;
2091 struct completion *event;
2096 static struct bs_thread_info bs_thread;
2099 * edac_kernel_thread
2100 * This the kernel thread that processes edac operations
2101 * in a normal thread environment
2103 static int edac_kernel_thread(void *arg)
2105 struct bs_thread_info *thread = (struct bs_thread_info *) arg;
2108 daemonize(thread->name);
2110 current->exit_signal = SIGCHLD;
2111 allow_signal(SIGKILL);
2112 thread->task = current;
2114 /* indicate to starting task we have started */
2115 complete(thread->event);
2117 /* loop forever, until we are told to stop */
2118 while(thread->run != NULL) {
2121 /* call the function to check the memory controllers */
2126 if (signal_pending(current))
2127 flush_signals(current);
2129 /* ensure we are interruptable */
2130 set_current_state(TASK_INTERRUPTIBLE);
2132 /* goto sleep for the interval */
2133 schedule_timeout((HZ * poll_msec) / 1000);
2137 /* notify waiter that we are exiting */
2138 complete(thread->event);
2145 * module initialization entry point
2147 static int __init edac_mc_init(void)
2150 struct completion event;
2152 printk(KERN_INFO "MC: " __FILE__ " version " EDAC_MC_VERSION "\n");
2155 * Harvest and clear any boot/initialization PCI parity errors
2157 * FIXME: This only clears errors logged by devices present at time of
2158 * module initialization. We should also do an initial clear
2159 * of each newly hotplugged device.
2161 clear_pci_parity_errors();
2163 /* perform check for first time to harvest boot leftovers */
2166 /* Create the MC sysfs entires */
2167 if (edac_sysfs_memctrl_setup()) {
2168 printk(KERN_ERR "EDAC MC: Error initializing sysfs code\n");
2172 /* Create the PCI parity sysfs entries */
2173 if (edac_sysfs_pci_setup()) {
2174 edac_sysfs_memctrl_teardown();
2175 printk(KERN_ERR "EDAC PCI: Error initializing sysfs code\n");
2179 /* Create our kernel thread */
2180 init_completion(&event);
2181 bs_thread.event = &event;
2182 bs_thread.name = "kedac";
2183 bs_thread.run = do_edac_check;
2185 /* create our kernel thread */
2186 ret = kernel_thread(edac_kernel_thread, &bs_thread, CLONE_KERNEL);
2188 /* remove the sysfs entries */
2189 edac_sysfs_memctrl_teardown();
2190 edac_sysfs_pci_teardown();
2194 /* wait for our kernel theard ack that it is up and running */
2195 wait_for_completion(&event);
2203 * module exit/termination functioni
2205 static void __exit edac_mc_exit(void)
2207 struct completion event;
2209 debugf0("MC: " __FILE__ ": %s()\n", __func__);
2211 init_completion(&event);
2212 bs_thread.event = &event;
2214 /* As soon as ->run is set to NULL, the task could disappear,
2215 * so we need to hold tasklist_lock until we have sent the signal
2217 read_lock(&tasklist_lock);
2218 bs_thread.run = NULL;
2219 send_sig(SIGKILL, bs_thread.task, 1);
2220 read_unlock(&tasklist_lock);
2221 wait_for_completion(&event);
2223 /* tear down the sysfs device */
2224 edac_sysfs_memctrl_teardown();
2225 edac_sysfs_pci_teardown();
2231 module_init(edac_mc_init);
2232 module_exit(edac_mc_exit);
2234 MODULE_LICENSE("GPL");
2235 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n"
2236 "Based on.work by Dan Hollis et al");
2237 MODULE_DESCRIPTION("Core library routines for MC reporting");
2239 module_param(panic_on_ue, int, 0644);
2240 MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
2241 module_param(check_pci_parity, int, 0644);
2242 MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on");
2243 module_param(panic_on_pci_parity, int, 0644);
2244 MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on");
2245 module_param(log_ue, int, 0644);
2246 MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on");
2247 module_param(log_ce, int, 0644);
2248 MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on");
2249 module_param(poll_msec, int, 0644);
2250 MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds");
2251 #ifdef CONFIG_EDAC_DEBUG
2252 module_param(edac_debug_level, int, 0644);
2253 MODULE_PARM_DESC(edac_debug_level, "Debug level");