2 * Routines to indentify caches on Intel CPU.
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen : CPUID4 emulation on AMD.
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/device.h>
13 #include <linux/compiler.h>
14 #include <linux/cpu.h>
15 #include <linux/sched.h>
17 #include <asm/processor.h>
28 unsigned char descriptor;
33 /* all the cache descriptor types we care about (no TLB or trace cache entries) */
34 static struct _cache_table cache_table[] __cpuinitdata =
36 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
37 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
38 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
39 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
40 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
41 { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
42 { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */
43 { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */
44 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
45 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
46 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
47 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
48 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
49 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
50 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
51 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
52 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
53 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
54 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
55 { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
56 { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
57 { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */
58 { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */
59 { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
60 { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */
61 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
62 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
63 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
64 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
65 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
66 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
67 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
68 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
69 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
70 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
71 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
72 { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
73 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
74 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
75 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
76 { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
77 { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
78 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
79 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
80 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
81 { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
82 { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
83 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
84 { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
94 CACHE_TYPE_UNIFIED = 3
97 union _cpuid4_leaf_eax {
99 enum _cache_type type:5;
100 unsigned int level:3;
101 unsigned int is_self_initializing:1;
102 unsigned int is_fully_associative:1;
103 unsigned int reserved:4;
104 unsigned int num_threads_sharing:12;
105 unsigned int num_cores_on_die:6;
110 union _cpuid4_leaf_ebx {
112 unsigned int coherency_line_size:12;
113 unsigned int physical_line_partition:10;
114 unsigned int ways_of_associativity:10;
119 union _cpuid4_leaf_ecx {
121 unsigned int number_of_sets:32;
126 struct _cpuid4_info {
127 union _cpuid4_leaf_eax eax;
128 union _cpuid4_leaf_ebx ebx;
129 union _cpuid4_leaf_ecx ecx;
131 cpumask_t shared_cpu_map;
134 unsigned short num_cache_leaves;
136 /* AMD doesn't have CPUID4. Emulate it here to report the same
137 information to the user. This makes some assumptions about the machine:
138 No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
140 In theory the TLBs could be reported as fake type (they are in "dummy").
144 unsigned line_size : 8;
145 unsigned lines_per_tag : 8;
147 unsigned size_in_kb : 8;
154 unsigned line_size : 8;
155 unsigned lines_per_tag : 4;
157 unsigned size_in_kb : 16;
162 static unsigned short assocs[] = {
163 [1] = 1, [2] = 2, [4] = 4, [6] = 8,
167 static unsigned char levels[] = { 1, 1, 2 };
168 static unsigned char types[] = { 1, 2, 3 };
170 static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
171 union _cpuid4_leaf_ebx *ebx,
172 union _cpuid4_leaf_ecx *ecx)
175 unsigned line_size, lines_per_tag, assoc, size_in_kb;
176 union l1_cache l1i, l1d;
183 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
184 cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
186 if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
189 eax->split.is_self_initializing = 1;
190 eax->split.type = types[leaf];
191 eax->split.level = levels[leaf];
192 eax->split.num_threads_sharing = 0;
193 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
196 union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
198 line_size = l1->line_size;
199 lines_per_tag = l1->lines_per_tag;
200 size_in_kb = l1->size_in_kb;
203 line_size = l2.line_size;
204 lines_per_tag = l2.lines_per_tag;
205 /* cpu_data has errata corrections for K7 applied */
206 size_in_kb = current_cpu_data.x86_cache_size;
210 eax->split.is_fully_associative = 1;
211 ebx->split.coherency_line_size = line_size - 1;
212 ebx->split.ways_of_associativity = assocs[assoc] - 1;
213 ebx->split.physical_line_partition = lines_per_tag - 1;
214 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
215 (ebx->split.ways_of_associativity + 1) - 1;
218 static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
220 union _cpuid4_leaf_eax eax;
221 union _cpuid4_leaf_ebx ebx;
222 union _cpuid4_leaf_ecx ecx;
225 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
226 amd_cpuid4(index, &eax, &ebx, &ecx);
228 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
229 if (eax.split.type == CACHE_TYPE_NULL)
230 return -EIO; /* better error ? */
232 this_leaf->eax = eax;
233 this_leaf->ebx = ebx;
234 this_leaf->ecx = ecx;
235 this_leaf->size = (ecx.split.number_of_sets + 1) *
236 (ebx.split.coherency_line_size + 1) *
237 (ebx.split.physical_line_partition + 1) *
238 (ebx.split.ways_of_associativity + 1);
242 /* will only be called once; __init is safe here */
243 static int __init find_num_cache_leaves(void)
245 unsigned int eax, ebx, ecx, edx;
246 union _cpuid4_leaf_eax cache_eax;
251 /* Do cpuid(4) loop to find out num_cache_leaves */
252 cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
253 cache_eax.full = eax;
254 } while (cache_eax.split.type != CACHE_TYPE_NULL);
258 unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
260 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
261 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
262 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
263 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
265 unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
268 if (c->cpuid_level > 3) {
269 static int is_initialized;
271 if (is_initialized == 0) {
272 /* Init num_cache_leaves from boot CPU */
273 num_cache_leaves = find_num_cache_leaves();
278 * Whenever possible use cpuid(4), deterministic cache
279 * parameters cpuid leaf to find the cache details
281 for (i = 0; i < num_cache_leaves; i++) {
282 struct _cpuid4_info this_leaf;
286 retval = cpuid4_cache_lookup(i, &this_leaf);
288 switch(this_leaf.eax.split.level) {
290 if (this_leaf.eax.split.type ==
292 new_l1d = this_leaf.size/1024;
293 else if (this_leaf.eax.split.type ==
295 new_l1i = this_leaf.size/1024;
298 new_l2 = this_leaf.size/1024;
299 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
300 index_msb = get_count_order(num_threads_sharing);
301 l2_id = c->apicid >> index_msb;
304 new_l3 = this_leaf.size/1024;
305 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
306 index_msb = get_count_order(num_threads_sharing);
307 l3_id = c->apicid >> index_msb;
316 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
319 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
320 /* supports eax=2 call */
323 unsigned char *dp = (unsigned char *)regs;
326 if (num_cache_leaves != 0 && c->x86 == 15)
329 /* Number of times to iterate */
330 n = cpuid_eax(2) & 0xFF;
332 for ( i = 0 ; i < n ; i++ ) {
333 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
335 /* If bit 31 is set, this is an unknown format */
336 for ( j = 0 ; j < 3 ; j++ ) {
337 if ( regs[j] < 0 ) regs[j] = 0;
340 /* Byte 0 is level count, not a descriptor */
341 for ( j = 1 ; j < 16 ; j++ ) {
342 unsigned char des = dp[j];
345 /* look up this descriptor in the table */
346 while (cache_table[k].descriptor != 0)
348 if (cache_table[k].descriptor == des) {
349 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
351 switch (cache_table[k].cache_type) {
353 l1i += cache_table[k].size;
356 l1d += cache_table[k].size;
359 l2 += cache_table[k].size;
362 l3 += cache_table[k].size;
365 trace += cache_table[k].size;
387 cpu_llc_id[cpu] = l2_id;
394 cpu_llc_id[cpu] = l3_id;
399 printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
401 printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
404 printk(", L1 D cache: %dK\n", l1d);
409 printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
412 printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
414 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
419 /* pointer to _cpuid4_info array (for each cache leaf) */
420 static struct _cpuid4_info *cpuid4_info[NR_CPUS];
421 #define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y]))
424 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
426 struct _cpuid4_info *this_leaf, *sibling_leaf;
427 unsigned long num_threads_sharing;
429 struct cpuinfo_x86 *c = cpu_data;
431 this_leaf = CPUID4_INFO_IDX(cpu, index);
432 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
434 if (num_threads_sharing == 1)
435 cpu_set(cpu, this_leaf->shared_cpu_map);
437 index_msb = get_count_order(num_threads_sharing);
439 for_each_online_cpu(i) {
440 if (c[i].apicid >> index_msb ==
441 c[cpu].apicid >> index_msb) {
442 cpu_set(i, this_leaf->shared_cpu_map);
443 if (i != cpu && cpuid4_info[i]) {
444 sibling_leaf = CPUID4_INFO_IDX(i, index);
445 cpu_set(cpu, sibling_leaf->shared_cpu_map);
451 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
453 struct _cpuid4_info *this_leaf, *sibling_leaf;
456 this_leaf = CPUID4_INFO_IDX(cpu, index);
457 for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
458 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
459 cpu_clear(cpu, sibling_leaf->shared_cpu_map);
463 static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
464 static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
467 static void free_cache_attributes(unsigned int cpu)
469 kfree(cpuid4_info[cpu]);
470 cpuid4_info[cpu] = NULL;
473 static int __cpuinit detect_cache_attributes(unsigned int cpu)
475 struct _cpuid4_info *this_leaf;
480 if (num_cache_leaves == 0)
483 cpuid4_info[cpu] = kmalloc(
484 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
485 if (unlikely(cpuid4_info[cpu] == NULL))
487 memset(cpuid4_info[cpu], 0,
488 sizeof(struct _cpuid4_info) * num_cache_leaves);
490 oldmask = current->cpus_allowed;
491 retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
495 /* Do cpuid and store the results */
497 for (j = 0; j < num_cache_leaves; j++) {
498 this_leaf = CPUID4_INFO_IDX(cpu, j);
499 retval = cpuid4_cache_lookup(j, this_leaf);
500 if (unlikely(retval < 0))
502 cache_shared_cpu_map_setup(cpu, j);
504 set_cpus_allowed(current, oldmask);
508 free_cache_attributes(cpu);
514 #include <linux/kobject.h>
515 #include <linux/sysfs.h>
517 extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
519 /* pointer to kobject for cpuX/cache */
520 static struct kobject * cache_kobject[NR_CPUS];
522 struct _index_kobject {
525 unsigned short index;
528 /* pointer to array of kobjects for cpuX/cache/indexY */
529 static struct _index_kobject *index_kobject[NR_CPUS];
530 #define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y]))
532 #define show_one_plus(file_name, object, val) \
533 static ssize_t show_##file_name \
534 (struct _cpuid4_info *this_leaf, char *buf) \
536 return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
539 show_one_plus(level, eax.split.level, 0);
540 show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
541 show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
542 show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
543 show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
545 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
547 return sprintf (buf, "%luK\n", this_leaf->size / 1024);
550 static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
552 char mask_str[NR_CPUS];
553 cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
554 return sprintf(buf, "%s\n", mask_str);
557 static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
558 switch(this_leaf->eax.split.type) {
559 case CACHE_TYPE_DATA:
560 return sprintf(buf, "Data\n");
562 case CACHE_TYPE_INST:
563 return sprintf(buf, "Instruction\n");
565 case CACHE_TYPE_UNIFIED:
566 return sprintf(buf, "Unified\n");
569 return sprintf(buf, "Unknown\n");
575 struct attribute attr;
576 ssize_t (*show)(struct _cpuid4_info *, char *);
577 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
580 #define define_one_ro(_name) \
581 static struct _cache_attr _name = \
582 __ATTR(_name, 0444, show_##_name, NULL)
584 define_one_ro(level);
586 define_one_ro(coherency_line_size);
587 define_one_ro(physical_line_partition);
588 define_one_ro(ways_of_associativity);
589 define_one_ro(number_of_sets);
591 define_one_ro(shared_cpu_map);
593 static struct attribute * default_attrs[] = {
596 &coherency_line_size.attr,
597 &physical_line_partition.attr,
598 &ways_of_associativity.attr,
599 &number_of_sets.attr,
601 &shared_cpu_map.attr,
605 #define to_object(k) container_of(k, struct _index_kobject, kobj)
606 #define to_attr(a) container_of(a, struct _cache_attr, attr)
608 static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
610 struct _cache_attr *fattr = to_attr(attr);
611 struct _index_kobject *this_leaf = to_object(kobj);
615 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
621 static ssize_t store(struct kobject * kobj, struct attribute * attr,
622 const char * buf, size_t count)
627 static struct sysfs_ops sysfs_ops = {
632 static struct kobj_type ktype_cache = {
633 .sysfs_ops = &sysfs_ops,
634 .default_attrs = default_attrs,
637 static struct kobj_type ktype_percpu_entry = {
638 .sysfs_ops = &sysfs_ops,
641 static void cpuid4_cache_sysfs_exit(unsigned int cpu)
643 kfree(cache_kobject[cpu]);
644 kfree(index_kobject[cpu]);
645 cache_kobject[cpu] = NULL;
646 index_kobject[cpu] = NULL;
647 free_cache_attributes(cpu);
650 static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
653 if (num_cache_leaves == 0)
656 detect_cache_attributes(cpu);
657 if (cpuid4_info[cpu] == NULL)
660 /* Allocate all required memory */
661 cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
662 if (unlikely(cache_kobject[cpu] == NULL))
664 memset(cache_kobject[cpu], 0, sizeof(struct kobject));
666 index_kobject[cpu] = kmalloc(
667 sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
668 if (unlikely(index_kobject[cpu] == NULL))
670 memset(index_kobject[cpu], 0,
671 sizeof(struct _index_kobject) * num_cache_leaves);
676 cpuid4_cache_sysfs_exit(cpu);
680 /* Add/Remove cache interface for CPU device */
681 static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
683 unsigned int cpu = sys_dev->id;
685 struct _index_kobject *this_object;
688 retval = cpuid4_cache_sysfs_init(cpu);
689 if (unlikely(retval < 0))
692 cache_kobject[cpu]->parent = &sys_dev->kobj;
693 kobject_set_name(cache_kobject[cpu], "%s", "cache");
694 cache_kobject[cpu]->ktype = &ktype_percpu_entry;
695 retval = kobject_register(cache_kobject[cpu]);
697 for (i = 0; i < num_cache_leaves; i++) {
698 this_object = INDEX_KOBJECT_PTR(cpu,i);
699 this_object->cpu = cpu;
700 this_object->index = i;
701 this_object->kobj.parent = cache_kobject[cpu];
702 kobject_set_name(&(this_object->kobj), "index%1lu", i);
703 this_object->kobj.ktype = &ktype_cache;
704 retval = kobject_register(&(this_object->kobj));
705 if (unlikely(retval)) {
706 for (j = 0; j < i; j++) {
708 &(INDEX_KOBJECT_PTR(cpu,j)->kobj));
710 kobject_unregister(cache_kobject[cpu]);
711 cpuid4_cache_sysfs_exit(cpu);
718 static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
720 unsigned int cpu = sys_dev->id;
723 for (i = 0; i < num_cache_leaves; i++) {
724 cache_remove_shared_cpu_map(cpu, i);
725 kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
727 kobject_unregister(cache_kobject[cpu]);
728 cpuid4_cache_sysfs_exit(cpu);
732 static int cacheinfo_cpu_callback(struct notifier_block *nfb,
733 unsigned long action, void *hcpu)
735 unsigned int cpu = (unsigned long)hcpu;
736 struct sys_device *sys_dev;
738 sys_dev = get_cpu_sysdev(cpu);
741 cache_add_dev(sys_dev);
744 cache_remove_dev(sys_dev);
750 static struct notifier_block cacheinfo_cpu_notifier =
752 .notifier_call = cacheinfo_cpu_callback,
755 static int __cpuinit cache_sysfs_init(void)
759 if (num_cache_leaves == 0)
762 register_cpu_notifier(&cacheinfo_cpu_notifier);
764 for_each_online_cpu(i) {
765 cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
772 device_initcall(cache_sysfs_init);