2 * Routines to indentify caches on Intel CPU.
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen : CPUID4 emulation on AMD.
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/device.h>
13 #include <linux/compiler.h>
14 #include <linux/cpu.h>
15 #include <linux/sched.h>
17 #include <asm/processor.h>
28 unsigned char descriptor;
33 /* all the cache descriptor types we care about (no TLB or trace cache entries) */
34 static struct _cache_table cache_table[] __cpuinitdata =
36 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
37 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
38 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
39 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
40 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
41 { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
42 { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */
43 { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */
44 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
45 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
46 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
47 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
48 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
49 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
50 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
51 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
52 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
53 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
54 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
55 { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
56 { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
57 { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */
58 { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */
59 { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
60 { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */
61 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
62 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
63 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
64 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
65 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
66 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
67 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
68 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
69 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
70 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
71 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
72 { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
73 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
74 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
75 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
76 { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
77 { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
78 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
79 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
80 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
81 { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
82 { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
83 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
84 { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
94 CACHE_TYPE_UNIFIED = 3
97 union _cpuid4_leaf_eax {
99 enum _cache_type type:5;
100 unsigned int level:3;
101 unsigned int is_self_initializing:1;
102 unsigned int is_fully_associative:1;
103 unsigned int reserved:4;
104 unsigned int num_threads_sharing:12;
105 unsigned int num_cores_on_die:6;
110 union _cpuid4_leaf_ebx {
112 unsigned int coherency_line_size:12;
113 unsigned int physical_line_partition:10;
114 unsigned int ways_of_associativity:10;
119 union _cpuid4_leaf_ecx {
121 unsigned int number_of_sets:32;
126 struct _cpuid4_info {
127 union _cpuid4_leaf_eax eax;
128 union _cpuid4_leaf_ebx ebx;
129 union _cpuid4_leaf_ecx ecx;
131 cpumask_t shared_cpu_map;
134 unsigned short num_cache_leaves;
136 /* AMD doesn't have CPUID4. Emulate it here to report the same
137 information to the user. This makes some assumptions about the machine:
138 No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
140 In theory the TLBs could be reported as fake type (they are in "dummy").
144 unsigned line_size : 8;
145 unsigned lines_per_tag : 8;
147 unsigned size_in_kb : 8;
154 unsigned line_size : 8;
155 unsigned lines_per_tag : 4;
157 unsigned size_in_kb : 16;
162 static const unsigned short assocs[] = {
163 [1] = 1, [2] = 2, [4] = 4, [6] = 8,
167 static const unsigned char levels[] = { 1, 1, 2 };
168 static const unsigned char types[] = { 1, 2, 3 };
170 static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
171 union _cpuid4_leaf_ebx *ebx,
172 union _cpuid4_leaf_ecx *ecx)
175 unsigned line_size, lines_per_tag, assoc, size_in_kb;
176 union l1_cache l1i, l1d;
183 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
184 cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
186 if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
189 eax->split.is_self_initializing = 1;
190 eax->split.type = types[leaf];
191 eax->split.level = levels[leaf];
192 eax->split.num_threads_sharing = 0;
193 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
196 union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
198 line_size = l1->line_size;
199 lines_per_tag = l1->lines_per_tag;
200 size_in_kb = l1->size_in_kb;
203 line_size = l2.line_size;
204 lines_per_tag = l2.lines_per_tag;
205 /* cpu_data has errata corrections for K7 applied */
206 size_in_kb = current_cpu_data.x86_cache_size;
210 eax->split.is_fully_associative = 1;
211 ebx->split.coherency_line_size = line_size - 1;
212 ebx->split.ways_of_associativity = assocs[assoc] - 1;
213 ebx->split.physical_line_partition = lines_per_tag - 1;
214 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
215 (ebx->split.ways_of_associativity + 1) - 1;
218 static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
220 union _cpuid4_leaf_eax eax;
221 union _cpuid4_leaf_ebx ebx;
222 union _cpuid4_leaf_ecx ecx;
225 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
226 amd_cpuid4(index, &eax, &ebx, &ecx);
228 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
229 if (eax.split.type == CACHE_TYPE_NULL)
230 return -EIO; /* better error ? */
232 this_leaf->eax = eax;
233 this_leaf->ebx = ebx;
234 this_leaf->ecx = ecx;
235 this_leaf->size = (ecx.split.number_of_sets + 1) *
236 (ebx.split.coherency_line_size + 1) *
237 (ebx.split.physical_line_partition + 1) *
238 (ebx.split.ways_of_associativity + 1);
242 /* will only be called once; __init is safe here */
243 static int __init find_num_cache_leaves(void)
245 unsigned int eax, ebx, ecx, edx;
246 union _cpuid4_leaf_eax cache_eax;
251 /* Do cpuid(4) loop to find out num_cache_leaves */
252 cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
253 cache_eax.full = eax;
254 } while (cache_eax.split.type != CACHE_TYPE_NULL);
258 unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
260 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
261 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
262 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
263 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
265 unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
268 if (c->cpuid_level > 3) {
269 static int is_initialized;
271 if (is_initialized == 0) {
272 /* Init num_cache_leaves from boot CPU */
273 num_cache_leaves = find_num_cache_leaves();
278 * Whenever possible use cpuid(4), deterministic cache
279 * parameters cpuid leaf to find the cache details
281 for (i = 0; i < num_cache_leaves; i++) {
282 struct _cpuid4_info this_leaf;
286 retval = cpuid4_cache_lookup(i, &this_leaf);
288 switch(this_leaf.eax.split.level) {
290 if (this_leaf.eax.split.type ==
292 new_l1d = this_leaf.size/1024;
293 else if (this_leaf.eax.split.type ==
295 new_l1i = this_leaf.size/1024;
298 new_l2 = this_leaf.size/1024;
299 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
300 index_msb = get_count_order(num_threads_sharing);
301 l2_id = c->apicid >> index_msb;
304 new_l3 = this_leaf.size/1024;
305 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
306 index_msb = get_count_order(num_threads_sharing);
307 l3_id = c->apicid >> index_msb;
316 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
319 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
320 /* supports eax=2 call */
323 unsigned char *dp = (unsigned char *)regs;
326 if (num_cache_leaves != 0 && c->x86 == 15)
329 /* Number of times to iterate */
330 n = cpuid_eax(2) & 0xFF;
332 for ( i = 0 ; i < n ; i++ ) {
333 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
335 /* If bit 31 is set, this is an unknown format */
336 for ( j = 0 ; j < 3 ; j++ ) {
337 if ( regs[j] < 0 ) regs[j] = 0;
340 /* Byte 0 is level count, not a descriptor */
341 for ( j = 1 ; j < 16 ; j++ ) {
342 unsigned char des = dp[j];
345 /* look up this descriptor in the table */
346 while (cache_table[k].descriptor != 0)
348 if (cache_table[k].descriptor == des) {
349 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
351 switch (cache_table[k].cache_type) {
353 l1i += cache_table[k].size;
356 l1d += cache_table[k].size;
359 l2 += cache_table[k].size;
362 l3 += cache_table[k].size;
365 trace += cache_table[k].size;
387 cpu_llc_id[cpu] = l2_id;
394 cpu_llc_id[cpu] = l3_id;
399 printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
401 printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
404 printk(", L1 D cache: %dK\n", l1d);
409 printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
412 printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
414 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
419 /* pointer to _cpuid4_info array (for each cache leaf) */
420 static struct _cpuid4_info *cpuid4_info[NR_CPUS];
421 #define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y]))
424 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
426 struct _cpuid4_info *this_leaf, *sibling_leaf;
427 unsigned long num_threads_sharing;
429 struct cpuinfo_x86 *c = cpu_data;
431 this_leaf = CPUID4_INFO_IDX(cpu, index);
432 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
434 if (num_threads_sharing == 1)
435 cpu_set(cpu, this_leaf->shared_cpu_map);
437 index_msb = get_count_order(num_threads_sharing);
439 for_each_online_cpu(i) {
440 if (c[i].apicid >> index_msb ==
441 c[cpu].apicid >> index_msb) {
442 cpu_set(i, this_leaf->shared_cpu_map);
443 if (i != cpu && cpuid4_info[i]) {
444 sibling_leaf = CPUID4_INFO_IDX(i, index);
445 cpu_set(cpu, sibling_leaf->shared_cpu_map);
451 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
453 struct _cpuid4_info *this_leaf, *sibling_leaf;
456 this_leaf = CPUID4_INFO_IDX(cpu, index);
457 for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
458 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
459 cpu_clear(cpu, sibling_leaf->shared_cpu_map);
463 static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
464 static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
467 static void free_cache_attributes(unsigned int cpu)
469 kfree(cpuid4_info[cpu]);
470 cpuid4_info[cpu] = NULL;
473 static int __cpuinit detect_cache_attributes(unsigned int cpu)
475 struct _cpuid4_info *this_leaf;
480 if (num_cache_leaves == 0)
483 cpuid4_info[cpu] = kzalloc(
484 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
485 if (unlikely(cpuid4_info[cpu] == NULL))
488 oldmask = current->cpus_allowed;
489 retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
493 /* Do cpuid and store the results */
495 for (j = 0; j < num_cache_leaves; j++) {
496 this_leaf = CPUID4_INFO_IDX(cpu, j);
497 retval = cpuid4_cache_lookup(j, this_leaf);
498 if (unlikely(retval < 0))
500 cache_shared_cpu_map_setup(cpu, j);
502 set_cpus_allowed(current, oldmask);
506 free_cache_attributes(cpu);
512 #include <linux/kobject.h>
513 #include <linux/sysfs.h>
515 extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
517 /* pointer to kobject for cpuX/cache */
518 static struct kobject * cache_kobject[NR_CPUS];
520 struct _index_kobject {
523 unsigned short index;
526 /* pointer to array of kobjects for cpuX/cache/indexY */
527 static struct _index_kobject *index_kobject[NR_CPUS];
528 #define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y]))
530 #define show_one_plus(file_name, object, val) \
531 static ssize_t show_##file_name \
532 (struct _cpuid4_info *this_leaf, char *buf) \
534 return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
537 show_one_plus(level, eax.split.level, 0);
538 show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
539 show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
540 show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
541 show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
543 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
545 return sprintf (buf, "%luK\n", this_leaf->size / 1024);
548 static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
550 char mask_str[NR_CPUS];
551 cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
552 return sprintf(buf, "%s\n", mask_str);
555 static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
556 switch(this_leaf->eax.split.type) {
557 case CACHE_TYPE_DATA:
558 return sprintf(buf, "Data\n");
560 case CACHE_TYPE_INST:
561 return sprintf(buf, "Instruction\n");
563 case CACHE_TYPE_UNIFIED:
564 return sprintf(buf, "Unified\n");
567 return sprintf(buf, "Unknown\n");
573 struct attribute attr;
574 ssize_t (*show)(struct _cpuid4_info *, char *);
575 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
578 #define define_one_ro(_name) \
579 static struct _cache_attr _name = \
580 __ATTR(_name, 0444, show_##_name, NULL)
582 define_one_ro(level);
584 define_one_ro(coherency_line_size);
585 define_one_ro(physical_line_partition);
586 define_one_ro(ways_of_associativity);
587 define_one_ro(number_of_sets);
589 define_one_ro(shared_cpu_map);
591 static struct attribute * default_attrs[] = {
594 &coherency_line_size.attr,
595 &physical_line_partition.attr,
596 &ways_of_associativity.attr,
597 &number_of_sets.attr,
599 &shared_cpu_map.attr,
603 #define to_object(k) container_of(k, struct _index_kobject, kobj)
604 #define to_attr(a) container_of(a, struct _cache_attr, attr)
606 static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
608 struct _cache_attr *fattr = to_attr(attr);
609 struct _index_kobject *this_leaf = to_object(kobj);
613 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
619 static ssize_t store(struct kobject * kobj, struct attribute * attr,
620 const char * buf, size_t count)
625 static struct sysfs_ops sysfs_ops = {
630 static struct kobj_type ktype_cache = {
631 .sysfs_ops = &sysfs_ops,
632 .default_attrs = default_attrs,
635 static struct kobj_type ktype_percpu_entry = {
636 .sysfs_ops = &sysfs_ops,
639 static void cpuid4_cache_sysfs_exit(unsigned int cpu)
641 kfree(cache_kobject[cpu]);
642 kfree(index_kobject[cpu]);
643 cache_kobject[cpu] = NULL;
644 index_kobject[cpu] = NULL;
645 free_cache_attributes(cpu);
648 static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
651 if (num_cache_leaves == 0)
654 detect_cache_attributes(cpu);
655 if (cpuid4_info[cpu] == NULL)
658 /* Allocate all required memory */
659 cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL);
660 if (unlikely(cache_kobject[cpu] == NULL))
663 index_kobject[cpu] = kzalloc(
664 sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
665 if (unlikely(index_kobject[cpu] == NULL))
671 cpuid4_cache_sysfs_exit(cpu);
675 /* Add/Remove cache interface for CPU device */
676 static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
678 unsigned int cpu = sys_dev->id;
680 struct _index_kobject *this_object;
683 retval = cpuid4_cache_sysfs_init(cpu);
684 if (unlikely(retval < 0))
687 cache_kobject[cpu]->parent = &sys_dev->kobj;
688 kobject_set_name(cache_kobject[cpu], "%s", "cache");
689 cache_kobject[cpu]->ktype = &ktype_percpu_entry;
690 retval = kobject_register(cache_kobject[cpu]);
692 for (i = 0; i < num_cache_leaves; i++) {
693 this_object = INDEX_KOBJECT_PTR(cpu,i);
694 this_object->cpu = cpu;
695 this_object->index = i;
696 this_object->kobj.parent = cache_kobject[cpu];
697 kobject_set_name(&(this_object->kobj), "index%1lu", i);
698 this_object->kobj.ktype = &ktype_cache;
699 retval = kobject_register(&(this_object->kobj));
700 if (unlikely(retval)) {
701 for (j = 0; j < i; j++) {
703 &(INDEX_KOBJECT_PTR(cpu,j)->kobj));
705 kobject_unregister(cache_kobject[cpu]);
706 cpuid4_cache_sysfs_exit(cpu);
713 static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
715 unsigned int cpu = sys_dev->id;
718 for (i = 0; i < num_cache_leaves; i++) {
719 cache_remove_shared_cpu_map(cpu, i);
720 kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
722 kobject_unregister(cache_kobject[cpu]);
723 cpuid4_cache_sysfs_exit(cpu);
727 static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
728 unsigned long action, void *hcpu)
730 unsigned int cpu = (unsigned long)hcpu;
731 struct sys_device *sys_dev;
733 sys_dev = get_cpu_sysdev(cpu);
736 cache_add_dev(sys_dev);
739 cache_remove_dev(sys_dev);
745 static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
747 .notifier_call = cacheinfo_cpu_callback,
750 static int __cpuinit cache_sysfs_init(void)
754 if (num_cache_leaves == 0)
757 register_hotcpu_notifier(&cacheinfo_cpu_notifier);
759 for_each_online_cpu(i) {
760 cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
767 device_initcall(cache_sysfs_init);