2 * linux/arch/x86-64/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Nov 2001 Dave Jones <davej@suse.de>
7 * Forked from i386 setup code.
13 * This file handles the architecture-dependent parts of initialization
16 #include <linux/errno.h>
17 #include <linux/sched.h>
18 #include <linux/kernel.h>
20 #include <linux/stddef.h>
21 #include <linux/unistd.h>
22 #include <linux/ptrace.h>
23 #include <linux/slab.h>
24 #include <linux/user.h>
25 #include <linux/a.out.h>
26 #include <linux/tty.h>
27 #include <linux/ioport.h>
28 #include <linux/delay.h>
29 #include <linux/config.h>
30 #include <linux/init.h>
31 #include <linux/initrd.h>
32 #include <linux/highmem.h>
33 #include <linux/bootmem.h>
34 #include <linux/module.h>
35 #include <asm/processor.h>
36 #include <linux/console.h>
37 #include <linux/seq_file.h>
38 #include <linux/crash_dump.h>
39 #include <linux/root_dev.h>
40 #include <linux/pci.h>
41 #include <linux/acpi.h>
42 #include <linux/kallsyms.h>
43 #include <linux/edd.h>
44 #include <linux/mmzone.h>
45 #include <linux/kexec.h>
46 #include <linux/cpufreq.h>
47 #include <linux/dmi.h>
48 #include <linux/dma-mapping.h>
51 #include <asm/uaccess.h>
52 #include <asm/system.h>
57 #include <video/edid.h>
60 #include <asm/mpspec.h>
61 #include <asm/mmu_context.h>
62 #include <asm/bootsetup.h>
63 #include <asm/proto.h>
64 #include <asm/setup.h>
65 #include <asm/mach_apic.h>
67 #include <asm/swiotlb.h>
68 #include <asm/sections.h>
69 #include <asm/gart-mapping.h>
75 struct cpuinfo_x86 boot_cpu_data __read_mostly;
77 unsigned long mmu_cr4_features;
80 EXPORT_SYMBOL(acpi_disabled);
82 extern int __initdata acpi_ht;
83 extern acpi_interrupt_flags acpi_sci_flags;
84 int __initdata acpi_force = 0;
87 int acpi_numa __initdata;
89 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
92 unsigned long saved_video_mode;
97 struct drive_info_struct { char dummy[32]; } drive_info;
98 struct screen_info screen_info;
99 struct sys_desc_table_struct {
100 unsigned short length;
101 unsigned char table[0];
104 struct edid_info edid_info;
107 extern int root_mountflags;
109 char command_line[COMMAND_LINE_SIZE];
111 struct resource standard_io_resources[] = {
112 { .name = "dma1", .start = 0x00, .end = 0x1f,
113 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
114 { .name = "pic1", .start = 0x20, .end = 0x21,
115 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
116 { .name = "timer0", .start = 0x40, .end = 0x43,
117 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
118 { .name = "timer1", .start = 0x50, .end = 0x53,
119 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
120 { .name = "keyboard", .start = 0x60, .end = 0x6f,
121 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
122 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
123 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
124 { .name = "pic2", .start = 0xa0, .end = 0xa1,
125 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
126 { .name = "dma2", .start = 0xc0, .end = 0xdf,
127 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
128 { .name = "fpu", .start = 0xf0, .end = 0xff,
129 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
132 #define STANDARD_IO_RESOURCES \
133 (sizeof standard_io_resources / sizeof standard_io_resources[0])
135 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
137 struct resource data_resource = {
138 .name = "Kernel data",
141 .flags = IORESOURCE_RAM,
143 struct resource code_resource = {
144 .name = "Kernel code",
147 .flags = IORESOURCE_RAM,
150 #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
152 static struct resource system_rom_resource = {
153 .name = "System ROM",
156 .flags = IORESOURCE_ROM,
159 static struct resource extension_rom_resource = {
160 .name = "Extension ROM",
163 .flags = IORESOURCE_ROM,
166 static struct resource adapter_rom_resources[] = {
167 { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
168 .flags = IORESOURCE_ROM },
169 { .name = "Adapter ROM", .start = 0, .end = 0,
170 .flags = IORESOURCE_ROM },
171 { .name = "Adapter ROM", .start = 0, .end = 0,
172 .flags = IORESOURCE_ROM },
173 { .name = "Adapter ROM", .start = 0, .end = 0,
174 .flags = IORESOURCE_ROM },
175 { .name = "Adapter ROM", .start = 0, .end = 0,
176 .flags = IORESOURCE_ROM },
177 { .name = "Adapter ROM", .start = 0, .end = 0,
178 .flags = IORESOURCE_ROM }
181 #define ADAPTER_ROM_RESOURCES \
182 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
184 static struct resource video_rom_resource = {
188 .flags = IORESOURCE_ROM,
191 static struct resource video_ram_resource = {
192 .name = "Video RAM area",
195 .flags = IORESOURCE_RAM,
198 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
200 static int __init romchecksum(unsigned char *rom, unsigned long length)
202 unsigned char *p, sum = 0;
204 for (p = rom; p < rom + length; p++)
209 static void __init probe_roms(void)
211 unsigned long start, length, upper;
216 upper = adapter_rom_resources[0].start;
217 for (start = video_rom_resource.start; start < upper; start += 2048) {
218 rom = isa_bus_to_virt(start);
219 if (!romsignature(rom))
222 video_rom_resource.start = start;
224 /* 0 < length <= 0x7f * 512, historically */
225 length = rom[2] * 512;
227 /* if checksum okay, trust length byte */
228 if (length && romchecksum(rom, length))
229 video_rom_resource.end = start + length - 1;
231 request_resource(&iomem_resource, &video_rom_resource);
235 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
240 request_resource(&iomem_resource, &system_rom_resource);
241 upper = system_rom_resource.start;
243 /* check for extension rom (ignore length byte!) */
244 rom = isa_bus_to_virt(extension_rom_resource.start);
245 if (romsignature(rom)) {
246 length = extension_rom_resource.end - extension_rom_resource.start + 1;
247 if (romchecksum(rom, length)) {
248 request_resource(&iomem_resource, &extension_rom_resource);
249 upper = extension_rom_resource.start;
253 /* check for adapter roms on 2k boundaries */
254 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
255 rom = isa_bus_to_virt(start);
256 if (!romsignature(rom))
259 /* 0 < length <= 0x7f * 512, historically */
260 length = rom[2] * 512;
262 /* but accept any length that fits if checksum okay */
263 if (!length || start + length > upper || !romchecksum(rom, length))
266 adapter_rom_resources[i].start = start;
267 adapter_rom_resources[i].end = start + length - 1;
268 request_resource(&iomem_resource, &adapter_rom_resources[i]);
270 start = adapter_rom_resources[i++].end & ~2047UL;
274 static __init void parse_cmdline_early (char ** cmdline_p)
276 char c = ' ', *to = command_line, *from = COMMAND_LINE;
280 /* Save unparsed command line copy for /proc/cmdline */
281 memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
282 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
290 * If the BIOS enumerates physical processors before logical,
291 * maxcpus=N at enumeration-time can be used to disable HT.
293 else if (!memcmp(from, "maxcpus=", 8)) {
294 extern unsigned int maxcpus;
296 maxcpus = simple_strtoul(from + 8, NULL, 0);
300 /* "acpi=off" disables both ACPI table parsing and interpreter init */
301 if (!memcmp(from, "acpi=off", 8))
304 if (!memcmp(from, "acpi=force", 10)) {
305 /* add later when we do DMI horrors: */
310 /* acpi=ht just means: do ACPI MADT parsing
311 at bootup, but don't enable the full ACPI interpreter */
312 if (!memcmp(from, "acpi=ht", 7)) {
317 else if (!memcmp(from, "pci=noacpi", 10))
319 else if (!memcmp(from, "acpi=noirq", 10))
322 else if (!memcmp(from, "acpi_sci=edge", 13))
323 acpi_sci_flags.trigger = 1;
324 else if (!memcmp(from, "acpi_sci=level", 14))
325 acpi_sci_flags.trigger = 3;
326 else if (!memcmp(from, "acpi_sci=high", 13))
327 acpi_sci_flags.polarity = 1;
328 else if (!memcmp(from, "acpi_sci=low", 12))
329 acpi_sci_flags.polarity = 3;
331 /* acpi=strict disables out-of-spec workarounds */
332 else if (!memcmp(from, "acpi=strict", 11)) {
335 #ifdef CONFIG_X86_IO_APIC
336 else if (!memcmp(from, "acpi_skip_timer_override", 24))
337 acpi_skip_timer_override = 1;
341 if (!memcmp(from, "disable_timer_pin_1", 19))
342 disable_timer_pin_1 = 1;
343 if (!memcmp(from, "enable_timer_pin_1", 18))
344 disable_timer_pin_1 = -1;
346 if (!memcmp(from, "nolapic", 7) ||
347 !memcmp(from, "disableapic", 11))
350 if (!memcmp(from, "noapic", 6))
351 skip_ioapic_setup = 1;
353 /* Make sure to not confuse with apic= */
354 if (!memcmp(from, "apic", 4) &&
355 (from[4] == ' ' || from[4] == 0)) {
356 skip_ioapic_setup = 0;
360 if (!memcmp(from, "mem=", 4))
361 parse_memopt(from+4, &from);
363 if (!memcmp(from, "memmap=", 7)) {
364 /* exactmap option is for used defined memory */
365 if (!memcmp(from+7, "exactmap", 8)) {
366 #ifdef CONFIG_CRASH_DUMP
367 /* If we are doing a crash dump, we
368 * still need to know the real mem
369 * size before original memory map is
372 saved_max_pfn = e820_end_of_ram();
380 parse_memmapopt(from+7, &from);
386 if (!memcmp(from, "numa=", 5))
390 if (!memcmp(from,"iommu=",6)) {
394 if (!memcmp(from,"oops=panic", 10))
397 if (!memcmp(from, "noexec=", 7))
398 nonx_setup(from + 7);
401 /* crashkernel=size@addr specifies the location to reserve for
402 * a crash kernel. By reserving this memory we guarantee
403 * that linux never set's it up as a DMA target.
404 * Useful for holding code to do something appropriate
405 * after a kernel panic.
407 else if (!memcmp(from, "crashkernel=", 12)) {
408 unsigned long size, base;
409 size = memparse(from+12, &from);
411 base = memparse(from+1, &from);
412 /* FIXME: Do I want a sanity check
413 * to validate the memory range?
415 crashk_res.start = base;
416 crashk_res.end = base + size - 1;
421 #ifdef CONFIG_PROC_VMCORE
422 /* elfcorehdr= specifies the location of elf core header
423 * stored by the crashed kernel. This option will be passed
424 * by kexec loader to the capture kernel.
426 else if(!memcmp(from, "elfcorehdr=", 11))
427 elfcorehdr_addr = memparse(from+11, &from);
433 if (COMMAND_LINE_SIZE <= ++len)
438 printk(KERN_INFO "user-defined physical RAM map:\n");
439 e820_print_map("user");
442 *cmdline_p = command_line;
447 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
449 unsigned long bootmap_size, bootmap;
451 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
452 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
454 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
455 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
456 e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
457 reserve_bootmem(bootmap, bootmap_size);
461 /* Use inline assembly to define this because the nops are defined
462 as inline assembly strings in the include files and we cannot
463 get them easily into strings. */
464 asm("\t.data\nk8nops: "
465 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
468 extern unsigned char k8nops[];
469 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
475 k8nops + 1 + 2 + 3 + 4,
476 k8nops + 1 + 2 + 3 + 4 + 5,
477 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
478 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
481 extern char __vsyscall_0;
483 /* Replace instructions with better alternatives for this CPU type.
485 This runs before SMP is initialized to avoid SMP problems with
486 self modifying code. This implies that assymetric systems where
487 APs have less capabilities than the boot processor are not handled.
488 In this case boot with "noreplacement". */
489 void apply_alternatives(void *start, void *end)
493 for (a = start; (void *)a < end; a++) {
496 if (!boot_cpu_has(a->cpuid))
499 BUG_ON(a->replacementlen > a->instrlen);
501 /* vsyscall code is not mapped yet. resolve it manually. */
502 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
503 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
504 __inline_memcpy(instr, a->replacement, a->replacementlen);
505 diff = a->instrlen - a->replacementlen;
507 /* Pad the rest with nops */
508 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
512 __inline_memcpy(instr + i, k8_nops[k], k);
517 static int no_replacement __initdata = 0;
519 void __init alternative_instructions(void)
521 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
524 apply_alternatives(__alt_instructions, __alt_instructions_end);
527 static int __init noreplacement_setup(char *s)
533 __setup("noreplacement", noreplacement_setup);
535 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
537 #ifdef CONFIG_EDD_MODULE
541 * copy_edd() - Copy the BIOS EDD information
542 * from boot_params into a safe place.
545 static inline void copy_edd(void)
547 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
548 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
549 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
550 edd.edd_info_nr = EDD_NR;
553 static inline void copy_edd(void)
558 #define EBDA_ADDR_POINTER 0x40E
559 static void __init reserve_ebda_region(void)
563 * there is a real-mode segmented pointer pointing to the
564 * 4K EBDA area at 0x40E
566 addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
569 reserve_bootmem_generic(addr, PAGE_SIZE);
572 void __init setup_arch(char **cmdline_p)
574 unsigned long kernel_end;
576 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
577 drive_info = DRIVE_INFO;
578 screen_info = SCREEN_INFO;
579 edid_info = EDID_INFO;
580 saved_video_mode = SAVED_VIDEO_MODE;
581 bootloader_type = LOADER_TYPE;
583 #ifdef CONFIG_BLK_DEV_RAM
584 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
585 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
586 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
588 setup_memory_region();
591 if (!MOUNT_ROOT_RDONLY)
592 root_mountflags &= ~MS_RDONLY;
593 init_mm.start_code = (unsigned long) &_text;
594 init_mm.end_code = (unsigned long) &_etext;
595 init_mm.end_data = (unsigned long) &_edata;
596 init_mm.brk = (unsigned long) &_end;
598 code_resource.start = virt_to_phys(&_text);
599 code_resource.end = virt_to_phys(&_etext)-1;
600 data_resource.start = virt_to_phys(&_etext);
601 data_resource.end = virt_to_phys(&_edata)-1;
603 parse_cmdline_early(cmdline_p);
605 early_identify_cpu(&boot_cpu_data);
608 * partially used pages are not usable - thus
609 * we are rounding upwards:
611 end_pfn = e820_end_of_ram();
615 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
621 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
622 * Call this early for SRAT node setup.
624 acpi_boot_table_init();
627 #ifdef CONFIG_ACPI_NUMA
629 * Parse SRAT to discover nodes.
635 numa_initmem_init(0, end_pfn);
637 contig_initmem_init(0, end_pfn);
640 /* Reserve direct mapping */
641 reserve_bootmem_generic(table_start << PAGE_SHIFT,
642 (table_end - table_start) << PAGE_SHIFT);
645 kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
646 reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
649 * reserve physical page 0 - it's a special BIOS page on many boxes,
650 * enabling clean reboots, SMP operation, laptop functions.
652 reserve_bootmem_generic(0, PAGE_SIZE);
654 /* reserve ebda region */
655 reserve_ebda_region();
659 * But first pinch a few for the stack/trampoline stuff
660 * FIXME: Don't need the extra page at 4K, but need to fix
661 * trampoline before removing it. (see the GDT stuff)
663 reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
665 /* Reserve SMP trampoline */
666 reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
669 #ifdef CONFIG_ACPI_SLEEP
671 * Reserve low memory region for sleep support.
673 acpi_reserve_bootmem();
675 #ifdef CONFIG_X86_LOCAL_APIC
677 * Find and reserve possible boot-time SMP configuration:
681 #ifdef CONFIG_BLK_DEV_INITRD
682 if (LOADER_TYPE && INITRD_START) {
683 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
684 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
686 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
687 initrd_end = initrd_start+INITRD_SIZE;
690 printk(KERN_ERR "initrd extends beyond end of memory "
691 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
692 (unsigned long)(INITRD_START + INITRD_SIZE),
693 (unsigned long)(end_pfn << PAGE_SHIFT));
699 if (crashk_res.start != crashk_res.end) {
700 reserve_bootmem(crashk_res.start,
701 crashk_res.end - crashk_res.start + 1);
711 * Read APIC and some other early information from ACPI tables.
718 #ifdef CONFIG_X86_LOCAL_APIC
720 * get boot-time SMP configuration:
722 if (smp_found_config)
724 init_apic_mappings();
728 * Request address space for all standard RAM and ROM resources
729 * and also for regions reported as reserved by the e820.
732 e820_reserve_resources();
734 request_resource(&iomem_resource, &video_ram_resource);
738 /* request I/O space for devices used on all i[345]86 PCs */
739 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
740 request_resource(&ioport_resource, &standard_io_resources[i]);
745 #ifdef CONFIG_GART_IOMMU
750 #if defined(CONFIG_VGA_CONSOLE)
751 conswitchp = &vga_con;
752 #elif defined(CONFIG_DUMMY_CONSOLE)
753 conswitchp = &dummy_con;
758 static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
762 if (c->extended_cpuid_level < 0x80000004)
765 v = (unsigned int *) c->x86_model_id;
766 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
767 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
768 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
769 c->x86_model_id[48] = 0;
774 static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
776 unsigned int n, dummy, eax, ebx, ecx, edx;
778 n = c->extended_cpuid_level;
780 if (n >= 0x80000005) {
781 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
782 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
783 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
784 c->x86_cache_size=(ecx>>24)+(edx>>24);
785 /* On K8 L1 TLB is inclusive, so don't count it */
789 if (n >= 0x80000006) {
790 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
791 ecx = cpuid_ecx(0x80000006);
792 c->x86_cache_size = ecx >> 16;
793 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
795 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
796 c->x86_cache_size, ecx & 0xFF);
800 cpuid(0x80000007, &dummy, &dummy, &dummy, &c->x86_power);
801 if (n >= 0x80000008) {
802 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
803 c->x86_virt_bits = (eax >> 8) & 0xff;
804 c->x86_phys_bits = eax & 0xff;
809 static int nearby_node(int apicid)
812 for (i = apicid - 1; i >= 0; i--) {
813 int node = apicid_to_node[i];
814 if (node != NUMA_NO_NODE && node_online(node))
817 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
818 int node = apicid_to_node[i];
819 if (node != NUMA_NO_NODE && node_online(node))
822 return first_node(node_online_map); /* Shouldn't happen */
827 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
828 * Assumes number of cores is a power of two.
830 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
833 int cpu = smp_processor_id();
837 unsigned apicid = phys_proc_id[cpu];
841 while ((1 << bits) < c->x86_max_cores)
844 /* Low order bits define the core id (index of core in socket) */
845 cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
846 /* Convert the APIC ID into the socket ID */
847 phys_proc_id[cpu] >>= bits;
850 node = phys_proc_id[cpu];
851 if (apicid_to_node[apicid] != NUMA_NO_NODE)
852 node = apicid_to_node[apicid];
853 if (!node_online(node)) {
854 /* Two possibilities here:
855 - The CPU is missing memory and no node was created.
856 In that case try picking one from a nearby CPU
857 - The APIC IDs differ from the HyperTransport node IDs
858 which the K8 northbridge parsing fills in.
859 Assume they are all increased by a constant offset,
860 but in the same order as the HT nodeids.
861 If that doesn't result in a usable node fall back to the
862 path for the previous case. */
863 int ht_nodeid = apicid - (phys_proc_id[0] << bits);
864 if (ht_nodeid >= 0 &&
865 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
866 node = apicid_to_node[ht_nodeid];
867 /* Pick a nearby node */
868 if (!node_online(node))
869 node = nearby_node(apicid);
871 numa_set_node(cpu, node);
873 printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
874 cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
879 static int __init init_amd(struct cpuinfo_x86 *c)
887 * Disable TLB flush filter by setting HWCR.FFDIS on K8
888 * bit 6 of msr C001_0015
890 * Errata 63 for SH-B3 steppings
891 * Errata 122 for all steppings (F+ have it disabled by default)
894 rdmsrl(MSR_K8_HWCR, value);
896 wrmsrl(MSR_K8_HWCR, value);
900 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
901 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
902 clear_bit(0*32+31, &c->x86_capability);
904 r = get_model_name(c);
908 /* Should distinguish Models here, but this is only
909 a fallback anyways. */
910 strcpy(c->x86_model_id, "Hammer");
914 display_cacheinfo(c);
916 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
917 if (c->x86_power & (1<<8))
918 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
920 if (c->extended_cpuid_level >= 0x80000008) {
921 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
922 if (c->x86_max_cores & (c->x86_max_cores - 1))
923 c->x86_max_cores = 1;
931 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
934 u32 eax, ebx, ecx, edx;
935 int index_msb, core_bits;
936 int cpu = smp_processor_id();
938 cpuid(1, &eax, &ebx, &ecx, &edx);
940 c->apicid = phys_pkg_id(0);
942 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
945 smp_num_siblings = (ebx & 0xff0000) >> 16;
947 if (smp_num_siblings == 1) {
948 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
949 } else if (smp_num_siblings > 1 ) {
951 if (smp_num_siblings > NR_CPUS) {
952 printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
953 smp_num_siblings = 1;
957 index_msb = get_count_order(smp_num_siblings);
958 phys_proc_id[cpu] = phys_pkg_id(index_msb);
960 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
963 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
965 index_msb = get_count_order(smp_num_siblings) ;
967 core_bits = get_count_order(c->x86_max_cores);
969 cpu_core_id[cpu] = phys_pkg_id(index_msb) &
970 ((1 << core_bits) - 1);
972 if (c->x86_max_cores > 1)
973 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
980 * find out the number of processor cores on the die
982 static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
986 if (c->cpuid_level < 4)
995 return ((eax >> 26) + 1);
1000 static void srat_detect_node(void)
1004 int cpu = smp_processor_id();
1006 /* Don't do the funky fallback heuristics the AMD version employs
1008 node = apicid_to_node[hard_smp_processor_id()];
1009 if (node == NUMA_NO_NODE)
1011 numa_set_node(cpu, node);
1014 printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
1018 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
1023 init_intel_cacheinfo(c);
1024 n = c->extended_cpuid_level;
1025 if (n >= 0x80000008) {
1026 unsigned eax = cpuid_eax(0x80000008);
1027 c->x86_virt_bits = (eax >> 8) & 0xff;
1028 c->x86_phys_bits = eax & 0xff;
1029 /* CPUID workaround for Intel 0F34 CPU */
1030 if (c->x86_vendor == X86_VENDOR_INTEL &&
1031 c->x86 == 0xF && c->x86_model == 0x3 &&
1033 c->x86_phys_bits = 36;
1037 c->x86_cache_alignment = c->x86_clflush_size * 2;
1038 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
1039 (c->x86 == 0x6 && c->x86_model >= 0x0e))
1040 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
1041 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
1042 c->x86_max_cores = intel_num_cpu_cores(c);
1047 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
1049 char *v = c->x86_vendor_id;
1051 if (!strcmp(v, "AuthenticAMD"))
1052 c->x86_vendor = X86_VENDOR_AMD;
1053 else if (!strcmp(v, "GenuineIntel"))
1054 c->x86_vendor = X86_VENDOR_INTEL;
1056 c->x86_vendor = X86_VENDOR_UNKNOWN;
1059 struct cpu_model_info {
1062 char *model_names[16];
1065 /* Do some early cpuid on the boot CPU to get some parameter that are
1066 needed before check_bugs. Everything advanced is in identify_cpu
1068 void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
1072 c->loops_per_jiffy = loops_per_jiffy;
1073 c->x86_cache_size = -1;
1074 c->x86_vendor = X86_VENDOR_UNKNOWN;
1075 c->x86_model = c->x86_mask = 0; /* So far unknown... */
1076 c->x86_vendor_id[0] = '\0'; /* Unset */
1077 c->x86_model_id[0] = '\0'; /* Unset */
1078 c->x86_clflush_size = 64;
1079 c->x86_cache_alignment = c->x86_clflush_size;
1080 c->x86_max_cores = 1;
1081 c->extended_cpuid_level = 0;
1082 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1084 /* Get vendor name */
1085 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
1086 (unsigned int *)&c->x86_vendor_id[0],
1087 (unsigned int *)&c->x86_vendor_id[8],
1088 (unsigned int *)&c->x86_vendor_id[4]);
1092 /* Initialize the standard set of capabilities */
1093 /* Note that the vendor-specific code below might override */
1095 /* Intel-defined flags: level 0x00000001 */
1096 if (c->cpuid_level >= 0x00000001) {
1098 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
1099 &c->x86_capability[0]);
1100 c->x86 = (tfms >> 8) & 0xf;
1101 c->x86_model = (tfms >> 4) & 0xf;
1102 c->x86_mask = tfms & 0xf;
1104 c->x86 += (tfms >> 20) & 0xff;
1106 c->x86_model += ((tfms >> 16) & 0xF) << 4;
1107 if (c->x86_capability[0] & (1<<19))
1108 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
1110 /* Have CPUID level 0 only - unheard of */
1115 phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
1120 * This does the hard work of actually picking apart the CPU stuff...
1122 void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1127 early_identify_cpu(c);
1129 /* AMD-defined flags: level 0x80000001 */
1130 xlvl = cpuid_eax(0x80000000);
1131 c->extended_cpuid_level = xlvl;
1132 if ((xlvl & 0xffff0000) == 0x80000000) {
1133 if (xlvl >= 0x80000001) {
1134 c->x86_capability[1] = cpuid_edx(0x80000001);
1135 c->x86_capability[6] = cpuid_ecx(0x80000001);
1137 if (xlvl >= 0x80000004)
1138 get_model_name(c); /* Default name */
1141 /* Transmeta-defined flags: level 0x80860001 */
1142 xlvl = cpuid_eax(0x80860000);
1143 if ((xlvl & 0xffff0000) == 0x80860000) {
1144 /* Don't set x86_cpuid_level here for now to not confuse. */
1145 if (xlvl >= 0x80860001)
1146 c->x86_capability[2] = cpuid_edx(0x80860001);
1150 * Vendor-specific initialization. In this section we
1151 * canonicalize the feature flags, meaning if there are
1152 * features a certain CPU supports which CPUID doesn't
1153 * tell us, CPUID claiming incorrect flags, or other bugs,
1154 * we handle them here.
1156 * At the end of this section, c->x86_capability better
1157 * indicate the features this CPU genuinely supports!
1159 switch (c->x86_vendor) {
1160 case X86_VENDOR_AMD:
1164 case X86_VENDOR_INTEL:
1168 case X86_VENDOR_UNKNOWN:
1170 display_cacheinfo(c);
1174 select_idle_routine(c);
1178 * On SMP, boot_cpu_data holds the common feature set between
1179 * all CPUs; so make sure that we indicate which features are
1180 * common between the CPUs. The first time this routine gets
1181 * executed, c == &boot_cpu_data.
1183 if (c != &boot_cpu_data) {
1184 /* AND the already accumulated flags with these */
1185 for (i = 0 ; i < NCAPINTS ; i++)
1186 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
1189 #ifdef CONFIG_X86_MCE
1192 if (c == &boot_cpu_data)
1197 numa_add_cpu(smp_processor_id());
1202 void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1204 if (c->x86_model_id[0])
1205 printk("%s", c->x86_model_id);
1207 if (c->x86_mask || c->cpuid_level >= 0)
1208 printk(" stepping %02x\n", c->x86_mask);
1214 * Get CPU information for use by the procfs.
1217 static int show_cpuinfo(struct seq_file *m, void *v)
1219 struct cpuinfo_x86 *c = v;
1222 * These flag bits must match the definitions in <asm/cpufeature.h>.
1223 * NULL means this bit is undefined or reserved; either way it doesn't
1224 * have meaning as far as Linux is concerned. Note that it's important
1225 * to realize there is a difference between this table and CPUID -- if
1226 * applications want to get the raw CPUID data, they should access
1227 * /dev/cpu/<cpu_nr>/cpuid instead.
1229 static char *x86_cap_flags[] = {
1231 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1232 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1233 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1234 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
1237 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1238 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
1239 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
1240 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
1242 /* Transmeta-defined */
1243 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
1244 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1245 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1246 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1248 /* Other (Linux-defined) */
1249 "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1250 "constant_tsc", NULL, NULL,
1251 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1252 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1253 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1255 /* Intel-defined (#2) */
1256 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
1257 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
1258 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1259 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1261 /* VIA/Cyrix/Centaur-defined */
1262 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
1263 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1264 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1265 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1267 /* AMD-defined (#2) */
1268 "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
1269 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1270 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1271 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1273 static char *x86_power_flags[] = {
1274 "ts", /* temperature sensor */
1275 "fid", /* frequency id control */
1276 "vid", /* voltage id control */
1277 "ttp", /* thermal trip */
1281 /* nothing */ /* constant_tsc - moved to flags */
1286 if (!cpu_online(c-cpu_data))
1290 seq_printf(m,"processor\t: %u\n"
1292 "cpu family\t: %d\n"
1294 "model name\t: %s\n",
1295 (unsigned)(c-cpu_data),
1296 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
1299 c->x86_model_id[0] ? c->x86_model_id : "unknown");
1301 if (c->x86_mask || c->cpuid_level >= 0)
1302 seq_printf(m, "stepping\t: %d\n", c->x86_mask);
1304 seq_printf(m, "stepping\t: unknown\n");
1306 if (cpu_has(c,X86_FEATURE_TSC)) {
1307 unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
1310 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
1311 freq / 1000, (freq % 1000));
1315 if (c->x86_cache_size >= 0)
1316 seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
1319 if (smp_num_siblings * c->x86_max_cores > 1) {
1320 int cpu = c - cpu_data;
1321 seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
1322 seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
1323 seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
1324 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
1330 "fpu_exception\t: yes\n"
1331 "cpuid level\t: %d\n"
1338 for ( i = 0 ; i < 32*NCAPINTS ; i++ )
1339 if ( test_bit(i, &c->x86_capability) &&
1340 x86_cap_flags[i] != NULL )
1341 seq_printf(m, " %s", x86_cap_flags[i]);
1344 seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
1345 c->loops_per_jiffy/(500000/HZ),
1346 (c->loops_per_jiffy/(5000/HZ)) % 100);
1348 if (c->x86_tlbsize > 0)
1349 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
1350 seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
1351 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
1353 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
1354 c->x86_phys_bits, c->x86_virt_bits);
1356 seq_printf(m, "power management:");
1359 for (i = 0; i < 32; i++)
1360 if (c->x86_power & (1 << i)) {
1361 if (i < ARRAY_SIZE(x86_power_flags) &&
1363 seq_printf(m, "%s%s",
1364 x86_power_flags[i][0]?" ":"",
1365 x86_power_flags[i]);
1367 seq_printf(m, " [%d]", i);
1371 seq_printf(m, "\n\n");
1376 static void *c_start(struct seq_file *m, loff_t *pos)
1378 return *pos < NR_CPUS ? cpu_data + *pos : NULL;
1381 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
1384 return c_start(m, pos);
1387 static void c_stop(struct seq_file *m, void *v)
1391 struct seq_operations cpuinfo_op = {
1395 .show = show_cpuinfo,
1398 static int __init run_dmi_scan(void)
1403 core_initcall(run_dmi_scan);