2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/config.h>
27 #include <linux/sched.h>
29 #include <linux/mmzone.h>
30 #include <linux/tty.h>
31 #include <linux/ioport.h>
32 #include <linux/acpi.h>
33 #include <linux/apm_bios.h>
34 #include <linux/initrd.h>
35 #include <linux/bootmem.h>
36 #include <linux/seq_file.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kexec.h>
47 #include <linux/crash_dump.h>
49 #include <video/edid.h>
53 #include <asm/mpspec.h>
54 #include <asm/setup.h>
55 #include <asm/arch_hooks.h>
56 #include <asm/sections.h>
57 #include <asm/io_apic.h>
60 #include "setup_arch_pre.h"
61 #include <bios_ebda.h>
63 /* Forward Declaration. */
64 void __init find_max_pfn(void);
66 /* This value is set up by the early boot code to point to the value
67 immediately after the boot time page tables. It contains a *physical*
68 address, and must not be in the .bss segment! */
69 unsigned long init_pg_tables_end __initdata = ~0UL;
71 int disable_pse __devinitdata = 0;
79 EXPORT_SYMBOL(efi_enabled);
82 /* cpu data as detected by the assembly code in head.S */
83 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
84 /* common cpu data for all cpus */
85 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
86 EXPORT_SYMBOL(boot_cpu_data);
88 unsigned long mmu_cr4_features;
91 int acpi_disabled = 0;
93 int acpi_disabled = 1;
95 EXPORT_SYMBOL(acpi_disabled);
98 int __initdata acpi_force = 0;
99 extern acpi_interrupt_flags acpi_sci_flags;
102 /* for MCA, but anyone else can use it if they want */
103 unsigned int machine_id;
105 EXPORT_SYMBOL(machine_id);
107 unsigned int machine_submodel_id;
108 unsigned int BIOS_revision;
109 unsigned int mca_pentium_flag;
111 /* For PCI or other memory-mapped resources */
112 unsigned long pci_mem_start = 0x10000000;
114 EXPORT_SYMBOL(pci_mem_start);
117 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
120 /* user-defined highmem size */
121 static unsigned int highmem_pages = -1;
126 struct drive_info_struct { char dummy[32]; } drive_info;
127 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
128 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
129 EXPORT_SYMBOL(drive_info);
131 struct screen_info screen_info;
132 EXPORT_SYMBOL(screen_info);
133 struct apm_info apm_info;
134 EXPORT_SYMBOL(apm_info);
135 struct sys_desc_table_struct {
136 unsigned short length;
137 unsigned char table[0];
139 struct edid_info edid_info;
140 EXPORT_SYMBOL_GPL(edid_info);
141 struct ist_info ist_info;
142 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
143 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
144 EXPORT_SYMBOL(ist_info);
148 extern void early_cpu_init(void);
149 extern void dmi_scan_machine(void);
150 extern void generic_apic_probe(char *);
151 extern int root_mountflags;
153 unsigned long saved_videomode;
155 #define RAMDISK_IMAGE_START_MASK 0x07FF
156 #define RAMDISK_PROMPT_FLAG 0x8000
157 #define RAMDISK_LOAD_FLAG 0x4000
159 static char command_line[COMMAND_LINE_SIZE];
161 unsigned char __initdata boot_params[PARAM_SIZE];
163 static struct resource data_resource = {
164 .name = "Kernel data",
167 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
170 static struct resource code_resource = {
171 .name = "Kernel code",
174 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
177 static struct resource system_rom_resource = {
178 .name = "System ROM",
181 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
184 static struct resource extension_rom_resource = {
185 .name = "Extension ROM",
188 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
191 static struct resource adapter_rom_resources[] = { {
192 .name = "Adapter ROM",
195 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
197 .name = "Adapter ROM",
200 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
202 .name = "Adapter ROM",
205 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
207 .name = "Adapter ROM",
210 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
212 .name = "Adapter ROM",
215 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
217 .name = "Adapter ROM",
220 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
223 #define ADAPTER_ROM_RESOURCES \
224 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
226 static struct resource video_rom_resource = {
230 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
233 static struct resource video_ram_resource = {
234 .name = "Video RAM area",
237 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
240 static struct resource standard_io_resources[] = { {
244 .flags = IORESOURCE_BUSY | IORESOURCE_IO
249 .flags = IORESOURCE_BUSY | IORESOURCE_IO
254 .flags = IORESOURCE_BUSY | IORESOURCE_IO
259 .flags = IORESOURCE_BUSY | IORESOURCE_IO
264 .flags = IORESOURCE_BUSY | IORESOURCE_IO
266 .name = "dma page reg",
269 .flags = IORESOURCE_BUSY | IORESOURCE_IO
274 .flags = IORESOURCE_BUSY | IORESOURCE_IO
279 .flags = IORESOURCE_BUSY | IORESOURCE_IO
284 .flags = IORESOURCE_BUSY | IORESOURCE_IO
287 #define STANDARD_IO_RESOURCES \
288 (sizeof standard_io_resources / sizeof standard_io_resources[0])
290 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
292 static int __init romchecksum(unsigned char *rom, unsigned long length)
294 unsigned char *p, sum = 0;
296 for (p = rom; p < rom + length; p++)
301 static void __init probe_roms(void)
303 unsigned long start, length, upper;
308 upper = adapter_rom_resources[0].start;
309 for (start = video_rom_resource.start; start < upper; start += 2048) {
310 rom = isa_bus_to_virt(start);
311 if (!romsignature(rom))
314 video_rom_resource.start = start;
316 /* 0 < length <= 0x7f * 512, historically */
317 length = rom[2] * 512;
319 /* if checksum okay, trust length byte */
320 if (length && romchecksum(rom, length))
321 video_rom_resource.end = start + length - 1;
323 request_resource(&iomem_resource, &video_rom_resource);
327 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
332 request_resource(&iomem_resource, &system_rom_resource);
333 upper = system_rom_resource.start;
335 /* check for extension rom (ignore length byte!) */
336 rom = isa_bus_to_virt(extension_rom_resource.start);
337 if (romsignature(rom)) {
338 length = extension_rom_resource.end - extension_rom_resource.start + 1;
339 if (romchecksum(rom, length)) {
340 request_resource(&iomem_resource, &extension_rom_resource);
341 upper = extension_rom_resource.start;
345 /* check for adapter roms on 2k boundaries */
346 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
347 rom = isa_bus_to_virt(start);
348 if (!romsignature(rom))
351 /* 0 < length <= 0x7f * 512, historically */
352 length = rom[2] * 512;
354 /* but accept any length that fits if checksum okay */
355 if (!length || start + length > upper || !romchecksum(rom, length))
358 adapter_rom_resources[i].start = start;
359 adapter_rom_resources[i].end = start + length - 1;
360 request_resource(&iomem_resource, &adapter_rom_resources[i]);
362 start = adapter_rom_resources[i++].end & ~2047UL;
366 static void __init limit_regions(unsigned long long size)
368 unsigned long long current_addr = 0;
372 efi_memory_desc_t *md;
375 for (p = memmap.map, i = 0; p < memmap.map_end;
376 p += memmap.desc_size, i++) {
378 current_addr = md->phys_addr + (md->num_pages << 12);
379 if (md->type == EFI_CONVENTIONAL_MEMORY) {
380 if (current_addr >= size) {
382 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
383 memmap.nr_map = i + 1;
389 for (i = 0; i < e820.nr_map; i++) {
390 current_addr = e820.map[i].addr + e820.map[i].size;
391 if (current_addr < size)
394 if (e820.map[i].type != E820_RAM)
397 if (e820.map[i].addr >= size) {
399 * This region starts past the end of the
400 * requested size, skip it completely.
405 e820.map[i].size -= current_addr - size;
411 static void __init add_memory_region(unsigned long long start,
412 unsigned long long size, int type)
420 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
424 e820.map[x].addr = start;
425 e820.map[x].size = size;
426 e820.map[x].type = type;
429 } /* add_memory_region */
433 static void __init print_memory_map(char *who)
437 for (i = 0; i < e820.nr_map; i++) {
438 printk(" %s: %016Lx - %016Lx ", who,
440 e820.map[i].addr + e820.map[i].size);
441 switch (e820.map[i].type) {
442 case E820_RAM: printk("(usable)\n");
445 printk("(reserved)\n");
448 printk("(ACPI data)\n");
451 printk("(ACPI NVS)\n");
453 default: printk("type %lu\n", e820.map[i].type);
460 * Sanitize the BIOS e820 map.
462 * Some e820 responses include overlapping entries. The following
463 * replaces the original e820 map with a new one, removing overlaps.
466 struct change_member {
467 struct e820entry *pbios; /* pointer to original bios entry */
468 unsigned long long addr; /* address for this change point */
470 static struct change_member change_point_list[2*E820MAX] __initdata;
471 static struct change_member *change_point[2*E820MAX] __initdata;
472 static struct e820entry *overlap_list[E820MAX] __initdata;
473 static struct e820entry new_bios[E820MAX] __initdata;
475 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
477 struct change_member *change_tmp;
478 unsigned long current_type, last_type;
479 unsigned long long last_addr;
480 int chgidx, still_changing;
483 int old_nr, new_nr, chg_nr;
487 Visually we're performing the following (1,2,3,4 = memory types)...
489 Sample memory map (w/overlaps):
490 ____22__________________
491 ______________________4_
492 ____1111________________
493 _44_____________________
494 11111111________________
495 ____________________33__
496 ___________44___________
497 __________33333_________
498 ______________22________
499 ___________________2222_
500 _________111111111______
501 _____________________11_
502 _________________4______
504 Sanitized equivalent (no overlap):
505 1_______________________
506 _44_____________________
507 ___1____________________
508 ____22__________________
509 ______11________________
510 _________1______________
511 __________3_____________
512 ___________44___________
513 _____________33_________
514 _______________2________
515 ________________1_______
516 _________________4______
517 ___________________2____
518 ____________________33__
519 ______________________4_
522 /* if there's only one memory region, don't bother */
528 /* bail out if we find any unreasonable addresses in bios map */
529 for (i=0; i<old_nr; i++)
530 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
533 /* create pointers for initial change-point information (for sorting) */
534 for (i=0; i < 2*old_nr; i++)
535 change_point[i] = &change_point_list[i];
537 /* record all known change-points (starting and ending addresses),
538 omitting those that are for empty memory regions */
540 for (i=0; i < old_nr; i++) {
541 if (biosmap[i].size != 0) {
542 change_point[chgidx]->addr = biosmap[i].addr;
543 change_point[chgidx++]->pbios = &biosmap[i];
544 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
545 change_point[chgidx++]->pbios = &biosmap[i];
548 chg_nr = chgidx; /* true number of change-points */
550 /* sort change-point list by memory addresses (low -> high) */
552 while (still_changing) {
554 for (i=1; i < chg_nr; i++) {
555 /* if <current_addr> > <last_addr>, swap */
556 /* or, if current=<start_addr> & last=<end_addr>, swap */
557 if ((change_point[i]->addr < change_point[i-1]->addr) ||
558 ((change_point[i]->addr == change_point[i-1]->addr) &&
559 (change_point[i]->addr == change_point[i]->pbios->addr) &&
560 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
563 change_tmp = change_point[i];
564 change_point[i] = change_point[i-1];
565 change_point[i-1] = change_tmp;
571 /* create a new bios memory map, removing overlaps */
572 overlap_entries=0; /* number of entries in the overlap table */
573 new_bios_entry=0; /* index for creating new bios map entries */
574 last_type = 0; /* start with undefined memory type */
575 last_addr = 0; /* start with 0 as last starting address */
576 /* loop through change-points, determining affect on the new bios map */
577 for (chgidx=0; chgidx < chg_nr; chgidx++)
579 /* keep track of all overlapping bios entries */
580 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
582 /* add map entry to overlap list (> 1 entry implies an overlap) */
583 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
587 /* remove entry from list (order independent, so swap with last) */
588 for (i=0; i<overlap_entries; i++)
590 if (overlap_list[i] == change_point[chgidx]->pbios)
591 overlap_list[i] = overlap_list[overlap_entries-1];
595 /* if there are overlapping entries, decide which "type" to use */
596 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
598 for (i=0; i<overlap_entries; i++)
599 if (overlap_list[i]->type > current_type)
600 current_type = overlap_list[i]->type;
601 /* continue building up new bios map based on this information */
602 if (current_type != last_type) {
603 if (last_type != 0) {
604 new_bios[new_bios_entry].size =
605 change_point[chgidx]->addr - last_addr;
606 /* move forward only if the new size was non-zero */
607 if (new_bios[new_bios_entry].size != 0)
608 if (++new_bios_entry >= E820MAX)
609 break; /* no more space left for new bios entries */
611 if (current_type != 0) {
612 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
613 new_bios[new_bios_entry].type = current_type;
614 last_addr=change_point[chgidx]->addr;
616 last_type = current_type;
619 new_nr = new_bios_entry; /* retain count for new bios entries */
621 /* copy new bios mapping into original location */
622 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
629 * Copy the BIOS e820 map into a safe place.
631 * Sanity-check it while we're at it..
633 * If we're lucky and live on a modern system, the setup code
634 * will have given us a memory map that we can use to properly
635 * set up memory. If we aren't, we'll fake a memory map.
637 * We check to see that the memory map contains at least 2 elements
638 * before we'll use it, because the detection code in setup.S may
639 * not be perfect and most every PC known to man has two memory
640 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
641 * thinkpad 560x, for example, does not cooperate with the memory
644 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
646 /* Only one memory region (or negative)? Ignore it */
651 unsigned long long start = biosmap->addr;
652 unsigned long long size = biosmap->size;
653 unsigned long long end = start + size;
654 unsigned long type = biosmap->type;
656 /* Overflow in 64 bits? Ignore the memory map. */
661 * Some BIOSes claim RAM in the 640k - 1M region.
662 * Not right. Fix it up.
664 if (type == E820_RAM) {
665 if (start < 0x100000ULL && end > 0xA0000ULL) {
666 if (start < 0xA0000ULL)
667 add_memory_region(start, 0xA0000ULL-start, type);
668 if (end <= 0x100000ULL)
674 add_memory_region(start, size, type);
675 } while (biosmap++,--nr_map);
679 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
681 #ifdef CONFIG_EDD_MODULE
685 * copy_edd() - Copy the BIOS EDD information
686 * from boot_params into a safe place.
689 static inline void copy_edd(void)
691 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
692 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
693 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
694 edd.edd_info_nr = EDD_NR;
697 static inline void copy_edd(void)
703 * Do NOT EVER look at the BIOS memory size location.
704 * It does not work on many machines.
706 #define LOWMEMSIZE() (0x9f000)
708 static void __init parse_cmdline_early (char ** cmdline_p)
710 char c = ' ', *to = command_line, *from = saved_command_line;
714 /* Save unparsed command line copy for /proc/cmdline */
715 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
721 * "mem=nopentium" disables the 4MB page tables.
722 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
723 * to <mem>, overriding the bios size.
724 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
725 * <start> to <start>+<mem>, overriding the bios size.
727 * HPA tells me bootloaders need to parse mem=, so no new
728 * option should be mem= [also see Documentation/i386/boot.txt]
730 if (!memcmp(from, "mem=", 4)) {
731 if (to != command_line)
733 if (!memcmp(from+4, "nopentium", 9)) {
735 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
738 /* If the user specifies memory size, we
739 * limit the BIOS-provided memory map to
740 * that size. exactmap can be used to specify
741 * the exact map. mem=number can be used to
742 * trim the existing memory map.
744 unsigned long long mem_size;
746 mem_size = memparse(from+4, &from);
747 limit_regions(mem_size);
752 else if (!memcmp(from, "memmap=", 7)) {
753 if (to != command_line)
755 if (!memcmp(from+7, "exactmap", 8)) {
756 #ifdef CONFIG_CRASH_DUMP
757 /* If we are doing a crash dump, we
758 * still need to know the real mem
759 * size before original memory map is
763 saved_max_pfn = max_pfn;
769 /* If the user specifies memory size, we
770 * limit the BIOS-provided memory map to
771 * that size. exactmap can be used to specify
772 * the exact map. mem=number can be used to
773 * trim the existing memory map.
775 unsigned long long start_at, mem_size;
777 mem_size = memparse(from+7, &from);
779 start_at = memparse(from+1, &from);
780 add_memory_region(start_at, mem_size, E820_RAM);
781 } else if (*from == '#') {
782 start_at = memparse(from+1, &from);
783 add_memory_region(start_at, mem_size, E820_ACPI);
784 } else if (*from == '$') {
785 start_at = memparse(from+1, &from);
786 add_memory_region(start_at, mem_size, E820_RESERVED);
788 limit_regions(mem_size);
794 else if (!memcmp(from, "noexec=", 7))
795 noexec_setup(from + 7);
798 #ifdef CONFIG_X86_SMP
800 * If the BIOS enumerates physical processors before logical,
801 * maxcpus=N at enumeration-time can be used to disable HT.
803 else if (!memcmp(from, "maxcpus=", 8)) {
804 extern unsigned int maxcpus;
806 maxcpus = simple_strtoul(from + 8, NULL, 0);
811 /* "acpi=off" disables both ACPI table parsing and interpreter */
812 else if (!memcmp(from, "acpi=off", 8)) {
816 /* acpi=force to over-ride black-list */
817 else if (!memcmp(from, "acpi=force", 10)) {
823 /* acpi=strict disables out-of-spec workarounds */
824 else if (!memcmp(from, "acpi=strict", 11)) {
828 /* Limit ACPI just to boot-time to enable HT */
829 else if (!memcmp(from, "acpi=ht", 7)) {
835 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
836 else if (!memcmp(from, "pci=noacpi", 10)) {
839 /* "acpi=noirq" disables ACPI interrupt routing */
840 else if (!memcmp(from, "acpi=noirq", 10)) {
844 else if (!memcmp(from, "acpi_sci=edge", 13))
845 acpi_sci_flags.trigger = 1;
847 else if (!memcmp(from, "acpi_sci=level", 14))
848 acpi_sci_flags.trigger = 3;
850 else if (!memcmp(from, "acpi_sci=high", 13))
851 acpi_sci_flags.polarity = 1;
853 else if (!memcmp(from, "acpi_sci=low", 12))
854 acpi_sci_flags.polarity = 3;
856 #ifdef CONFIG_X86_IO_APIC
857 else if (!memcmp(from, "acpi_skip_timer_override", 24))
858 acpi_skip_timer_override = 1;
860 if (!memcmp(from, "disable_timer_pin_1", 19))
861 disable_timer_pin_1 = 1;
862 if (!memcmp(from, "enable_timer_pin_1", 18))
863 disable_timer_pin_1 = -1;
865 /* disable IO-APIC */
866 else if (!memcmp(from, "noapic", 6))
867 disable_ioapic_setup();
868 #endif /* CONFIG_X86_IO_APIC */
869 #endif /* CONFIG_ACPI */
871 #ifdef CONFIG_X86_LOCAL_APIC
872 /* enable local APIC */
873 else if (!memcmp(from, "lapic", 5))
876 /* disable local APIC */
877 else if (!memcmp(from, "nolapic", 6))
879 #endif /* CONFIG_X86_LOCAL_APIC */
882 /* crashkernel=size@addr specifies the location to reserve for
883 * a crash kernel. By reserving this memory we guarantee
884 * that linux never set's it up as a DMA target.
885 * Useful for holding code to do something appropriate
886 * after a kernel panic.
888 else if (!memcmp(from, "crashkernel=", 12)) {
889 unsigned long size, base;
890 size = memparse(from+12, &from);
892 base = memparse(from+1, &from);
893 /* FIXME: Do I want a sanity check
894 * to validate the memory range?
896 crashk_res.start = base;
897 crashk_res.end = base + size - 1;
901 #ifdef CONFIG_CRASH_DUMP
902 /* elfcorehdr= specifies the location of elf core header
903 * stored by the crashed kernel.
905 else if (!memcmp(from, "elfcorehdr=", 11))
906 elfcorehdr_addr = memparse(from+11, &from);
910 * highmem=size forces highmem to be exactly 'size' bytes.
911 * This works even on boxes that have no highmem otherwise.
912 * This also works to reduce highmem size on bigger boxes.
914 else if (!memcmp(from, "highmem=", 8))
915 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
918 * vmalloc=size forces the vmalloc area to be exactly 'size'
919 * bytes. This can be used to increase (or decrease) the
920 * vmalloc area - the default is 128m.
922 else if (!memcmp(from, "vmalloc=", 8))
923 __VMALLOC_RESERVE = memparse(from+8, &from);
929 if (COMMAND_LINE_SIZE <= ++len)
934 *cmdline_p = command_line;
936 printk(KERN_INFO "user-defined physical RAM map:\n");
937 print_memory_map("user");
942 * Callback for efi_memory_walk.
945 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
947 unsigned long *max_pfn = arg, pfn;
950 pfn = PFN_UP(end -1);
959 * Find the highest page frame number we have available
961 void __init find_max_pfn(void)
967 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
971 for (i = 0; i < e820.nr_map; i++) {
972 unsigned long start, end;
974 if (e820.map[i].type != E820_RAM)
976 start = PFN_UP(e820.map[i].addr);
977 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
986 * Determine low and high memory ranges:
988 unsigned long __init find_max_low_pfn(void)
990 unsigned long max_low_pfn;
992 max_low_pfn = max_pfn;
993 if (max_low_pfn > MAXMEM_PFN) {
994 if (highmem_pages == -1)
995 highmem_pages = max_pfn - MAXMEM_PFN;
996 if (highmem_pages + MAXMEM_PFN < max_pfn)
997 max_pfn = MAXMEM_PFN + highmem_pages;
998 if (highmem_pages + MAXMEM_PFN > max_pfn) {
999 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
1002 max_low_pfn = MAXMEM_PFN;
1003 #ifndef CONFIG_HIGHMEM
1004 /* Maximum memory usable is what is directly addressable */
1005 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1007 if (max_pfn > MAX_NONPAE_PFN)
1008 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1010 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1011 max_pfn = MAXMEM_PFN;
1012 #else /* !CONFIG_HIGHMEM */
1013 #ifndef CONFIG_X86_PAE
1014 if (max_pfn > MAX_NONPAE_PFN) {
1015 max_pfn = MAX_NONPAE_PFN;
1016 printk(KERN_WARNING "Warning only 4GB will be used.\n");
1017 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1019 #endif /* !CONFIG_X86_PAE */
1020 #endif /* !CONFIG_HIGHMEM */
1022 if (highmem_pages == -1)
1024 #ifdef CONFIG_HIGHMEM
1025 if (highmem_pages >= max_pfn) {
1026 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1029 if (highmem_pages) {
1030 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1031 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1034 max_low_pfn -= highmem_pages;
1038 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1045 * Free all available memory for boot time allocation. Used
1046 * as a callback function by efi_memory_walk()
1050 free_available_memory(unsigned long start, unsigned long end, void *arg)
1052 /* check max_low_pfn */
1053 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1055 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1056 end = (max_low_pfn + 1) << PAGE_SHIFT;
1058 free_bootmem(start, end - start);
1063 * Register fully available low RAM pages with the bootmem allocator.
1065 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1070 efi_memmap_walk(free_available_memory, NULL);
1073 for (i = 0; i < e820.nr_map; i++) {
1074 unsigned long curr_pfn, last_pfn, size;
1076 * Reserve usable low memory
1078 if (e820.map[i].type != E820_RAM)
1081 * We are rounding up the start address of usable memory:
1083 curr_pfn = PFN_UP(e820.map[i].addr);
1084 if (curr_pfn >= max_low_pfn)
1087 * ... and at the end of the usable range downwards:
1089 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1091 if (last_pfn > max_low_pfn)
1092 last_pfn = max_low_pfn;
1095 * .. finally, did all the rounding and playing
1096 * around just make the area go away?
1098 if (last_pfn <= curr_pfn)
1101 size = last_pfn - curr_pfn;
1102 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1107 * workaround for Dell systems that neglect to reserve EBDA
1109 static void __init reserve_ebda_region(void)
1112 addr = get_bios_ebda();
1114 reserve_bootmem(addr, PAGE_SIZE);
1117 #ifndef CONFIG_NEED_MULTIPLE_NODES
1118 void __init setup_bootmem_allocator(void);
1119 static unsigned long __init setup_memory(void)
1122 * partially used pages are not usable - thus
1123 * we are rounding upwards:
1125 min_low_pfn = PFN_UP(init_pg_tables_end);
1129 max_low_pfn = find_max_low_pfn();
1131 #ifdef CONFIG_HIGHMEM
1132 highstart_pfn = highend_pfn = max_pfn;
1133 if (max_pfn > max_low_pfn) {
1134 highstart_pfn = max_low_pfn;
1136 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1137 pages_to_mb(highend_pfn - highstart_pfn));
1139 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1140 pages_to_mb(max_low_pfn));
1142 setup_bootmem_allocator();
1147 void __init zone_sizes_init(void)
1149 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1150 unsigned int max_dma, low;
1152 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1156 zones_size[ZONE_DMA] = low;
1158 zones_size[ZONE_DMA] = max_dma;
1159 zones_size[ZONE_NORMAL] = low - max_dma;
1160 #ifdef CONFIG_HIGHMEM
1161 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1164 free_area_init(zones_size);
1167 extern unsigned long __init setup_memory(void);
1168 extern void zone_sizes_init(void);
1169 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1171 void __init setup_bootmem_allocator(void)
1173 unsigned long bootmap_size;
1175 * Initialize the boot-time allocator (with low memory only):
1177 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1179 register_bootmem_low_pages(max_low_pfn);
1182 * Reserve the bootmem bitmap itself as well. We do this in two
1183 * steps (first step was init_bootmem()) because this catches
1184 * the (very unlikely) case of us accidentally initializing the
1185 * bootmem allocator with an invalid RAM area.
1187 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1188 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1191 * reserve physical page 0 - it's a special BIOS page on many boxes,
1192 * enabling clean reboots, SMP operation, laptop functions.
1194 reserve_bootmem(0, PAGE_SIZE);
1196 /* reserve EBDA region, it's a 4K region */
1197 reserve_ebda_region();
1199 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1200 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1201 unless you have no PS/2 mouse plugged in. */
1202 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1203 boot_cpu_data.x86 == 6)
1204 reserve_bootmem(0xa0000 - 4096, 4096);
1208 * But first pinch a few for the stack/trampoline stuff
1209 * FIXME: Don't need the extra page at 4K, but need to fix
1210 * trampoline before removing it. (see the GDT stuff)
1212 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1214 #ifdef CONFIG_ACPI_SLEEP
1216 * Reserve low memory region for sleep support.
1218 acpi_reserve_bootmem();
1220 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1222 * Find and reserve possible boot-time SMP configuration:
1227 #ifdef CONFIG_BLK_DEV_INITRD
1228 if (LOADER_TYPE && INITRD_START) {
1229 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1230 reserve_bootmem(INITRD_START, INITRD_SIZE);
1232 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1233 initrd_end = initrd_start+INITRD_SIZE;
1236 printk(KERN_ERR "initrd extends beyond end of memory "
1237 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1238 INITRD_START + INITRD_SIZE,
1239 max_low_pfn << PAGE_SHIFT);
1245 if (crashk_res.start != crashk_res.end)
1246 reserve_bootmem(crashk_res.start,
1247 crashk_res.end - crashk_res.start + 1);
1252 * The node 0 pgdat is initialized before all of these because
1253 * it's needed for bootmem. node>0 pgdats have their virtual
1254 * space allocated before the pagetables are in place to access
1255 * them, so they can't be cleared then.
1257 * This should all compile down to nothing when NUMA is off.
1259 void __init remapped_pgdat_init(void)
1263 for_each_online_node(nid) {
1265 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1270 * Request address space for all standard RAM and ROM resources
1271 * and also for regions reported as reserved by the e820.
1274 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1279 for (i = 0; i < e820.nr_map; i++) {
1280 struct resource *res;
1281 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1283 res = alloc_bootmem_low(sizeof(struct resource));
1284 switch (e820.map[i].type) {
1285 case E820_RAM: res->name = "System RAM"; break;
1286 case E820_ACPI: res->name = "ACPI Tables"; break;
1287 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1288 default: res->name = "reserved";
1290 res->start = e820.map[i].addr;
1291 res->end = res->start + e820.map[i].size - 1;
1292 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1293 request_resource(&iomem_resource, res);
1294 if (e820.map[i].type == E820_RAM) {
1296 * We don't know which RAM region contains kernel data,
1297 * so we try it repeatedly and let the resource manager
1300 request_resource(res, code_resource);
1301 request_resource(res, data_resource);
1303 request_resource(res, &crashk_res);
1310 * Request address space for all standard resources
1312 static void __init register_memory(void)
1314 unsigned long gapstart, gapsize, round;
1315 unsigned long long last;
1319 efi_initialize_iomem_resources(&code_resource, &data_resource);
1321 legacy_init_iomem_resources(&code_resource, &data_resource);
1323 /* EFI systems may still have VGA */
1324 request_resource(&iomem_resource, &video_ram_resource);
1326 /* request I/O space for devices used on all i[345]86 PCs */
1327 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1328 request_resource(&ioport_resource, &standard_io_resources[i]);
1331 * Search for the bigest gap in the low 32 bits of the e820
1334 last = 0x100000000ull;
1335 gapstart = 0x10000000;
1339 unsigned long long start = e820.map[i].addr;
1340 unsigned long long end = start + e820.map[i].size;
1343 * Since "last" is at most 4GB, we know we'll
1344 * fit in 32 bits if this condition is true
1347 unsigned long gap = last - end;
1349 if (gap > gapsize) {
1359 * See how much we want to round up: start off with
1360 * rounding to the next 1MB area.
1363 while ((gapsize >> 4) > round)
1365 /* Fun with two's complement */
1366 pci_mem_start = (gapstart + round) & -round;
1368 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1369 pci_mem_start, gapstart, gapsize);
1372 /* Use inline assembly to define this because the nops are defined
1373 as inline assembly strings in the include files and we cannot
1374 get them easily into strings. */
1375 asm("\t.data\nintelnops: "
1376 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1377 GENERIC_NOP7 GENERIC_NOP8);
1378 asm("\t.data\nk8nops: "
1379 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1381 asm("\t.data\nk7nops: "
1382 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1385 extern unsigned char intelnops[], k8nops[], k7nops[];
1386 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1391 intelnops + 1 + 2 + 3,
1392 intelnops + 1 + 2 + 3 + 4,
1393 intelnops + 1 + 2 + 3 + 4 + 5,
1394 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1395 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1397 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1403 k8nops + 1 + 2 + 3 + 4,
1404 k8nops + 1 + 2 + 3 + 4 + 5,
1405 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1406 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1408 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1414 k7nops + 1 + 2 + 3 + 4,
1415 k7nops + 1 + 2 + 3 + 4 + 5,
1416 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1417 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1421 unsigned char **noptable;
1423 { X86_FEATURE_K8, k8_nops },
1424 { X86_FEATURE_K7, k7_nops },
1428 /* Replace instructions with better alternatives for this CPU type.
1430 This runs before SMP is initialized to avoid SMP problems with
1431 self modifying code. This implies that assymetric systems where
1432 APs have less capabilities than the boot processor are not handled.
1433 Tough. Make sure you disable such features by hand. */
1434 void apply_alternatives(void *start, void *end)
1436 struct alt_instr *a;
1438 unsigned char **noptable = intel_nops;
1439 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1440 if (boot_cpu_has(noptypes[i].cpuid)) {
1441 noptable = noptypes[i].noptable;
1445 for (a = start; (void *)a < end; a++) {
1446 if (!boot_cpu_has(a->cpuid))
1448 BUG_ON(a->replacementlen > a->instrlen);
1449 memcpy(a->instr, a->replacement, a->replacementlen);
1450 diff = a->instrlen - a->replacementlen;
1451 /* Pad the rest with nops */
1452 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1454 if (k > ASM_NOP_MAX)
1456 memcpy(a->instr + i, noptable[k], k);
1461 void __init alternative_instructions(void)
1463 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1464 apply_alternatives(__alt_instructions, __alt_instructions_end);
1467 static char * __init machine_specific_memory_setup(void);
1470 static void set_mca_bus(int x)
1475 static void set_mca_bus(int x) { }
1479 * Determine if we were loaded by an EFI loader. If so, then we have also been
1480 * passed the efi memmap, systab, etc., so we should use these data structures
1481 * for initialization. Note, the efi init code path is determined by the
1482 * global efi_enabled. This allows the same kernel image to be used on existing
1483 * systems (with a traditional BIOS) as well as on EFI systems.
1485 void __init setup_arch(char **cmdline_p)
1487 unsigned long max_low_pfn;
1489 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1490 pre_setup_arch_hook();
1494 * FIXME: This isn't an official loader_type right
1495 * now but does currently work with elilo.
1496 * If we were configured as an EFI kernel, check to make
1497 * sure that we were loaded correctly from elilo and that
1498 * the system table is valid. If not, then initialize normally.
1501 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1505 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1506 drive_info = DRIVE_INFO;
1507 screen_info = SCREEN_INFO;
1508 edid_info = EDID_INFO;
1509 apm_info.bios = APM_BIOS_INFO;
1510 ist_info = IST_INFO;
1511 saved_videomode = VIDEO_MODE;
1512 if( SYS_DESC_TABLE.length != 0 ) {
1513 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1514 machine_id = SYS_DESC_TABLE.table[0];
1515 machine_submodel_id = SYS_DESC_TABLE.table[1];
1516 BIOS_revision = SYS_DESC_TABLE.table[2];
1518 bootloader_type = LOADER_TYPE;
1520 #ifdef CONFIG_BLK_DEV_RAM
1521 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1522 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1523 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1529 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1530 print_memory_map(machine_specific_memory_setup());
1535 if (!MOUNT_ROOT_RDONLY)
1536 root_mountflags &= ~MS_RDONLY;
1537 init_mm.start_code = (unsigned long) _text;
1538 init_mm.end_code = (unsigned long) _etext;
1539 init_mm.end_data = (unsigned long) _edata;
1540 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1542 code_resource.start = virt_to_phys(_text);
1543 code_resource.end = virt_to_phys(_etext)-1;
1544 data_resource.start = virt_to_phys(_etext);
1545 data_resource.end = virt_to_phys(_edata)-1;
1547 parse_cmdline_early(cmdline_p);
1549 max_low_pfn = setup_memory();
1552 * NOTE: before this point _nobody_ is allowed to allocate
1553 * any memory using the bootmem allocator. Although the
1554 * alloctor is now initialised only the first 8Mb of the kernel
1555 * virtual address space has been mapped. All allocations before
1556 * paging_init() has completed must use the alloc_bootmem_low_pages()
1557 * variant (which allocates DMA'able memory) and care must be taken
1558 * not to exceed the 8Mb limit.
1562 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1565 remapped_pgdat_init();
1570 * NOTE: at this point the bootmem allocator is fully available.
1573 #ifdef CONFIG_EARLY_PRINTK
1575 char *s = strstr(*cmdline_p, "earlyprintk=");
1577 extern void setup_early_printk(char *);
1579 setup_early_printk(s);
1580 printk("early console enabled\n");
1588 #ifdef CONFIG_X86_GENERICARCH
1589 generic_apic_probe(*cmdline_p);
1596 * Parse the ACPI tables for possible boot-time SMP configuration.
1598 acpi_boot_table_init();
1601 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1603 printk(KERN_WARNING "More than 8 CPUs detected and "
1604 "CONFIG_X86_PC cannot handle it.\nUse "
1605 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1608 #ifdef CONFIG_X86_LOCAL_APIC
1609 if (smp_found_config)
1616 #if defined(CONFIG_VGA_CONSOLE)
1617 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1618 conswitchp = &vga_con;
1619 #elif defined(CONFIG_DUMMY_CONSOLE)
1620 conswitchp = &dummy_con;
1625 #include "setup_arch_post.h"
1629 * c-file-style:"k&r"