2 * linux/arch/i386/kernel/setup.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
23 * This file handles the architecture-dependent parts of initialization
26 #include <linux/config.h>
27 #include <linux/sched.h>
29 #include <linux/mmzone.h>
30 #include <linux/tty.h>
31 #include <linux/ioport.h>
32 #include <linux/acpi.h>
33 #include <linux/apm_bios.h>
34 #include <linux/initrd.h>
35 #include <linux/bootmem.h>
36 #include <linux/seq_file.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <video/edid.h>
48 #include <asm/mpspec.h>
49 #include <asm/setup.h>
50 #include <asm/arch_hooks.h>
51 #include <asm/sections.h>
52 #include <asm/io_apic.h>
55 #include "setup_arch_pre.h"
56 #include <bios_ebda.h>
58 /* This value is set up by the early boot code to point to the value
59 immediately after the boot time page tables. It contains a *physical*
60 address, and must not be in the .bss segment! */
61 unsigned long init_pg_tables_end __initdata = ~0UL;
63 int disable_pse __initdata = 0;
71 EXPORT_SYMBOL(efi_enabled);
74 /* cpu data as detected by the assembly code in head.S */
75 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
76 /* common cpu data for all cpus */
77 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
78 EXPORT_SYMBOL(boot_cpu_data);
80 unsigned long mmu_cr4_features;
82 #ifdef CONFIG_ACPI_INTERPRETER
83 int acpi_disabled = 0;
85 int acpi_disabled = 1;
87 EXPORT_SYMBOL(acpi_disabled);
89 #ifdef CONFIG_ACPI_BOOT
90 int __initdata acpi_force = 0;
91 extern acpi_interrupt_flags acpi_sci_flags;
94 /* for MCA, but anyone else can use it if they want */
95 unsigned int machine_id;
97 EXPORT_SYMBOL(machine_id);
99 unsigned int machine_submodel_id;
100 unsigned int BIOS_revision;
101 unsigned int mca_pentium_flag;
103 /* For PCI or other memory-mapped resources */
104 unsigned long pci_mem_start = 0x10000000;
106 EXPORT_SYMBOL(pci_mem_start);
109 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
112 /* user-defined highmem size */
113 static unsigned int highmem_pages = -1;
118 struct drive_info_struct { char dummy[32]; } drive_info;
119 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
120 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
121 EXPORT_SYMBOL(drive_info);
123 struct screen_info screen_info;
125 EXPORT_SYMBOL(screen_info);
127 struct apm_info apm_info;
128 EXPORT_SYMBOL(apm_info);
129 struct sys_desc_table_struct {
130 unsigned short length;
131 unsigned char table[0];
133 struct edid_info edid_info;
134 struct ist_info ist_info;
135 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
136 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
137 EXPORT_SYMBOL(ist_info);
141 extern void early_cpu_init(void);
142 extern void dmi_scan_machine(void);
143 extern void generic_apic_probe(char *);
144 extern int root_mountflags;
146 unsigned long saved_videomode;
148 #define RAMDISK_IMAGE_START_MASK 0x07FF
149 #define RAMDISK_PROMPT_FLAG 0x8000
150 #define RAMDISK_LOAD_FLAG 0x4000
152 static char command_line[COMMAND_LINE_SIZE];
154 unsigned char __initdata boot_params[PARAM_SIZE];
156 static struct resource data_resource = {
157 .name = "Kernel data",
160 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
163 static struct resource code_resource = {
164 .name = "Kernel code",
167 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
170 static struct resource system_rom_resource = {
171 .name = "System ROM",
174 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
177 static struct resource extension_rom_resource = {
178 .name = "Extension ROM",
181 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
184 static struct resource adapter_rom_resources[] = { {
185 .name = "Adapter ROM",
188 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
190 .name = "Adapter ROM",
193 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
195 .name = "Adapter ROM",
198 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
200 .name = "Adapter ROM",
203 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
205 .name = "Adapter ROM",
208 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
210 .name = "Adapter ROM",
213 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
216 #define ADAPTER_ROM_RESOURCES \
217 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
219 static struct resource video_rom_resource = {
223 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
226 static struct resource video_ram_resource = {
227 .name = "Video RAM area",
230 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
233 static struct resource standard_io_resources[] = { {
237 .flags = IORESOURCE_BUSY | IORESOURCE_IO
242 .flags = IORESOURCE_BUSY | IORESOURCE_IO
247 .flags = IORESOURCE_BUSY | IORESOURCE_IO
252 .flags = IORESOURCE_BUSY | IORESOURCE_IO
257 .flags = IORESOURCE_BUSY | IORESOURCE_IO
259 .name = "dma page reg",
262 .flags = IORESOURCE_BUSY | IORESOURCE_IO
267 .flags = IORESOURCE_BUSY | IORESOURCE_IO
272 .flags = IORESOURCE_BUSY | IORESOURCE_IO
277 .flags = IORESOURCE_BUSY | IORESOURCE_IO
280 #define STANDARD_IO_RESOURCES \
281 (sizeof standard_io_resources / sizeof standard_io_resources[0])
283 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
285 static int __init romchecksum(unsigned char *rom, unsigned long length)
287 unsigned char *p, sum = 0;
289 for (p = rom; p < rom + length; p++)
294 static void __init probe_roms(void)
296 unsigned long start, length, upper;
301 upper = adapter_rom_resources[0].start;
302 for (start = video_rom_resource.start; start < upper; start += 2048) {
303 rom = isa_bus_to_virt(start);
304 if (!romsignature(rom))
307 video_rom_resource.start = start;
309 /* 0 < length <= 0x7f * 512, historically */
310 length = rom[2] * 512;
312 /* if checksum okay, trust length byte */
313 if (length && romchecksum(rom, length))
314 video_rom_resource.end = start + length - 1;
316 request_resource(&iomem_resource, &video_rom_resource);
320 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
325 request_resource(&iomem_resource, &system_rom_resource);
326 upper = system_rom_resource.start;
328 /* check for extension rom (ignore length byte!) */
329 rom = isa_bus_to_virt(extension_rom_resource.start);
330 if (romsignature(rom)) {
331 length = extension_rom_resource.end - extension_rom_resource.start + 1;
332 if (romchecksum(rom, length)) {
333 request_resource(&iomem_resource, &extension_rom_resource);
334 upper = extension_rom_resource.start;
338 /* check for adapter roms on 2k boundaries */
339 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
340 rom = isa_bus_to_virt(start);
341 if (!romsignature(rom))
344 /* 0 < length <= 0x7f * 512, historically */
345 length = rom[2] * 512;
347 /* but accept any length that fits if checksum okay */
348 if (!length || start + length > upper || !romchecksum(rom, length))
351 adapter_rom_resources[i].start = start;
352 adapter_rom_resources[i].end = start + length - 1;
353 request_resource(&iomem_resource, &adapter_rom_resources[i]);
355 start = adapter_rom_resources[i++].end & ~2047UL;
359 static void __init limit_regions(unsigned long long size)
361 unsigned long long current_addr = 0;
365 for (i = 0; i < memmap.nr_map; i++) {
366 current_addr = memmap.map[i].phys_addr +
367 (memmap.map[i].num_pages << 12);
368 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
369 if (current_addr >= size) {
370 memmap.map[i].num_pages -=
371 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
372 memmap.nr_map = i + 1;
378 for (i = 0; i < e820.nr_map; i++) {
379 if (e820.map[i].type == E820_RAM) {
380 current_addr = e820.map[i].addr + e820.map[i].size;
381 if (current_addr >= size) {
382 e820.map[i].size -= current_addr-size;
390 static void __init add_memory_region(unsigned long long start,
391 unsigned long long size, int type)
399 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
403 e820.map[x].addr = start;
404 e820.map[x].size = size;
405 e820.map[x].type = type;
408 } /* add_memory_region */
412 static void __init print_memory_map(char *who)
416 for (i = 0; i < e820.nr_map; i++) {
417 printk(" %s: %016Lx - %016Lx ", who,
419 e820.map[i].addr + e820.map[i].size);
420 switch (e820.map[i].type) {
421 case E820_RAM: printk("(usable)\n");
424 printk("(reserved)\n");
427 printk("(ACPI data)\n");
430 printk("(ACPI NVS)\n");
432 default: printk("type %lu\n", e820.map[i].type);
439 * Sanitize the BIOS e820 map.
441 * Some e820 responses include overlapping entries. The following
442 * replaces the original e820 map with a new one, removing overlaps.
445 struct change_member {
446 struct e820entry *pbios; /* pointer to original bios entry */
447 unsigned long long addr; /* address for this change point */
449 static struct change_member change_point_list[2*E820MAX] __initdata;
450 static struct change_member *change_point[2*E820MAX] __initdata;
451 static struct e820entry *overlap_list[E820MAX] __initdata;
452 static struct e820entry new_bios[E820MAX] __initdata;
454 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
456 struct change_member *change_tmp;
457 unsigned long current_type, last_type;
458 unsigned long long last_addr;
459 int chgidx, still_changing;
462 int old_nr, new_nr, chg_nr;
466 Visually we're performing the following (1,2,3,4 = memory types)...
468 Sample memory map (w/overlaps):
469 ____22__________________
470 ______________________4_
471 ____1111________________
472 _44_____________________
473 11111111________________
474 ____________________33__
475 ___________44___________
476 __________33333_________
477 ______________22________
478 ___________________2222_
479 _________111111111______
480 _____________________11_
481 _________________4______
483 Sanitized equivalent (no overlap):
484 1_______________________
485 _44_____________________
486 ___1____________________
487 ____22__________________
488 ______11________________
489 _________1______________
490 __________3_____________
491 ___________44___________
492 _____________33_________
493 _______________2________
494 ________________1_______
495 _________________4______
496 ___________________2____
497 ____________________33__
498 ______________________4_
501 /* if there's only one memory region, don't bother */
507 /* bail out if we find any unreasonable addresses in bios map */
508 for (i=0; i<old_nr; i++)
509 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
512 /* create pointers for initial change-point information (for sorting) */
513 for (i=0; i < 2*old_nr; i++)
514 change_point[i] = &change_point_list[i];
516 /* record all known change-points (starting and ending addresses),
517 omitting those that are for empty memory regions */
519 for (i=0; i < old_nr; i++) {
520 if (biosmap[i].size != 0) {
521 change_point[chgidx]->addr = biosmap[i].addr;
522 change_point[chgidx++]->pbios = &biosmap[i];
523 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
524 change_point[chgidx++]->pbios = &biosmap[i];
527 chg_nr = chgidx; /* true number of change-points */
529 /* sort change-point list by memory addresses (low -> high) */
531 while (still_changing) {
533 for (i=1; i < chg_nr; i++) {
534 /* if <current_addr> > <last_addr>, swap */
535 /* or, if current=<start_addr> & last=<end_addr>, swap */
536 if ((change_point[i]->addr < change_point[i-1]->addr) ||
537 ((change_point[i]->addr == change_point[i-1]->addr) &&
538 (change_point[i]->addr == change_point[i]->pbios->addr) &&
539 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
542 change_tmp = change_point[i];
543 change_point[i] = change_point[i-1];
544 change_point[i-1] = change_tmp;
550 /* create a new bios memory map, removing overlaps */
551 overlap_entries=0; /* number of entries in the overlap table */
552 new_bios_entry=0; /* index for creating new bios map entries */
553 last_type = 0; /* start with undefined memory type */
554 last_addr = 0; /* start with 0 as last starting address */
555 /* loop through change-points, determining affect on the new bios map */
556 for (chgidx=0; chgidx < chg_nr; chgidx++)
558 /* keep track of all overlapping bios entries */
559 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
561 /* add map entry to overlap list (> 1 entry implies an overlap) */
562 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
566 /* remove entry from list (order independent, so swap with last) */
567 for (i=0; i<overlap_entries; i++)
569 if (overlap_list[i] == change_point[chgidx]->pbios)
570 overlap_list[i] = overlap_list[overlap_entries-1];
574 /* if there are overlapping entries, decide which "type" to use */
575 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
577 for (i=0; i<overlap_entries; i++)
578 if (overlap_list[i]->type > current_type)
579 current_type = overlap_list[i]->type;
580 /* continue building up new bios map based on this information */
581 if (current_type != last_type) {
582 if (last_type != 0) {
583 new_bios[new_bios_entry].size =
584 change_point[chgidx]->addr - last_addr;
585 /* move forward only if the new size was non-zero */
586 if (new_bios[new_bios_entry].size != 0)
587 if (++new_bios_entry >= E820MAX)
588 break; /* no more space left for new bios entries */
590 if (current_type != 0) {
591 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
592 new_bios[new_bios_entry].type = current_type;
593 last_addr=change_point[chgidx]->addr;
595 last_type = current_type;
598 new_nr = new_bios_entry; /* retain count for new bios entries */
600 /* copy new bios mapping into original location */
601 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
608 * Copy the BIOS e820 map into a safe place.
610 * Sanity-check it while we're at it..
612 * If we're lucky and live on a modern system, the setup code
613 * will have given us a memory map that we can use to properly
614 * set up memory. If we aren't, we'll fake a memory map.
616 * We check to see that the memory map contains at least 2 elements
617 * before we'll use it, because the detection code in setup.S may
618 * not be perfect and most every PC known to man has two memory
619 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
620 * thinkpad 560x, for example, does not cooperate with the memory
623 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
625 /* Only one memory region (or negative)? Ignore it */
630 unsigned long long start = biosmap->addr;
631 unsigned long long size = biosmap->size;
632 unsigned long long end = start + size;
633 unsigned long type = biosmap->type;
635 /* Overflow in 64 bits? Ignore the memory map. */
640 * Some BIOSes claim RAM in the 640k - 1M region.
641 * Not right. Fix it up.
643 if (type == E820_RAM) {
644 if (start < 0x100000ULL && end > 0xA0000ULL) {
645 if (start < 0xA0000ULL)
646 add_memory_region(start, 0xA0000ULL-start, type);
647 if (end <= 0x100000ULL)
653 add_memory_region(start, size, type);
654 } while (biosmap++,--nr_map);
658 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
660 #ifdef CONFIG_EDD_MODULE
664 * copy_edd() - Copy the BIOS EDD information
665 * from boot_params into a safe place.
668 static inline void copy_edd(void)
670 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
671 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
672 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
673 edd.edd_info_nr = EDD_NR;
676 static inline void copy_edd(void)
682 * Do NOT EVER look at the BIOS memory size location.
683 * It does not work on many machines.
685 #define LOWMEMSIZE() (0x9f000)
687 static void __init parse_cmdline_early (char ** cmdline_p)
689 char c = ' ', *to = command_line, *from = saved_command_line;
693 /* Save unparsed command line copy for /proc/cmdline */
694 saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
700 * "mem=nopentium" disables the 4MB page tables.
701 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
702 * to <mem>, overriding the bios size.
703 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
704 * <start> to <start>+<mem>, overriding the bios size.
706 * HPA tells me bootloaders need to parse mem=, so no new
707 * option should be mem= [also see Documentation/i386/boot.txt]
709 if (!memcmp(from, "mem=", 4)) {
710 if (to != command_line)
712 if (!memcmp(from+4, "nopentium", 9)) {
714 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
717 /* If the user specifies memory size, we
718 * limit the BIOS-provided memory map to
719 * that size. exactmap can be used to specify
720 * the exact map. mem=number can be used to
721 * trim the existing memory map.
723 unsigned long long mem_size;
725 mem_size = memparse(from+4, &from);
726 limit_regions(mem_size);
731 else if (!memcmp(from, "memmap=", 7)) {
732 if (to != command_line)
734 if (!memcmp(from+7, "exactmap", 8)) {
739 /* If the user specifies memory size, we
740 * limit the BIOS-provided memory map to
741 * that size. exactmap can be used to specify
742 * the exact map. mem=number can be used to
743 * trim the existing memory map.
745 unsigned long long start_at, mem_size;
747 mem_size = memparse(from+7, &from);
749 start_at = memparse(from+1, &from);
750 add_memory_region(start_at, mem_size, E820_RAM);
751 } else if (*from == '#') {
752 start_at = memparse(from+1, &from);
753 add_memory_region(start_at, mem_size, E820_ACPI);
754 } else if (*from == '$') {
755 start_at = memparse(from+1, &from);
756 add_memory_region(start_at, mem_size, E820_RESERVED);
758 limit_regions(mem_size);
764 else if (!memcmp(from, "noexec=", 7))
765 noexec_setup(from + 7);
768 #ifdef CONFIG_X86_SMP
770 * If the BIOS enumerates physical processors before logical,
771 * maxcpus=N at enumeration-time can be used to disable HT.
773 else if (!memcmp(from, "maxcpus=", 8)) {
774 extern unsigned int maxcpus;
776 maxcpus = simple_strtoul(from + 8, NULL, 0);
780 #ifdef CONFIG_ACPI_BOOT
781 /* "acpi=off" disables both ACPI table parsing and interpreter */
782 else if (!memcmp(from, "acpi=off", 8)) {
786 /* acpi=force to over-ride black-list */
787 else if (!memcmp(from, "acpi=force", 10)) {
793 /* acpi=strict disables out-of-spec workarounds */
794 else if (!memcmp(from, "acpi=strict", 11)) {
798 /* Limit ACPI just to boot-time to enable HT */
799 else if (!memcmp(from, "acpi=ht", 7)) {
805 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
806 else if (!memcmp(from, "pci=noacpi", 10)) {
809 /* "acpi=noirq" disables ACPI interrupt routing */
810 else if (!memcmp(from, "acpi=noirq", 10)) {
814 else if (!memcmp(from, "acpi_sci=edge", 13))
815 acpi_sci_flags.trigger = 1;
817 else if (!memcmp(from, "acpi_sci=level", 14))
818 acpi_sci_flags.trigger = 3;
820 else if (!memcmp(from, "acpi_sci=high", 13))
821 acpi_sci_flags.polarity = 1;
823 else if (!memcmp(from, "acpi_sci=low", 12))
824 acpi_sci_flags.polarity = 3;
826 #ifdef CONFIG_X86_IO_APIC
827 else if (!memcmp(from, "acpi_skip_timer_override", 24))
828 acpi_skip_timer_override = 1;
831 #ifdef CONFIG_X86_LOCAL_APIC
832 /* disable IO-APIC */
833 else if (!memcmp(from, "noapic", 6))
834 disable_ioapic_setup();
835 #endif /* CONFIG_X86_LOCAL_APIC */
836 #endif /* CONFIG_ACPI_BOOT */
839 * highmem=size forces highmem to be exactly 'size' bytes.
840 * This works even on boxes that have no highmem otherwise.
841 * This also works to reduce highmem size on bigger boxes.
843 else if (!memcmp(from, "highmem=", 8))
844 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
847 * vmalloc=size forces the vmalloc area to be exactly 'size'
848 * bytes. This can be used to increase (or decrease) the
849 * vmalloc area - the default is 128m.
851 else if (!memcmp(from, "vmalloc=", 8))
852 __VMALLOC_RESERVE = memparse(from+8, &from);
858 if (COMMAND_LINE_SIZE <= ++len)
863 *cmdline_p = command_line;
865 printk(KERN_INFO "user-defined physical RAM map:\n");
866 print_memory_map("user");
871 * Callback for efi_memory_walk.
874 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
876 unsigned long *max_pfn = arg, pfn;
879 pfn = PFN_UP(end -1);
888 * Find the highest page frame number we have available
890 void __init find_max_pfn(void)
896 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
900 for (i = 0; i < e820.nr_map; i++) {
901 unsigned long start, end;
903 if (e820.map[i].type != E820_RAM)
905 start = PFN_UP(e820.map[i].addr);
906 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
915 * Determine low and high memory ranges:
917 unsigned long __init find_max_low_pfn(void)
919 unsigned long max_low_pfn;
921 max_low_pfn = max_pfn;
922 if (max_low_pfn > MAXMEM_PFN) {
923 if (highmem_pages == -1)
924 highmem_pages = max_pfn - MAXMEM_PFN;
925 if (highmem_pages + MAXMEM_PFN < max_pfn)
926 max_pfn = MAXMEM_PFN + highmem_pages;
927 if (highmem_pages + MAXMEM_PFN > max_pfn) {
928 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
931 max_low_pfn = MAXMEM_PFN;
932 #ifndef CONFIG_HIGHMEM
933 /* Maximum memory usable is what is directly addressable */
934 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
936 if (max_pfn > MAX_NONPAE_PFN)
937 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
939 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
940 max_pfn = MAXMEM_PFN;
941 #else /* !CONFIG_HIGHMEM */
942 #ifndef CONFIG_X86_PAE
943 if (max_pfn > MAX_NONPAE_PFN) {
944 max_pfn = MAX_NONPAE_PFN;
945 printk(KERN_WARNING "Warning only 4GB will be used.\n");
946 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
948 #endif /* !CONFIG_X86_PAE */
949 #endif /* !CONFIG_HIGHMEM */
951 if (highmem_pages == -1)
953 #ifdef CONFIG_HIGHMEM
954 if (highmem_pages >= max_pfn) {
955 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
959 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
960 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
963 max_low_pfn -= highmem_pages;
967 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
974 * Free all available memory for boot time allocation. Used
975 * as a callback function by efi_memory_walk()
979 free_available_memory(unsigned long start, unsigned long end, void *arg)
981 /* check max_low_pfn */
982 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
984 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
985 end = (max_low_pfn + 1) << PAGE_SHIFT;
987 free_bootmem(start, end - start);
992 * Register fully available low RAM pages with the bootmem allocator.
994 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
999 efi_memmap_walk(free_available_memory, NULL);
1002 for (i = 0; i < e820.nr_map; i++) {
1003 unsigned long curr_pfn, last_pfn, size;
1005 * Reserve usable low memory
1007 if (e820.map[i].type != E820_RAM)
1010 * We are rounding up the start address of usable memory:
1012 curr_pfn = PFN_UP(e820.map[i].addr);
1013 if (curr_pfn >= max_low_pfn)
1016 * ... and at the end of the usable range downwards:
1018 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1020 if (last_pfn > max_low_pfn)
1021 last_pfn = max_low_pfn;
1024 * .. finally, did all the rounding and playing
1025 * around just make the area go away?
1027 if (last_pfn <= curr_pfn)
1030 size = last_pfn - curr_pfn;
1031 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1036 * workaround for Dell systems that neglect to reserve EBDA
1038 static void __init reserve_ebda_region(void)
1041 addr = get_bios_ebda();
1043 reserve_bootmem(addr, PAGE_SIZE);
1046 #ifndef CONFIG_NEED_MULTIPLE_NODES
1047 void __init setup_bootmem_allocator(void);
1048 static unsigned long __init setup_memory(void)
1051 * partially used pages are not usable - thus
1052 * we are rounding upwards:
1054 min_low_pfn = PFN_UP(init_pg_tables_end);
1058 max_low_pfn = find_max_low_pfn();
1060 #ifdef CONFIG_HIGHMEM
1061 highstart_pfn = highend_pfn = max_pfn;
1062 if (max_pfn > max_low_pfn) {
1063 highstart_pfn = max_low_pfn;
1065 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1066 pages_to_mb(highend_pfn - highstart_pfn));
1068 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1069 pages_to_mb(max_low_pfn));
1071 setup_bootmem_allocator();
1076 void __init zone_sizes_init(void)
1078 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1079 unsigned int max_dma, low;
1081 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1085 zones_size[ZONE_DMA] = low;
1087 zones_size[ZONE_DMA] = max_dma;
1088 zones_size[ZONE_NORMAL] = low - max_dma;
1089 #ifdef CONFIG_HIGHMEM
1090 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1093 free_area_init(zones_size);
1096 extern unsigned long __init setup_memory(void);
1097 extern void zone_sizes_init(void);
1098 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1100 void __init setup_bootmem_allocator(void)
1102 unsigned long bootmap_size;
1104 * Initialize the boot-time allocator (with low memory only):
1106 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1108 register_bootmem_low_pages(max_low_pfn);
1111 * Reserve the bootmem bitmap itself as well. We do this in two
1112 * steps (first step was init_bootmem()) because this catches
1113 * the (very unlikely) case of us accidentally initializing the
1114 * bootmem allocator with an invalid RAM area.
1116 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
1117 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1120 * reserve physical page 0 - it's a special BIOS page on many boxes,
1121 * enabling clean reboots, SMP operation, laptop functions.
1123 reserve_bootmem(0, PAGE_SIZE);
1125 /* reserve EBDA region, it's a 4K region */
1126 reserve_ebda_region();
1128 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1129 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1130 unless you have no PS/2 mouse plugged in. */
1131 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1132 boot_cpu_data.x86 == 6)
1133 reserve_bootmem(0xa0000 - 4096, 4096);
1137 * But first pinch a few for the stack/trampoline stuff
1138 * FIXME: Don't need the extra page at 4K, but need to fix
1139 * trampoline before removing it. (see the GDT stuff)
1141 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1143 #ifdef CONFIG_ACPI_SLEEP
1145 * Reserve low memory region for sleep support.
1147 acpi_reserve_bootmem();
1149 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1151 * Find and reserve possible boot-time SMP configuration:
1156 #ifdef CONFIG_BLK_DEV_INITRD
1157 if (LOADER_TYPE && INITRD_START) {
1158 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1159 reserve_bootmem(INITRD_START, INITRD_SIZE);
1161 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1162 initrd_end = initrd_start+INITRD_SIZE;
1165 printk(KERN_ERR "initrd extends beyond end of memory "
1166 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1167 INITRD_START + INITRD_SIZE,
1168 max_low_pfn << PAGE_SHIFT);
1176 * The node 0 pgdat is initialized before all of these because
1177 * it's needed for bootmem. node>0 pgdats have their virtual
1178 * space allocated before the pagetables are in place to access
1179 * them, so they can't be cleared then.
1181 * This should all compile down to nothing when NUMA is off.
1183 void __init remapped_pgdat_init(void)
1187 for_each_online_node(nid) {
1189 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1194 * Request address space for all standard RAM and ROM resources
1195 * and also for regions reported as reserved by the e820.
1198 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1203 for (i = 0; i < e820.nr_map; i++) {
1204 struct resource *res;
1205 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1207 res = alloc_bootmem_low(sizeof(struct resource));
1208 switch (e820.map[i].type) {
1209 case E820_RAM: res->name = "System RAM"; break;
1210 case E820_ACPI: res->name = "ACPI Tables"; break;
1211 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1212 default: res->name = "reserved";
1214 res->start = e820.map[i].addr;
1215 res->end = res->start + e820.map[i].size - 1;
1216 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1217 request_resource(&iomem_resource, res);
1218 if (e820.map[i].type == E820_RAM) {
1220 * We don't know which RAM region contains kernel data,
1221 * so we try it repeatedly and let the resource manager
1224 request_resource(res, code_resource);
1225 request_resource(res, data_resource);
1231 * Request address space for all standard resources
1233 static void __init register_memory(void)
1235 unsigned long gapstart, gapsize;
1236 unsigned long long last;
1240 efi_initialize_iomem_resources(&code_resource, &data_resource);
1242 legacy_init_iomem_resources(&code_resource, &data_resource);
1244 /* EFI systems may still have VGA */
1245 request_resource(&iomem_resource, &video_ram_resource);
1247 /* request I/O space for devices used on all i[345]86 PCs */
1248 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1249 request_resource(&ioport_resource, &standard_io_resources[i]);
1252 * Search for the bigest gap in the low 32 bits of the e820
1255 last = 0x100000000ull;
1256 gapstart = 0x10000000;
1260 unsigned long long start = e820.map[i].addr;
1261 unsigned long long end = start + e820.map[i].size;
1264 * Since "last" is at most 4GB, we know we'll
1265 * fit in 32 bits if this condition is true
1268 unsigned long gap = last - end;
1270 if (gap > gapsize) {
1280 * Start allocating dynamic PCI memory a bit into the gap,
1281 * aligned up to the nearest megabyte.
1283 * Question: should we try to pad it up a bit (do something
1284 * like " + (gapsize >> 3)" in there too?). We now have the
1287 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1289 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1290 pci_mem_start, gapstart, gapsize);
1293 /* Use inline assembly to define this because the nops are defined
1294 as inline assembly strings in the include files and we cannot
1295 get them easily into strings. */
1296 asm("\t.data\nintelnops: "
1297 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1298 GENERIC_NOP7 GENERIC_NOP8);
1299 asm("\t.data\nk8nops: "
1300 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1302 asm("\t.data\nk7nops: "
1303 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1306 extern unsigned char intelnops[], k8nops[], k7nops[];
1307 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1312 intelnops + 1 + 2 + 3,
1313 intelnops + 1 + 2 + 3 + 4,
1314 intelnops + 1 + 2 + 3 + 4 + 5,
1315 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1316 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1318 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1324 k8nops + 1 + 2 + 3 + 4,
1325 k8nops + 1 + 2 + 3 + 4 + 5,
1326 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1327 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1329 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1335 k7nops + 1 + 2 + 3 + 4,
1336 k7nops + 1 + 2 + 3 + 4 + 5,
1337 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1338 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1342 unsigned char **noptable;
1344 { X86_FEATURE_K8, k8_nops },
1345 { X86_FEATURE_K7, k7_nops },
1349 /* Replace instructions with better alternatives for this CPU type.
1351 This runs before SMP is initialized to avoid SMP problems with
1352 self modifying code. This implies that assymetric systems where
1353 APs have less capabilities than the boot processor are not handled.
1354 In this case boot with "noreplacement". */
1355 void apply_alternatives(void *start, void *end)
1357 struct alt_instr *a;
1359 unsigned char **noptable = intel_nops;
1360 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1361 if (boot_cpu_has(noptypes[i].cpuid)) {
1362 noptable = noptypes[i].noptable;
1366 for (a = start; (void *)a < end; a++) {
1367 if (!boot_cpu_has(a->cpuid))
1369 BUG_ON(a->replacementlen > a->instrlen);
1370 memcpy(a->instr, a->replacement, a->replacementlen);
1371 diff = a->instrlen - a->replacementlen;
1372 /* Pad the rest with nops */
1373 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1375 if (k > ASM_NOP_MAX)
1377 memcpy(a->instr + i, noptable[k], k);
1382 static int no_replacement __initdata = 0;
1384 void __init alternative_instructions(void)
1386 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1389 apply_alternatives(__alt_instructions, __alt_instructions_end);
1392 static int __init noreplacement_setup(char *s)
1398 __setup("noreplacement", noreplacement_setup);
1400 static char * __init machine_specific_memory_setup(void);
1403 static void set_mca_bus(int x)
1408 static void set_mca_bus(int x) { }
1412 * Determine if we were loaded by an EFI loader. If so, then we have also been
1413 * passed the efi memmap, systab, etc., so we should use these data structures
1414 * for initialization. Note, the efi init code path is determined by the
1415 * global efi_enabled. This allows the same kernel image to be used on existing
1416 * systems (with a traditional BIOS) as well as on EFI systems.
1418 void __init setup_arch(char **cmdline_p)
1420 unsigned long max_low_pfn;
1422 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1423 pre_setup_arch_hook();
1427 * FIXME: This isn't an official loader_type right
1428 * now but does currently work with elilo.
1429 * If we were configured as an EFI kernel, check to make
1430 * sure that we were loaded correctly from elilo and that
1431 * the system table is valid. If not, then initialize normally.
1434 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1438 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1439 drive_info = DRIVE_INFO;
1440 screen_info = SCREEN_INFO;
1441 edid_info = EDID_INFO;
1442 apm_info.bios = APM_BIOS_INFO;
1443 ist_info = IST_INFO;
1444 saved_videomode = VIDEO_MODE;
1445 if( SYS_DESC_TABLE.length != 0 ) {
1446 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1447 machine_id = SYS_DESC_TABLE.table[0];
1448 machine_submodel_id = SYS_DESC_TABLE.table[1];
1449 BIOS_revision = SYS_DESC_TABLE.table[2];
1451 bootloader_type = LOADER_TYPE;
1453 #ifdef CONFIG_BLK_DEV_RAM
1454 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1455 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1456 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1462 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1463 print_memory_map(machine_specific_memory_setup());
1468 if (!MOUNT_ROOT_RDONLY)
1469 root_mountflags &= ~MS_RDONLY;
1470 init_mm.start_code = (unsigned long) _text;
1471 init_mm.end_code = (unsigned long) _etext;
1472 init_mm.end_data = (unsigned long) _edata;
1473 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1475 code_resource.start = virt_to_phys(_text);
1476 code_resource.end = virt_to_phys(_etext)-1;
1477 data_resource.start = virt_to_phys(_etext);
1478 data_resource.end = virt_to_phys(_edata)-1;
1480 parse_cmdline_early(cmdline_p);
1482 max_low_pfn = setup_memory();
1485 * NOTE: before this point _nobody_ is allowed to allocate
1486 * any memory using the bootmem allocator. Although the
1487 * alloctor is now initialised only the first 8Mb of the kernel
1488 * virtual address space has been mapped. All allocations before
1489 * paging_init() has completed must use the alloc_bootmem_low_pages()
1490 * variant (which allocates DMA'able memory) and care must be taken
1491 * not to exceed the 8Mb limit.
1495 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1498 remapped_pgdat_init();
1503 * NOTE: at this point the bootmem allocator is fully available.
1506 #ifdef CONFIG_EARLY_PRINTK
1508 char *s = strstr(*cmdline_p, "earlyprintk=");
1510 extern void setup_early_printk(char *);
1512 setup_early_printk(s);
1513 printk("early console enabled\n");
1521 #ifdef CONFIG_X86_GENERICARCH
1522 generic_apic_probe(*cmdline_p);
1527 #ifdef CONFIG_ACPI_BOOT
1529 * Parse the ACPI tables for possible boot-time SMP configuration.
1531 acpi_boot_table_init();
1535 #ifdef CONFIG_X86_LOCAL_APIC
1536 if (smp_found_config)
1543 #if defined(CONFIG_VGA_CONSOLE)
1544 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1545 conswitchp = &vga_con;
1546 #elif defined(CONFIG_DUMMY_CONSOLE)
1547 conswitchp = &dummy_con;
1552 #include "setup_arch_post.h"
1556 * c-file-style:"k&r"