[PATCH] kprobes: fix bug when probed on task and isr functions
[linux-2.6] / arch / i386 / kernel / setup.c
1 /*
2  *  linux/arch/i386/kernel/setup.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7  *
8  *  Memory region support
9  *      David Parsons <orc@pell.chi.il.us>, July-August 1999
10  *
11  *  Added E820 sanitization routine (removes overlapping memory regions);
12  *  Brian Moyle <bmoyle@mvista.com>, February 2001
13  *
14  * Moved CPU detection code to cpu/${cpu}.c
15  *    Patrick Mochel <mochel@osdl.org>, March 2002
16  *
17  *  Provisions for empty E820 memory regions (reported by certain BIOSes).
18  *  Alex Achenbach <xela@slit.de>, December 2002.
19  *
20  */
21
22 /*
23  * This file handles the architecture-dependent parts of initialization
24  */
25
26 #include <linux/config.h>
27 #include <linux/sched.h>
28 #include <linux/mm.h>
29 #include <linux/mmzone.h>
30 #include <linux/tty.h>
31 #include <linux/ioport.h>
32 #include <linux/acpi.h>
33 #include <linux/apm_bios.h>
34 #include <linux/initrd.h>
35 #include <linux/bootmem.h>
36 #include <linux/seq_file.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kexec.h>
47 #include <linux/crash_dump.h>
48
49 #include <video/edid.h>
50
51 #include <asm/apic.h>
52 #include <asm/e820.h>
53 #include <asm/mpspec.h>
54 #include <asm/setup.h>
55 #include <asm/arch_hooks.h>
56 #include <asm/sections.h>
57 #include <asm/io_apic.h>
58 #include <asm/ist.h>
59 #include <asm/io.h>
60 #include "setup_arch_pre.h"
61 #include <bios_ebda.h>
62
63 /* Forward Declaration. */
64 void __init find_max_pfn(void);
65
66 /* This value is set up by the early boot code to point to the value
67    immediately after the boot time page tables.  It contains a *physical*
68    address, and must not be in the .bss segment! */
69 unsigned long init_pg_tables_end __initdata = ~0UL;
70
71 int disable_pse __devinitdata = 0;
72
73 /*
74  * Machine setup..
75  */
76
77 #ifdef CONFIG_EFI
78 int efi_enabled = 0;
79 EXPORT_SYMBOL(efi_enabled);
80 #endif
81
82 /* cpu data as detected by the assembly code in head.S */
83 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
84 /* common cpu data for all cpus */
85 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
86 EXPORT_SYMBOL(boot_cpu_data);
87
88 unsigned long mmu_cr4_features;
89
90 #ifdef  CONFIG_ACPI_INTERPRETER
91         int acpi_disabled = 0;
92 #else
93         int acpi_disabled = 1;
94 #endif
95 EXPORT_SYMBOL(acpi_disabled);
96
97 #ifdef  CONFIG_ACPI_BOOT
98 int __initdata acpi_force = 0;
99 extern acpi_interrupt_flags     acpi_sci_flags;
100 #endif
101
102 /* for MCA, but anyone else can use it if they want */
103 unsigned int machine_id;
104 #ifdef CONFIG_MCA
105 EXPORT_SYMBOL(machine_id);
106 #endif
107 unsigned int machine_submodel_id;
108 unsigned int BIOS_revision;
109 unsigned int mca_pentium_flag;
110
111 /* For PCI or other memory-mapped resources */
112 unsigned long pci_mem_start = 0x10000000;
113 #ifdef CONFIG_PCI
114 EXPORT_SYMBOL(pci_mem_start);
115 #endif
116
117 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
118 int bootloader_type;
119
120 /* user-defined highmem size */
121 static unsigned int highmem_pages = -1;
122
123 /*
124  * Setup options
125  */
126 struct drive_info_struct { char dummy[32]; } drive_info;
127 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
128     defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
129 EXPORT_SYMBOL(drive_info);
130 #endif
131 struct screen_info screen_info;
132 #ifdef CONFIG_VT
133 EXPORT_SYMBOL(screen_info);
134 #endif
135 struct apm_info apm_info;
136 EXPORT_SYMBOL(apm_info);
137 struct sys_desc_table_struct {
138         unsigned short length;
139         unsigned char table[0];
140 };
141 struct edid_info edid_info;
142 struct ist_info ist_info;
143 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
144         defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
145 EXPORT_SYMBOL(ist_info);
146 #endif
147 struct e820map e820;
148
149 extern void early_cpu_init(void);
150 extern void dmi_scan_machine(void);
151 extern void generic_apic_probe(char *);
152 extern int root_mountflags;
153
154 unsigned long saved_videomode;
155
156 #define RAMDISK_IMAGE_START_MASK        0x07FF
157 #define RAMDISK_PROMPT_FLAG             0x8000
158 #define RAMDISK_LOAD_FLAG               0x4000  
159
160 static char command_line[COMMAND_LINE_SIZE];
161
162 unsigned char __initdata boot_params[PARAM_SIZE];
163
164 static struct resource data_resource = {
165         .name   = "Kernel data",
166         .start  = 0,
167         .end    = 0,
168         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
169 };
170
171 static struct resource code_resource = {
172         .name   = "Kernel code",
173         .start  = 0,
174         .end    = 0,
175         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
176 };
177
178 static struct resource system_rom_resource = {
179         .name   = "System ROM",
180         .start  = 0xf0000,
181         .end    = 0xfffff,
182         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
183 };
184
185 static struct resource extension_rom_resource = {
186         .name   = "Extension ROM",
187         .start  = 0xe0000,
188         .end    = 0xeffff,
189         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
190 };
191
192 static struct resource adapter_rom_resources[] = { {
193         .name   = "Adapter ROM",
194         .start  = 0xc8000,
195         .end    = 0,
196         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
197 }, {
198         .name   = "Adapter ROM",
199         .start  = 0,
200         .end    = 0,
201         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
202 }, {
203         .name   = "Adapter ROM",
204         .start  = 0,
205         .end    = 0,
206         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
207 }, {
208         .name   = "Adapter ROM",
209         .start  = 0,
210         .end    = 0,
211         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
212 }, {
213         .name   = "Adapter ROM",
214         .start  = 0,
215         .end    = 0,
216         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
217 }, {
218         .name   = "Adapter ROM",
219         .start  = 0,
220         .end    = 0,
221         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
222 } };
223
224 #define ADAPTER_ROM_RESOURCES \
225         (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
226
227 static struct resource video_rom_resource = {
228         .name   = "Video ROM",
229         .start  = 0xc0000,
230         .end    = 0xc7fff,
231         .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
232 };
233
234 static struct resource video_ram_resource = {
235         .name   = "Video RAM area",
236         .start  = 0xa0000,
237         .end    = 0xbffff,
238         .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
239 };
240
241 static struct resource standard_io_resources[] = { {
242         .name   = "dma1",
243         .start  = 0x0000,
244         .end    = 0x001f,
245         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
246 }, {
247         .name   = "pic1",
248         .start  = 0x0020,
249         .end    = 0x0021,
250         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
251 }, {
252         .name   = "timer0",
253         .start  = 0x0040,
254         .end    = 0x0043,
255         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
256 }, {
257         .name   = "timer1",
258         .start  = 0x0050,
259         .end    = 0x0053,
260         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
261 }, {
262         .name   = "keyboard",
263         .start  = 0x0060,
264         .end    = 0x006f,
265         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
266 }, {
267         .name   = "dma page reg",
268         .start  = 0x0080,
269         .end    = 0x008f,
270         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
271 }, {
272         .name   = "pic2",
273         .start  = 0x00a0,
274         .end    = 0x00a1,
275         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
276 }, {
277         .name   = "dma2",
278         .start  = 0x00c0,
279         .end    = 0x00df,
280         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
281 }, {
282         .name   = "fpu",
283         .start  = 0x00f0,
284         .end    = 0x00ff,
285         .flags  = IORESOURCE_BUSY | IORESOURCE_IO
286 } };
287
288 #define STANDARD_IO_RESOURCES \
289         (sizeof standard_io_resources / sizeof standard_io_resources[0])
290
291 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
292
293 static int __init romchecksum(unsigned char *rom, unsigned long length)
294 {
295         unsigned char *p, sum = 0;
296
297         for (p = rom; p < rom + length; p++)
298                 sum += *p;
299         return sum == 0;
300 }
301
302 static void __init probe_roms(void)
303 {
304         unsigned long start, length, upper;
305         unsigned char *rom;
306         int           i;
307
308         /* video rom */
309         upper = adapter_rom_resources[0].start;
310         for (start = video_rom_resource.start; start < upper; start += 2048) {
311                 rom = isa_bus_to_virt(start);
312                 if (!romsignature(rom))
313                         continue;
314
315                 video_rom_resource.start = start;
316
317                 /* 0 < length <= 0x7f * 512, historically */
318                 length = rom[2] * 512;
319
320                 /* if checksum okay, trust length byte */
321                 if (length && romchecksum(rom, length))
322                         video_rom_resource.end = start + length - 1;
323
324                 request_resource(&iomem_resource, &video_rom_resource);
325                 break;
326         }
327
328         start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
329         if (start < upper)
330                 start = upper;
331
332         /* system rom */
333         request_resource(&iomem_resource, &system_rom_resource);
334         upper = system_rom_resource.start;
335
336         /* check for extension rom (ignore length byte!) */
337         rom = isa_bus_to_virt(extension_rom_resource.start);
338         if (romsignature(rom)) {
339                 length = extension_rom_resource.end - extension_rom_resource.start + 1;
340                 if (romchecksum(rom, length)) {
341                         request_resource(&iomem_resource, &extension_rom_resource);
342                         upper = extension_rom_resource.start;
343                 }
344         }
345
346         /* check for adapter roms on 2k boundaries */
347         for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
348                 rom = isa_bus_to_virt(start);
349                 if (!romsignature(rom))
350                         continue;
351
352                 /* 0 < length <= 0x7f * 512, historically */
353                 length = rom[2] * 512;
354
355                 /* but accept any length that fits if checksum okay */
356                 if (!length || start + length > upper || !romchecksum(rom, length))
357                         continue;
358
359                 adapter_rom_resources[i].start = start;
360                 adapter_rom_resources[i].end = start + length - 1;
361                 request_resource(&iomem_resource, &adapter_rom_resources[i]);
362
363                 start = adapter_rom_resources[i++].end & ~2047UL;
364         }
365 }
366
367 static void __init limit_regions(unsigned long long size)
368 {
369         unsigned long long current_addr = 0;
370         int i;
371
372         if (efi_enabled) {
373                 efi_memory_desc_t *md;
374                 void *p;
375
376                 for (p = memmap.map, i = 0; p < memmap.map_end;
377                         p += memmap.desc_size, i++) {
378                         md = p;
379                         current_addr = md->phys_addr + (md->num_pages << 12);
380                         if (md->type == EFI_CONVENTIONAL_MEMORY) {
381                                 if (current_addr >= size) {
382                                         md->num_pages -=
383                                                 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
384                                         memmap.nr_map = i + 1;
385                                         return;
386                                 }
387                         }
388                 }
389         }
390         for (i = 0; i < e820.nr_map; i++) {
391                 if (e820.map[i].type == E820_RAM) {
392                         current_addr = e820.map[i].addr + e820.map[i].size;
393                         if (current_addr >= size) {
394                                 e820.map[i].size -= current_addr-size;
395                                 e820.nr_map = i + 1;
396                                 return;
397                         }
398                 }
399         }
400 }
401
402 static void __init add_memory_region(unsigned long long start,
403                                   unsigned long long size, int type)
404 {
405         int x;
406
407         if (!efi_enabled) {
408                 x = e820.nr_map;
409
410                 if (x == E820MAX) {
411                     printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
412                     return;
413                 }
414
415                 e820.map[x].addr = start;
416                 e820.map[x].size = size;
417                 e820.map[x].type = type;
418                 e820.nr_map++;
419         }
420 } /* add_memory_region */
421
422 #define E820_DEBUG      1
423
424 static void __init print_memory_map(char *who)
425 {
426         int i;
427
428         for (i = 0; i < e820.nr_map; i++) {
429                 printk(" %s: %016Lx - %016Lx ", who,
430                         e820.map[i].addr,
431                         e820.map[i].addr + e820.map[i].size);
432                 switch (e820.map[i].type) {
433                 case E820_RAM:  printk("(usable)\n");
434                                 break;
435                 case E820_RESERVED:
436                                 printk("(reserved)\n");
437                                 break;
438                 case E820_ACPI:
439                                 printk("(ACPI data)\n");
440                                 break;
441                 case E820_NVS:
442                                 printk("(ACPI NVS)\n");
443                                 break;
444                 default:        printk("type %lu\n", e820.map[i].type);
445                                 break;
446                 }
447         }
448 }
449
450 /*
451  * Sanitize the BIOS e820 map.
452  *
453  * Some e820 responses include overlapping entries.  The following 
454  * replaces the original e820 map with a new one, removing overlaps.
455  *
456  */
457 struct change_member {
458         struct e820entry *pbios; /* pointer to original bios entry */
459         unsigned long long addr; /* address for this change point */
460 };
461 static struct change_member change_point_list[2*E820MAX] __initdata;
462 static struct change_member *change_point[2*E820MAX] __initdata;
463 static struct e820entry *overlap_list[E820MAX] __initdata;
464 static struct e820entry new_bios[E820MAX] __initdata;
465
466 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
467 {
468         struct change_member *change_tmp;
469         unsigned long current_type, last_type;
470         unsigned long long last_addr;
471         int chgidx, still_changing;
472         int overlap_entries;
473         int new_bios_entry;
474         int old_nr, new_nr, chg_nr;
475         int i;
476
477         /*
478                 Visually we're performing the following (1,2,3,4 = memory types)...
479
480                 Sample memory map (w/overlaps):
481                    ____22__________________
482                    ______________________4_
483                    ____1111________________
484                    _44_____________________
485                    11111111________________
486                    ____________________33__
487                    ___________44___________
488                    __________33333_________
489                    ______________22________
490                    ___________________2222_
491                    _________111111111______
492                    _____________________11_
493                    _________________4______
494
495                 Sanitized equivalent (no overlap):
496                    1_______________________
497                    _44_____________________
498                    ___1____________________
499                    ____22__________________
500                    ______11________________
501                    _________1______________
502                    __________3_____________
503                    ___________44___________
504                    _____________33_________
505                    _______________2________
506                    ________________1_______
507                    _________________4______
508                    ___________________2____
509                    ____________________33__
510                    ______________________4_
511         */
512
513         /* if there's only one memory region, don't bother */
514         if (*pnr_map < 2)
515                 return -1;
516
517         old_nr = *pnr_map;
518
519         /* bail out if we find any unreasonable addresses in bios map */
520         for (i=0; i<old_nr; i++)
521                 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
522                         return -1;
523
524         /* create pointers for initial change-point information (for sorting) */
525         for (i=0; i < 2*old_nr; i++)
526                 change_point[i] = &change_point_list[i];
527
528         /* record all known change-points (starting and ending addresses),
529            omitting those that are for empty memory regions */
530         chgidx = 0;
531         for (i=0; i < old_nr; i++)      {
532                 if (biosmap[i].size != 0) {
533                         change_point[chgidx]->addr = biosmap[i].addr;
534                         change_point[chgidx++]->pbios = &biosmap[i];
535                         change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
536                         change_point[chgidx++]->pbios = &biosmap[i];
537                 }
538         }
539         chg_nr = chgidx;        /* true number of change-points */
540
541         /* sort change-point list by memory addresses (low -> high) */
542         still_changing = 1;
543         while (still_changing)  {
544                 still_changing = 0;
545                 for (i=1; i < chg_nr; i++)  {
546                         /* if <current_addr> > <last_addr>, swap */
547                         /* or, if current=<start_addr> & last=<end_addr>, swap */
548                         if ((change_point[i]->addr < change_point[i-1]->addr) ||
549                                 ((change_point[i]->addr == change_point[i-1]->addr) &&
550                                  (change_point[i]->addr == change_point[i]->pbios->addr) &&
551                                  (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
552                            )
553                         {
554                                 change_tmp = change_point[i];
555                                 change_point[i] = change_point[i-1];
556                                 change_point[i-1] = change_tmp;
557                                 still_changing=1;
558                         }
559                 }
560         }
561
562         /* create a new bios memory map, removing overlaps */
563         overlap_entries=0;       /* number of entries in the overlap table */
564         new_bios_entry=0;        /* index for creating new bios map entries */
565         last_type = 0;           /* start with undefined memory type */
566         last_addr = 0;           /* start with 0 as last starting address */
567         /* loop through change-points, determining affect on the new bios map */
568         for (chgidx=0; chgidx < chg_nr; chgidx++)
569         {
570                 /* keep track of all overlapping bios entries */
571                 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
572                 {
573                         /* add map entry to overlap list (> 1 entry implies an overlap) */
574                         overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
575                 }
576                 else
577                 {
578                         /* remove entry from list (order independent, so swap with last) */
579                         for (i=0; i<overlap_entries; i++)
580                         {
581                                 if (overlap_list[i] == change_point[chgidx]->pbios)
582                                         overlap_list[i] = overlap_list[overlap_entries-1];
583                         }
584                         overlap_entries--;
585                 }
586                 /* if there are overlapping entries, decide which "type" to use */
587                 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
588                 current_type = 0;
589                 for (i=0; i<overlap_entries; i++)
590                         if (overlap_list[i]->type > current_type)
591                                 current_type = overlap_list[i]->type;
592                 /* continue building up new bios map based on this information */
593                 if (current_type != last_type)  {
594                         if (last_type != 0)      {
595                                 new_bios[new_bios_entry].size =
596                                         change_point[chgidx]->addr - last_addr;
597                                 /* move forward only if the new size was non-zero */
598                                 if (new_bios[new_bios_entry].size != 0)
599                                         if (++new_bios_entry >= E820MAX)
600                                                 break;  /* no more space left for new bios entries */
601                         }
602                         if (current_type != 0)  {
603                                 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
604                                 new_bios[new_bios_entry].type = current_type;
605                                 last_addr=change_point[chgidx]->addr;
606                         }
607                         last_type = current_type;
608                 }
609         }
610         new_nr = new_bios_entry;   /* retain count for new bios entries */
611
612         /* copy new bios mapping into original location */
613         memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
614         *pnr_map = new_nr;
615
616         return 0;
617 }
618
619 /*
620  * Copy the BIOS e820 map into a safe place.
621  *
622  * Sanity-check it while we're at it..
623  *
624  * If we're lucky and live on a modern system, the setup code
625  * will have given us a memory map that we can use to properly
626  * set up memory.  If we aren't, we'll fake a memory map.
627  *
628  * We check to see that the memory map contains at least 2 elements
629  * before we'll use it, because the detection code in setup.S may
630  * not be perfect and most every PC known to man has two memory
631  * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
632  * thinkpad 560x, for example, does not cooperate with the memory
633  * detection code.)
634  */
635 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
636 {
637         /* Only one memory region (or negative)? Ignore it */
638         if (nr_map < 2)
639                 return -1;
640
641         do {
642                 unsigned long long start = biosmap->addr;
643                 unsigned long long size = biosmap->size;
644                 unsigned long long end = start + size;
645                 unsigned long type = biosmap->type;
646
647                 /* Overflow in 64 bits? Ignore the memory map. */
648                 if (start > end)
649                         return -1;
650
651                 /*
652                  * Some BIOSes claim RAM in the 640k - 1M region.
653                  * Not right. Fix it up.
654                  */
655                 if (type == E820_RAM) {
656                         if (start < 0x100000ULL && end > 0xA0000ULL) {
657                                 if (start < 0xA0000ULL)
658                                         add_memory_region(start, 0xA0000ULL-start, type);
659                                 if (end <= 0x100000ULL)
660                                         continue;
661                                 start = 0x100000ULL;
662                                 size = end - start;
663                         }
664                 }
665                 add_memory_region(start, size, type);
666         } while (biosmap++,--nr_map);
667         return 0;
668 }
669
670 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
671 struct edd edd;
672 #ifdef CONFIG_EDD_MODULE
673 EXPORT_SYMBOL(edd);
674 #endif
675 /**
676  * copy_edd() - Copy the BIOS EDD information
677  *              from boot_params into a safe place.
678  *
679  */
680 static inline void copy_edd(void)
681 {
682      memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
683      memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
684      edd.mbr_signature_nr = EDD_MBR_SIG_NR;
685      edd.edd_info_nr = EDD_NR;
686 }
687 #else
688 static inline void copy_edd(void)
689 {
690 }
691 #endif
692
693 /*
694  * Do NOT EVER look at the BIOS memory size location.
695  * It does not work on many machines.
696  */
697 #define LOWMEMSIZE()    (0x9f000)
698
699 static void __init parse_cmdline_early (char ** cmdline_p)
700 {
701         char c = ' ', *to = command_line, *from = saved_command_line;
702         int len = 0;
703         int userdef = 0;
704
705         /* Save unparsed command line copy for /proc/cmdline */
706         saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
707
708         for (;;) {
709                 if (c != ' ')
710                         goto next_char;
711                 /*
712                  * "mem=nopentium" disables the 4MB page tables.
713                  * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
714                  * to <mem>, overriding the bios size.
715                  * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
716                  * <start> to <start>+<mem>, overriding the bios size.
717                  *
718                  * HPA tells me bootloaders need to parse mem=, so no new
719                  * option should be mem=  [also see Documentation/i386/boot.txt]
720                  */
721                 if (!memcmp(from, "mem=", 4)) {
722                         if (to != command_line)
723                                 to--;
724                         if (!memcmp(from+4, "nopentium", 9)) {
725                                 from += 9+4;
726                                 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
727                                 disable_pse = 1;
728                         } else {
729                                 /* If the user specifies memory size, we
730                                  * limit the BIOS-provided memory map to
731                                  * that size. exactmap can be used to specify
732                                  * the exact map. mem=number can be used to
733                                  * trim the existing memory map.
734                                  */
735                                 unsigned long long mem_size;
736  
737                                 mem_size = memparse(from+4, &from);
738                                 limit_regions(mem_size);
739                                 userdef=1;
740                         }
741                 }
742
743                 else if (!memcmp(from, "memmap=", 7)) {
744                         if (to != command_line)
745                                 to--;
746                         if (!memcmp(from+7, "exactmap", 8)) {
747 #ifdef CONFIG_CRASH_DUMP
748                                 /* If we are doing a crash dump, we
749                                  * still need to know the real mem
750                                  * size before original memory map is
751                                  * reset.
752                                  */
753                                 find_max_pfn();
754                                 saved_max_pfn = max_pfn;
755 #endif
756                                 from += 8+7;
757                                 e820.nr_map = 0;
758                                 userdef = 1;
759                         } else {
760                                 /* If the user specifies memory size, we
761                                  * limit the BIOS-provided memory map to
762                                  * that size. exactmap can be used to specify
763                                  * the exact map. mem=number can be used to
764                                  * trim the existing memory map.
765                                  */
766                                 unsigned long long start_at, mem_size;
767  
768                                 mem_size = memparse(from+7, &from);
769                                 if (*from == '@') {
770                                         start_at = memparse(from+1, &from);
771                                         add_memory_region(start_at, mem_size, E820_RAM);
772                                 } else if (*from == '#') {
773                                         start_at = memparse(from+1, &from);
774                                         add_memory_region(start_at, mem_size, E820_ACPI);
775                                 } else if (*from == '$') {
776                                         start_at = memparse(from+1, &from);
777                                         add_memory_region(start_at, mem_size, E820_RESERVED);
778                                 } else {
779                                         limit_regions(mem_size);
780                                         userdef=1;
781                                 }
782                         }
783                 }
784
785                 else if (!memcmp(from, "noexec=", 7))
786                         noexec_setup(from + 7);
787
788
789 #ifdef  CONFIG_X86_SMP
790                 /*
791                  * If the BIOS enumerates physical processors before logical,
792                  * maxcpus=N at enumeration-time can be used to disable HT.
793                  */
794                 else if (!memcmp(from, "maxcpus=", 8)) {
795                         extern unsigned int maxcpus;
796
797                         maxcpus = simple_strtoul(from + 8, NULL, 0);
798                 }
799 #endif
800
801 #ifdef CONFIG_ACPI_BOOT
802                 /* "acpi=off" disables both ACPI table parsing and interpreter */
803                 else if (!memcmp(from, "acpi=off", 8)) {
804                         disable_acpi();
805                 }
806
807                 /* acpi=force to over-ride black-list */
808                 else if (!memcmp(from, "acpi=force", 10)) {
809                         acpi_force = 1;
810                         acpi_ht = 1;
811                         acpi_disabled = 0;
812                 }
813
814                 /* acpi=strict disables out-of-spec workarounds */
815                 else if (!memcmp(from, "acpi=strict", 11)) {
816                         acpi_strict = 1;
817                 }
818
819                 /* Limit ACPI just to boot-time to enable HT */
820                 else if (!memcmp(from, "acpi=ht", 7)) {
821                         if (!acpi_force)
822                                 disable_acpi();
823                         acpi_ht = 1;
824                 }
825                 
826                 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
827                 else if (!memcmp(from, "pci=noacpi", 10)) {
828                         acpi_disable_pci();
829                 }
830                 /* "acpi=noirq" disables ACPI interrupt routing */
831                 else if (!memcmp(from, "acpi=noirq", 10)) {
832                         acpi_noirq_set();
833                 }
834
835                 else if (!memcmp(from, "acpi_sci=edge", 13))
836                         acpi_sci_flags.trigger =  1;
837
838                 else if (!memcmp(from, "acpi_sci=level", 14))
839                         acpi_sci_flags.trigger = 3;
840
841                 else if (!memcmp(from, "acpi_sci=high", 13))
842                         acpi_sci_flags.polarity = 1;
843
844                 else if (!memcmp(from, "acpi_sci=low", 12))
845                         acpi_sci_flags.polarity = 3;
846
847 #ifdef CONFIG_X86_IO_APIC
848                 else if (!memcmp(from, "acpi_skip_timer_override", 24))
849                         acpi_skip_timer_override = 1;
850 #endif
851
852 #ifdef CONFIG_X86_LOCAL_APIC
853                 /* disable IO-APIC */
854                 else if (!memcmp(from, "noapic", 6))
855                         disable_ioapic_setup();
856 #endif /* CONFIG_X86_LOCAL_APIC */
857 #endif /* CONFIG_ACPI_BOOT */
858
859 #ifdef CONFIG_X86_LOCAL_APIC
860                 /* enable local APIC */
861                 else if (!memcmp(from, "lapic", 5))
862                         lapic_enable();
863
864                 /* disable local APIC */
865                 else if (!memcmp(from, "nolapic", 6))
866                         lapic_disable();
867 #endif /* CONFIG_X86_LOCAL_APIC */
868
869 #ifdef CONFIG_KEXEC
870                 /* crashkernel=size@addr specifies the location to reserve for
871                  * a crash kernel.  By reserving this memory we guarantee
872                  * that linux never set's it up as a DMA target.
873                  * Useful for holding code to do something appropriate
874                  * after a kernel panic.
875                  */
876                 else if (!memcmp(from, "crashkernel=", 12)) {
877                         unsigned long size, base;
878                         size = memparse(from+12, &from);
879                         if (*from == '@') {
880                                 base = memparse(from+1, &from);
881                                 /* FIXME: Do I want a sanity check
882                                  * to validate the memory range?
883                                  */
884                                 crashk_res.start = base;
885                                 crashk_res.end   = base + size - 1;
886                         }
887                 }
888 #endif
889 #ifdef CONFIG_CRASH_DUMP
890                 /* elfcorehdr= specifies the location of elf core header
891                  * stored by the crashed kernel.
892                  */
893                 else if (!memcmp(from, "elfcorehdr=", 11))
894                         elfcorehdr_addr = memparse(from+11, &from);
895 #endif
896
897                 /*
898                  * highmem=size forces highmem to be exactly 'size' bytes.
899                  * This works even on boxes that have no highmem otherwise.
900                  * This also works to reduce highmem size on bigger boxes.
901                  */
902                 else if (!memcmp(from, "highmem=", 8))
903                         highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
904         
905                 /*
906                  * vmalloc=size forces the vmalloc area to be exactly 'size'
907                  * bytes. This can be used to increase (or decrease) the
908                  * vmalloc area - the default is 128m.
909                  */
910                 else if (!memcmp(from, "vmalloc=", 8))
911                         __VMALLOC_RESERVE = memparse(from+8, &from);
912
913         next_char:
914                 c = *(from++);
915                 if (!c)
916                         break;
917                 if (COMMAND_LINE_SIZE <= ++len)
918                         break;
919                 *(to++) = c;
920         }
921         *to = '\0';
922         *cmdline_p = command_line;
923         if (userdef) {
924                 printk(KERN_INFO "user-defined physical RAM map:\n");
925                 print_memory_map("user");
926         }
927 }
928
929 /*
930  * Callback for efi_memory_walk.
931  */
932 static int __init
933 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
934 {
935         unsigned long *max_pfn = arg, pfn;
936
937         if (start < end) {
938                 pfn = PFN_UP(end -1);
939                 if (pfn > *max_pfn)
940                         *max_pfn = pfn;
941         }
942         return 0;
943 }
944
945
946 /*
947  * Find the highest page frame number we have available
948  */
949 void __init find_max_pfn(void)
950 {
951         int i;
952
953         max_pfn = 0;
954         if (efi_enabled) {
955                 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
956                 return;
957         }
958
959         for (i = 0; i < e820.nr_map; i++) {
960                 unsigned long start, end;
961                 /* RAM? */
962                 if (e820.map[i].type != E820_RAM)
963                         continue;
964                 start = PFN_UP(e820.map[i].addr);
965                 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
966                 if (start >= end)
967                         continue;
968                 if (end > max_pfn)
969                         max_pfn = end;
970         }
971 }
972
973 /*
974  * Determine low and high memory ranges:
975  */
976 unsigned long __init find_max_low_pfn(void)
977 {
978         unsigned long max_low_pfn;
979
980         max_low_pfn = max_pfn;
981         if (max_low_pfn > MAXMEM_PFN) {
982                 if (highmem_pages == -1)
983                         highmem_pages = max_pfn - MAXMEM_PFN;
984                 if (highmem_pages + MAXMEM_PFN < max_pfn)
985                         max_pfn = MAXMEM_PFN + highmem_pages;
986                 if (highmem_pages + MAXMEM_PFN > max_pfn) {
987                         printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
988                         highmem_pages = 0;
989                 }
990                 max_low_pfn = MAXMEM_PFN;
991 #ifndef CONFIG_HIGHMEM
992                 /* Maximum memory usable is what is directly addressable */
993                 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
994                                         MAXMEM>>20);
995                 if (max_pfn > MAX_NONPAE_PFN)
996                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
997                 else
998                         printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
999                 max_pfn = MAXMEM_PFN;
1000 #else /* !CONFIG_HIGHMEM */
1001 #ifndef CONFIG_X86_PAE
1002                 if (max_pfn > MAX_NONPAE_PFN) {
1003                         max_pfn = MAX_NONPAE_PFN;
1004                         printk(KERN_WARNING "Warning only 4GB will be used.\n");
1005                         printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1006                 }
1007 #endif /* !CONFIG_X86_PAE */
1008 #endif /* !CONFIG_HIGHMEM */
1009         } else {
1010                 if (highmem_pages == -1)
1011                         highmem_pages = 0;
1012 #ifdef CONFIG_HIGHMEM
1013                 if (highmem_pages >= max_pfn) {
1014                         printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1015                         highmem_pages = 0;
1016                 }
1017                 if (highmem_pages) {
1018                         if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1019                                 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1020                                 highmem_pages = 0;
1021                         }
1022                         max_low_pfn -= highmem_pages;
1023                 }
1024 #else
1025                 if (highmem_pages)
1026                         printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1027 #endif
1028         }
1029         return max_low_pfn;
1030 }
1031
1032 /*
1033  * Free all available memory for boot time allocation.  Used
1034  * as a callback function by efi_memory_walk()
1035  */
1036
1037 static int __init
1038 free_available_memory(unsigned long start, unsigned long end, void *arg)
1039 {
1040         /* check max_low_pfn */
1041         if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1042                 return 0;
1043         if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1044                 end = (max_low_pfn + 1) << PAGE_SHIFT;
1045         if (start < end)
1046                 free_bootmem(start, end - start);
1047
1048         return 0;
1049 }
1050 /*
1051  * Register fully available low RAM pages with the bootmem allocator.
1052  */
1053 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1054 {
1055         int i;
1056
1057         if (efi_enabled) {
1058                 efi_memmap_walk(free_available_memory, NULL);
1059                 return;
1060         }
1061         for (i = 0; i < e820.nr_map; i++) {
1062                 unsigned long curr_pfn, last_pfn, size;
1063                 /*
1064                  * Reserve usable low memory
1065                  */
1066                 if (e820.map[i].type != E820_RAM)
1067                         continue;
1068                 /*
1069                  * We are rounding up the start address of usable memory:
1070                  */
1071                 curr_pfn = PFN_UP(e820.map[i].addr);
1072                 if (curr_pfn >= max_low_pfn)
1073                         continue;
1074                 /*
1075                  * ... and at the end of the usable range downwards:
1076                  */
1077                 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1078
1079                 if (last_pfn > max_low_pfn)
1080                         last_pfn = max_low_pfn;
1081
1082                 /*
1083                  * .. finally, did all the rounding and playing
1084                  * around just make the area go away?
1085                  */
1086                 if (last_pfn <= curr_pfn)
1087                         continue;
1088
1089                 size = last_pfn - curr_pfn;
1090                 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1091         }
1092 }
1093
1094 /*
1095  * workaround for Dell systems that neglect to reserve EBDA
1096  */
1097 static void __init reserve_ebda_region(void)
1098 {
1099         unsigned int addr;
1100         addr = get_bios_ebda();
1101         if (addr)
1102                 reserve_bootmem(addr, PAGE_SIZE);       
1103 }
1104
1105 #ifndef CONFIG_NEED_MULTIPLE_NODES
1106 void __init setup_bootmem_allocator(void);
1107 static unsigned long __init setup_memory(void)
1108 {
1109         /*
1110          * partially used pages are not usable - thus
1111          * we are rounding upwards:
1112          */
1113         min_low_pfn = PFN_UP(init_pg_tables_end);
1114
1115         find_max_pfn();
1116
1117         max_low_pfn = find_max_low_pfn();
1118
1119 #ifdef CONFIG_HIGHMEM
1120         highstart_pfn = highend_pfn = max_pfn;
1121         if (max_pfn > max_low_pfn) {
1122                 highstart_pfn = max_low_pfn;
1123         }
1124         printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1125                 pages_to_mb(highend_pfn - highstart_pfn));
1126 #endif
1127         printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1128                         pages_to_mb(max_low_pfn));
1129
1130         setup_bootmem_allocator();
1131
1132         return max_low_pfn;
1133 }
1134
1135 void __init zone_sizes_init(void)
1136 {
1137         unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1138         unsigned int max_dma, low;
1139
1140         max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1141         low = max_low_pfn;
1142
1143         if (low < max_dma)
1144                 zones_size[ZONE_DMA] = low;
1145         else {
1146                 zones_size[ZONE_DMA] = max_dma;
1147                 zones_size[ZONE_NORMAL] = low - max_dma;
1148 #ifdef CONFIG_HIGHMEM
1149                 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1150 #endif
1151         }
1152         free_area_init(zones_size);
1153 }
1154 #else
1155 extern unsigned long __init setup_memory(void);
1156 extern void zone_sizes_init(void);
1157 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1158
1159 void __init setup_bootmem_allocator(void)
1160 {
1161         unsigned long bootmap_size;
1162         /*
1163          * Initialize the boot-time allocator (with low memory only):
1164          */
1165         bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1166
1167         register_bootmem_low_pages(max_low_pfn);
1168
1169         /*
1170          * Reserve the bootmem bitmap itself as well. We do this in two
1171          * steps (first step was init_bootmem()) because this catches
1172          * the (very unlikely) case of us accidentally initializing the
1173          * bootmem allocator with an invalid RAM area.
1174          */
1175         reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1176                          bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1177
1178         /*
1179          * reserve physical page 0 - it's a special BIOS page on many boxes,
1180          * enabling clean reboots, SMP operation, laptop functions.
1181          */
1182         reserve_bootmem(0, PAGE_SIZE);
1183
1184         /* reserve EBDA region, it's a 4K region */
1185         reserve_ebda_region();
1186
1187     /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
1188        PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1189        unless you have no PS/2 mouse plugged in. */
1190         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1191             boot_cpu_data.x86 == 6)
1192              reserve_bootmem(0xa0000 - 4096, 4096);
1193
1194 #ifdef CONFIG_SMP
1195         /*
1196          * But first pinch a few for the stack/trampoline stuff
1197          * FIXME: Don't need the extra page at 4K, but need to fix
1198          * trampoline before removing it. (see the GDT stuff)
1199          */
1200         reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1201 #endif
1202 #ifdef CONFIG_ACPI_SLEEP
1203         /*
1204          * Reserve low memory region for sleep support.
1205          */
1206         acpi_reserve_bootmem();
1207 #endif
1208 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1209         /*
1210          * Find and reserve possible boot-time SMP configuration:
1211          */
1212         find_smp_config();
1213 #endif
1214
1215 #ifdef CONFIG_BLK_DEV_INITRD
1216         if (LOADER_TYPE && INITRD_START) {
1217                 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1218                         reserve_bootmem(INITRD_START, INITRD_SIZE);
1219                         initrd_start =
1220                                 INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
1221                         initrd_end = initrd_start+INITRD_SIZE;
1222                 }
1223                 else {
1224                         printk(KERN_ERR "initrd extends beyond end of memory "
1225                             "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1226                             INITRD_START + INITRD_SIZE,
1227                             max_low_pfn << PAGE_SHIFT);
1228                         initrd_start = 0;
1229                 }
1230         }
1231 #endif
1232 #ifdef CONFIG_KEXEC
1233         if (crashk_res.start != crashk_res.end)
1234                 reserve_bootmem(crashk_res.start,
1235                         crashk_res.end - crashk_res.start + 1);
1236 #endif
1237 }
1238
1239 /*
1240  * The node 0 pgdat is initialized before all of these because
1241  * it's needed for bootmem.  node>0 pgdats have their virtual
1242  * space allocated before the pagetables are in place to access
1243  * them, so they can't be cleared then.
1244  *
1245  * This should all compile down to nothing when NUMA is off.
1246  */
1247 void __init remapped_pgdat_init(void)
1248 {
1249         int nid;
1250
1251         for_each_online_node(nid) {
1252                 if (nid != 0)
1253                         memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1254         }
1255 }
1256
1257 /*
1258  * Request address space for all standard RAM and ROM resources
1259  * and also for regions reported as reserved by the e820.
1260  */
1261 static void __init
1262 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1263 {
1264         int i;
1265
1266         probe_roms();
1267         for (i = 0; i < e820.nr_map; i++) {
1268                 struct resource *res;
1269                 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1270                         continue;
1271                 res = alloc_bootmem_low(sizeof(struct resource));
1272                 switch (e820.map[i].type) {
1273                 case E820_RAM:  res->name = "System RAM"; break;
1274                 case E820_ACPI: res->name = "ACPI Tables"; break;
1275                 case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
1276                 default:        res->name = "reserved";
1277                 }
1278                 res->start = e820.map[i].addr;
1279                 res->end = res->start + e820.map[i].size - 1;
1280                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1281                 request_resource(&iomem_resource, res);
1282                 if (e820.map[i].type == E820_RAM) {
1283                         /*
1284                          *  We don't know which RAM region contains kernel data,
1285                          *  so we try it repeatedly and let the resource manager
1286                          *  test it.
1287                          */
1288                         request_resource(res, code_resource);
1289                         request_resource(res, data_resource);
1290 #ifdef CONFIG_KEXEC
1291                         request_resource(res, &crashk_res);
1292 #endif
1293                 }
1294         }
1295 }
1296
1297 /*
1298  * Request address space for all standard resources
1299  */
1300 static void __init register_memory(void)
1301 {
1302         unsigned long gapstart, gapsize;
1303         unsigned long long last;
1304         int           i;
1305
1306         if (efi_enabled)
1307                 efi_initialize_iomem_resources(&code_resource, &data_resource);
1308         else
1309                 legacy_init_iomem_resources(&code_resource, &data_resource);
1310
1311         /* EFI systems may still have VGA */
1312         request_resource(&iomem_resource, &video_ram_resource);
1313
1314         /* request I/O space for devices used on all i[345]86 PCs */
1315         for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1316                 request_resource(&ioport_resource, &standard_io_resources[i]);
1317
1318         /*
1319          * Search for the bigest gap in the low 32 bits of the e820
1320          * memory space.
1321          */
1322         last = 0x100000000ull;
1323         gapstart = 0x10000000;
1324         gapsize = 0x400000;
1325         i = e820.nr_map;
1326         while (--i >= 0) {
1327                 unsigned long long start = e820.map[i].addr;
1328                 unsigned long long end = start + e820.map[i].size;
1329
1330                 /*
1331                  * Since "last" is at most 4GB, we know we'll
1332                  * fit in 32 bits if this condition is true
1333                  */
1334                 if (last > end) {
1335                         unsigned long gap = last - end;
1336
1337                         if (gap > gapsize) {
1338                                 gapsize = gap;
1339                                 gapstart = end;
1340                         }
1341                 }
1342                 if (start < last)
1343                         last = start;
1344         }
1345
1346         /*
1347          * Start allocating dynamic PCI memory a bit into the gap,
1348          * aligned up to the nearest megabyte.
1349          *
1350          * Question: should we try to pad it up a bit (do something
1351          * like " + (gapsize >> 3)" in there too?). We now have the
1352          * technology.
1353          */
1354         pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1355
1356         printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1357                 pci_mem_start, gapstart, gapsize);
1358 }
1359
1360 /* Use inline assembly to define this because the nops are defined 
1361    as inline assembly strings in the include files and we cannot 
1362    get them easily into strings. */
1363 asm("\t.data\nintelnops: " 
1364     GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1365     GENERIC_NOP7 GENERIC_NOP8); 
1366 asm("\t.data\nk8nops: " 
1367     K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1368     K8_NOP7 K8_NOP8); 
1369 asm("\t.data\nk7nops: " 
1370     K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1371     K7_NOP7 K7_NOP8); 
1372     
1373 extern unsigned char intelnops[], k8nops[], k7nops[];
1374 static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 
1375      NULL,
1376      intelnops,
1377      intelnops + 1,
1378      intelnops + 1 + 2,
1379      intelnops + 1 + 2 + 3,
1380      intelnops + 1 + 2 + 3 + 4,
1381      intelnops + 1 + 2 + 3 + 4 + 5,
1382      intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1383      intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1384 }; 
1385 static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
1386      NULL,
1387      k8nops,
1388      k8nops + 1,
1389      k8nops + 1 + 2,
1390      k8nops + 1 + 2 + 3,
1391      k8nops + 1 + 2 + 3 + 4,
1392      k8nops + 1 + 2 + 3 + 4 + 5,
1393      k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1394      k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1395 }; 
1396 static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 
1397      NULL,
1398      k7nops,
1399      k7nops + 1,
1400      k7nops + 1 + 2,
1401      k7nops + 1 + 2 + 3,
1402      k7nops + 1 + 2 + 3 + 4,
1403      k7nops + 1 + 2 + 3 + 4 + 5,
1404      k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1405      k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1406 }; 
1407 static struct nop { 
1408      int cpuid; 
1409      unsigned char **noptable; 
1410 } noptypes[] = { 
1411      { X86_FEATURE_K8, k8_nops }, 
1412      { X86_FEATURE_K7, k7_nops }, 
1413      { -1, NULL }
1414 }; 
1415
1416 /* Replace instructions with better alternatives for this CPU type.
1417
1418    This runs before SMP is initialized to avoid SMP problems with
1419    self modifying code. This implies that assymetric systems where
1420    APs have less capabilities than the boot processor are not handled. 
1421    Tough. Make sure you disable such features by hand. */ 
1422 void apply_alternatives(void *start, void *end) 
1423
1424         struct alt_instr *a; 
1425         int diff, i, k;
1426         unsigned char **noptable = intel_nops; 
1427         for (i = 0; noptypes[i].cpuid >= 0; i++) { 
1428                 if (boot_cpu_has(noptypes[i].cpuid)) { 
1429                         noptable = noptypes[i].noptable;
1430                         break;
1431                 }
1432         } 
1433         for (a = start; (void *)a < end; a++) { 
1434                 if (!boot_cpu_has(a->cpuid))
1435                         continue;
1436                 BUG_ON(a->replacementlen > a->instrlen); 
1437                 memcpy(a->instr, a->replacement, a->replacementlen); 
1438                 diff = a->instrlen - a->replacementlen; 
1439                 /* Pad the rest with nops */
1440                 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1441                         k = diff;
1442                         if (k > ASM_NOP_MAX)
1443                                 k = ASM_NOP_MAX;
1444                         memcpy(a->instr + i, noptable[k], k); 
1445                 } 
1446         }
1447
1448
1449 void __init alternative_instructions(void)
1450 {
1451         extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1452         apply_alternatives(__alt_instructions, __alt_instructions_end);
1453 }
1454
1455 static char * __init machine_specific_memory_setup(void);
1456
1457 #ifdef CONFIG_MCA
1458 static void set_mca_bus(int x)
1459 {
1460         MCA_bus = x;
1461 }
1462 #else
1463 static void set_mca_bus(int x) { }
1464 #endif
1465
1466 /*
1467  * Determine if we were loaded by an EFI loader.  If so, then we have also been
1468  * passed the efi memmap, systab, etc., so we should use these data structures
1469  * for initialization.  Note, the efi init code path is determined by the
1470  * global efi_enabled. This allows the same kernel image to be used on existing
1471  * systems (with a traditional BIOS) as well as on EFI systems.
1472  */
1473 void __init setup_arch(char **cmdline_p)
1474 {
1475         unsigned long max_low_pfn;
1476
1477         memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1478         pre_setup_arch_hook();
1479         early_cpu_init();
1480
1481         /*
1482          * FIXME: This isn't an official loader_type right
1483          * now but does currently work with elilo.
1484          * If we were configured as an EFI kernel, check to make
1485          * sure that we were loaded correctly from elilo and that
1486          * the system table is valid.  If not, then initialize normally.
1487          */
1488 #ifdef CONFIG_EFI
1489         if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1490                 efi_enabled = 1;
1491 #endif
1492
1493         ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
1494         drive_info = DRIVE_INFO;
1495         screen_info = SCREEN_INFO;
1496         edid_info = EDID_INFO;
1497         apm_info.bios = APM_BIOS_INFO;
1498         ist_info = IST_INFO;
1499         saved_videomode = VIDEO_MODE;
1500         if( SYS_DESC_TABLE.length != 0 ) {
1501                 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1502                 machine_id = SYS_DESC_TABLE.table[0];
1503                 machine_submodel_id = SYS_DESC_TABLE.table[1];
1504                 BIOS_revision = SYS_DESC_TABLE.table[2];
1505         }
1506         bootloader_type = LOADER_TYPE;
1507
1508 #ifdef CONFIG_BLK_DEV_RAM
1509         rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1510         rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1511         rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1512 #endif
1513         ARCH_SETUP
1514         if (efi_enabled)
1515                 efi_init();
1516         else {
1517                 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1518                 print_memory_map(machine_specific_memory_setup());
1519         }
1520
1521         copy_edd();
1522
1523         if (!MOUNT_ROOT_RDONLY)
1524                 root_mountflags &= ~MS_RDONLY;
1525         init_mm.start_code = (unsigned long) _text;
1526         init_mm.end_code = (unsigned long) _etext;
1527         init_mm.end_data = (unsigned long) _edata;
1528         init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
1529
1530         code_resource.start = virt_to_phys(_text);
1531         code_resource.end = virt_to_phys(_etext)-1;
1532         data_resource.start = virt_to_phys(_etext);
1533         data_resource.end = virt_to_phys(_edata)-1;
1534
1535         parse_cmdline_early(cmdline_p);
1536
1537         max_low_pfn = setup_memory();
1538
1539         /*
1540          * NOTE: before this point _nobody_ is allowed to allocate
1541          * any memory using the bootmem allocator.  Although the
1542          * alloctor is now initialised only the first 8Mb of the kernel
1543          * virtual address space has been mapped.  All allocations before
1544          * paging_init() has completed must use the alloc_bootmem_low_pages()
1545          * variant (which allocates DMA'able memory) and care must be taken
1546          * not to exceed the 8Mb limit.
1547          */
1548
1549 #ifdef CONFIG_SMP
1550         smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1551 #endif
1552         paging_init();
1553         remapped_pgdat_init();
1554         sparse_init();
1555         zone_sizes_init();
1556
1557         /*
1558          * NOTE: at this point the bootmem allocator is fully available.
1559          */
1560
1561 #ifdef CONFIG_EARLY_PRINTK
1562         {
1563                 char *s = strstr(*cmdline_p, "earlyprintk=");
1564                 if (s) {
1565                         extern void setup_early_printk(char *);
1566
1567                         setup_early_printk(s);
1568                         printk("early console enabled\n");
1569                 }
1570         }
1571 #endif
1572
1573
1574         dmi_scan_machine();
1575
1576 #ifdef CONFIG_X86_GENERICARCH
1577         generic_apic_probe(*cmdline_p);
1578 #endif  
1579         if (efi_enabled)
1580                 efi_map_memmap();
1581
1582 #ifdef CONFIG_ACPI_BOOT
1583         /*
1584          * Parse the ACPI tables for possible boot-time SMP configuration.
1585          */
1586         acpi_boot_table_init();
1587         acpi_boot_init();
1588
1589 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1590         if (def_to_bigsmp)
1591                 printk(KERN_WARNING "More than 8 CPUs detected and "
1592                         "CONFIG_X86_PC cannot handle it.\nUse "
1593                         "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1594 #endif
1595 #endif
1596 #ifdef CONFIG_X86_LOCAL_APIC
1597         if (smp_found_config)
1598                 get_smp_config();
1599 #endif
1600
1601         register_memory();
1602
1603 #ifdef CONFIG_VT
1604 #if defined(CONFIG_VGA_CONSOLE)
1605         if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1606                 conswitchp = &vga_con;
1607 #elif defined(CONFIG_DUMMY_CONSOLE)
1608         conswitchp = &dummy_con;
1609 #endif
1610 #endif
1611 }
1612
1613 #include "setup_arch_post.h"
1614 /*
1615  * Local Variables:
1616  * mode:c
1617  * c-file-style:"k&r"
1618  * c-basic-offset:8
1619  * End:
1620  */