Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
[linux-2.6] / arch / x86 / mm / init_64.c
1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7  */
8
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h>
24 #include <linux/pci.h>
25 #include <linux/pfn.h>
26 #include <linux/poison.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/module.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/nmi.h>
31
32 #include <asm/processor.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/dma.h>
38 #include <asm/fixmap.h>
39 #include <asm/e820.h>
40 #include <asm/apic.h>
41 #include <asm/tlb.h>
42 #include <asm/mmu_context.h>
43 #include <asm/proto.h>
44 #include <asm/smp.h>
45 #include <asm/sections.h>
46
47 #ifndef Dprintk
48 #define Dprintk(x...)
49 #endif
50
51 const struct dma_mapping_ops* dma_ops;
52 EXPORT_SYMBOL(dma_ops);
53
54 static unsigned long dma_reserve __initdata;
55
56 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
57
58 /*
59  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
60  * physical space so we can cache the place of the first one and move
61  * around without checking the pgd every time.
62  */
63
64 void show_mem(void)
65 {
66         long i, total = 0, reserved = 0;
67         long shared = 0, cached = 0;
68         pg_data_t *pgdat;
69         struct page *page;
70
71         printk(KERN_INFO "Mem-info:\n");
72         show_free_areas();
73         printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
74
75         for_each_online_pgdat(pgdat) {
76                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77                         /* this loop can take a while with 256 GB and 4k pages
78                            so update the NMI watchdog */
79                         if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
80                                 touch_nmi_watchdog();
81                         }
82                         if (!pfn_valid(pgdat->node_start_pfn + i))
83                                 continue;
84                         page = pfn_to_page(pgdat->node_start_pfn + i);
85                         total++;
86                         if (PageReserved(page))
87                                 reserved++;
88                         else if (PageSwapCache(page))
89                                 cached++;
90                         else if (page_count(page))
91                                 shared += page_count(page) - 1;
92                }
93         }
94         printk(KERN_INFO "%lu pages of RAM\n", total);
95         printk(KERN_INFO "%lu reserved pages\n",reserved);
96         printk(KERN_INFO "%lu pages shared\n",shared);
97         printk(KERN_INFO "%lu pages swap cached\n",cached);
98 }
99
100 int after_bootmem;
101
102 static __init void *spp_getpage(void)
103
104         void *ptr;
105         if (after_bootmem)
106                 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
107         else
108                 ptr = alloc_bootmem_pages(PAGE_SIZE);
109         if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
110                 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
111
112         Dprintk("spp_getpage %p\n", ptr);
113         return ptr;
114
115
116 static __init void set_pte_phys(unsigned long vaddr,
117                          unsigned long phys, pgprot_t prot)
118 {
119         pgd_t *pgd;
120         pud_t *pud;
121         pmd_t *pmd;
122         pte_t *pte, new_pte;
123
124         Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
125
126         pgd = pgd_offset_k(vaddr);
127         if (pgd_none(*pgd)) {
128                 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
129                 return;
130         }
131         pud = pud_offset(pgd, vaddr);
132         if (pud_none(*pud)) {
133                 pmd = (pmd_t *) spp_getpage(); 
134                 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
135                 if (pmd != pmd_offset(pud, 0)) {
136                         printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
137                         return;
138                 }
139         }
140         pmd = pmd_offset(pud, vaddr);
141         if (pmd_none(*pmd)) {
142                 pte = (pte_t *) spp_getpage();
143                 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
144                 if (pte != pte_offset_kernel(pmd, 0)) {
145                         printk("PAGETABLE BUG #02!\n");
146                         return;
147                 }
148         }
149         new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
150
151         pte = pte_offset_kernel(pmd, vaddr);
152         if (!pte_none(*pte) &&
153             pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
154                 pte_ERROR(*pte);
155         set_pte(pte, new_pte);
156
157         /*
158          * It's enough to flush this one mapping.
159          * (PGE mappings get flushed as well)
160          */
161         __flush_tlb_one(vaddr);
162 }
163
164 /* NOTE: this is meant to be run only at boot */
165 void __init 
166 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
167 {
168         unsigned long address = __fix_to_virt(idx);
169
170         if (idx >= __end_of_fixed_addresses) {
171                 printk("Invalid __set_fixmap\n");
172                 return;
173         }
174         set_pte_phys(address, phys, prot);
175 }
176
177 unsigned long __meminitdata table_start, table_end;
178
179 static __meminit void *alloc_low_page(unsigned long *phys)
180
181         unsigned long pfn = table_end++;
182         void *adr;
183
184         if (after_bootmem) {
185                 adr = (void *)get_zeroed_page(GFP_ATOMIC);
186                 *phys = __pa(adr);
187                 return adr;
188         }
189
190         if (pfn >= end_pfn) 
191                 panic("alloc_low_page: ran out of memory"); 
192
193         adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
194         memset(adr, 0, PAGE_SIZE);
195         *phys  = pfn * PAGE_SIZE;
196         return adr;
197 }
198
199 static __meminit void unmap_low_page(void *adr)
200
201
202         if (after_bootmem)
203                 return;
204
205         early_iounmap(adr, PAGE_SIZE);
206
207
208 /* Must run before zap_low_mappings */
209 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
210 {
211         unsigned long vaddr;
212         pmd_t *pmd, *last_pmd;
213         int i, pmds;
214
215         pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
216         vaddr = __START_KERNEL_map;
217         pmd = level2_kernel_pgt;
218         last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
219         for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
220                 for (i = 0; i < pmds; i++) {
221                         if (pmd_present(pmd[i]))
222                                 goto next;
223                 }
224                 vaddr += addr & ~PMD_MASK;
225                 addr &= PMD_MASK;
226                 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
227                         set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
228                 __flush_tlb();
229                 return (void *)vaddr;
230         next:
231                 ;
232         }
233         printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
234         return NULL;
235 }
236
237 /* To avoid virtual aliases later */
238 __meminit void early_iounmap(void *addr, unsigned long size)
239 {
240         unsigned long vaddr;
241         pmd_t *pmd;
242         int i, pmds;
243
244         vaddr = (unsigned long)addr;
245         pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
246         pmd = level2_kernel_pgt + pmd_index(vaddr);
247         for (i = 0; i < pmds; i++)
248                 pmd_clear(pmd + i);
249         __flush_tlb();
250 }
251
252 static void __meminit
253 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
254 {
255         int i = pmd_index(address);
256
257         for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
258                 unsigned long entry;
259                 pmd_t *pmd = pmd_page + pmd_index(address);
260
261                 if (address >= end) {
262                         if (!after_bootmem)
263                                 for (; i < PTRS_PER_PMD; i++, pmd++)
264                                         set_pmd(pmd, __pmd(0));
265                         break;
266                 }
267
268                 if (pmd_val(*pmd))
269                         continue;
270
271                 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
272                 entry &= __supported_pte_mask;
273                 set_pmd(pmd, __pmd(entry));
274         }
275 }
276
277 static void __meminit
278 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
279 {
280         pmd_t *pmd = pmd_offset(pud,0);
281         spin_lock(&init_mm.page_table_lock);
282         phys_pmd_init(pmd, address, end);
283         spin_unlock(&init_mm.page_table_lock);
284         __flush_tlb_all();
285 }
286
287 static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
288
289         int i = pud_index(addr);
290
291
292         for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
293                 unsigned long pmd_phys;
294                 pud_t *pud = pud_page + pud_index(addr);
295                 pmd_t *pmd;
296
297                 if (addr >= end)
298                         break;
299
300                 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
301                         set_pud(pud, __pud(0)); 
302                         continue;
303                 } 
304
305                 if (pud_val(*pud)) {
306                         phys_pmd_update(pud, addr, end);
307                         continue;
308                 }
309
310                 pmd = alloc_low_page(&pmd_phys);
311                 spin_lock(&init_mm.page_table_lock);
312                 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
313                 phys_pmd_init(pmd, addr, end);
314                 spin_unlock(&init_mm.page_table_lock);
315                 unmap_low_page(pmd);
316         }
317         __flush_tlb();
318
319
320 static void __init find_early_table_space(unsigned long end)
321 {
322         unsigned long puds, pmds, tables, start;
323
324         puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
325         pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
326         tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
327                  round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
328
329         /* RED-PEN putting page tables only on node 0 could
330            cause a hotspot and fill up ZONE_DMA. The page tables
331            need roughly 0.5KB per GB. */
332         start = 0x8000;
333         table_start = find_e820_area(start, end, tables);
334         if (table_start == -1UL)
335                 panic("Cannot find space for the kernel page tables");
336
337         table_start >>= PAGE_SHIFT;
338         table_end = table_start;
339
340         early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
341                 end, table_start << PAGE_SHIFT,
342                 (table_start << PAGE_SHIFT) + tables);
343 }
344
345 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
346    This runs before bootmem is initialized and gets pages directly from the 
347    physical memory. To access them they are temporarily mapped. */
348 void __meminit init_memory_mapping(unsigned long start, unsigned long end)
349
350         unsigned long next; 
351
352         Dprintk("init_memory_mapping\n");
353
354         /* 
355          * Find space for the kernel direct mapping tables.
356          * Later we should allocate these tables in the local node of the memory
357          * mapped.  Unfortunately this is done currently before the nodes are 
358          * discovered.
359          */
360         if (!after_bootmem)
361                 find_early_table_space(end);
362
363         start = (unsigned long)__va(start);
364         end = (unsigned long)__va(end);
365
366         for (; start < end; start = next) {
367                 unsigned long pud_phys; 
368                 pgd_t *pgd = pgd_offset_k(start);
369                 pud_t *pud;
370
371                 if (after_bootmem)
372                         pud = pud_offset(pgd, start & PGDIR_MASK);
373                 else
374                         pud = alloc_low_page(&pud_phys);
375
376                 next = start + PGDIR_SIZE;
377                 if (next > end) 
378                         next = end; 
379                 phys_pud_init(pud, __pa(start), __pa(next));
380                 if (!after_bootmem)
381                         set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
382                 unmap_low_page(pud);
383         } 
384
385         if (!after_bootmem)
386                 mmu_cr4_features = read_cr4();
387         __flush_tlb_all();
388 }
389
390 #ifndef CONFIG_NUMA
391 void __init paging_init(void)
392 {
393         unsigned long max_zone_pfns[MAX_NR_ZONES];
394         memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
395         max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
396         max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
397         max_zone_pfns[ZONE_NORMAL] = end_pfn;
398
399         memory_present(0, 0, end_pfn);
400         sparse_init();
401         free_area_init_nodes(max_zone_pfns);
402 }
403 #endif
404
405 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
406    from the CPU leading to inconsistent cache lines. address and size
407    must be aligned to 2MB boundaries. 
408    Does nothing when the mapping doesn't exist. */
409 void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
410 {
411         unsigned long end = address + size;
412
413         BUG_ON(address & ~LARGE_PAGE_MASK);
414         BUG_ON(size & ~LARGE_PAGE_MASK); 
415         
416         for (; address < end; address += LARGE_PAGE_SIZE) { 
417                 pgd_t *pgd = pgd_offset_k(address);
418                 pud_t *pud;
419                 pmd_t *pmd;
420                 if (pgd_none(*pgd))
421                         continue;
422                 pud = pud_offset(pgd, address);
423                 if (pud_none(*pud))
424                         continue; 
425                 pmd = pmd_offset(pud, address);
426                 if (!pmd || pmd_none(*pmd))
427                         continue; 
428                 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
429                         /* Could handle this, but it should not happen currently. */
430                         printk(KERN_ERR 
431                "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
432                         pmd_ERROR(*pmd); 
433                 }
434                 set_pmd(pmd, __pmd(0));                 
435         }
436         __flush_tlb_all();
437
438
439 /*
440  * Memory hotplug specific functions
441  */
442 void online_page(struct page *page)
443 {
444         ClearPageReserved(page);
445         init_page_count(page);
446         __free_page(page);
447         totalram_pages++;
448         num_physpages++;
449 }
450
451 #ifdef CONFIG_MEMORY_HOTPLUG
452 /*
453  * Memory is added always to NORMAL zone. This means you will never get
454  * additional DMA/DMA32 memory.
455  */
456 int arch_add_memory(int nid, u64 start, u64 size)
457 {
458         struct pglist_data *pgdat = NODE_DATA(nid);
459         struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
460         unsigned long start_pfn = start >> PAGE_SHIFT;
461         unsigned long nr_pages = size >> PAGE_SHIFT;
462         int ret;
463
464         init_memory_mapping(start, (start + size -1));
465
466         ret = __add_pages(zone, start_pfn, nr_pages);
467         if (ret)
468                 goto error;
469
470         return ret;
471 error:
472         printk("%s: Problem encountered in __add_pages!\n", __func__);
473         return ret;
474 }
475 EXPORT_SYMBOL_GPL(arch_add_memory);
476
477 int remove_memory(u64 start, u64 size)
478 {
479         return -EINVAL;
480 }
481 EXPORT_SYMBOL_GPL(remove_memory);
482
483 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
484 int memory_add_physaddr_to_nid(u64 start)
485 {
486         return 0;
487 }
488 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
489 #endif
490
491 #endif /* CONFIG_MEMORY_HOTPLUG */
492
493 #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
494 /*
495  * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
496  * just online the pages.
497  */
498 int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
499 {
500         int err = -EIO;
501         unsigned long pfn;
502         unsigned long total = 0, mem = 0;
503         for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
504                 if (pfn_valid(pfn)) {
505                         online_page(pfn_to_page(pfn));
506                         err = 0;
507                         mem++;
508                 }
509                 total++;
510         }
511         if (!err) {
512                 z->spanned_pages += total;
513                 z->present_pages += mem;
514                 z->zone_pgdat->node_spanned_pages += total;
515                 z->zone_pgdat->node_present_pages += mem;
516         }
517         return err;
518 }
519 #endif
520
521 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
522                          kcore_vsyscall;
523
524 void __init mem_init(void)
525 {
526         long codesize, reservedpages, datasize, initsize;
527
528         pci_iommu_alloc();
529
530         /* clear the zero-page */
531         memset(empty_zero_page, 0, PAGE_SIZE);
532
533         reservedpages = 0;
534
535         /* this will put all low memory onto the freelists */
536 #ifdef CONFIG_NUMA
537         totalram_pages = numa_free_all_bootmem();
538 #else
539         totalram_pages = free_all_bootmem();
540 #endif
541         reservedpages = end_pfn - totalram_pages -
542                                         absent_pages_in_range(0, end_pfn);
543
544         after_bootmem = 1;
545
546         codesize =  (unsigned long) &_etext - (unsigned long) &_text;
547         datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
548         initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
549
550         /* Register memory areas for /proc/kcore */
551         kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
552         kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
553                    VMALLOC_END-VMALLOC_START);
554         kclist_add(&kcore_kernel, &_stext, _end - _stext);
555         kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
556         kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 
557                                  VSYSCALL_END - VSYSCALL_START);
558
559         printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
560                 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
561                 end_pfn << (PAGE_SHIFT-10),
562                 codesize >> 10,
563                 reservedpages << (PAGE_SHIFT-10),
564                 datasize >> 10,
565                 initsize >> 10);
566 }
567
568 void free_init_pages(char *what, unsigned long begin, unsigned long end)
569 {
570         unsigned long addr;
571
572         if (begin >= end)
573                 return;
574
575         printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
576         for (addr = begin; addr < end; addr += PAGE_SIZE) {
577                 ClearPageReserved(virt_to_page(addr));
578                 init_page_count(virt_to_page(addr));
579                 memset((void *)(addr & ~(PAGE_SIZE-1)),
580                         POISON_FREE_INITMEM, PAGE_SIZE);
581                 if (addr >= __START_KERNEL_map)
582                         change_page_attr_addr(addr, 1, __pgprot(0));
583                 free_page(addr);
584                 totalram_pages++;
585         }
586         if (addr > __START_KERNEL_map)
587                 global_flush_tlb();
588 }
589
590 void free_initmem(void)
591 {
592         free_init_pages("unused kernel memory",
593                         (unsigned long)(&__init_begin),
594                         (unsigned long)(&__init_end));
595 }
596
597 #ifdef CONFIG_DEBUG_RODATA
598
599 void mark_rodata_ro(void)
600 {
601         unsigned long start = (unsigned long)_stext, end;
602
603 #ifdef CONFIG_HOTPLUG_CPU
604         /* It must still be possible to apply SMP alternatives. */
605         if (num_possible_cpus() > 1)
606                 start = (unsigned long)_etext;
607 #endif
608
609 #ifdef CONFIG_KPROBES
610         start = (unsigned long)__start_rodata;
611 #endif
612         
613         end = (unsigned long)__end_rodata;
614         start = (start + PAGE_SIZE - 1) & PAGE_MASK;
615         end &= PAGE_MASK;
616         if (end <= start)
617                 return;
618
619         change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
620
621         printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
622                (end - start) >> 10);
623
624         /*
625          * change_page_attr_addr() requires a global_flush_tlb() call after it.
626          * We do this after the printk so that if something went wrong in the
627          * change, the printk gets out at least to give a better debug hint
628          * of who is the culprit.
629          */
630         global_flush_tlb();
631 }
632 #endif
633
634 #ifdef CONFIG_BLK_DEV_INITRD
635 void free_initrd_mem(unsigned long start, unsigned long end)
636 {
637         free_init_pages("initrd memory", start, end);
638 }
639 #endif
640
641 void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
642
643 #ifdef CONFIG_NUMA
644         int nid = phys_to_nid(phys);
645 #endif
646         unsigned long pfn = phys >> PAGE_SHIFT;
647         if (pfn >= end_pfn) {
648                 /* This can happen with kdump kernels when accessing firmware
649                    tables. */
650                 if (pfn < end_pfn_map)
651                         return;
652                 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
653                                 phys, len);
654                 return;
655         }
656
657         /* Should check here against the e820 map to avoid double free */
658 #ifdef CONFIG_NUMA
659         reserve_bootmem_node(NODE_DATA(nid), phys, len);
660 #else                   
661         reserve_bootmem(phys, len);    
662 #endif
663         if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
664                 dma_reserve += len / PAGE_SIZE;
665                 set_dma_reserve(dma_reserve);
666         }
667 }
668
669 int kern_addr_valid(unsigned long addr) 
670
671         unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
672        pgd_t *pgd;
673        pud_t *pud;
674        pmd_t *pmd;
675        pte_t *pte;
676
677         if (above != 0 && above != -1UL)
678                 return 0; 
679         
680         pgd = pgd_offset_k(addr);
681         if (pgd_none(*pgd))
682                 return 0;
683
684         pud = pud_offset(pgd, addr);
685         if (pud_none(*pud))
686                 return 0; 
687
688         pmd = pmd_offset(pud, addr);
689         if (pmd_none(*pmd))
690                 return 0;
691         if (pmd_large(*pmd))
692                 return pfn_valid(pmd_pfn(*pmd));
693
694         pte = pte_offset_kernel(pmd, addr);
695         if (pte_none(*pte))
696                 return 0;
697         return pfn_valid(pte_pfn(*pte));
698 }
699
700 /* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
701    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
702    not need special handling anymore. */
703
704 static struct vm_area_struct gate_vma = {
705         .vm_start = VSYSCALL_START,
706         .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
707         .vm_page_prot = PAGE_READONLY_EXEC,
708         .vm_flags = VM_READ | VM_EXEC
709 };
710
711 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
712 {
713 #ifdef CONFIG_IA32_EMULATION
714         if (test_tsk_thread_flag(tsk, TIF_IA32))
715                 return NULL;
716 #endif
717         return &gate_vma;
718 }
719
720 int in_gate_area(struct task_struct *task, unsigned long addr)
721 {
722         struct vm_area_struct *vma = get_gate_vma(task);
723         if (!vma)
724                 return 0;
725         return (addr >= vma->vm_start) && (addr < vma->vm_end);
726 }
727
728 /* Use this when you have no reliable task/vma, typically from interrupt
729  * context.  It is less reliable than using the task's vma and may give
730  * false positives.
731  */
732 int in_gate_area_no_task(unsigned long addr)
733 {
734         return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
735 }
736
737 void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
738 {
739         return __alloc_bootmem_core(pgdat->bdata, size,
740                         SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
741 }
742
743 const char *arch_vma_name(struct vm_area_struct *vma)
744 {
745         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
746                 return "[vdso]";
747         if (vma == &gate_vma)
748                 return "[vsyscall]";
749         return NULL;
750 }