x86: change size of node ids from u8 to u16
[linux-2.6] / arch / x86 / mm / init_64.c
1 /*
2  *  linux/arch/x86_64/mm/init.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
6  *  Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7  */
8
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/swap.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/pagemap.h>
22 #include <linux/bootmem.h>
23 #include <linux/proc_fs.h>
24 #include <linux/pci.h>
25 #include <linux/pfn.h>
26 #include <linux/poison.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/module.h>
29 #include <linux/memory_hotplug.h>
30 #include <linux/nmi.h>
31
32 #include <asm/processor.h>
33 #include <asm/system.h>
34 #include <asm/uaccess.h>
35 #include <asm/pgtable.h>
36 #include <asm/pgalloc.h>
37 #include <asm/dma.h>
38 #include <asm/fixmap.h>
39 #include <asm/e820.h>
40 #include <asm/apic.h>
41 #include <asm/tlb.h>
42 #include <asm/mmu_context.h>
43 #include <asm/proto.h>
44 #include <asm/smp.h>
45 #include <asm/sections.h>
46 #include <asm/kdebug.h>
47 #include <asm/numa.h>
48
49 #ifndef Dprintk
50 #define Dprintk(x...)
51 #endif
52
53 const struct dma_mapping_ops* dma_ops;
54 EXPORT_SYMBOL(dma_ops);
55
56 static unsigned long dma_reserve __initdata;
57
58 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
59
60 /*
61  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
62  * physical space so we can cache the place of the first one and move
63  * around without checking the pgd every time.
64  */
65
66 void show_mem(void)
67 {
68         long i, total = 0, reserved = 0;
69         long shared = 0, cached = 0;
70         pg_data_t *pgdat;
71         struct page *page;
72
73         printk(KERN_INFO "Mem-info:\n");
74         show_free_areas();
75         printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
76
77         for_each_online_pgdat(pgdat) {
78                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
79                         /* this loop can take a while with 256 GB and 4k pages
80                            so update the NMI watchdog */
81                         if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
82                                 touch_nmi_watchdog();
83                         }
84                         if (!pfn_valid(pgdat->node_start_pfn + i))
85                                 continue;
86                         page = pfn_to_page(pgdat->node_start_pfn + i);
87                         total++;
88                         if (PageReserved(page))
89                                 reserved++;
90                         else if (PageSwapCache(page))
91                                 cached++;
92                         else if (page_count(page))
93                                 shared += page_count(page) - 1;
94                }
95         }
96         printk(KERN_INFO "%lu pages of RAM\n", total);
97         printk(KERN_INFO "%lu reserved pages\n",reserved);
98         printk(KERN_INFO "%lu pages shared\n",shared);
99         printk(KERN_INFO "%lu pages swap cached\n",cached);
100 }
101
102 int after_bootmem;
103
104 static __init void *spp_getpage(void)
105
106         void *ptr;
107         if (after_bootmem)
108                 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
109         else
110                 ptr = alloc_bootmem_pages(PAGE_SIZE);
111         if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
112                 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
113
114         Dprintk("spp_getpage %p\n", ptr);
115         return ptr;
116
117
118 static __init void set_pte_phys(unsigned long vaddr,
119                          unsigned long phys, pgprot_t prot)
120 {
121         pgd_t *pgd;
122         pud_t *pud;
123         pmd_t *pmd;
124         pte_t *pte, new_pte;
125
126         Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
127
128         pgd = pgd_offset_k(vaddr);
129         if (pgd_none(*pgd)) {
130                 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
131                 return;
132         }
133         pud = pud_offset(pgd, vaddr);
134         if (pud_none(*pud)) {
135                 pmd = (pmd_t *) spp_getpage(); 
136                 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
137                 if (pmd != pmd_offset(pud, 0)) {
138                         printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
139                         return;
140                 }
141         }
142         pmd = pmd_offset(pud, vaddr);
143         if (pmd_none(*pmd)) {
144                 pte = (pte_t *) spp_getpage();
145                 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
146                 if (pte != pte_offset_kernel(pmd, 0)) {
147                         printk("PAGETABLE BUG #02!\n");
148                         return;
149                 }
150         }
151         new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
152
153         pte = pte_offset_kernel(pmd, vaddr);
154         if (!pte_none(*pte) &&
155             pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
156                 pte_ERROR(*pte);
157         set_pte(pte, new_pte);
158
159         /*
160          * It's enough to flush this one mapping.
161          * (PGE mappings get flushed as well)
162          */
163         __flush_tlb_one(vaddr);
164 }
165
166 /* NOTE: this is meant to be run only at boot */
167 void __init 
168 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
169 {
170         unsigned long address = __fix_to_virt(idx);
171
172         if (idx >= __end_of_fixed_addresses) {
173                 printk("Invalid __set_fixmap\n");
174                 return;
175         }
176         set_pte_phys(address, phys, prot);
177 }
178
179 unsigned long __meminitdata table_start, table_end;
180
181 static __meminit void *alloc_low_page(unsigned long *phys)
182
183         unsigned long pfn = table_end++;
184         void *adr;
185
186         if (after_bootmem) {
187                 adr = (void *)get_zeroed_page(GFP_ATOMIC);
188                 *phys = __pa(adr);
189                 return adr;
190         }
191
192         if (pfn >= end_pfn) 
193                 panic("alloc_low_page: ran out of memory"); 
194
195         adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
196         memset(adr, 0, PAGE_SIZE);
197         *phys  = pfn * PAGE_SIZE;
198         return adr;
199 }
200
201 static __meminit void unmap_low_page(void *adr)
202
203
204         if (after_bootmem)
205                 return;
206
207         early_iounmap(adr, PAGE_SIZE);
208
209
210 /* Must run before zap_low_mappings */
211 __meminit void *early_ioremap(unsigned long addr, unsigned long size)
212 {
213         unsigned long vaddr;
214         pmd_t *pmd, *last_pmd;
215         int i, pmds;
216
217         pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
218         vaddr = __START_KERNEL_map;
219         pmd = level2_kernel_pgt;
220         last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
221         for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
222                 for (i = 0; i < pmds; i++) {
223                         if (pmd_present(pmd[i]))
224                                 goto next;
225                 }
226                 vaddr += addr & ~PMD_MASK;
227                 addr &= PMD_MASK;
228                 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
229                         set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
230                 __flush_tlb();
231                 return (void *)vaddr;
232         next:
233                 ;
234         }
235         printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
236         return NULL;
237 }
238
239 /* To avoid virtual aliases later */
240 __meminit void early_iounmap(void *addr, unsigned long size)
241 {
242         unsigned long vaddr;
243         pmd_t *pmd;
244         int i, pmds;
245
246         vaddr = (unsigned long)addr;
247         pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
248         pmd = level2_kernel_pgt + pmd_index(vaddr);
249         for (i = 0; i < pmds; i++)
250                 pmd_clear(pmd + i);
251         __flush_tlb();
252 }
253
254 static void __meminit
255 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
256 {
257         int i = pmd_index(address);
258
259         for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
260                 unsigned long entry;
261                 pmd_t *pmd = pmd_page + pmd_index(address);
262
263                 if (address >= end) {
264                         if (!after_bootmem)
265                                 for (; i < PTRS_PER_PMD; i++, pmd++)
266                                         set_pmd(pmd, __pmd(0));
267                         break;
268                 }
269
270                 if (pmd_val(*pmd))
271                         continue;
272
273                 entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
274                 entry &= __supported_pte_mask;
275                 set_pmd(pmd, __pmd(entry));
276         }
277 }
278
279 static void __meminit
280 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
281 {
282         pmd_t *pmd = pmd_offset(pud,0);
283         spin_lock(&init_mm.page_table_lock);
284         phys_pmd_init(pmd, address, end);
285         spin_unlock(&init_mm.page_table_lock);
286         __flush_tlb_all();
287 }
288
289 static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
290
291         int i = pud_index(addr);
292
293
294         for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
295                 unsigned long pmd_phys;
296                 pud_t *pud = pud_page + pud_index(addr);
297                 pmd_t *pmd;
298
299                 if (addr >= end)
300                         break;
301
302                 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
303                         set_pud(pud, __pud(0)); 
304                         continue;
305                 } 
306
307                 if (pud_val(*pud)) {
308                         phys_pmd_update(pud, addr, end);
309                         continue;
310                 }
311
312                 pmd = alloc_low_page(&pmd_phys);
313                 spin_lock(&init_mm.page_table_lock);
314                 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
315                 phys_pmd_init(pmd, addr, end);
316                 spin_unlock(&init_mm.page_table_lock);
317                 unmap_low_page(pmd);
318         }
319         __flush_tlb();
320
321
322 static void __init find_early_table_space(unsigned long end)
323 {
324         unsigned long puds, pmds, tables, start;
325
326         puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
327         pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
328         tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
329                  round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
330
331         /* RED-PEN putting page tables only on node 0 could
332            cause a hotspot and fill up ZONE_DMA. The page tables
333            need roughly 0.5KB per GB. */
334         start = 0x8000;
335         table_start = find_e820_area(start, end, tables);
336         if (table_start == -1UL)
337                 panic("Cannot find space for the kernel page tables");
338
339         table_start >>= PAGE_SHIFT;
340         table_end = table_start;
341
342         early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
343                 end, table_start << PAGE_SHIFT,
344                 (table_start << PAGE_SHIFT) + tables);
345 }
346
347 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
348    This runs before bootmem is initialized and gets pages directly from the 
349    physical memory. To access them they are temporarily mapped. */
350 void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
351
352         unsigned long next; 
353
354         Dprintk("init_memory_mapping\n");
355
356         /* 
357          * Find space for the kernel direct mapping tables.
358          * Later we should allocate these tables in the local node of the memory
359          * mapped.  Unfortunately this is done currently before the nodes are 
360          * discovered.
361          */
362         if (!after_bootmem)
363                 find_early_table_space(end);
364
365         start = (unsigned long)__va(start);
366         end = (unsigned long)__va(end);
367
368         for (; start < end; start = next) {
369                 unsigned long pud_phys; 
370                 pgd_t *pgd = pgd_offset_k(start);
371                 pud_t *pud;
372
373                 if (after_bootmem)
374                         pud = pud_offset(pgd, start & PGDIR_MASK);
375                 else
376                         pud = alloc_low_page(&pud_phys);
377
378                 next = start + PGDIR_SIZE;
379                 if (next > end) 
380                         next = end; 
381                 phys_pud_init(pud, __pa(start), __pa(next));
382                 if (!after_bootmem)
383                         set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
384                 unmap_low_page(pud);
385         } 
386
387         if (!after_bootmem)
388                 mmu_cr4_features = read_cr4();
389         __flush_tlb_all();
390 }
391
392 #ifndef CONFIG_NUMA
393 void __init paging_init(void)
394 {
395         unsigned long max_zone_pfns[MAX_NR_ZONES];
396         memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
397         max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
398         max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
399         max_zone_pfns[ZONE_NORMAL] = end_pfn;
400
401         memory_present(0, 0, end_pfn);
402         sparse_init();
403         free_area_init_nodes(max_zone_pfns);
404 }
405 #endif
406
407 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
408    from the CPU leading to inconsistent cache lines. address and size
409    must be aligned to 2MB boundaries. 
410    Does nothing when the mapping doesn't exist. */
411 void __init clear_kernel_mapping(unsigned long address, unsigned long size) 
412 {
413         unsigned long end = address + size;
414
415         BUG_ON(address & ~LARGE_PAGE_MASK);
416         BUG_ON(size & ~LARGE_PAGE_MASK); 
417         
418         for (; address < end; address += LARGE_PAGE_SIZE) { 
419                 pgd_t *pgd = pgd_offset_k(address);
420                 pud_t *pud;
421                 pmd_t *pmd;
422                 if (pgd_none(*pgd))
423                         continue;
424                 pud = pud_offset(pgd, address);
425                 if (pud_none(*pud))
426                         continue; 
427                 pmd = pmd_offset(pud, address);
428                 if (!pmd || pmd_none(*pmd))
429                         continue; 
430                 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { 
431                         /* Could handle this, but it should not happen currently. */
432                         printk(KERN_ERR 
433                "clear_kernel_mapping: mapping has been split. will leak memory\n"); 
434                         pmd_ERROR(*pmd); 
435                 }
436                 set_pmd(pmd, __pmd(0));                 
437         }
438         __flush_tlb_all();
439
440
441 /*
442  * Memory hotplug specific functions
443  */
444 void online_page(struct page *page)
445 {
446         ClearPageReserved(page);
447         init_page_count(page);
448         __free_page(page);
449         totalram_pages++;
450         num_physpages++;
451 }
452
453 #ifdef CONFIG_MEMORY_HOTPLUG
454 /*
455  * Memory is added always to NORMAL zone. This means you will never get
456  * additional DMA/DMA32 memory.
457  */
458 int arch_add_memory(int nid, u64 start, u64 size)
459 {
460         struct pglist_data *pgdat = NODE_DATA(nid);
461         struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
462         unsigned long start_pfn = start >> PAGE_SHIFT;
463         unsigned long nr_pages = size >> PAGE_SHIFT;
464         int ret;
465
466         init_memory_mapping(start, (start + size -1));
467
468         ret = __add_pages(zone, start_pfn, nr_pages);
469         if (ret)
470                 goto error;
471
472         return ret;
473 error:
474         printk("%s: Problem encountered in __add_pages!\n", __func__);
475         return ret;
476 }
477 EXPORT_SYMBOL_GPL(arch_add_memory);
478
479 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
480 int memory_add_physaddr_to_nid(u64 start)
481 {
482         return 0;
483 }
484 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
485 #endif
486
487 #endif /* CONFIG_MEMORY_HOTPLUG */
488
489 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
490                          kcore_vsyscall;
491
492 void __init mem_init(void)
493 {
494         long codesize, reservedpages, datasize, initsize;
495
496         pci_iommu_alloc();
497
498         /* clear_bss() already clear the empty_zero_page */
499
500         /* temporary debugging - double check it's true: */
501         {
502                 int i;
503
504                 for (i = 0; i < 1024; i++)
505                         WARN_ON_ONCE(empty_zero_page[i]);
506         }
507
508         reservedpages = 0;
509
510         /* this will put all low memory onto the freelists */
511 #ifdef CONFIG_NUMA
512         totalram_pages = numa_free_all_bootmem();
513 #else
514         totalram_pages = free_all_bootmem();
515 #endif
516         reservedpages = end_pfn - totalram_pages -
517                                         absent_pages_in_range(0, end_pfn);
518
519         after_bootmem = 1;
520
521         codesize =  (unsigned long) &_etext - (unsigned long) &_text;
522         datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
523         initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
524
525         /* Register memory areas for /proc/kcore */
526         kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); 
527         kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, 
528                    VMALLOC_END-VMALLOC_START);
529         kclist_add(&kcore_kernel, &_stext, _end - _stext);
530         kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
531         kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 
532                                  VSYSCALL_END - VSYSCALL_START);
533
534         printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
535                 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
536                 end_pfn << (PAGE_SHIFT-10),
537                 codesize >> 10,
538                 reservedpages << (PAGE_SHIFT-10),
539                 datasize >> 10,
540                 initsize >> 10);
541 }
542
543 void free_init_pages(char *what, unsigned long begin, unsigned long end)
544 {
545         unsigned long addr;
546
547         if (begin >= end)
548                 return;
549
550         printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
551         for (addr = begin; addr < end; addr += PAGE_SIZE) {
552                 ClearPageReserved(virt_to_page(addr));
553                 init_page_count(virt_to_page(addr));
554                 memset((void *)(addr & ~(PAGE_SIZE-1)),
555                         POISON_FREE_INITMEM, PAGE_SIZE);
556                 if (addr >= __START_KERNEL_map)
557                         change_page_attr_addr(addr, 1, __pgprot(0));
558                 free_page(addr);
559                 totalram_pages++;
560         }
561         if (addr > __START_KERNEL_map)
562                 global_flush_tlb();
563 }
564
565 void free_initmem(void)
566 {
567         free_init_pages("unused kernel memory",
568                         (unsigned long)(&__init_begin),
569                         (unsigned long)(&__init_end));
570 }
571
572 #ifdef CONFIG_DEBUG_RODATA
573
574 void mark_rodata_ro(void)
575 {
576         unsigned long start = (unsigned long)_stext, end;
577
578 #ifdef CONFIG_HOTPLUG_CPU
579         /* It must still be possible to apply SMP alternatives. */
580         if (num_possible_cpus() > 1)
581                 start = (unsigned long)_etext;
582 #endif
583
584 #ifdef CONFIG_KPROBES
585         start = (unsigned long)__start_rodata;
586 #endif
587         
588         end = (unsigned long)__end_rodata;
589         start = (start + PAGE_SIZE - 1) & PAGE_MASK;
590         end &= PAGE_MASK;
591         if (end <= start)
592                 return;
593
594         change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
595
596         printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
597                (end - start) >> 10);
598
599         /*
600          * change_page_attr_addr() requires a global_flush_tlb() call after it.
601          * We do this after the printk so that if something went wrong in the
602          * change, the printk gets out at least to give a better debug hint
603          * of who is the culprit.
604          */
605         global_flush_tlb();
606 }
607 #endif
608
609 #ifdef CONFIG_BLK_DEV_INITRD
610 void free_initrd_mem(unsigned long start, unsigned long end)
611 {
612         free_init_pages("initrd memory", start, end);
613 }
614 #endif
615
616 void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
617
618 #ifdef CONFIG_NUMA
619         int nid = phys_to_nid(phys);
620 #endif
621         unsigned long pfn = phys >> PAGE_SHIFT;
622         if (pfn >= end_pfn) {
623                 /* This can happen with kdump kernels when accessing firmware
624                    tables. */
625                 if (pfn < end_pfn_map)
626                         return;
627                 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
628                                 phys, len);
629                 return;
630         }
631
632         /* Should check here against the e820 map to avoid double free */
633 #ifdef CONFIG_NUMA
634         reserve_bootmem_node(NODE_DATA(nid), phys, len);
635 #else                   
636         reserve_bootmem(phys, len);    
637 #endif
638         if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
639                 dma_reserve += len / PAGE_SIZE;
640                 set_dma_reserve(dma_reserve);
641         }
642 }
643
644 int kern_addr_valid(unsigned long addr) 
645
646         unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
647        pgd_t *pgd;
648        pud_t *pud;
649        pmd_t *pmd;
650        pte_t *pte;
651
652         if (above != 0 && above != -1UL)
653                 return 0; 
654         
655         pgd = pgd_offset_k(addr);
656         if (pgd_none(*pgd))
657                 return 0;
658
659         pud = pud_offset(pgd, addr);
660         if (pud_none(*pud))
661                 return 0; 
662
663         pmd = pmd_offset(pud, addr);
664         if (pmd_none(*pmd))
665                 return 0;
666         if (pmd_large(*pmd))
667                 return pfn_valid(pmd_pfn(*pmd));
668
669         pte = pte_offset_kernel(pmd, addr);
670         if (pte_none(*pte))
671                 return 0;
672         return pfn_valid(pte_pfn(*pte));
673 }
674
675 /* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
676    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
677    not need special handling anymore. */
678
679 static struct vm_area_struct gate_vma = {
680         .vm_start = VSYSCALL_START,
681         .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
682         .vm_page_prot = PAGE_READONLY_EXEC,
683         .vm_flags = VM_READ | VM_EXEC
684 };
685
686 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
687 {
688 #ifdef CONFIG_IA32_EMULATION
689         if (test_tsk_thread_flag(tsk, TIF_IA32))
690                 return NULL;
691 #endif
692         return &gate_vma;
693 }
694
695 int in_gate_area(struct task_struct *task, unsigned long addr)
696 {
697         struct vm_area_struct *vma = get_gate_vma(task);
698         if (!vma)
699                 return 0;
700         return (addr >= vma->vm_start) && (addr < vma->vm_end);
701 }
702
703 /* Use this when you have no reliable task/vma, typically from interrupt
704  * context.  It is less reliable than using the task's vma and may give
705  * false positives.
706  */
707 int in_gate_area_no_task(unsigned long addr)
708 {
709         return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
710 }
711
712 const char *arch_vma_name(struct vm_area_struct *vma)
713 {
714         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
715                 return "[vdso]";
716         if (vma == &gate_vma)
717                 return "[vsyscall]";
718         return NULL;
719 }
720
721 #ifdef CONFIG_SPARSEMEM_VMEMMAP
722 /*
723  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
724  */
725 int __meminit vmemmap_populate(struct page *start_page,
726                                                 unsigned long size, int node)
727 {
728         unsigned long addr = (unsigned long)start_page;
729         unsigned long end = (unsigned long)(start_page + size);
730         unsigned long next;
731         pgd_t *pgd;
732         pud_t *pud;
733         pmd_t *pmd;
734
735         for (; addr < end; addr = next) {
736                 next = pmd_addr_end(addr, end);
737
738                 pgd = vmemmap_pgd_populate(addr, node);
739                 if (!pgd)
740                         return -ENOMEM;
741                 pud = vmemmap_pud_populate(pgd, addr, node);
742                 if (!pud)
743                         return -ENOMEM;
744
745                 pmd = pmd_offset(pud, addr);
746                 if (pmd_none(*pmd)) {
747                         pte_t entry;
748                         void *p = vmemmap_alloc_block(PMD_SIZE, node);
749                         if (!p)
750                                 return -ENOMEM;
751
752                         entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL_LARGE);
753                         set_pmd(pmd, __pmd(pte_val(entry)));
754
755                         printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
756                                 addr, addr + PMD_SIZE - 1, p, node);
757                 } else
758                         vmemmap_verify((pte_t *)pmd, node, addr, next);
759         }
760
761         return 0;
762 }
763 #endif