x86: change cpa to pfn based
[linux-2.6] / arch / x86 / mm / pageattr.c
1 /*
2  * Copyright 2002 Andi Kleen, SuSE Labs.
3  * Thanks to Ben LaHaise for precious feedback.
4  */
5 #include <linux/highmem.h>
6 #include <linux/bootmem.h>
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include <linux/mm.h>
11
12 void clflush_cache_range(void *addr, int size)
13 {
14         int i;
15
16         for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
17                 clflush(addr+i);
18 }
19
20 #include <asm/processor.h>
21 #include <asm/tlbflush.h>
22 #include <asm/sections.h>
23 #include <asm/uaccess.h>
24 #include <asm/pgalloc.h>
25
26 /*
27  * We allow the BIOS range to be executable:
28  */
29 #define BIOS_BEGIN              0x000a0000
30 #define BIOS_END                0x00100000
31
32 static inline pgprot_t check_exec(pgprot_t prot, unsigned long address)
33 {
34         if (__pa(address) >= BIOS_BEGIN && __pa(address) < BIOS_END)
35                 pgprot_val(prot) &= ~_PAGE_NX;
36         /*
37          * Better fail early if someone sets the kernel text to NX.
38          * Does not cover __inittext
39          */
40         BUG_ON(address >= (unsigned long)&_text &&
41                 address < (unsigned long)&_etext &&
42                (pgprot_val(prot) & _PAGE_NX));
43
44         return prot;
45 }
46
47 pte_t *lookup_address(unsigned long address, int *level)
48 {
49         pgd_t *pgd = pgd_offset_k(address);
50         pud_t *pud;
51         pmd_t *pmd;
52
53         *level = PG_LEVEL_NONE;
54
55         if (pgd_none(*pgd))
56                 return NULL;
57         pud = pud_offset(pgd, address);
58         if (pud_none(*pud))
59                 return NULL;
60         pmd = pmd_offset(pud, address);
61         if (pmd_none(*pmd))
62                 return NULL;
63
64         *level = PG_LEVEL_2M;
65         if (pmd_large(*pmd))
66                 return (pte_t *)pmd;
67
68         *level = PG_LEVEL_4K;
69         return pte_offset_kernel(pmd, address);
70 }
71
72 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
73 {
74         /* change init_mm */
75         set_pte_atomic(kpte, pte);
76 #ifdef CONFIG_X86_32
77         if (!SHARED_KERNEL_PMD) {
78                 struct page *page;
79
80                 for (page = pgd_list; page; page = (struct page *)page->index) {
81                         pgd_t *pgd;
82                         pud_t *pud;
83                         pmd_t *pmd;
84
85                         pgd = (pgd_t *)page_address(page) + pgd_index(address);
86                         pud = pud_offset(pgd, address);
87                         pmd = pmd_offset(pud, address);
88                         set_pte_atomic((pte_t *)pmd, pte);
89                 }
90         }
91 #endif
92 }
93
94 static int split_large_page(pte_t *kpte, unsigned long address)
95 {
96         pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
97         gfp_t gfp_flags = GFP_KERNEL;
98         unsigned long flags;
99         unsigned long addr;
100         pte_t *pbase, *tmp;
101         struct page *base;
102         int i, level;
103
104 #ifdef CONFIG_DEBUG_PAGEALLOC
105         gfp_flags = GFP_ATOMIC;
106 #endif
107         base = alloc_pages(gfp_flags, 0);
108         if (!base)
109                 return -ENOMEM;
110
111         spin_lock_irqsave(&pgd_lock, flags);
112         /*
113          * Check for races, another CPU might have split this page
114          * up for us already:
115          */
116         tmp = lookup_address(address, &level);
117         if (tmp != kpte) {
118                 WARN_ON_ONCE(1);
119                 goto out_unlock;
120         }
121
122         address = __pa(address);
123         addr = address & LARGE_PAGE_MASK;
124         pbase = (pte_t *)page_address(base);
125 #ifdef CONFIG_X86_32
126         paravirt_alloc_pt(&init_mm, page_to_pfn(base));
127 #endif
128
129         for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
130                 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
131
132         /*
133          * Install the new, split up pagetable:
134          */
135         __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
136         base = NULL;
137
138 out_unlock:
139         spin_unlock_irqrestore(&pgd_lock, flags);
140
141         if (base)
142                 __free_pages(base, 0);
143
144         return 0;
145 }
146
147 static int
148 __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot)
149 {
150         struct page *kpte_page;
151         int level, err = 0;
152         pte_t *kpte;
153
154 #ifdef CONFIG_X86_32
155         BUG_ON(pfn > max_low_pfn);
156 #endif
157
158 repeat:
159         kpte = lookup_address(address, &level);
160         if (!kpte)
161                 return -EINVAL;
162
163         kpte_page = virt_to_page(kpte);
164         BUG_ON(PageLRU(kpte_page));
165         BUG_ON(PageCompound(kpte_page));
166
167         prot = check_exec(prot, address);
168
169         if (level == PG_LEVEL_4K) {
170                 set_pte_atomic(kpte, pfn_pte(pfn, canon_pgprot(prot)));
171         } else {
172                 err = split_large_page(kpte, address);
173                 if (!err)
174                         goto repeat;
175         }
176         return err;
177 }
178
179 /**
180  * change_page_attr_addr - Change page table attributes in linear mapping
181  * @address: Virtual address in linear mapping.
182  * @numpages: Number of pages to change
183  * @prot:    New page table attribute (PAGE_*)
184  *
185  * Change page attributes of a page in the direct mapping. This is a variant
186  * of change_page_attr() that also works on memory holes that do not have
187  * mem_map entry (pfn_valid() is false).
188  *
189  * See change_page_attr() documentation for more details.
190  */
191
192 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
193 {
194         int err = 0, kernel_map = 0, i;
195
196 #ifdef CONFIG_X86_64
197         if (address >= __START_KERNEL_map &&
198                         address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
199
200                 address = (unsigned long)__va(__pa(address));
201                 kernel_map = 1;
202         }
203 #endif
204
205         for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
206                 unsigned long pfn = __pa(address) >> PAGE_SHIFT;
207
208                 if (!kernel_map || pte_present(pfn_pte(0, prot))) {
209                         err = __change_page_attr(address, pfn, prot);
210                         if (err)
211                                 break;
212                 }
213 #ifdef CONFIG_X86_64
214                 /*
215                  * Handle kernel mapping too which aliases part of
216                  * lowmem:
217                  */
218                 if (__pa(address) < KERNEL_TEXT_SIZE) {
219                         unsigned long addr2;
220                         pgprot_t prot2;
221
222                         addr2 = __START_KERNEL_map + __pa(address);
223                         /* Make sure the kernel mappings stay executable */
224                         prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
225                         err = __change_page_attr(addr2, pfn, prot2);
226                 }
227 #endif
228         }
229
230         return err;
231 }
232
233 /**
234  * change_page_attr - Change page table attributes in the linear mapping.
235  * @page: First page to change
236  * @numpages: Number of pages to change
237  * @prot: New protection/caching type (PAGE_*)
238  *
239  * Returns 0 on success, otherwise a negated errno.
240  *
241  * This should be used when a page is mapped with a different caching policy
242  * than write-back somewhere - some CPUs do not like it when mappings with
243  * different caching policies exist. This changes the page attributes of the
244  * in kernel linear mapping too.
245  *
246  * Caller must call global_flush_tlb() later to make the changes active.
247  *
248  * The caller needs to ensure that there are no conflicting mappings elsewhere
249  * (e.g. in user space) * This function only deals with the kernel linear map.
250  *
251  * For MMIO areas without mem_map use change_page_attr_addr() instead.
252  */
253 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
254 {
255         unsigned long addr = (unsigned long)page_address(page);
256
257         return change_page_attr_addr(addr, numpages, prot);
258 }
259 EXPORT_SYMBOL(change_page_attr);
260
261 static void flush_kernel_map(void *arg)
262 {
263         /*
264          * Flush all to work around Errata in early athlons regarding
265          * large page flushing.
266          */
267         __flush_tlb_all();
268
269         if (boot_cpu_data.x86_model >= 4)
270                 wbinvd();
271 }
272
273 void global_flush_tlb(void)
274 {
275         BUG_ON(irqs_disabled());
276
277         on_each_cpu(flush_kernel_map, NULL, 1, 1);
278 }
279 EXPORT_SYMBOL(global_flush_tlb);
280
281 #ifdef CONFIG_DEBUG_PAGEALLOC
282 void kernel_map_pages(struct page *page, int numpages, int enable)
283 {
284         if (PageHighMem(page))
285                 return;
286         if (!enable) {
287                 debug_check_no_locks_freed(page_address(page),
288                                            numpages * PAGE_SIZE);
289         }
290
291         /*
292          * If page allocator is not up yet then do not call c_p_a():
293          */
294         if (!debug_pagealloc_enabled)
295                 return;
296
297         /*
298          * The return value is ignored - the calls cannot fail,
299          * large pages are disabled at boot time:
300          */
301         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
302
303         /*
304          * We should perform an IPI and flush all tlbs,
305          * but that can deadlock->flush only current cpu:
306          */
307         __flush_tlb_all();
308 }
309 #endif