Handle addresses beyond VMALLOC_END correctly.
[linux-2.6] / arch / i386 / mm / pageattr.c
1 /* 
2  * Copyright 2002 Andi Kleen, SuSE Labs. 
3  * Thanks to Ben LaHaise for precious feedback.
4  */ 
5
6 #include <linux/config.h>
7 #include <linux/mm.h>
8 #include <linux/sched.h>
9 #include <linux/highmem.h>
10 #include <linux/module.h>
11 #include <linux/slab.h>
12 #include <asm/uaccess.h>
13 #include <asm/processor.h>
14 #include <asm/tlbflush.h>
15 #include <asm/pgalloc.h>
16
17 static DEFINE_SPINLOCK(cpa_lock);
18 static struct list_head df_list = LIST_HEAD_INIT(df_list);
19
20
21 pte_t *lookup_address(unsigned long address) 
22
23         pgd_t *pgd = pgd_offset_k(address);
24         pud_t *pud;
25         pmd_t *pmd;
26         if (pgd_none(*pgd))
27                 return NULL;
28         pud = pud_offset(pgd, address);
29         if (pud_none(*pud))
30                 return NULL;
31         pmd = pmd_offset(pud, address);
32         if (pmd_none(*pmd))
33                 return NULL;
34         if (pmd_large(*pmd))
35                 return (pte_t *)pmd;
36         return pte_offset_kernel(pmd, address);
37
38
39 static struct page *split_large_page(unsigned long address, pgprot_t prot)
40
41         int i; 
42         unsigned long addr;
43         struct page *base;
44         pte_t *pbase;
45
46         spin_unlock_irq(&cpa_lock);
47         base = alloc_pages(GFP_KERNEL, 0);
48         spin_lock_irq(&cpa_lock);
49         if (!base) 
50                 return NULL;
51
52         address = __pa(address);
53         addr = address & LARGE_PAGE_MASK; 
54         pbase = (pte_t *)page_address(base);
55         for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
56                set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
57                                           addr == address ? prot : PAGE_KERNEL));
58         }
59         return base;
60
61
62 static void flush_kernel_map(void *dummy) 
63
64         /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
65         if (boot_cpu_data.x86_model >= 4) 
66                 wbinvd();
67         /* Flush all to work around Errata in early athlons regarding 
68          * large page flushing. 
69          */
70         __flush_tlb_all();      
71 }
72
73 static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 
74
75         struct page *page;
76         unsigned long flags;
77
78         set_pte_atomic(kpte, pte);      /* change init_mm */
79         if (PTRS_PER_PMD > 1)
80                 return;
81
82         spin_lock_irqsave(&pgd_lock, flags);
83         for (page = pgd_list; page; page = (struct page *)page->index) {
84                 pgd_t *pgd;
85                 pud_t *pud;
86                 pmd_t *pmd;
87                 pgd = (pgd_t *)page_address(page) + pgd_index(address);
88                 pud = pud_offset(pgd, address);
89                 pmd = pmd_offset(pud, address);
90                 set_pte_atomic((pte_t *)pmd, pte);
91         }
92         spin_unlock_irqrestore(&pgd_lock, flags);
93 }
94
95 /* 
96  * No more special protections in this 2/4MB area - revert to a
97  * large page again. 
98  */
99 static inline void revert_page(struct page *kpte_page, unsigned long address)
100 {
101         pte_t *linear = (pte_t *) 
102                 pmd_offset(pud_offset(pgd_offset_k(address), address), address);
103         set_pmd_pte(linear,  address,
104                     pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
105                             PAGE_KERNEL_LARGE));
106 }
107
108 static int
109 __change_page_attr(struct page *page, pgprot_t prot)
110
111         pte_t *kpte; 
112         unsigned long address;
113         struct page *kpte_page;
114
115         BUG_ON(PageHighMem(page));
116         address = (unsigned long)page_address(page);
117
118         kpte = lookup_address(address);
119         if (!kpte)
120                 return -EINVAL;
121         kpte_page = virt_to_page(kpte);
122         if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
123                 if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
124                         set_pte_atomic(kpte, mk_pte(page, prot)); 
125                 } else {
126                         struct page *split = split_large_page(address, prot); 
127                         if (!split)
128                                 return -ENOMEM;
129                         set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
130                         kpte_page = split;
131                 }       
132                 get_page(kpte_page);
133         } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
134                 set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
135                 __put_page(kpte_page);
136         } else
137                 BUG();
138
139         /*
140          * If the pte was reserved, it means it was created at boot
141          * time (not via split_large_page) and in turn we must not
142          * replace it with a largepage.
143          */
144         if (!PageReserved(kpte_page)) {
145                 /* memleak and potential failed 2M page regeneration */
146                 BUG_ON(!page_count(kpte_page));
147
148                 if (cpu_has_pse && (page_count(kpte_page) == 1)) {
149                         list_add(&kpte_page->lru, &df_list);
150                         revert_page(kpte_page, address);
151                 }
152         }
153         return 0;
154
155
156 static inline void flush_map(void)
157 {
158         on_each_cpu(flush_kernel_map, NULL, 1, 1);
159 }
160
161 /*
162  * Change the page attributes of an page in the linear mapping.
163  *
164  * This should be used when a page is mapped with a different caching policy
165  * than write-back somewhere - some CPUs do not like it when mappings with
166  * different caching policies exist. This changes the page attributes of the
167  * in kernel linear mapping too.
168  * 
169  * The caller needs to ensure that there are no conflicting mappings elsewhere.
170  * This function only deals with the kernel linear map.
171  * 
172  * Caller must call global_flush_tlb() after this.
173  */
174 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
175 {
176         int err = 0; 
177         int i; 
178         unsigned long flags;
179
180         spin_lock_irqsave(&cpa_lock, flags);
181         for (i = 0; i < numpages; i++, page++) { 
182                 err = __change_page_attr(page, prot);
183                 if (err) 
184                         break; 
185         }       
186         spin_unlock_irqrestore(&cpa_lock, flags);
187         return err;
188 }
189
190 void global_flush_tlb(void)
191
192         LIST_HEAD(l);
193         struct page *pg, *next;
194
195         BUG_ON(irqs_disabled());
196
197         spin_lock_irq(&cpa_lock);
198         list_splice_init(&df_list, &l);
199         spin_unlock_irq(&cpa_lock);
200         flush_map();
201         list_for_each_entry_safe(pg, next, &l, lru)
202                 __free_page(pg);
203
204
205 #ifdef CONFIG_DEBUG_PAGEALLOC
206 void kernel_map_pages(struct page *page, int numpages, int enable)
207 {
208         if (PageHighMem(page))
209                 return;
210         /* the return value is ignored - the calls cannot fail,
211          * large pages are disabled at boot time.
212          */
213         change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
214         /* we should perform an IPI and flush all tlbs,
215          * but that can deadlock->flush only current cpu.
216          */
217         __flush_tlb_all();
218 }
219 #endif
220
221 EXPORT_SYMBOL(change_page_attr);
222 EXPORT_SYMBOL(global_flush_tlb);