Merge branch 'for-linus' of git://git390.osdl.marist.edu/pub/scm/linux-2.6
[linux-2.6] / arch / s390 / mm / pgtable.c
1 /*
2  *  arch/s390/mm/pgtable.c
3  *
4  *    Copyright IBM Corp. 2007
5  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6  */
7
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/errno.h>
11 #include <linux/mm.h>
12 #include <linux/swap.h>
13 #include <linux/smp.h>
14 #include <linux/highmem.h>
15 #include <linux/slab.h>
16 #include <linux/pagemap.h>
17 #include <linux/spinlock.h>
18 #include <linux/module.h>
19 #include <linux/quicklist.h>
20
21 #include <asm/system.h>
22 #include <asm/pgtable.h>
23 #include <asm/pgalloc.h>
24 #include <asm/tlb.h>
25 #include <asm/tlbflush.h>
26 #include <asm/mmu_context.h>
27
28 #ifndef CONFIG_64BIT
29 #define ALLOC_ORDER     1
30 #define TABLES_PER_PAGE 4
31 #define FRAG_MASK       15UL
32 #define SECOND_HALVES   10UL
33 #else
34 #define ALLOC_ORDER     2
35 #define TABLES_PER_PAGE 2
36 #define FRAG_MASK       3UL
37 #define SECOND_HALVES   2UL
38 #endif
39
40 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
41 {
42         struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
43
44         if (!page)
45                 return NULL;
46         page->index = 0;
47         if (noexec) {
48                 struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
49                 if (!shadow) {
50                         __free_pages(page, ALLOC_ORDER);
51                         return NULL;
52                 }
53                 page->index = page_to_phys(shadow);
54         }
55         spin_lock(&mm->page_table_lock);
56         list_add(&page->lru, &mm->context.crst_list);
57         spin_unlock(&mm->page_table_lock);
58         return (unsigned long *) page_to_phys(page);
59 }
60
61 void crst_table_free(struct mm_struct *mm, unsigned long *table)
62 {
63         unsigned long *shadow = get_shadow_table(table);
64         struct page *page = virt_to_page(table);
65
66         spin_lock(&mm->page_table_lock);
67         list_del(&page->lru);
68         spin_unlock(&mm->page_table_lock);
69         if (shadow)
70                 free_pages((unsigned long) shadow, ALLOC_ORDER);
71         free_pages((unsigned long) table, ALLOC_ORDER);
72 }
73
74 #ifdef CONFIG_64BIT
75 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
76 {
77         unsigned long *table, *pgd;
78         unsigned long entry;
79
80         BUG_ON(limit > (1UL << 53));
81 repeat:
82         table = crst_table_alloc(mm, mm->context.noexec);
83         if (!table)
84                 return -ENOMEM;
85         spin_lock(&mm->page_table_lock);
86         if (mm->context.asce_limit < limit) {
87                 pgd = (unsigned long *) mm->pgd;
88                 if (mm->context.asce_limit <= (1UL << 31)) {
89                         entry = _REGION3_ENTRY_EMPTY;
90                         mm->context.asce_limit = 1UL << 42;
91                         mm->context.asce_bits = _ASCE_TABLE_LENGTH |
92                                                 _ASCE_USER_BITS |
93                                                 _ASCE_TYPE_REGION3;
94                 } else {
95                         entry = _REGION2_ENTRY_EMPTY;
96                         mm->context.asce_limit = 1UL << 53;
97                         mm->context.asce_bits = _ASCE_TABLE_LENGTH |
98                                                 _ASCE_USER_BITS |
99                                                 _ASCE_TYPE_REGION2;
100                 }
101                 crst_table_init(table, entry);
102                 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
103                 mm->pgd = (pgd_t *) table;
104                 table = NULL;
105         }
106         spin_unlock(&mm->page_table_lock);
107         if (table)
108                 crst_table_free(mm, table);
109         if (mm->context.asce_limit < limit)
110                 goto repeat;
111         update_mm(mm, current);
112         return 0;
113 }
114
115 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
116 {
117         pgd_t *pgd;
118
119         if (mm->context.asce_limit <= limit)
120                 return;
121         __tlb_flush_mm(mm);
122         while (mm->context.asce_limit > limit) {
123                 pgd = mm->pgd;
124                 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
125                 case _REGION_ENTRY_TYPE_R2:
126                         mm->context.asce_limit = 1UL << 42;
127                         mm->context.asce_bits = _ASCE_TABLE_LENGTH |
128                                                 _ASCE_USER_BITS |
129                                                 _ASCE_TYPE_REGION3;
130                         break;
131                 case _REGION_ENTRY_TYPE_R3:
132                         mm->context.asce_limit = 1UL << 31;
133                         mm->context.asce_bits = _ASCE_TABLE_LENGTH |
134                                                 _ASCE_USER_BITS |
135                                                 _ASCE_TYPE_SEGMENT;
136                         break;
137                 default:
138                         BUG();
139                 }
140                 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
141                 crst_table_free(mm, (unsigned long *) pgd);
142         }
143         update_mm(mm, current);
144 }
145 #endif
146
147 /*
148  * page table entry allocation/free routines.
149  */
150 unsigned long *page_table_alloc(struct mm_struct *mm)
151 {
152         struct page *page;
153         unsigned long *table;
154         unsigned long bits;
155
156         bits = mm->context.noexec ? 3UL : 1UL;
157         spin_lock(&mm->page_table_lock);
158         page = NULL;
159         if (!list_empty(&mm->context.pgtable_list)) {
160                 page = list_first_entry(&mm->context.pgtable_list,
161                                         struct page, lru);
162                 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
163                         page = NULL;
164         }
165         if (!page) {
166                 spin_unlock(&mm->page_table_lock);
167                 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
168                 if (!page)
169                         return NULL;
170                 pgtable_page_ctor(page);
171                 page->flags &= ~FRAG_MASK;
172                 table = (unsigned long *) page_to_phys(page);
173                 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
174                 spin_lock(&mm->page_table_lock);
175                 list_add(&page->lru, &mm->context.pgtable_list);
176         }
177         table = (unsigned long *) page_to_phys(page);
178         while (page->flags & bits) {
179                 table += 256;
180                 bits <<= 1;
181         }
182         page->flags |= bits;
183         if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
184                 list_move_tail(&page->lru, &mm->context.pgtable_list);
185         spin_unlock(&mm->page_table_lock);
186         return table;
187 }
188
189 void page_table_free(struct mm_struct *mm, unsigned long *table)
190 {
191         struct page *page;
192         unsigned long bits;
193
194         bits = mm->context.noexec ? 3UL : 1UL;
195         bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
196         page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
197         spin_lock(&mm->page_table_lock);
198         page->flags ^= bits;
199         if (page->flags & FRAG_MASK) {
200                 /* Page now has some free pgtable fragments. */
201                 list_move(&page->lru, &mm->context.pgtable_list);
202                 page = NULL;
203         } else
204                 /* All fragments of the 4K page have been freed. */
205                 list_del(&page->lru);
206         spin_unlock(&mm->page_table_lock);
207         if (page) {
208                 pgtable_page_dtor(page);
209                 __free_page(page);
210         }
211 }
212
213 void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
214 {
215         struct page *page;
216
217         spin_lock(&mm->page_table_lock);
218         /* Free shadow region and segment tables. */
219         list_for_each_entry(page, &mm->context.crst_list, lru)
220                 if (page->index) {
221                         free_pages((unsigned long) page->index, ALLOC_ORDER);
222                         page->index = 0;
223                 }
224         /* "Free" second halves of page tables. */
225         list_for_each_entry(page, &mm->context.pgtable_list, lru)
226                 page->flags &= ~SECOND_HALVES;
227         spin_unlock(&mm->page_table_lock);
228         mm->context.noexec = 0;
229         update_mm(mm, tsk);
230 }