Merge git://git.linux-nfs.org/pub/linux/nfs-2.6
[linux-2.6] / arch / sh / mm / fault.c
1 /*
2  * Page fault handler for SH with an MMU.
3  *
4  *  Copyright (C) 1999  Niibe Yutaka
5  *  Copyright (C) 2003 - 2007  Paul Mundt
6  *
7  *  Based on linux/arch/i386/mm/fault.c:
8  *   Copyright (C) 1995  Linus Torvalds
9  *
10  * This file is subject to the terms and conditions of the GNU General Public
11  * License.  See the file "COPYING" in the main directory of this archive
12  * for more details.
13  */
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <asm/system.h>
19 #include <asm/mmu_context.h>
20 #include <asm/tlbflush.h>
21 #include <asm/kgdb.h>
22
23 /*
24  * This routine handles page faults.  It determines the address,
25  * and the problem, and then passes it off to one of the appropriate
26  * routines.
27  */
28 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
29                                         unsigned long writeaccess,
30                                         unsigned long address)
31 {
32         struct task_struct *tsk;
33         struct mm_struct *mm;
34         struct vm_area_struct * vma;
35         int si_code;
36         int fault;
37         siginfo_t info;
38
39         trace_hardirqs_on();
40         local_irq_enable();
41
42 #ifdef CONFIG_SH_KGDB
43         if (kgdb_nofault && kgdb_bus_err_hook)
44                 kgdb_bus_err_hook();
45 #endif
46
47         tsk = current;
48         mm = tsk->mm;
49         si_code = SEGV_MAPERR;
50
51         if (unlikely(address >= TASK_SIZE)) {
52                 /*
53                  * Synchronize this task's top level page-table
54                  * with the 'reference' page table.
55                  *
56                  * Do _not_ use "tsk" here. We might be inside
57                  * an interrupt in the middle of a task switch..
58                  */
59                 int offset = pgd_index(address);
60                 pgd_t *pgd, *pgd_k;
61                 pud_t *pud, *pud_k;
62                 pmd_t *pmd, *pmd_k;
63
64                 pgd = get_TTB() + offset;
65                 pgd_k = swapper_pg_dir + offset;
66
67                 /* This will never happen with the folded page table. */
68                 if (!pgd_present(*pgd)) {
69                         if (!pgd_present(*pgd_k))
70                                 goto bad_area_nosemaphore;
71                         set_pgd(pgd, *pgd_k);
72                         return;
73                 }
74
75                 pud = pud_offset(pgd, address);
76                 pud_k = pud_offset(pgd_k, address);
77                 if (pud_present(*pud) || !pud_present(*pud_k))
78                         goto bad_area_nosemaphore;
79                 set_pud(pud, *pud_k);
80
81                 pmd = pmd_offset(pud, address);
82                 pmd_k = pmd_offset(pud_k, address);
83                 if (pmd_present(*pmd) || !pmd_present(*pmd_k))
84                         goto bad_area_nosemaphore;
85                 set_pmd(pmd, *pmd_k);
86
87                 return;
88         }
89
90         /*
91          * If we're in an interrupt or have no user
92          * context, we must not take the fault..
93          */
94         if (in_atomic() || !mm)
95                 goto no_context;
96
97         down_read(&mm->mmap_sem);
98
99         vma = find_vma(mm, address);
100         if (!vma)
101                 goto bad_area;
102         if (vma->vm_start <= address)
103                 goto good_area;
104         if (!(vma->vm_flags & VM_GROWSDOWN))
105                 goto bad_area;
106         if (expand_stack(vma, address))
107                 goto bad_area;
108 /*
109  * Ok, we have a good vm_area for this memory access, so
110  * we can handle it..
111  */
112 good_area:
113         si_code = SEGV_ACCERR;
114         if (writeaccess) {
115                 if (!(vma->vm_flags & VM_WRITE))
116                         goto bad_area;
117         } else {
118                 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
119                         goto bad_area;
120         }
121
122         /*
123          * If for any reason at all we couldn't handle the fault,
124          * make sure we exit gracefully rather than endlessly redo
125          * the fault.
126          */
127 survive:
128         fault = handle_mm_fault(mm, vma, address, writeaccess);
129         if (unlikely(fault & VM_FAULT_ERROR)) {
130                 if (fault & VM_FAULT_OOM)
131                         goto out_of_memory;
132                 else if (fault & VM_FAULT_SIGBUS)
133                         goto do_sigbus;
134                 BUG();
135         }
136         if (fault & VM_FAULT_MAJOR)
137                 tsk->maj_flt++;
138         else
139                 tsk->min_flt++;
140
141         up_read(&mm->mmap_sem);
142         return;
143
144 /*
145  * Something tried to access memory that isn't in our memory map..
146  * Fix it, but check if it's kernel or user first..
147  */
148 bad_area:
149         up_read(&mm->mmap_sem);
150
151 bad_area_nosemaphore:
152         if (user_mode(regs)) {
153                 info.si_signo = SIGSEGV;
154                 info.si_errno = 0;
155                 info.si_code = si_code;
156                 info.si_addr = (void *) address;
157                 force_sig_info(SIGSEGV, &info, tsk);
158                 return;
159         }
160
161 no_context:
162         /* Are we prepared to handle this kernel fault?  */
163         if (fixup_exception(regs))
164                 return;
165
166 /*
167  * Oops. The kernel tried to access some bad page. We'll have to
168  * terminate things with extreme prejudice.
169  *
170  */
171
172         bust_spinlocks(1);
173
174         if (oops_may_print()) {
175                 __typeof__(pte_val(__pte(0))) page;
176
177                 if (address < PAGE_SIZE)
178                         printk(KERN_ALERT "Unable to handle kernel NULL "
179                                           "pointer dereference");
180                 else
181                         printk(KERN_ALERT "Unable to handle kernel paging "
182                                           "request");
183                 printk(" at virtual address %08lx\n", address);
184                 printk(KERN_ALERT "pc = %08lx\n", regs->pc);
185                 page = (unsigned long)get_TTB();
186                 if (page) {
187                         page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
188                         printk(KERN_ALERT "*pde = %08lx\n", page);
189                         if (page & _PAGE_PRESENT) {
190                                 page &= PAGE_MASK;
191                                 address &= 0x003ff000;
192                                 page = ((__typeof__(page) *)
193                                                 __va(page))[address >>
194                                                             PAGE_SHIFT];
195                                 printk(KERN_ALERT "*pte = %08lx\n", page);
196                         }
197                 }
198         }
199
200         die("Oops", regs, writeaccess);
201         bust_spinlocks(0);
202         do_exit(SIGKILL);
203
204 /*
205  * We ran out of memory, or some other thing happened to us that made
206  * us unable to handle the page fault gracefully.
207  */
208 out_of_memory:
209         up_read(&mm->mmap_sem);
210         if (is_global_init(current)) {
211                 yield();
212                 down_read(&mm->mmap_sem);
213                 goto survive;
214         }
215         printk("VM: killing process %s\n", tsk->comm);
216         if (user_mode(regs))
217                 do_group_exit(SIGKILL);
218         goto no_context;
219
220 do_sigbus:
221         up_read(&mm->mmap_sem);
222
223         /*
224          * Send a sigbus, regardless of whether we were in kernel
225          * or user mode.
226          */
227         info.si_signo = SIGBUS;
228         info.si_errno = 0;
229         info.si_code = BUS_ADRERR;
230         info.si_addr = (void *)address;
231         force_sig_info(SIGBUS, &info, tsk);
232
233         /* Kernel mode? Handle exceptions or die */
234         if (!user_mode(regs))
235                 goto no_context;
236 }
237
238 #ifdef CONFIG_SH_STORE_QUEUES
239 /*
240  * This is a special case for the SH-4 store queues, as pages for this
241  * space still need to be faulted in before it's possible to flush the
242  * store queue cache for writeout to the remapped region.
243  */
244 #define P3_ADDR_MAX             (P4SEG_STORE_QUE + 0x04000000)
245 #else
246 #define P3_ADDR_MAX             P4SEG
247 #endif
248
249 /*
250  * Called with interrupts disabled.
251  */
252 asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
253                                          unsigned long writeaccess,
254                                          unsigned long address)
255 {
256         pgd_t *pgd;
257         pud_t *pud;
258         pmd_t *pmd;
259         pte_t *pte;
260         pte_t entry;
261
262 #ifdef CONFIG_SH_KGDB
263         if (kgdb_nofault && kgdb_bus_err_hook)
264                 kgdb_bus_err_hook();
265 #endif
266
267         /*
268          * We don't take page faults for P1, P2, and parts of P4, these
269          * are always mapped, whether it be due to legacy behaviour in
270          * 29-bit mode, or due to PMB configuration in 32-bit mode.
271          */
272         if (address >= P3SEG && address < P3_ADDR_MAX) {
273                 pgd = pgd_offset_k(address);
274         } else {
275                 if (unlikely(address >= TASK_SIZE || !current->mm))
276                         return 1;
277
278                 pgd = pgd_offset(current->mm, address);
279         }
280
281         pud = pud_offset(pgd, address);
282         if (pud_none_or_clear_bad(pud))
283                 return 1;
284         pmd = pmd_offset(pud, address);
285         if (pmd_none_or_clear_bad(pmd))
286                 return 1;
287
288         pte = pte_offset_kernel(pmd, address);
289         entry = *pte;
290         if (unlikely(pte_none(entry) || pte_not_present(entry)))
291                 return 1;
292         if (unlikely(writeaccess && !pte_write(entry)))
293                 return 1;
294
295         if (writeaccess)
296                 entry = pte_mkdirty(entry);
297         entry = pte_mkyoung(entry);
298
299         set_pte(pte, entry);
300         update_mmu_cache(NULL, address, entry);
301
302         return 0;
303 }