sh: Kill off duplicate page fault notifiers in slow path.
[linux-2.6] / arch / sh / mm / fault_32.c
1 /*
2  * Page fault handler for SH with an MMU.
3  *
4  *  Copyright (C) 1999  Niibe Yutaka
5  *  Copyright (C) 2003 - 2008  Paul Mundt
6  *
7  *  Based on linux/arch/i386/mm/fault.c:
8  *   Copyright (C) 1995  Linus Torvalds
9  *
10  * This file is subject to the terms and conditions of the GNU General Public
11  * License.  See the file "COPYING" in the main directory of this archive
12  * for more details.
13  */
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <asm/io_trapped.h>
19 #include <asm/system.h>
20 #include <asm/mmu_context.h>
21 #include <asm/tlbflush.h>
22 #include <asm/kgdb.h>
23
24 static inline int notify_page_fault(struct pt_regs *regs, int trap)
25 {
26         int ret = 0;
27
28 #ifdef CONFIG_KPROBES
29         if (!user_mode(regs)) {
30                 preempt_disable();
31                 if (kprobe_running() && kprobe_fault_handler(regs, trap))
32                         ret = 1;
33                 preempt_enable();
34         }
35 #endif
36
37         return ret;
38 }
39
40 /*
41  * This routine handles page faults.  It determines the address,
42  * and the problem, and then passes it off to one of the appropriate
43  * routines.
44  */
45 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
46                                         unsigned long writeaccess,
47                                         unsigned long address)
48 {
49         struct task_struct *tsk;
50         struct mm_struct *mm;
51         struct vm_area_struct * vma;
52         int si_code;
53         int fault;
54         siginfo_t info;
55
56         /*
57          * We don't bother with any notifier callbacks here, as they are
58          * all handled through the __do_page_fault() fast-path.
59          */
60
61         tsk = current;
62         si_code = SEGV_MAPERR;
63
64         if (unlikely(address >= TASK_SIZE)) {
65                 /*
66                  * Synchronize this task's top level page-table
67                  * with the 'reference' page table.
68                  *
69                  * Do _not_ use "tsk" here. We might be inside
70                  * an interrupt in the middle of a task switch..
71                  */
72                 int offset = pgd_index(address);
73                 pgd_t *pgd, *pgd_k;
74                 pud_t *pud, *pud_k;
75                 pmd_t *pmd, *pmd_k;
76
77                 pgd = get_TTB() + offset;
78                 pgd_k = swapper_pg_dir + offset;
79
80                 if (!pgd_present(*pgd)) {
81                         if (!pgd_present(*pgd_k))
82                                 goto bad_area_nosemaphore;
83                         set_pgd(pgd, *pgd_k);
84                         return;
85                 }
86
87                 pud = pud_offset(pgd, address);
88                 pud_k = pud_offset(pgd_k, address);
89
90                 if (!pud_present(*pud)) {
91                         if (!pud_present(*pud_k))
92                                 goto bad_area_nosemaphore;
93                         set_pud(pud, *pud_k);
94                         return;
95                 }
96
97                 pmd = pmd_offset(pud, address);
98                 pmd_k = pmd_offset(pud_k, address);
99                 if (pmd_present(*pmd) || !pmd_present(*pmd_k))
100                         goto bad_area_nosemaphore;
101                 set_pmd(pmd, *pmd_k);
102
103                 return;
104         }
105
106         /* Only enable interrupts if they were on before the fault */
107         if ((regs->sr & SR_IMASK) != SR_IMASK) {
108                 trace_hardirqs_on();
109                 local_irq_enable();
110         }
111
112         mm = tsk->mm;
113
114         /*
115          * If we're in an interrupt or have no user
116          * context, we must not take the fault..
117          */
118         if (in_atomic() || !mm)
119                 goto no_context;
120
121         down_read(&mm->mmap_sem);
122
123         vma = find_vma(mm, address);
124         if (!vma)
125                 goto bad_area;
126         if (vma->vm_start <= address)
127                 goto good_area;
128         if (!(vma->vm_flags & VM_GROWSDOWN))
129                 goto bad_area;
130         if (expand_stack(vma, address))
131                 goto bad_area;
132 /*
133  * Ok, we have a good vm_area for this memory access, so
134  * we can handle it..
135  */
136 good_area:
137         si_code = SEGV_ACCERR;
138         if (writeaccess) {
139                 if (!(vma->vm_flags & VM_WRITE))
140                         goto bad_area;
141         } else {
142                 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
143                         goto bad_area;
144         }
145
146         /*
147          * If for any reason at all we couldn't handle the fault,
148          * make sure we exit gracefully rather than endlessly redo
149          * the fault.
150          */
151 survive:
152         fault = handle_mm_fault(mm, vma, address, writeaccess);
153         if (unlikely(fault & VM_FAULT_ERROR)) {
154                 if (fault & VM_FAULT_OOM)
155                         goto out_of_memory;
156                 else if (fault & VM_FAULT_SIGBUS)
157                         goto do_sigbus;
158                 BUG();
159         }
160         if (fault & VM_FAULT_MAJOR)
161                 tsk->maj_flt++;
162         else
163                 tsk->min_flt++;
164
165         up_read(&mm->mmap_sem);
166         return;
167
168 /*
169  * Something tried to access memory that isn't in our memory map..
170  * Fix it, but check if it's kernel or user first..
171  */
172 bad_area:
173         up_read(&mm->mmap_sem);
174
175 bad_area_nosemaphore:
176         if (user_mode(regs)) {
177                 info.si_signo = SIGSEGV;
178                 info.si_errno = 0;
179                 info.si_code = si_code;
180                 info.si_addr = (void *) address;
181                 force_sig_info(SIGSEGV, &info, tsk);
182                 return;
183         }
184
185 no_context:
186         /* Are we prepared to handle this kernel fault?  */
187         if (fixup_exception(regs))
188                 return;
189
190         if (handle_trapped_io(regs, address))
191                 return;
192 /*
193  * Oops. The kernel tried to access some bad page. We'll have to
194  * terminate things with extreme prejudice.
195  *
196  */
197
198         bust_spinlocks(1);
199
200         if (oops_may_print()) {
201                 unsigned long page;
202
203                 if (address < PAGE_SIZE)
204                         printk(KERN_ALERT "Unable to handle kernel NULL "
205                                           "pointer dereference");
206                 else
207                         printk(KERN_ALERT "Unable to handle kernel paging "
208                                           "request");
209                 printk(" at virtual address %08lx\n", address);
210                 printk(KERN_ALERT "pc = %08lx\n", regs->pc);
211                 page = (unsigned long)get_TTB();
212                 if (page) {
213                         page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
214                         printk(KERN_ALERT "*pde = %08lx\n", page);
215                         if (page & _PAGE_PRESENT) {
216                                 page &= PAGE_MASK;
217                                 address &= 0x003ff000;
218                                 page = ((__typeof__(page) *)
219                                                 __va(page))[address >>
220                                                             PAGE_SHIFT];
221                                 printk(KERN_ALERT "*pte = %08lx\n", page);
222                         }
223                 }
224         }
225
226         die("Oops", regs, writeaccess);
227         bust_spinlocks(0);
228         do_exit(SIGKILL);
229
230 /*
231  * We ran out of memory, or some other thing happened to us that made
232  * us unable to handle the page fault gracefully.
233  */
234 out_of_memory:
235         up_read(&mm->mmap_sem);
236         if (is_global_init(current)) {
237                 yield();
238                 down_read(&mm->mmap_sem);
239                 goto survive;
240         }
241         printk("VM: killing process %s\n", tsk->comm);
242         if (user_mode(regs))
243                 do_group_exit(SIGKILL);
244         goto no_context;
245
246 do_sigbus:
247         up_read(&mm->mmap_sem);
248
249         /*
250          * Send a sigbus, regardless of whether we were in kernel
251          * or user mode.
252          */
253         info.si_signo = SIGBUS;
254         info.si_errno = 0;
255         info.si_code = BUS_ADRERR;
256         info.si_addr = (void *)address;
257         force_sig_info(SIGBUS, &info, tsk);
258
259         /* Kernel mode? Handle exceptions or die */
260         if (!user_mode(regs))
261                 goto no_context;
262 }
263
264 #ifdef CONFIG_SH_STORE_QUEUES
265 /*
266  * This is a special case for the SH-4 store queues, as pages for this
267  * space still need to be faulted in before it's possible to flush the
268  * store queue cache for writeout to the remapped region.
269  */
270 #define P3_ADDR_MAX             (P4SEG_STORE_QUE + 0x04000000)
271 #else
272 #define P3_ADDR_MAX             P4SEG
273 #endif
274
275 /*
276  * Called with interrupts disabled.
277  */
278 asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
279                                          unsigned long writeaccess,
280                                          unsigned long address)
281 {
282         pgd_t *pgd;
283         pud_t *pud;
284         pmd_t *pmd;
285         pte_t *pte;
286         pte_t entry;
287
288         if (notify_page_fault(regs, lookup_exception_vector()))
289                 return 0;
290
291 #ifdef CONFIG_SH_KGDB
292         if (kgdb_nofault && kgdb_bus_err_hook)
293                 kgdb_bus_err_hook();
294 #endif
295
296         /*
297          * We don't take page faults for P1, P2, and parts of P4, these
298          * are always mapped, whether it be due to legacy behaviour in
299          * 29-bit mode, or due to PMB configuration in 32-bit mode.
300          */
301         if (address >= P3SEG && address < P3_ADDR_MAX) {
302                 pgd = pgd_offset_k(address);
303         } else {
304                 if (unlikely(address >= TASK_SIZE || !current->mm))
305                         return 1;
306
307                 pgd = pgd_offset(current->mm, address);
308         }
309
310         pud = pud_offset(pgd, address);
311         if (pud_none_or_clear_bad(pud))
312                 return 1;
313         pmd = pmd_offset(pud, address);
314         if (pmd_none_or_clear_bad(pmd))
315                 return 1;
316
317         pte = pte_offset_kernel(pmd, address);
318         entry = *pte;
319         if (unlikely(pte_none(entry) || pte_not_present(entry)))
320                 return 1;
321         if (unlikely(writeaccess && !pte_write(entry)))
322                 return 1;
323
324         if (writeaccess)
325                 entry = pte_mkdirty(entry);
326         entry = pte_mkyoung(entry);
327
328 #if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
329         /*
330          * ITLB is not affected by "ldtlb" instruction.
331          * So, we need to flush the entry by ourselves.
332          */
333         local_flush_tlb_one(get_asid(), address & PAGE_MASK);
334 #endif
335
336         set_pte(pte, entry);
337         update_mmu_cache(NULL, address, entry);
338
339         return 0;
340 }