Merge branch 'topic/aoa' into to-push
[linux-2.6] / arch / sh / mm / fault_32.c
1 /*
2  * Page fault handler for SH with an MMU.
3  *
4  *  Copyright (C) 1999  Niibe Yutaka
5  *  Copyright (C) 2003 - 2008  Paul Mundt
6  *
7  *  Based on linux/arch/i386/mm/fault.c:
8  *   Copyright (C) 1995  Linus Torvalds
9  *
10  * This file is subject to the terms and conditions of the GNU General Public
11  * License.  See the file "COPYING" in the main directory of this archive
12  * for more details.
13  */
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <linux/marker.h>
19 #include <asm/io_trapped.h>
20 #include <asm/system.h>
21 #include <asm/mmu_context.h>
22 #include <asm/tlbflush.h>
23 #include <asm/kgdb.h>
24
25 /*
26  * This routine handles page faults.  It determines the address,
27  * and the problem, and then passes it off to one of the appropriate
28  * routines.
29  */
30 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
31                                         unsigned long writeaccess,
32                                         unsigned long address)
33 {
34         struct task_struct *tsk;
35         struct mm_struct *mm;
36         struct vm_area_struct * vma;
37         int si_code;
38         int fault;
39         siginfo_t info;
40
41         /*
42          * We don't bother with any notifier callbacks here, as they are
43          * all handled through the __do_page_fault() fast-path.
44          */
45
46         tsk = current;
47         si_code = SEGV_MAPERR;
48
49         if (unlikely(address >= TASK_SIZE)) {
50                 /*
51                  * Synchronize this task's top level page-table
52                  * with the 'reference' page table.
53                  *
54                  * Do _not_ use "tsk" here. We might be inside
55                  * an interrupt in the middle of a task switch..
56                  */
57                 int offset = pgd_index(address);
58                 pgd_t *pgd, *pgd_k;
59                 pud_t *pud, *pud_k;
60                 pmd_t *pmd, *pmd_k;
61
62                 pgd = get_TTB() + offset;
63                 pgd_k = swapper_pg_dir + offset;
64
65                 if (!pgd_present(*pgd)) {
66                         if (!pgd_present(*pgd_k))
67                                 goto bad_area_nosemaphore;
68                         set_pgd(pgd, *pgd_k);
69                         return;
70                 }
71
72                 pud = pud_offset(pgd, address);
73                 pud_k = pud_offset(pgd_k, address);
74
75                 if (!pud_present(*pud)) {
76                         if (!pud_present(*pud_k))
77                                 goto bad_area_nosemaphore;
78                         set_pud(pud, *pud_k);
79                         return;
80                 }
81
82                 pmd = pmd_offset(pud, address);
83                 pmd_k = pmd_offset(pud_k, address);
84                 if (pmd_present(*pmd) || !pmd_present(*pmd_k))
85                         goto bad_area_nosemaphore;
86                 set_pmd(pmd, *pmd_k);
87
88                 return;
89         }
90
91         /* Only enable interrupts if they were on before the fault */
92         if ((regs->sr & SR_IMASK) != SR_IMASK) {
93                 trace_hardirqs_on();
94                 local_irq_enable();
95         }
96
97         mm = tsk->mm;
98
99         /*
100          * If we're in an interrupt or have no user
101          * context, we must not take the fault..
102          */
103         if (in_atomic() || !mm)
104                 goto no_context;
105
106         down_read(&mm->mmap_sem);
107
108         vma = find_vma(mm, address);
109         if (!vma)
110                 goto bad_area;
111         if (vma->vm_start <= address)
112                 goto good_area;
113         if (!(vma->vm_flags & VM_GROWSDOWN))
114                 goto bad_area;
115         if (expand_stack(vma, address))
116                 goto bad_area;
117 /*
118  * Ok, we have a good vm_area for this memory access, so
119  * we can handle it..
120  */
121 good_area:
122         si_code = SEGV_ACCERR;
123         if (writeaccess) {
124                 if (!(vma->vm_flags & VM_WRITE))
125                         goto bad_area;
126         } else {
127                 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
128                         goto bad_area;
129         }
130
131         /*
132          * If for any reason at all we couldn't handle the fault,
133          * make sure we exit gracefully rather than endlessly redo
134          * the fault.
135          */
136 survive:
137         fault = handle_mm_fault(mm, vma, address, writeaccess);
138         if (unlikely(fault & VM_FAULT_ERROR)) {
139                 if (fault & VM_FAULT_OOM)
140                         goto out_of_memory;
141                 else if (fault & VM_FAULT_SIGBUS)
142                         goto do_sigbus;
143                 BUG();
144         }
145         if (fault & VM_FAULT_MAJOR)
146                 tsk->maj_flt++;
147         else
148                 tsk->min_flt++;
149
150         up_read(&mm->mmap_sem);
151         return;
152
153 /*
154  * Something tried to access memory that isn't in our memory map..
155  * Fix it, but check if it's kernel or user first..
156  */
157 bad_area:
158         up_read(&mm->mmap_sem);
159
160 bad_area_nosemaphore:
161         if (user_mode(regs)) {
162                 info.si_signo = SIGSEGV;
163                 info.si_errno = 0;
164                 info.si_code = si_code;
165                 info.si_addr = (void *) address;
166                 force_sig_info(SIGSEGV, &info, tsk);
167                 return;
168         }
169
170 no_context:
171         /* Are we prepared to handle this kernel fault?  */
172         if (fixup_exception(regs))
173                 return;
174
175         if (handle_trapped_io(regs, address))
176                 return;
177 /*
178  * Oops. The kernel tried to access some bad page. We'll have to
179  * terminate things with extreme prejudice.
180  *
181  */
182
183         bust_spinlocks(1);
184
185         if (oops_may_print()) {
186                 unsigned long page;
187
188                 if (address < PAGE_SIZE)
189                         printk(KERN_ALERT "Unable to handle kernel NULL "
190                                           "pointer dereference");
191                 else
192                         printk(KERN_ALERT "Unable to handle kernel paging "
193                                           "request");
194                 printk(" at virtual address %08lx\n", address);
195                 printk(KERN_ALERT "pc = %08lx\n", regs->pc);
196                 page = (unsigned long)get_TTB();
197                 if (page) {
198                         page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT];
199                         printk(KERN_ALERT "*pde = %08lx\n", page);
200                         if (page & _PAGE_PRESENT) {
201                                 page &= PAGE_MASK;
202                                 address &= 0x003ff000;
203                                 page = ((__typeof__(page) *)
204                                                 __va(page))[address >>
205                                                             PAGE_SHIFT];
206                                 printk(KERN_ALERT "*pte = %08lx\n", page);
207                         }
208                 }
209         }
210
211         die("Oops", regs, writeaccess);
212         bust_spinlocks(0);
213         do_exit(SIGKILL);
214
215 /*
216  * We ran out of memory, or some other thing happened to us that made
217  * us unable to handle the page fault gracefully.
218  */
219 out_of_memory:
220         up_read(&mm->mmap_sem);
221         if (is_global_init(current)) {
222                 yield();
223                 down_read(&mm->mmap_sem);
224                 goto survive;
225         }
226         printk("VM: killing process %s\n", tsk->comm);
227         if (user_mode(regs))
228                 do_group_exit(SIGKILL);
229         goto no_context;
230
231 do_sigbus:
232         up_read(&mm->mmap_sem);
233
234         /*
235          * Send a sigbus, regardless of whether we were in kernel
236          * or user mode.
237          */
238         info.si_signo = SIGBUS;
239         info.si_errno = 0;
240         info.si_code = BUS_ADRERR;
241         info.si_addr = (void *)address;
242         force_sig_info(SIGBUS, &info, tsk);
243
244         /* Kernel mode? Handle exceptions or die */
245         if (!user_mode(regs))
246                 goto no_context;
247 }
248
249 static inline int notify_page_fault(struct pt_regs *regs, int trap)
250 {
251         int ret = 0;
252
253         trace_mark(kernel_arch_trap_entry, "trap_id %d ip #p%ld",
254                    trap >> 5, instruction_pointer(regs));
255
256 #ifdef CONFIG_KPROBES
257         if (!user_mode(regs)) {
258                 preempt_disable();
259                 if (kprobe_running() && kprobe_fault_handler(regs, trap))
260                         ret = 1;
261                 preempt_enable();
262         }
263 #endif
264
265         return ret;
266 }
267
268 #ifdef CONFIG_SH_STORE_QUEUES
269 /*
270  * This is a special case for the SH-4 store queues, as pages for this
271  * space still need to be faulted in before it's possible to flush the
272  * store queue cache for writeout to the remapped region.
273  */
274 #define P3_ADDR_MAX             (P4SEG_STORE_QUE + 0x04000000)
275 #else
276 #define P3_ADDR_MAX             P4SEG
277 #endif
278
279 /*
280  * Called with interrupts disabled.
281  */
282 asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
283                                          unsigned long writeaccess,
284                                          unsigned long address)
285 {
286         pgd_t *pgd;
287         pud_t *pud;
288         pmd_t *pmd;
289         pte_t *pte;
290         pte_t entry;
291         int ret = 0;
292
293         if (notify_page_fault(regs, lookup_exception_vector()))
294                 goto out;
295
296 #ifdef CONFIG_SH_KGDB
297         if (kgdb_nofault && kgdb_bus_err_hook)
298                 kgdb_bus_err_hook();
299 #endif
300
301         ret = 1;
302
303         /*
304          * We don't take page faults for P1, P2, and parts of P4, these
305          * are always mapped, whether it be due to legacy behaviour in
306          * 29-bit mode, or due to PMB configuration in 32-bit mode.
307          */
308         if (address >= P3SEG && address < P3_ADDR_MAX) {
309                 pgd = pgd_offset_k(address);
310         } else {
311                 if (unlikely(address >= TASK_SIZE || !current->mm))
312                         goto out;
313
314                 pgd = pgd_offset(current->mm, address);
315         }
316
317         pud = pud_offset(pgd, address);
318         if (pud_none_or_clear_bad(pud))
319                 goto out;
320         pmd = pmd_offset(pud, address);
321         if (pmd_none_or_clear_bad(pmd))
322                 goto out;
323         pte = pte_offset_kernel(pmd, address);
324         entry = *pte;
325         if (unlikely(pte_none(entry) || pte_not_present(entry)))
326                 goto out;
327         if (unlikely(writeaccess && !pte_write(entry)))
328                 goto out;
329
330         if (writeaccess)
331                 entry = pte_mkdirty(entry);
332         entry = pte_mkyoung(entry);
333
334 #if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP)
335         /*
336          * ITLB is not affected by "ldtlb" instruction.
337          * So, we need to flush the entry by ourselves.
338          */
339         local_flush_tlb_one(get_asid(), address & PAGE_MASK);
340 #endif
341
342         set_pte(pte, entry);
343         update_mmu_cache(NULL, address, entry);
344
345         ret = 0;
346 out:
347         trace_mark(kernel_arch_trap_exit, MARK_NOARGS);
348         return ret;
349 }