5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 * Derived from "arch/i386/mm/fault.c"
8 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
10 * Modified by Cort Dougan and Paul Mackerras.
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <linux/signal.h>
20 #include <linux/sched.h>
21 #include <linux/kernel.h>
22 #include <linux/errno.h>
23 #include <linux/string.h>
24 #include <linux/types.h>
25 #include <linux/ptrace.h>
26 #include <linux/mman.h>
28 #include <linux/interrupt.h>
29 #include <linux/highmem.h>
30 #include <linux/module.h>
33 #include <asm/pgtable.h>
35 #include <asm/mmu_context.h>
36 #include <asm/system.h>
37 #include <asm/uaccess.h>
38 #include <asm/tlbflush.h>
40 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
41 extern void (*debugger)(struct pt_regs *);
42 extern void (*debugger_fault_handler)(struct pt_regs *);
43 extern int (*debugger_dabr_match)(struct pt_regs *);
44 int debugger_kernel_faults = 1;
47 unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */
48 unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */
49 unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */
50 unsigned long pte_misses; /* updated by do_page_fault() */
51 unsigned long pte_errors; /* updated by do_page_fault() */
52 unsigned int probingmem;
55 * Check whether the instruction at regs->nip is a store using
56 * an update addressing form which will update r1.
58 static int store_updates_sp(struct pt_regs *regs)
62 if (get_user(inst, (unsigned int __user *)regs->nip))
64 /* check for 1 in the rA field */
65 if (((inst >> 16) & 0x1f) != 1)
67 /* check major opcode */
76 /* check minor opcode */
77 switch ((inst >> 1) & 0x3ff) {
81 case 695: /* stfsux */
82 case 759: /* stfdux */
90 * For 600- and 800-family processors, the error_code parameter is DSISR
91 * for a data fault, SRR1 for an instruction fault. For 400-family processors
92 * the error_code parameter is ESR for a data fault, 0 for an instruction
95 int do_page_fault(struct pt_regs *regs, unsigned long address,
96 unsigned long error_code)
98 struct vm_area_struct * vma;
99 struct mm_struct *mm = current->mm;
101 int code = SEGV_MAPERR;
102 #if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
103 int is_write = error_code & ESR_DST;
108 * Fortunately the bit assignments in SRR1 for an instruction
109 * fault and DSISR for a data fault are mostly the same for the
110 * bits we are interested in. But there are some bits which
111 * indicate errors in DSISR but can validly be set in SRR1.
113 if (TRAP(regs) == 0x400)
114 error_code &= 0x48200000;
116 is_write = error_code & 0x02000000;
117 #endif /* CONFIG_4xx || CONFIG_BOOKE */
119 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
120 if (debugger_fault_handler && TRAP(regs) == 0x300) {
121 debugger_fault_handler(regs);
124 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
125 if (error_code & 0x00400000) {
127 if (debugger_dabr_match(regs))
130 #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
131 #endif /* CONFIG_XMON || CONFIG_KGDB */
133 if (in_atomic() || mm == NULL)
136 down_read(&mm->mmap_sem);
137 vma = find_vma(mm, address);
140 if (vma->vm_start <= address)
142 if (!(vma->vm_flags & VM_GROWSDOWN))
148 * N.B. The rs6000/xcoff ABI allows programs to access up to
149 * a few hundred bytes below the stack pointer.
150 * The kernel signal delivery code writes up to about 1.5kB
151 * below the stack pointer (r1) before decrementing it.
152 * The exec code can write slightly over 640kB to the stack
153 * before setting the user r1. Thus we allow the stack to
154 * expand to 1MB without further checks.
156 if (address + 0x100000 < vma->vm_end) {
157 /* get user regs even if this fault is in kernel mode */
158 struct pt_regs *uregs = current->thread.regs;
163 * A user-mode access to an address a long way below
164 * the stack pointer is only valid if the instruction
165 * is one which would update the stack pointer to the
166 * address accessed if the instruction completed,
167 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
168 * (or the byte, halfword, float or double forms).
170 * If we don't check this then any write to the area
171 * between the last mapped region and the stack will
172 * expand the stack rather than segfaulting.
174 if (address + 2048 < uregs->gpr[1]
175 && (!user_mode(regs) || !store_updates_sp(regs)))
178 if (expand_stack(vma, address))
183 #if defined(CONFIG_6xx)
184 if (error_code & 0x95700000)
185 /* an error such as lwarx to I/O controller space,
186 address matching DABR, eciwx, etc. */
188 #endif /* CONFIG_6xx */
189 #if defined(CONFIG_8xx)
190 /* The MPC8xx seems to always set 0x80000000, which is
191 * "undefined". Of those that can be set, this is the only
192 * one which seems bad.
194 if (error_code & 0x10000000)
195 /* Guarded storage error. */
197 #endif /* CONFIG_8xx */
201 if (!(vma->vm_flags & VM_WRITE))
203 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
204 /* an exec - 4xx/Book-E allows for per-page execute permission */
205 } else if (TRAP(regs) == 0x400) {
209 /* It would be nice to actually enforce the VM execute
210 permission on CPUs which can do so, but far too
211 much stuff in userspace doesn't get the permissions
212 right, so we let any page be executed for now. */
213 if (! (vma->vm_flags & VM_EXEC))
217 /* Since 4xx/Book-E supports per-page execute permission,
218 * we lazily flush dcache to icache. */
220 if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
221 struct page *page = pte_page(*ptep);
223 if (! test_bit(PG_arch_1, &page->flags)) {
224 flush_dcache_icache_page(page);
225 set_bit(PG_arch_1, &page->flags);
227 pte_update(ptep, 0, _PAGE_HWEXEC);
230 up_read(&mm->mmap_sem);
238 /* protection fault */
239 if (error_code & 0x08000000)
241 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
246 * If for any reason at all we couldn't handle the fault,
247 * make sure we exit gracefully rather than endlessly redo
251 switch (handle_mm_fault(mm, vma, address, is_write)) {
258 case VM_FAULT_SIGBUS:
266 up_read(&mm->mmap_sem);
268 * keep track of tlb+htab misses that are good addrs but
269 * just need pte's created via handle_mm_fault()
276 up_read(&mm->mmap_sem);
279 /* User mode accesses cause a SIGSEGV */
280 if (user_mode(regs)) {
281 info.si_signo = SIGSEGV;
284 info.si_addr = (void __user *) address;
285 force_sig_info(SIGSEGV, &info, current);
292 * We ran out of memory, or some other thing happened to us that made
293 * us unable to handle the page fault gracefully.
296 up_read(&mm->mmap_sem);
297 if (current->pid == 1) {
299 down_read(&mm->mmap_sem);
302 printk("VM: killing process %s\n", current->comm);
308 up_read(&mm->mmap_sem);
309 info.si_signo = SIGBUS;
311 info.si_code = BUS_ADRERR;
312 info.si_addr = (void __user *)address;
313 force_sig_info (SIGBUS, &info, current);
314 if (!user_mode(regs))
320 * bad_page_fault is called when we have a bad access from the kernel.
321 * It is called from the DSI and ISI handlers in head.S and from some
322 * of the procedures in traps.c.
325 bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
327 const struct exception_table_entry *entry;
329 /* Are we prepared to handle this fault? */
330 if ((entry = search_exception_tables(regs->nip)) != NULL) {
331 regs->nip = entry->fixup;
335 /* kernel has accessed a bad area */
336 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
337 if (debugger_kernel_faults)
340 die("kernel access of bad area", regs, sig);
345 /* The pgtable.h claims some functions generically exist, but I
346 * can't find them......
348 pte_t *va_to_pte(unsigned long address)
354 if (address < TASK_SIZE)
357 dir = pgd_offset(&init_mm, address);
359 pmd = pmd_offset(dir, address & PAGE_MASK);
360 if (pmd && pmd_present(*pmd)) {
361 pte = pte_offset_kernel(pmd, address & PAGE_MASK);
362 if (pte && pte_present(*pte))
369 unsigned long va_to_phys(unsigned long address)
373 pte = va_to_pte(address);
375 return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
380 print_8xx_pte(struct mm_struct *mm, unsigned long addr)
386 printk(" pte @ 0x%8lx: ", addr);
387 pgd = pgd_offset(mm, addr & PAGE_MASK);
389 pmd = pmd_offset(pgd, addr & PAGE_MASK);
390 if (pmd && pmd_present(*pmd)) {
391 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
393 printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
394 (long)pgd, (long)pte, (long)pte_val(*pte));
395 #define pp ((long)pte_val(*pte))
396 printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
400 (pp>>3)&1, /* small */
401 (pp>>2)&1, /* shared */
402 (pp>>1)&1, /* cache inhibit */
421 get_8xx_pte(struct mm_struct *mm, unsigned long addr)
428 pgd = pgd_offset(mm, addr & PAGE_MASK);
430 pmd = pmd_offset(pgd, addr & PAGE_MASK);
431 if (pmd && pmd_present(*pmd)) {
432 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
434 retval = (int)pte_val(*pte);
440 #endif /* CONFIG_8xx */