Merge branch 'cfq' of git://git.kernel.dk/data/git/linux-2.6-block
[linux-2.6] / arch / sh / mm / fault.c
1 /*
2  * Page fault handler for SH with an MMU.
3  *
4  *  Copyright (C) 1999  Niibe Yutaka
5  *  Copyright (C) 2003 - 2007  Paul Mundt
6  *
7  *  Based on linux/arch/i386/mm/fault.c:
8  *   Copyright (C) 1995  Linus Torvalds
9  *
10  * This file is subject to the terms and conditions of the GNU General Public
11  * License.  See the file "COPYING" in the main directory of this archive
12  * for more details.
13  */
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <asm/system.h>
19 #include <asm/mmu_context.h>
20 #include <asm/tlbflush.h>
21 #include <asm/kgdb.h>
22
23 /*
24  * This routine handles page faults.  It determines the address,
25  * and the problem, and then passes it off to one of the appropriate
26  * routines.
27  */
28 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
29                                         unsigned long writeaccess,
30                                         unsigned long address)
31 {
32         struct task_struct *tsk;
33         struct mm_struct *mm;
34         struct vm_area_struct * vma;
35         int si_code;
36         int fault;
37         siginfo_t info;
38
39         trace_hardirqs_on();
40         local_irq_enable();
41
42 #ifdef CONFIG_SH_KGDB
43         if (kgdb_nofault && kgdb_bus_err_hook)
44                 kgdb_bus_err_hook();
45 #endif
46
47         tsk = current;
48         mm = tsk->mm;
49         si_code = SEGV_MAPERR;
50
51         if (unlikely(address >= TASK_SIZE)) {
52                 /*
53                  * Synchronize this task's top level page-table
54                  * with the 'reference' page table.
55                  *
56                  * Do _not_ use "tsk" here. We might be inside
57                  * an interrupt in the middle of a task switch..
58                  */
59                 int offset = pgd_index(address);
60                 pgd_t *pgd, *pgd_k;
61                 pud_t *pud, *pud_k;
62                 pmd_t *pmd, *pmd_k;
63
64                 pgd = get_TTB() + offset;
65                 pgd_k = swapper_pg_dir + offset;
66
67                 /* This will never happen with the folded page table. */
68                 if (!pgd_present(*pgd)) {
69                         if (!pgd_present(*pgd_k))
70                                 goto bad_area_nosemaphore;
71                         set_pgd(pgd, *pgd_k);
72                         return;
73                 }
74
75                 pud = pud_offset(pgd, address);
76                 pud_k = pud_offset(pgd_k, address);
77                 if (pud_present(*pud) || !pud_present(*pud_k))
78                         goto bad_area_nosemaphore;
79                 set_pud(pud, *pud_k);
80
81                 pmd = pmd_offset(pud, address);
82                 pmd_k = pmd_offset(pud_k, address);
83                 if (pmd_present(*pmd) || !pmd_present(*pmd_k))
84                         goto bad_area_nosemaphore;
85                 set_pmd(pmd, *pmd_k);
86
87                 return;
88         }
89
90         /*
91          * If we're in an interrupt or have no user
92          * context, we must not take the fault..
93          */
94         if (in_atomic() || !mm)
95                 goto no_context;
96
97         down_read(&mm->mmap_sem);
98
99         vma = find_vma(mm, address);
100         if (!vma)
101                 goto bad_area;
102         if (vma->vm_start <= address)
103                 goto good_area;
104         if (!(vma->vm_flags & VM_GROWSDOWN))
105                 goto bad_area;
106         if (expand_stack(vma, address))
107                 goto bad_area;
108 /*
109  * Ok, we have a good vm_area for this memory access, so
110  * we can handle it..
111  */
112 good_area:
113         si_code = SEGV_ACCERR;
114         if (writeaccess) {
115                 if (!(vma->vm_flags & VM_WRITE))
116                         goto bad_area;
117         } else {
118                 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
119                         goto bad_area;
120         }
121
122         /*
123          * If for any reason at all we couldn't handle the fault,
124          * make sure we exit gracefully rather than endlessly redo
125          * the fault.
126          */
127 survive:
128         fault = handle_mm_fault(mm, vma, address, writeaccess);
129         if (unlikely(fault & VM_FAULT_ERROR)) {
130                 if (fault & VM_FAULT_OOM)
131                         goto out_of_memory;
132                 else if (fault & VM_FAULT_SIGBUS)
133                         goto do_sigbus;
134                 BUG();
135         }
136         if (fault & VM_FAULT_MAJOR)
137                 tsk->maj_flt++;
138         else
139                 tsk->min_flt++;
140
141         up_read(&mm->mmap_sem);
142         return;
143
144 /*
145  * Something tried to access memory that isn't in our memory map..
146  * Fix it, but check if it's kernel or user first..
147  */
148 bad_area:
149         up_read(&mm->mmap_sem);
150
151 bad_area_nosemaphore:
152         if (user_mode(regs)) {
153                 info.si_signo = SIGSEGV;
154                 info.si_errno = 0;
155                 info.si_code = si_code;
156                 info.si_addr = (void *) address;
157                 force_sig_info(SIGSEGV, &info, tsk);
158                 return;
159         }
160
161 no_context:
162         /* Are we prepared to handle this kernel fault?  */
163         if (fixup_exception(regs))
164                 return;
165
166 /*
167  * Oops. The kernel tried to access some bad page. We'll have to
168  * terminate things with extreme prejudice.
169  *
170  */
171
172         bust_spinlocks(1);
173
174         if (oops_may_print()) {
175                 __typeof__(pte_val(__pte(0))) page;
176
177                 if (address < PAGE_SIZE)
178                         printk(KERN_ALERT "Unable to handle kernel NULL "
179                                           "pointer dereference");
180                 else
181                         printk(KERN_ALERT "Unable to handle kernel paging "
182                                           "request");
183                 printk(" at virtual address %08lx\n", address);
184                 printk(KERN_ALERT "pc = %08lx\n", regs->pc);
185                 page = (unsigned long)get_TTB();
186                 if (page) {
187                         page = ((__typeof__(page) *) __va(page))[address >>
188                                                                  PGDIR_SHIFT];
189                         printk(KERN_ALERT "*pde = %08lx\n", page);
190                         if (page & _PAGE_PRESENT) {
191                                 page &= PAGE_MASK;
192                                 address &= 0x003ff000;
193                                 page = ((__typeof__(page) *)
194                                                 __va(page))[address >>
195                                                             PAGE_SHIFT];
196                                 printk(KERN_ALERT "*pte = %08lx\n", page);
197                         }
198                 }
199         }
200
201         die("Oops", regs, writeaccess);
202         bust_spinlocks(0);
203         do_exit(SIGKILL);
204
205 /*
206  * We ran out of memory, or some other thing happened to us that made
207  * us unable to handle the page fault gracefully.
208  */
209 out_of_memory:
210         up_read(&mm->mmap_sem);
211         if (is_init(current)) {
212                 yield();
213                 down_read(&mm->mmap_sem);
214                 goto survive;
215         }
216         printk("VM: killing process %s\n", tsk->comm);
217         if (user_mode(regs))
218                 do_exit(SIGKILL);
219         goto no_context;
220
221 do_sigbus:
222         up_read(&mm->mmap_sem);
223
224         /*
225          * Send a sigbus, regardless of whether we were in kernel
226          * or user mode.
227          */
228         info.si_signo = SIGBUS;
229         info.si_errno = 0;
230         info.si_code = BUS_ADRERR;
231         info.si_addr = (void *)address;
232         force_sig_info(SIGBUS, &info, tsk);
233
234         /* Kernel mode? Handle exceptions or die */
235         if (!user_mode(regs))
236                 goto no_context;
237 }
238
239 #ifdef CONFIG_SH_STORE_QUEUES
240 /*
241  * This is a special case for the SH-4 store queues, as pages for this
242  * space still need to be faulted in before it's possible to flush the
243  * store queue cache for writeout to the remapped region.
244  */
245 #define P3_ADDR_MAX             (P4SEG_STORE_QUE + 0x04000000)
246 #else
247 #define P3_ADDR_MAX             P4SEG
248 #endif
249
250 /*
251  * Called with interrupts disabled.
252  */
253 asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
254                                          unsigned long writeaccess,
255                                          unsigned long address)
256 {
257         pgd_t *pgd;
258         pud_t *pud;
259         pmd_t *pmd;
260         pte_t *pte;
261         pte_t entry;
262         struct mm_struct *mm = current->mm;
263         spinlock_t *ptl = NULL;
264         int ret = 1;
265
266 #ifdef CONFIG_SH_KGDB
267         if (kgdb_nofault && kgdb_bus_err_hook)
268                 kgdb_bus_err_hook();
269 #endif
270
271         /*
272          * We don't take page faults for P1, P2, and parts of P4, these
273          * are always mapped, whether it be due to legacy behaviour in
274          * 29-bit mode, or due to PMB configuration in 32-bit mode.
275          */
276         if (address >= P3SEG && address < P3_ADDR_MAX) {
277                 pgd = pgd_offset_k(address);
278                 mm = NULL;
279         } else {
280                 if (unlikely(address >= TASK_SIZE || !mm))
281                         return 1;
282
283                 pgd = pgd_offset(mm, address);
284         }
285
286         pud = pud_offset(pgd, address);
287         if (pud_none_or_clear_bad(pud))
288                 return 1;
289         pmd = pmd_offset(pud, address);
290         if (pmd_none_or_clear_bad(pmd))
291                 return 1;
292
293         if (mm)
294                 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
295         else
296                 pte = pte_offset_kernel(pmd, address);
297
298         entry = *pte;
299         if (unlikely(pte_none(entry) || pte_not_present(entry)))
300                 goto unlock;
301         if (unlikely(writeaccess && !pte_write(entry)))
302                 goto unlock;
303
304         if (writeaccess)
305                 entry = pte_mkdirty(entry);
306         entry = pte_mkyoung(entry);
307
308 #ifdef CONFIG_CPU_SH4
309         /*
310          * ITLB is not affected by "ldtlb" instruction.
311          * So, we need to flush the entry by ourselves.
312          */
313         local_flush_tlb_one(get_asid(), address & PAGE_MASK);
314 #endif
315
316         set_pte(pte, entry);
317         update_mmu_cache(NULL, address, entry);
318         ret = 0;
319 unlock:
320         if (mm)
321                 pte_unmap_unlock(pte, ptl);
322         return ret;
323 }