Btrfs: Raise thresholds for metadata writeback
[linux-2.6] / arch / sparc / mm / fault.c
1 /*
2  * fault.c:  Page fault handlers for the Sparc.
3  *
4  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
5  * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
6  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
7  */
8
9 #include <asm/head.h>
10
11 #include <linux/string.h>
12 #include <linux/types.h>
13 #include <linux/sched.h>
14 #include <linux/ptrace.h>
15 #include <linux/mman.h>
16 #include <linux/threads.h>
17 #include <linux/kernel.h>
18 #include <linux/signal.h>
19 #include <linux/mm.h>
20 #include <linux/smp.h>
21 #include <linux/interrupt.h>
22 #include <linux/module.h>
23 #include <linux/kdebug.h>
24
25 #include <asm/system.h>
26 #include <asm/page.h>
27 #include <asm/pgtable.h>
28 #include <asm/memreg.h>
29 #include <asm/openprom.h>
30 #include <asm/oplib.h>
31 #include <asm/smp.h>
32 #include <asm/traps.h>
33 #include <asm/uaccess.h>
34
35 extern int prom_node_root;
36
37 /* At boot time we determine these two values necessary for setting
38  * up the segment maps and page table entries (pte's).
39  */
40
41 int num_segmaps, num_contexts;
42 int invalid_segment;
43
44 /* various Virtual Address Cache parameters we find at boot time... */
45
46 int vac_size, vac_linesize, vac_do_hw_vac_flushes;
47 int vac_entries_per_context, vac_entries_per_segment;
48 int vac_entries_per_page;
49
50 /* Return how much physical memory we have.  */
51 unsigned long probe_memory(void)
52 {
53         unsigned long total = 0;
54         int i;
55
56         for (i = 0; sp_banks[i].num_bytes; i++)
57                 total += sp_banks[i].num_bytes;
58
59         return total;
60 }
61
62 extern void sun4c_complete_all_stores(void);
63
64 /* Whee, a level 15 NMI interrupt memory error.  Let's have fun... */
65 asmlinkage void sparc_lvl15_nmi(struct pt_regs *regs, unsigned long serr,
66                                 unsigned long svaddr, unsigned long aerr,
67                                 unsigned long avaddr)
68 {
69         sun4c_complete_all_stores();
70         printk("FAULT: NMI received\n");
71         printk("SREGS: Synchronous Error %08lx\n", serr);
72         printk("       Synchronous Vaddr %08lx\n", svaddr);
73         printk("      Asynchronous Error %08lx\n", aerr);
74         printk("      Asynchronous Vaddr %08lx\n", avaddr);
75         if (sun4c_memerr_reg)
76                 printk("     Memory Parity Error %08lx\n", *sun4c_memerr_reg);
77         printk("REGISTER DUMP:\n");
78         show_regs(regs);
79         prom_halt();
80 }
81
82 static void unhandled_fault(unsigned long, struct task_struct *,
83                 struct pt_regs *) __attribute__ ((noreturn));
84
85 static void unhandled_fault(unsigned long address, struct task_struct *tsk,
86                      struct pt_regs *regs)
87 {
88         if((unsigned long) address < PAGE_SIZE) {
89                 printk(KERN_ALERT
90                     "Unable to handle kernel NULL pointer dereference\n");
91         } else {
92                 printk(KERN_ALERT "Unable to handle kernel paging request "
93                        "at virtual address %08lx\n", address);
94         }
95         printk(KERN_ALERT "tsk->{mm,active_mm}->context = %08lx\n",
96                 (tsk->mm ? tsk->mm->context : tsk->active_mm->context));
97         printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %08lx\n",
98                 (tsk->mm ? (unsigned long) tsk->mm->pgd :
99                         (unsigned long) tsk->active_mm->pgd));
100         die_if_kernel("Oops", regs);
101 }
102
103 asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, 
104                             unsigned long address)
105 {
106         struct pt_regs regs;
107         unsigned long g2;
108         unsigned int insn;
109         int i;
110         
111         i = search_extables_range(ret_pc, &g2);
112         switch (i) {
113         case 3:
114                 /* load & store will be handled by fixup */
115                 return 3;
116
117         case 1:
118                 /* store will be handled by fixup, load will bump out */
119                 /* for _to_ macros */
120                 insn = *((unsigned int *) pc);
121                 if ((insn >> 21) & 1)
122                         return 1;
123                 break;
124
125         case 2:
126                 /* load will be handled by fixup, store will bump out */
127                 /* for _from_ macros */
128                 insn = *((unsigned int *) pc);
129                 if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15)
130                         return 2; 
131                 break; 
132
133         default:
134                 break;
135         };
136
137         memset(&regs, 0, sizeof (regs));
138         regs.pc = pc;
139         regs.npc = pc + 4;
140         __asm__ __volatile__(
141                 "rd %%psr, %0\n\t"
142                 "nop\n\t"
143                 "nop\n\t"
144                 "nop\n" : "=r" (regs.psr));
145         unhandled_fault(address, current, &regs);
146
147         /* Not reached */
148         return 0;
149 }
150
151 extern unsigned long safe_compute_effective_address(struct pt_regs *,
152                                                     unsigned int);
153
154 static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault)
155 {
156         unsigned int insn;
157
158         if (text_fault)
159                 return regs->pc;
160
161         if (regs->psr & PSR_PS) {
162                 insn = *(unsigned int *) regs->pc;
163         } else {
164                 __get_user(insn, (unsigned int *) regs->pc);
165         }
166
167         return safe_compute_effective_address(regs, insn);
168 }
169
170 asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
171                                unsigned long address)
172 {
173         struct vm_area_struct *vma;
174         struct task_struct *tsk = current;
175         struct mm_struct *mm = tsk->mm;
176         unsigned int fixup;
177         unsigned long g2;
178         siginfo_t info;
179         int from_user = !(regs->psr & PSR_PS);
180         int fault;
181
182         if(text_fault)
183                 address = regs->pc;
184
185         /*
186          * We fault-in kernel-space virtual memory on-demand. The
187          * 'reference' page table is init_mm.pgd.
188          *
189          * NOTE! We MUST NOT take any locks for this case. We may
190          * be in an interrupt or a critical region, and should
191          * only copy the information from the master page table,
192          * nothing more.
193          */
194         if (!ARCH_SUN4C_SUN4 && address >= TASK_SIZE)
195                 goto vmalloc_fault;
196
197         info.si_code = SEGV_MAPERR;
198
199         /*
200          * If we're in an interrupt or have no user
201          * context, we must not take the fault..
202          */
203         if (in_atomic() || !mm)
204                 goto no_context;
205
206         down_read(&mm->mmap_sem);
207
208         /*
209          * The kernel referencing a bad kernel pointer can lock up
210          * a sun4c machine completely, so we must attempt recovery.
211          */
212         if(!from_user && address >= PAGE_OFFSET)
213                 goto bad_area;
214
215         vma = find_vma(mm, address);
216         if(!vma)
217                 goto bad_area;
218         if(vma->vm_start <= address)
219                 goto good_area;
220         if(!(vma->vm_flags & VM_GROWSDOWN))
221                 goto bad_area;
222         if(expand_stack(vma, address))
223                 goto bad_area;
224         /*
225          * Ok, we have a good vm_area for this memory access, so
226          * we can handle it..
227          */
228 good_area:
229         info.si_code = SEGV_ACCERR;
230         if(write) {
231                 if(!(vma->vm_flags & VM_WRITE))
232                         goto bad_area;
233         } else {
234                 /* Allow reads even for write-only mappings */
235                 if(!(vma->vm_flags & (VM_READ | VM_EXEC)))
236                         goto bad_area;
237         }
238
239         /*
240          * If for any reason at all we couldn't handle the fault,
241          * make sure we exit gracefully rather than endlessly redo
242          * the fault.
243          */
244         fault = handle_mm_fault(mm, vma, address, write);
245         if (unlikely(fault & VM_FAULT_ERROR)) {
246                 if (fault & VM_FAULT_OOM)
247                         goto out_of_memory;
248                 else if (fault & VM_FAULT_SIGBUS)
249                         goto do_sigbus;
250                 BUG();
251         }
252         if (fault & VM_FAULT_MAJOR)
253                 current->maj_flt++;
254         else
255                 current->min_flt++;
256         up_read(&mm->mmap_sem);
257         return;
258
259         /*
260          * Something tried to access memory that isn't in our memory map..
261          * Fix it, but check if it's kernel or user first..
262          */
263 bad_area:
264         up_read(&mm->mmap_sem);
265
266 bad_area_nosemaphore:
267         /* User mode accesses just cause a SIGSEGV */
268         if(from_user) {
269 #if 0
270                 printk("Fault whee %s [%d]: segfaults at %08lx pc=%08lx\n",
271                        tsk->comm, tsk->pid, address, regs->pc);
272 #endif
273                 info.si_signo = SIGSEGV;
274                 info.si_errno = 0;
275                 /* info.si_code set above to make clear whether
276                    this was a SEGV_MAPERR or SEGV_ACCERR fault.  */
277                 info.si_addr = (void __user *)compute_si_addr(regs, text_fault);
278                 info.si_trapno = 0;
279                 force_sig_info (SIGSEGV, &info, tsk);
280                 return;
281         }
282
283         /* Is this in ex_table? */
284 no_context:
285         g2 = regs->u_regs[UREG_G2];
286         if (!from_user && (fixup = search_extables_range(regs->pc, &g2))) {
287                 if (fixup > 10) { /* Values below are reserved for other things */
288                         extern const unsigned __memset_start[];
289                         extern const unsigned __memset_end[];
290                         extern const unsigned __csum_partial_copy_start[];
291                         extern const unsigned __csum_partial_copy_end[];
292
293 #ifdef DEBUG_EXCEPTIONS
294                         printk("Exception: PC<%08lx> faddr<%08lx>\n", regs->pc, address);
295                         printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n",
296                                 regs->pc, fixup, g2);
297 #endif
298                         if ((regs->pc >= (unsigned long)__memset_start &&
299                              regs->pc < (unsigned long)__memset_end) ||
300                             (regs->pc >= (unsigned long)__csum_partial_copy_start &&
301                              regs->pc < (unsigned long)__csum_partial_copy_end)) {
302                                 regs->u_regs[UREG_I4] = address;
303                                 regs->u_regs[UREG_I5] = regs->pc;
304                         }
305                         regs->u_regs[UREG_G2] = g2;
306                         regs->pc = fixup;
307                         regs->npc = regs->pc + 4;
308                         return;
309                 }
310         }
311         
312         unhandled_fault (address, tsk, regs);
313         do_exit(SIGKILL);
314
315 /*
316  * We ran out of memory, or some other thing happened to us that made
317  * us unable to handle the page fault gracefully.
318  */
319 out_of_memory:
320         up_read(&mm->mmap_sem);
321         printk("VM: killing process %s\n", tsk->comm);
322         if (from_user)
323                 do_group_exit(SIGKILL);
324         goto no_context;
325
326 do_sigbus:
327         up_read(&mm->mmap_sem);
328         info.si_signo = SIGBUS;
329         info.si_errno = 0;
330         info.si_code = BUS_ADRERR;
331         info.si_addr = (void __user *) compute_si_addr(regs, text_fault);
332         info.si_trapno = 0;
333         force_sig_info (SIGBUS, &info, tsk);
334         if (!from_user)
335                 goto no_context;
336
337 vmalloc_fault:
338         {
339                 /*
340                  * Synchronize this task's top level page-table
341                  * with the 'reference' page table.
342                  */
343                 int offset = pgd_index(address);
344                 pgd_t *pgd, *pgd_k;
345                 pmd_t *pmd, *pmd_k;
346
347                 pgd = tsk->active_mm->pgd + offset;
348                 pgd_k = init_mm.pgd + offset;
349
350                 if (!pgd_present(*pgd)) {
351                         if (!pgd_present(*pgd_k))
352                                 goto bad_area_nosemaphore;
353                         pgd_val(*pgd) = pgd_val(*pgd_k);
354                         return;
355                 }
356
357                 pmd = pmd_offset(pgd, address);
358                 pmd_k = pmd_offset(pgd_k, address);
359
360                 if (pmd_present(*pmd) || !pmd_present(*pmd_k))
361                         goto bad_area_nosemaphore;
362                 *pmd = *pmd_k;
363                 return;
364         }
365 }
366
367 asmlinkage void do_sun4c_fault(struct pt_regs *regs, int text_fault, int write,
368                                unsigned long address)
369 {
370         extern void sun4c_update_mmu_cache(struct vm_area_struct *,
371                                            unsigned long,pte_t);
372         extern pte_t *sun4c_pte_offset_kernel(pmd_t *,unsigned long);
373         struct task_struct *tsk = current;
374         struct mm_struct *mm = tsk->mm;
375         pgd_t *pgdp;
376         pte_t *ptep;
377
378         if (text_fault) {
379                 address = regs->pc;
380         } else if (!write &&
381                    !(regs->psr & PSR_PS)) {
382                 unsigned int insn, __user *ip;
383
384                 ip = (unsigned int __user *)regs->pc;
385                 if (!get_user(insn, ip)) {
386                         if ((insn & 0xc1680000) == 0xc0680000)
387                                 write = 1;
388                 }
389         }
390
391         if (!mm) {
392                 /* We are oopsing. */
393                 do_sparc_fault(regs, text_fault, write, address);
394                 BUG();  /* P3 Oops already, you bitch */
395         }
396
397         pgdp = pgd_offset(mm, address);
398         ptep = sun4c_pte_offset_kernel((pmd_t *) pgdp, address);
399
400         if (pgd_val(*pgdp)) {
401             if (write) {
402                 if ((pte_val(*ptep) & (_SUN4C_PAGE_WRITE|_SUN4C_PAGE_PRESENT))
403                                    == (_SUN4C_PAGE_WRITE|_SUN4C_PAGE_PRESENT)) {
404                         unsigned long flags;
405
406                         *ptep = __pte(pte_val(*ptep) | _SUN4C_PAGE_ACCESSED |
407                                       _SUN4C_PAGE_MODIFIED |
408                                       _SUN4C_PAGE_VALID |
409                                       _SUN4C_PAGE_DIRTY);
410
411                         local_irq_save(flags);
412                         if (sun4c_get_segmap(address) != invalid_segment) {
413                                 sun4c_put_pte(address, pte_val(*ptep));
414                                 local_irq_restore(flags);
415                                 return;
416                         }
417                         local_irq_restore(flags);
418                 }
419             } else {
420                 if ((pte_val(*ptep) & (_SUN4C_PAGE_READ|_SUN4C_PAGE_PRESENT))
421                                    == (_SUN4C_PAGE_READ|_SUN4C_PAGE_PRESENT)) {
422                         unsigned long flags;
423
424                         *ptep = __pte(pte_val(*ptep) | _SUN4C_PAGE_ACCESSED |
425                                       _SUN4C_PAGE_VALID);
426
427                         local_irq_save(flags);
428                         if (sun4c_get_segmap(address) != invalid_segment) {
429                                 sun4c_put_pte(address, pte_val(*ptep));
430                                 local_irq_restore(flags);
431                                 return;
432                         }
433                         local_irq_restore(flags);
434                 }
435             }
436         }
437
438         /* This conditional is 'interesting'. */
439         if (pgd_val(*pgdp) && !(write && !(pte_val(*ptep) & _SUN4C_PAGE_WRITE))
440             && (pte_val(*ptep) & _SUN4C_PAGE_VALID))
441                 /* Note: It is safe to not grab the MMAP semaphore here because
442                  *       we know that update_mmu_cache() will not sleep for
443                  *       any reason (at least not in the current implementation)
444                  *       and therefore there is no danger of another thread getting
445                  *       on the CPU and doing a shrink_mmap() on this vma.
446                  */
447                 sun4c_update_mmu_cache (find_vma(current->mm, address), address,
448                                         *ptep);
449         else
450                 do_sparc_fault(regs, text_fault, write, address);
451 }
452
453 /* This always deals with user addresses. */
454 static void force_user_fault(unsigned long address, int write)
455 {
456         struct vm_area_struct *vma;
457         struct task_struct *tsk = current;
458         struct mm_struct *mm = tsk->mm;
459         siginfo_t info;
460
461         info.si_code = SEGV_MAPERR;
462
463 #if 0
464         printk("wf<pid=%d,wr=%d,addr=%08lx>\n",
465                tsk->pid, write, address);
466 #endif
467         down_read(&mm->mmap_sem);
468         vma = find_vma(mm, address);
469         if(!vma)
470                 goto bad_area;
471         if(vma->vm_start <= address)
472                 goto good_area;
473         if(!(vma->vm_flags & VM_GROWSDOWN))
474                 goto bad_area;
475         if(expand_stack(vma, address))
476                 goto bad_area;
477 good_area:
478         info.si_code = SEGV_ACCERR;
479         if(write) {
480                 if(!(vma->vm_flags & VM_WRITE))
481                         goto bad_area;
482         } else {
483                 if(!(vma->vm_flags & (VM_READ | VM_EXEC)))
484                         goto bad_area;
485         }
486         switch (handle_mm_fault(mm, vma, address, write)) {
487         case VM_FAULT_SIGBUS:
488         case VM_FAULT_OOM:
489                 goto do_sigbus;
490         }
491         up_read(&mm->mmap_sem);
492         return;
493 bad_area:
494         up_read(&mm->mmap_sem);
495 #if 0
496         printk("Window whee %s [%d]: segfaults at %08lx\n",
497                tsk->comm, tsk->pid, address);
498 #endif
499         info.si_signo = SIGSEGV;
500         info.si_errno = 0;
501         /* info.si_code set above to make clear whether
502            this was a SEGV_MAPERR or SEGV_ACCERR fault.  */
503         info.si_addr = (void __user *) address;
504         info.si_trapno = 0;
505         force_sig_info (SIGSEGV, &info, tsk);
506         return;
507
508 do_sigbus:
509         up_read(&mm->mmap_sem);
510         info.si_signo = SIGBUS;
511         info.si_errno = 0;
512         info.si_code = BUS_ADRERR;
513         info.si_addr = (void __user *) address;
514         info.si_trapno = 0;
515         force_sig_info (SIGBUS, &info, tsk);
516 }
517
518 void window_overflow_fault(void)
519 {
520         unsigned long sp;
521
522         sp = current_thread_info()->rwbuf_stkptrs[0];
523         if(((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
524                 force_user_fault(sp + 0x38, 1);
525         force_user_fault(sp, 1);
526 }
527
528 void window_underflow_fault(unsigned long sp)
529 {
530         if(((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
531                 force_user_fault(sp + 0x38, 0);
532         force_user_fault(sp, 0);
533 }
534
535 void window_ret_fault(struct pt_regs *regs)
536 {
537         unsigned long sp;
538
539         sp = regs->u_regs[UREG_FP];
540         if(((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
541                 force_user_fault(sp + 0x38, 0);
542         force_user_fault(sp, 0);
543 }