git.oblomov.eu Git - linux-2.6/blob - arch/x86/kernel/traps.c

   1 /*
   2  *  Copyright (C) 1991, 1992  Linus Torvalds
   3  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
   4  *
   5  *  Pentium III FXSR, SSE support
   6  *      Gareth Hughes <gareth@valinux.com>, May 2000
   7  */
   8
   9 /*
  10  * Handle hardware traps and faults.
  11  */
  12 #include <linux/interrupt.h>
  13 #include <linux/kallsyms.h>
  14 #include <linux/spinlock.h>
  15 #include <linux/kprobes.h>
  16 #include <linux/uaccess.h>
  17 #include <linux/utsname.h>
  18 #include <linux/kdebug.h>
  19 #include <linux/kernel.h>
  20 #include <linux/module.h>
  21 #include <linux/ptrace.h>
  22 #include <linux/string.h>
  23 #include <linux/delay.h>
  24 #include <linux/errno.h>
  25 #include <linux/kexec.h>
  26 #include <linux/sched.h>
  27 #include <linux/timer.h>
  28 #include <linux/init.h>
  29 #include <linux/bug.h>
  30 #include <linux/nmi.h>
  31 #include <linux/mm.h>
  32 #include <linux/smp.h>
  33 #include <linux/io.h>
  34
  35 #ifdef CONFIG_EISA
  36 #include <linux/ioport.h>
  37 #include <linux/eisa.h>
  38 #endif
  39
  40 #ifdef CONFIG_MCA
  41 #include <linux/mca.h>
  42 #endif
  43
  44 #if defined(CONFIG_EDAC)
  45 #include <linux/edac.h>
  46 #endif
  47
  48 #include <asm/stacktrace.h>
  49 #include <asm/processor.h>
  50 #include <asm/debugreg.h>
  51 #include <asm/atomic.h>
  52 #include <asm/system.h>
  53 #include <asm/traps.h>
  54 #include <asm/desc.h>
  55 #include <asm/i387.h>
  56
  57 #include <mach_traps.h>
  58
  59 #ifdef CONFIG_X86_64
  60 #include <asm/pgalloc.h>
  61 #include <asm/proto.h>
  62 #include <asm/pda.h>
  63 #else
  64 #include <asm/processor-flags.h>
  65 #include <asm/arch_hooks.h>
  66 #include <asm/traps.h>
  67
  68 #include "cpu/mcheck/mce.h"
  69
  70 asmlinkage int system_call(void);
  71
  72 /* Do we ignore FPU interrupts ? */
  73 char ignore_fpu_irq;
  74
  75 /*
  76  * The IDT has to be page-aligned to simplify the Pentium
  77  * F0 0F bug workaround.. We have a special link segment
  78  * for this.
  79  */
  80 gate_desc idt_table[256]
  81         __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
  82 #endif
  83
  84 DECLARE_BITMAP(used_vectors, NR_VECTORS);
  85 EXPORT_SYMBOL_GPL(used_vectors);
  86
  87 static int ignore_nmis;
  88
  89 static inline void conditional_sti(struct pt_regs *regs)
  90 {
  91         if (regs->flags & X86_EFLAGS_IF)
  92                 local_irq_enable();
  93 }
  94
  95 static inline void preempt_conditional_sti(struct pt_regs *regs)
  96 {
  97         inc_preempt_count();
  98         if (regs->flags & X86_EFLAGS_IF)
  99                 local_irq_enable();
 100 }
 101
 102 static inline void preempt_conditional_cli(struct pt_regs *regs)
 103 {
 104         if (regs->flags & X86_EFLAGS_IF)
 105                 local_irq_disable();
 106         dec_preempt_count();
 107 }
 108
 109 #ifdef CONFIG_X86_32
 110 static inline void
 111 die_if_kernel(const char *str, struct pt_regs *regs, long err)
 112 {
 113         if (!user_mode_vm(regs))
 114                 die(str, regs, err);
 115 }
 116
 117 /*
 118  * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
 119  * invalid offset set (the LAZY one) and the faulting thread has
 120  * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
 121  * we set the offset field correctly and return 1.
 122  */
 123 static int lazy_iobitmap_copy(void)
 124 {
 125         struct thread_struct *thread;
 126         struct tss_struct *tss;
 127         int cpu;
 128
 129         cpu = get_cpu();
 130         tss = &per_cpu(init_tss, cpu);
 131         thread = &current->thread;
 132
 133         if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
 134             thread->io_bitmap_ptr) {
 135                 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
 136                        thread->io_bitmap_max);
 137                 /*
 138                  * If the previously set map was extending to higher ports
 139                  * than the current one, pad extra space with 0xff (no access).
 140                  */
 141                 if (thread->io_bitmap_max < tss->io_bitmap_max) {
 142                         memset((char *) tss->io_bitmap +
 143                                 thread->io_bitmap_max, 0xff,
 144                                 tss->io_bitmap_max - thread->io_bitmap_max);
 145                 }
 146                 tss->io_bitmap_max = thread->io_bitmap_max;
 147                 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 148                 tss->io_bitmap_owner = thread;
 149                 put_cpu();
 150
 151                 return 1;
 152         }
 153         put_cpu();
 154
 155         return 0;
 156 }
 157 #endif
 158
 159 static void __kprobes
 160 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 161         long error_code, siginfo_t *info)
 162 {
 163         struct task_struct *tsk = current;
 164
 165 #ifdef CONFIG_X86_32
 166         if (regs->flags & X86_VM_MASK) {
 167                 /*
 168                  * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
 169                  * On nmi (interrupt 2), do_trap should not be called.
 170                  */
 171                 if (trapnr < 6)
 172                         goto vm86_trap;
 173                 goto trap_signal;
 174         }
 175 #endif
 176
 177         if (!user_mode(regs))
 178                 goto kernel_trap;
 179
 180 #ifdef CONFIG_X86_32
 181 trap_signal:
 182 #endif
 183         /*
 184          * We want error_code and trap_no set for userspace faults and
 185          * kernelspace faults which result in die(), but not
 186          * kernelspace faults which are fixed up.  die() gives the
 187          * process no chance to handle the signal and notice the
 188          * kernel fault information, so that won't result in polluting
 189          * the information about previously queued, but not yet
 190          * delivered, faults.  See also do_general_protection below.
 191          */
 192         tsk->thread.error_code = error_code;
 193         tsk->thread.trap_no = trapnr;
 194
 195 #ifdef CONFIG_X86_64
 196         if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
 197             printk_ratelimit()) {
 198                 printk(KERN_INFO
 199                        "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
 200                        tsk->comm, tsk->pid, str,
 201                        regs->ip, regs->sp, error_code);
 202                 print_vma_addr(" in ", regs->ip);
 203                 printk("\n");
 204         }
 205 #endif
 206
 207         if (info)
 208                 force_sig_info(signr, info, tsk);
 209         else
 210                 force_sig(signr, tsk);
 211         return;
 212
 213 kernel_trap:
 214         if (!fixup_exception(regs)) {
 215                 tsk->thread.error_code = error_code;
 216                 tsk->thread.trap_no = trapnr;
 217                 die(str, regs, error_code);
 218         }
 219         return;
 220
 221 #ifdef CONFIG_X86_32
 222 vm86_trap:
 223         if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
 224                                                 error_code, trapnr))
 225                 goto trap_signal;
 226         return;
 227 #endif
 228 }
 229
 230 #define DO_ERROR(trapnr, signr, str, name)                              \
 231 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)     \
 232 {                                                                       \
 233         if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
 234                                                         == NOTIFY_STOP) \
 235                 return;                                                 \
 236         conditional_sti(regs);                                          \
 237         do_trap(trapnr, signr, str, regs, error_code, NULL);            \
 238 }
 239
 240 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)         \
 241 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)     \
 242 {                                                                       \
 243         siginfo_t info;                                                 \
 244         info.si_signo = signr;                                          \
 245         info.si_errno = 0;                                              \
 246         info.si_code = sicode;                                          \
 247         info.si_addr = (void __user *)siaddr;                           \
 248         if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
 249                                                         == NOTIFY_STOP) \
 250                 return;                                                 \
 251         conditional_sti(regs);                                          \
 252         do_trap(trapnr, signr, str, regs, error_code, &info);           \
 253 }
 254
 255 DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
 256 DO_ERROR(4, SIGSEGV, "overflow", overflow)
 257 DO_ERROR(5, SIGSEGV, "bounds", bounds)
 258 DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
 259 DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
 260 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 261 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
 262 #ifdef CONFIG_X86_32
 263 DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
 264 #endif
 265 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
 266
 267 #ifdef CONFIG_X86_64
 268 /* Runs on IST stack */
 269 dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
 270 {
 271         if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
 272                         12, SIGBUS) == NOTIFY_STOP)
 273                 return;
 274         preempt_conditional_sti(regs);
 275         do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
 276         preempt_conditional_cli(regs);
 277 }
 278
 279 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 280 {
 281         static const char str[] = "double fault";
 282         struct task_struct *tsk = current;
 283
 284         /* Return not checked because double check cannot be ignored */
 285         notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
 286
 287         tsk->thread.error_code = error_code;
 288         tsk->thread.trap_no = 8;
 289
 290         /*
 291          * This is always a kernel trap and never fixable (and thus must
 292          * never return).
 293          */
 294         for (;;)
 295                 die(str, regs, error_code);
 296 }
 297 #endif
 298
 299 dotraplinkage void __kprobes
 300 do_general_protection(struct pt_regs *regs, long error_code)
 301 {
 302         struct task_struct *tsk;
 303
 304         conditional_sti(regs);
 305
 306 #ifdef CONFIG_X86_32
 307         if (lazy_iobitmap_copy()) {
 308                 /* restart the faulting instruction */
 309                 return;
 310         }
 311
 312         if (regs->flags & X86_VM_MASK)
 313                 goto gp_in_vm86;
 314 #endif
 315
 316         tsk = current;
 317         if (!user_mode(regs))
 318                 goto gp_in_kernel;
 319
 320         tsk->thread.error_code = error_code;
 321         tsk->thread.trap_no = 13;
 322
 323         if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 324                         printk_ratelimit()) {
 325                 printk(KERN_INFO
 326                         "%s[%d] general protection ip:%lx sp:%lx error:%lx",
 327                         tsk->comm, task_pid_nr(tsk),
 328                         regs->ip, regs->sp, error_code);
 329                 print_vma_addr(" in ", regs->ip);
 330                 printk("\n");
 331         }
 332
 333         force_sig(SIGSEGV, tsk);
 334         return;
 335
 336 #ifdef CONFIG_X86_32
 337 gp_in_vm86:
 338         local_irq_enable();
 339         handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
 340         return;
 341 #endif
 342
 343 gp_in_kernel:
 344         if (fixup_exception(regs))
 345                 return;
 346
 347         tsk->thread.error_code = error_code;
 348         tsk->thread.trap_no = 13;
 349         if (notify_die(DIE_GPF, "general protection fault", regs,
 350                                 error_code, 13, SIGSEGV) == NOTIFY_STOP)
 351                 return;
 352         die("general protection fault", regs, error_code);
 353 }
 354
 355 static notrace __kprobes void
 356 mem_parity_error(unsigned char reason, struct pt_regs *regs)
 357 {
 358         printk(KERN_EMERG
 359                 "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
 360                         reason, smp_processor_id());
 361
 362         printk(KERN_EMERG
 363                 "You have some hardware problem, likely on the PCI bus.\n");
 364
 365 #if defined(CONFIG_EDAC)
 366         if (edac_handler_set()) {
 367                 edac_atomic_assert_error();
 368                 return;
 369         }
 370 #endif
 371
 372         if (panic_on_unrecovered_nmi)
 373                 panic("NMI: Not continuing");
 374
 375         printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 376
 377         /* Clear and disable the memory parity error line. */
 378         reason = (reason & 0xf) | 4;
 379         outb(reason, 0x61);
 380 }
 381
 382 static notrace __kprobes void
 383 io_check_error(unsigned char reason, struct pt_regs *regs)
 384 {
 385         unsigned long i;
 386
 387         printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
 388         show_registers(regs);
 389
 390         /* Re-enable the IOCK line, wait for a few seconds */
 391         reason = (reason & 0xf) | 8;
 392         outb(reason, 0x61);
 393
 394         i = 2000;
 395         while (--i)
 396                 udelay(1000);
 397
 398         reason &= ~8;
 399         outb(reason, 0x61);
 400 }
 401
 402 static notrace __kprobes void
 403 unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 404 {
 405         if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
 406                         NOTIFY_STOP)
 407                 return;
 408 #ifdef CONFIG_MCA
 409         /*
 410          * Might actually be able to figure out what the guilty party
 411          * is:
 412          */
 413         if (MCA_bus) {
 414                 mca_handle_nmi();
 415                 return;
 416         }
 417 #endif
 418         printk(KERN_EMERG
 419                 "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
 420                         reason, smp_processor_id());
 421
 422         printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
 423         if (panic_on_unrecovered_nmi)
 424                 panic("NMI: Not continuing");
 425
 426         printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 427 }
 428
 429 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 430 {
 431         unsigned char reason = 0;
 432         int cpu;
 433
 434         cpu = smp_processor_id();
 435
 436         /* Only the BSP gets external NMIs from the system. */
 437         if (!cpu)
 438                 reason = get_nmi_reason();
 439
 440         if (!(reason & 0xc0)) {
 441                 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 442                                                                 == NOTIFY_STOP)
 443                         return;
 444 #ifdef CONFIG_X86_LOCAL_APIC
 445                 /*
 446                  * Ok, so this is none of the documented NMI sources,
 447                  * so it must be the NMI watchdog.
 448                  */
 449                 if (nmi_watchdog_tick(regs, reason))
 450                         return;
 451                 if (!do_nmi_callback(regs, cpu))
 452                         unknown_nmi_error(reason, regs);
 453 #else
 454                 unknown_nmi_error(reason, regs);
 455 #endif
 456
 457                 return;
 458         }
 459         if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
 460                 return;
 461
 462         /* AK: following checks seem to be broken on modern chipsets. FIXME */
 463         if (reason & 0x80)
 464                 mem_parity_error(reason, regs);
 465         if (reason & 0x40)
 466                 io_check_error(reason, regs);
 467 #ifdef CONFIG_X86_32
 468         /*
 469          * Reassert NMI in case it became active meanwhile
 470          * as it's edge-triggered:
 471          */
 472         reassert_nmi();
 473 #endif
 474 }
 475
 476 dotraplinkage notrace __kprobes void
 477 do_nmi(struct pt_regs *regs, long error_code)
 478 {
 479         nmi_enter();
 480
 481         inc_irq_stat(__nmi_count);
 482
 483         if (!ignore_nmis)
 484                 default_do_nmi(regs);
 485
 486         nmi_exit();
 487 }
 488
 489 void stop_nmi(void)
 490 {
 491         acpi_nmi_disable();
 492         ignore_nmis++;
 493 }
 494
 495 void restart_nmi(void)
 496 {
 497         ignore_nmis--;
 498         acpi_nmi_enable();
 499 }
 500
 501 /* May run on IST stack. */
 502 dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 503 {
 504 #ifdef CONFIG_KPROBES
 505         if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
 506                         == NOTIFY_STOP)
 507                 return;
 508 #else
 509         if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
 510                         == NOTIFY_STOP)
 511                 return;
 512 #endif
 513
 514         preempt_conditional_sti(regs);
 515         do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
 516         preempt_conditional_cli(regs);
 517 }
 518
 519 #ifdef CONFIG_X86_64
 520 /*
 521  * Help handler running on IST stack to switch back to user stack
 522  * for scheduling or signal handling. The actual stack switch is done in
 523  * entry.S
 524  */
 525 asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 526 {
 527         struct pt_regs *regs = eregs;
 528         /* Did already sync */
 529         if (eregs == (struct pt_regs *)eregs->sp)
 530                 ;
 531         /* Exception from user space */
 532         else if (user_mode(eregs))
 533                 regs = task_pt_regs(current);
 534         /*
 535          * Exception from kernel and interrupts are enabled. Move to
 536          * kernel process stack.
 537          */
 538         else if (eregs->flags & X86_EFLAGS_IF)
 539                 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 540         if (eregs != regs)
 541                 *regs = *eregs;
 542         return regs;
 543 }
 544 #endif
 545
 546 /*
 547  * Our handling of the processor debug registers is non-trivial.
 548  * We do not clear them on entry and exit from the kernel. Therefore
 549  * it is possible to get a watchpoint trap here from inside the kernel.
 550  * However, the code in ./ptrace.c has ensured that the user can
 551  * only set watchpoints on userspace addresses. Therefore the in-kernel
 552  * watchpoint trap can only occur in code which is reading/writing
 553  * from user space. Such code must not hold kernel locks (since it
 554  * can equally take a page fault), therefore it is safe to call
 555  * force_sig_info even though that claims and releases locks.
 556  *
 557  * Code in ./signal.c ensures that the debug control register
 558  * is restored before we deliver any signal, and therefore that
 559  * user code runs with the correct debug control register even though
 560  * we clear it here.
 561  *
 562  * Being careful here means that we don't have to be as careful in a
 563  * lot of more complicated places (task switching can be a bit lazy
 564  * about restoring all the debug state, and ptrace doesn't have to
 565  * find every occurrence of the TF bit that could be saved away even
 566  * by user code)
 567  *
 568  * May run on IST stack.
 569  */
 570 dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 571 {
 572         struct task_struct *tsk = current;
 573         unsigned long condition;
 574         int si_code;
 575
 576         get_debugreg(condition, 6);
 577
 578         /*
 579          * The processor cleared BTF, so don't mark that we need it set.
 580          */
 581         clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
 582         tsk->thread.debugctlmsr = 0;
 583
 584         if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
 585                                                 SIGTRAP) == NOTIFY_STOP)
 586                 return;
 587
 588         /* It's safe to allow irq's after DR6 has been saved */
 589         preempt_conditional_sti(regs);
 590
 591         /* Mask out spurious debug traps due to lazy DR7 setting */
 592         if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
 593                 if (!tsk->thread.debugreg7)
 594                         goto clear_dr7;
 595         }
 596
 597 #ifdef CONFIG_X86_32
 598         if (regs->flags & X86_VM_MASK)
 599                 goto debug_vm86;
 600 #endif
 601
 602         /* Save debug status register where ptrace can see it */
 603         tsk->thread.debugreg6 = condition;
 604
 605         /*
 606          * Single-stepping through TF: make sure we ignore any events in
 607          * kernel space (but re-enable TF when returning to user mode).
 608          */
 609         if (condition & DR_STEP) {
 610                 if (!user_mode(regs))
 611                         goto clear_TF_reenable;
 612         }
 613
 614         si_code = get_si_code(condition);
 615         /* Ok, finally something we can handle */
 616         send_sigtrap(tsk, regs, error_code, si_code);
 617
 618         /*
 619          * Disable additional traps. They'll be re-enabled when
 620          * the signal is delivered.
 621          */
 622 clear_dr7:
 623         set_debugreg(0, 7);
 624         preempt_conditional_cli(regs);
 625         return;
 626
 627 #ifdef CONFIG_X86_32
 628 debug_vm86:
 629         handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
 630         preempt_conditional_cli(regs);
 631         return;
 632 #endif
 633
 634 clear_TF_reenable:
 635         set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
 636         regs->flags &= ~X86_EFLAGS_TF;
 637         preempt_conditional_cli(regs);
 638         return;
 639 }
 640
 641 #ifdef CONFIG_X86_64
 642 static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
 643 {
 644         if (fixup_exception(regs))
 645                 return 1;
 646
 647         notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
 648         /* Illegal floating point operation in the kernel */
 649         current->thread.trap_no = trapnr;
 650         die(str, regs, 0);
 651         return 0;
 652 }
 653 #endif
 654
 655 /*
 656  * Note that we play around with the 'TS' bit in an attempt to get
 657  * the correct behaviour even in the presence of the asynchronous
 658  * IRQ13 behaviour
 659  */
 660 void math_error(void __user *ip)
 661 {
 662         struct task_struct *task;
 663         siginfo_t info;
 664         unsigned short cwd, swd, err;
 665
 666         /*
 667          * Save the info for the exception handler and clear the error.
 668          */
 669         task = current;
 670         save_init_fpu(task);
 671         task->thread.trap_no = 16;
 672         task->thread.error_code = 0;
 673         info.si_signo = SIGFPE;
 674         info.si_errno = 0;
 675         info.si_addr = ip;
 676         /*
 677          * (~cwd & swd) will mask out exceptions that are not set to unmasked
 678          * status.  0x3f is the exception bits in these regs, 0x200 is the
 679          * C1 reg you need in case of a stack fault, 0x040 is the stack
 680          * fault bit.  We should only be taking one exception at a time,
 681          * so if this combination doesn't produce any single exception,
 682          * then we have a bad program that isn't synchronizing its FPU usage
 683          * and it will suffer the consequences since we won't be able to
 684          * fully reproduce the context of the exception
 685          */
 686         cwd = get_fpu_cwd(task);
 687         swd = get_fpu_swd(task);
 688
 689         err = swd & ~cwd;
 690
 691         if (err & 0x001) {      /* Invalid op */
 692                 /*
 693                  * swd & 0x240 == 0x040: Stack Underflow
 694                  * swd & 0x240 == 0x240: Stack Overflow
 695                  * User must clear the SF bit (0x40) if set
 696                  */
 697                 info.si_code = FPE_FLTINV;
 698         } else if (err & 0x004) { /* Divide by Zero */
 699                 info.si_code = FPE_FLTDIV;
 700         } else if (err & 0x008) { /* Overflow */
 701                 info.si_code = FPE_FLTOVF;
 702         } else if (err & 0x012) { /* Denormal, Underflow */
 703                 info.si_code = FPE_FLTUND;
 704         } else if (err & 0x020) { /* Precision */
 705                 info.si_code = FPE_FLTRES;
 706         } else {
 707                 /*
 708                  * If we're using IRQ 13, or supposedly even some trap 16
 709                  * implementations, it's possible we get a spurious trap...
 710                  */
 711                 return;         /* Spurious trap, no error */
 712         }
 713         force_sig_info(SIGFPE, &info, task);
 714 }
 715
 716 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
 717 {
 718         conditional_sti(regs);
 719
 720 #ifdef CONFIG_X86_32
 721         ignore_fpu_irq = 1;
 722 #else
 723         if (!user_mode(regs) &&
 724             kernel_math_error(regs, "kernel x87 math error", 16))
 725                 return;
 726 #endif
 727
 728         math_error((void __user *)regs->ip);
 729 }
 730
 731 static void simd_math_error(void __user *ip)
 732 {
 733         struct task_struct *task;
 734         siginfo_t info;
 735         unsigned short mxcsr;
 736
 737         /*
 738          * Save the info for the exception handler and clear the error.
 739          */
 740         task = current;
 741         save_init_fpu(task);
 742         task->thread.trap_no = 19;
 743         task->thread.error_code = 0;
 744         info.si_signo = SIGFPE;
 745         info.si_errno = 0;
 746         info.si_code = __SI_FAULT;
 747         info.si_addr = ip;
 748         /*
 749          * The SIMD FPU exceptions are handled a little differently, as there
 750          * is only a single status/control register.  Thus, to determine which
 751          * unmasked exception was caught we must mask the exception mask bits
 752          * at 0x1f80, and then use these to mask the exception bits at 0x3f.
 753          */
 754         mxcsr = get_fpu_mxcsr(task);
 755         switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
 756         case 0x000:
 757         default:
 758                 break;
 759         case 0x001: /* Invalid Op */
 760                 info.si_code = FPE_FLTINV;
 761                 break;
 762         case 0x002: /* Denormalize */
 763         case 0x010: /* Underflow */
 764                 info.si_code = FPE_FLTUND;
 765                 break;
 766         case 0x004: /* Zero Divide */
 767                 info.si_code = FPE_FLTDIV;
 768                 break;
 769         case 0x008: /* Overflow */
 770                 info.si_code = FPE_FLTOVF;
 771                 break;
 772         case 0x020: /* Precision */
 773                 info.si_code = FPE_FLTRES;
 774                 break;
 775         }
 776         force_sig_info(SIGFPE, &info, task);
 777 }
 778
 779 dotraplinkage void
 780 do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
 781 {
 782         conditional_sti(regs);
 783
 784 #ifdef CONFIG_X86_32
 785         if (cpu_has_xmm) {
 786                 /* Handle SIMD FPU exceptions on PIII+ processors. */
 787                 ignore_fpu_irq = 1;
 788                 simd_math_error((void __user *)regs->ip);
 789                 return;
 790         }
 791         /*
 792          * Handle strange cache flush from user space exception
 793          * in all other cases.  This is undocumented behaviour.
 794          */
 795         if (regs->flags & X86_VM_MASK) {
 796                 handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
 797                 return;
 798         }
 799         current->thread.trap_no = 19;
 800         current->thread.error_code = error_code;
 801         die_if_kernel("cache flush denied", regs, error_code);
 802         force_sig(SIGSEGV, current);
 803 #else
 804         if (!user_mode(regs) &&
 805                         kernel_math_error(regs, "kernel simd math error", 19))
 806                 return;
 807         simd_math_error((void __user *)regs->ip);
 808 #endif
 809 }
 810
 811 dotraplinkage void
 812 do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 813 {
 814         conditional_sti(regs);
 815 #if 0
 816         /* No need to warn about this any longer. */
 817         printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
 818 #endif
 819 }
 820
 821 #ifdef CONFIG_X86_32
 822 unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
 823 {
 824         struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
 825         unsigned long base = (kesp - uesp) & -THREAD_SIZE;
 826         unsigned long new_kesp = kesp - base;
 827         unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
 828         __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
 829
 830         /* Set up base for espfix segment */
 831         desc &= 0x00f0ff0000000000ULL;
 832         desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
 833                 ((((__u64)base) << 32) & 0xff00000000000000ULL) |
 834                 ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
 835                 (lim_pages & 0xffff);
 836         *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
 837
 838         return new_kesp;
 839 }
 840 #else
 841 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 842 {
 843 }
 844
 845 asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
 846 {
 847 }
 848 #endif
 849
 850 /*
 851  * 'math_state_restore()' saves the current math information in the
 852  * old math state array, and gets the new ones from the current task
 853  *
 854  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
 855  * Don't touch unless you *really* know how it works.
 856  *
 857  * Must be called with kernel preemption disabled (in this case,
 858  * local interrupts are disabled at the call-site in entry.S).
 859  */
 860 asmlinkage void math_state_restore(void)
 861 {
 862         struct thread_info *thread = current_thread_info();
 863         struct task_struct *tsk = thread->task;
 864
 865         if (!tsk_used_math(tsk)) {
 866                 local_irq_enable();
 867                 /*
 868                  * does a slab alloc which can sleep
 869                  */
 870                 if (init_fpu(tsk)) {
 871                         /*
 872                          * ran out of memory!
 873                          */
 874                         do_group_exit(SIGKILL);
 875                         return;
 876                 }
 877                 local_irq_disable();
 878         }
 879
 880         clts();                         /* Allow maths ops (or we recurse) */
 881 #ifdef CONFIG_X86_32
 882         restore_fpu(tsk);
 883 #else
 884         /*
 885          * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 886          */
 887         if (unlikely(restore_fpu_checking(tsk))) {
 888                 stts();
 889                 force_sig(SIGSEGV, tsk);
 890                 return;
 891         }
 892 #endif
 893         thread->status |= TS_USEDFPU;   /* So we fnsave on switch_to() */
 894         tsk->fpu_counter++;
 895 }
 896 EXPORT_SYMBOL_GPL(math_state_restore);
 897
 898 #ifndef CONFIG_MATH_EMULATION
 899 void math_emulate(struct math_emu_info *info)
 900 {
 901         printk(KERN_EMERG
 902                 "math-emulation not enabled and no coprocessor found.\n");
 903         printk(KERN_EMERG "killing %s.\n", current->comm);
 904         force_sig(SIGFPE, current);
 905         schedule();
 906 }
 907 #endif /* CONFIG_MATH_EMULATION */
 908
 909 dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
 910 {
 911 #ifdef CONFIG_X86_32
 912         if (read_cr0() & X86_CR0_EM) {
 913                 struct math_emu_info info = { };
 914
 915                 conditional_sti(&regs);
 916
 917                 info.regs = &regs;
 918                 math_emulate(&info);
 919         } else {
 920                 math_state_restore(); /* interrupts still off */
 921                 conditional_sti(&regs);
 922         }
 923 #else
 924         math_state_restore();
 925 #endif
 926 }
 927
 928 #ifdef CONFIG_X86_32
 929 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 930 {
 931         siginfo_t info;
 932         local_irq_enable();
 933
 934         info.si_signo = SIGILL;
 935         info.si_errno = 0;
 936         info.si_code = ILL_BADSTK;
 937         info.si_addr = 0;
 938         if (notify_die(DIE_TRAP, "iret exception",
 939                         regs, error_code, 32, SIGILL) == NOTIFY_STOP)
 940                 return;
 941         do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
 942 }
 943 #endif
 944
 945 void __init trap_init(void)
 946 {
 947         int i;
 948
 949 #ifdef CONFIG_EISA
 950         void __iomem *p = early_ioremap(0x0FFFD9, 4);
 951
 952         if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
 953                 EISA_bus = 1;
 954         early_iounmap(p, 4);
 955 #endif
 956
 957         set_intr_gate(0, &divide_error);
 958         set_intr_gate_ist(1, &debug, DEBUG_STACK);
 959         set_intr_gate_ist(2, &nmi, NMI_STACK);
 960         /* int3 can be called from all */
 961         set_system_intr_gate_ist(3, &int3, DEBUG_STACK);
 962         /* int4 can be called from all */
 963         set_system_intr_gate(4, &overflow);
 964         set_intr_gate(5, &bounds);
 965         set_intr_gate(6, &invalid_op);
 966         set_intr_gate(7, &device_not_available);
 967 #ifdef CONFIG_X86_32
 968         set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
 969 #else
 970         set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
 971 #endif
 972         set_intr_gate(9, &coprocessor_segment_overrun);
 973         set_intr_gate(10, &invalid_TSS);
 974         set_intr_gate(11, &segment_not_present);
 975         set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
 976         set_intr_gate(13, &general_protection);
 977         set_intr_gate(14, &page_fault);
 978         set_intr_gate(15, &spurious_interrupt_bug);
 979         set_intr_gate(16, &coprocessor_error);
 980         set_intr_gate(17, &alignment_check);
 981 #ifdef CONFIG_X86_MCE
 982         set_intr_gate_ist(18, &machine_check, MCE_STACK);
 983 #endif
 984         set_intr_gate(19, &simd_coprocessor_error);
 985
 986 #ifdef CONFIG_IA32_EMULATION
 987         set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
 988 #endif
 989
 990 #ifdef CONFIG_X86_32
 991         if (cpu_has_fxsr) {
 992                 printk(KERN_INFO "Enabling fast FPU save and restore... ");
 993                 set_in_cr4(X86_CR4_OSFXSR);
 994                 printk("done.\n");
 995         }
 996         if (cpu_has_xmm) {
 997                 printk(KERN_INFO
 998                         "Enabling unmasked SIMD FPU exception support... ");
 999                 set_in_cr4(X86_CR4_OSXMMEXCPT);
1000                 printk("done.\n");
1001         }
1002
1003         set_system_trap_gate(SYSCALL_VECTOR, &system_call);
1004 #endif
1005
1006         /* Reserve all the builtin and the syscall vector: */
1007         for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
1008                 set_bit(i, used_vectors);
1009
1010 #ifdef CONFIG_X86_64
1011         set_bit(IA32_SYSCALL_VECTOR, used_vectors);
1012 #else
1013         set_bit(SYSCALL_VECTOR, used_vectors);
1014 #endif
1015         /*
1016          * Should be a barrier for any external CPU state:
1017          */
1018         cpu_init();
1019
1020 #ifdef CONFIG_X86_32
1021         trap_init_hook();
1022 #endif
1023 }