2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
12 * entry.S contains the system-call and fault low-level handling routines.
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for syscall tracing, signals or fork/exec et.al.
20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers upto R11.
24 * - full stack frame: Like partial stack frame, but all register saved.
27 * - schedule it carefully for the final hardware.
31 #include <linux/config.h>
32 #include <linux/linkage.h>
33 #include <asm/segment.h>
35 #include <asm/cache.h>
36 #include <asm/errno.h>
37 #include <asm/dwarf2.h>
38 #include <asm/calling.h>
39 #include <asm/offset.h>
41 #include <asm/unistd.h>
42 #include <asm/thread_info.h>
43 #include <asm/hw_irq.h>
47 #ifndef CONFIG_PREEMPT
48 #define retint_kernel retint_restore_args
52 * C code is not supposed to know about undefined top of stack. Every time
53 * a C function with an pt_regs argument is called from the SYSCALL based
54 * fast path FIXUP_TOP_OF_STACK is needed.
55 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
59 /* %rsp:at FRAMEEND */
60 .macro FIXUP_TOP_OF_STACK tmp
61 movq %gs:pda_oldrsp,\tmp
63 movq $__USER_DS,SS(%rsp)
64 movq $__USER_CS,CS(%rsp)
66 movq R11(%rsp),\tmp /* get eflags */
67 movq \tmp,EFLAGS(%rsp)
70 .macro RESTORE_TOP_OF_STACK tmp,offset=0
71 movq RSP-\offset(%rsp),\tmp
72 movq \tmp,%gs:pda_oldrsp
73 movq EFLAGS-\offset(%rsp),\tmp
74 movq \tmp,R11-\offset(%rsp)
77 .macro FAKE_STACK_FRAME child_rip
78 /* push in order ss, rsp, eflags, cs, rip */
81 CFI_ADJUST_CFA_OFFSET 8
83 CFI_ADJUST_CFA_OFFSET 8
85 pushq $(1<<9) /* eflags - interrupts on */
86 CFI_ADJUST_CFA_OFFSET 8
87 pushq $__KERNEL_CS /* cs */
88 CFI_ADJUST_CFA_OFFSET 8
89 pushq \child_rip /* rip */
90 CFI_ADJUST_CFA_OFFSET 8
92 pushq %rax /* orig rax */
93 CFI_ADJUST_CFA_OFFSET 8
96 .macro UNFAKE_STACK_FRAME
98 CFI_ADJUST_CFA_OFFSET -(6*8)
101 .macro CFI_DEFAULT_STACK
102 CFI_ADJUST_CFA_OFFSET (SS)
103 CFI_OFFSET r15,R15-SS
104 CFI_OFFSET r14,R14-SS
105 CFI_OFFSET r13,R13-SS
106 CFI_OFFSET r12,R12-SS
107 CFI_OFFSET rbp,RBP-SS
108 CFI_OFFSET rbx,RBX-SS
109 CFI_OFFSET r11,R11-SS
110 CFI_OFFSET r10,R10-SS
113 CFI_OFFSET rax,RAX-SS
114 CFI_OFFSET rcx,RCX-SS
115 CFI_OFFSET rdx,RDX-SS
116 CFI_OFFSET rsi,RSI-SS
117 CFI_OFFSET rdi,RDI-SS
118 CFI_OFFSET rsp,RSP-SS
119 CFI_OFFSET rip,RIP-SS
122 * A newly forked process directly context switches into this.
129 GET_THREAD_INFO(%rcx)
130 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
134 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
135 je int_ret_from_sys_call
136 testl $_TIF_IA32,threadinfo_flags(%rcx)
137 jnz int_ret_from_sys_call
138 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
139 jmp ret_from_sys_call
142 call syscall_trace_leave
143 GET_THREAD_INFO(%rcx)
148 * System call entry. Upto 6 arguments in registers are supported.
150 * SYSCALL does not save anything on the stack and does not change the
156 * rax system call number
158 * rcx return address for syscall/sysret, C arg3
161 * r10 arg3 (--> moved to rcx for C)
164 * r11 eflags for syscall/sysret, temporary for C
165 * r12-r15,rbp,rbx saved by C code, not touched.
167 * Interrupts are off on entry.
168 * Only called from user space.
170 * XXX if we had a free scratch register we could save the RSP into the stack frame
171 * and report it properly in ps. Unfortunately we haven't.
177 movq %rsp,%gs:pda_oldrsp
178 movq %gs:pda_kernelstack,%rsp
181 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
182 movq %rcx,RIP-ARGOFFSET(%rsp)
183 GET_THREAD_INFO(%rcx)
184 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
186 cmpq $__NR_syscall_max,%rax
189 call *sys_call_table(,%rax,8) # XXX: rip relative
190 movq %rax,RAX-ARGOFFSET(%rsp)
192 * Syscall return path ending with SYSRET (fast path)
193 * Has incomplete stack frame and undefined top of stack.
195 .globl ret_from_sys_call
197 movl $_TIF_ALLWORK_MASK,%edi
200 GET_THREAD_INFO(%rcx)
202 movl threadinfo_flags(%rcx),%edx
205 movq RIP-ARGOFFSET(%rsp),%rcx
206 RESTORE_ARGS 0,-ARG_SKIP,1
207 movq %gs:pda_oldrsp,%rsp
211 /* Handle reschedules */
212 /* edx: work, edi: workmask */
214 bt $TIF_NEED_RESCHED,%edx
222 /* Handle a signal */
223 /* edx: work flags (arg3) */
226 leaq do_notify_resume(%rip),%rax
227 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
228 xorl %esi,%esi # oldset -> arg2
229 call ptregscall_common
232 /* Do syscall tracing */
235 movq $-ENOSYS,RAX(%rsp)
236 FIXUP_TOP_OF_STACK %rdi
238 call syscall_trace_enter
239 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
241 cmpq $__NR_syscall_max,%rax
243 movq %r10,%rcx /* fixup for C */
244 call *sys_call_table(,%rax,8)
245 movq %rax,RAX-ARGOFFSET(%rsp)
248 call syscall_trace_leave
249 RESTORE_TOP_OF_STACK %rbx
251 jmp ret_from_sys_call
254 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
255 jmp ret_from_sys_call
258 * Syscall return path ending with IRET.
259 * Has correct top of stack, but partial stack frame.
261 ENTRY(int_ret_from_sys_call)
263 testl $3,CS-ARGOFFSET(%rsp)
264 je retint_restore_args
265 movl $_TIF_ALLWORK_MASK,%edi
266 /* edi: mask to check */
268 GET_THREAD_INFO(%rcx)
269 movl threadinfo_flags(%rcx),%edx
274 /* Either reschedule or signal or syscall exit tracking needed. */
275 /* First do a reschedule test. */
276 /* edx: work, edi: workmask */
278 bt $TIF_NEED_RESCHED,%edx
287 /* handle signals and tracing -- both require a full stack frame */
291 /* Check for syscall exit trace */
292 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
295 leaq 8(%rsp),%rdi # &ptregs -> arg1
296 call syscall_trace_leave
298 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
302 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
304 movq %rsp,%rdi # &ptregs -> arg1
305 xorl %esi,%esi # oldset -> arg2
306 call do_notify_resume
307 1: movl $_TIF_NEED_RESCHED,%edi
314 * Certain special system calls that need to save a complete full stack frame.
317 .macro PTREGSCALL label,func,arg
320 leaq \func(%rip),%rax
321 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
322 jmp ptregscall_common
325 PTREGSCALL stub_clone, sys_clone, %r8
326 PTREGSCALL stub_fork, sys_fork, %rdi
327 PTREGSCALL stub_vfork, sys_vfork, %rdi
328 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
329 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
330 PTREGSCALL stub_iopl, sys_iopl, %rsi
332 ENTRY(ptregscall_common)
335 CFI_ADJUST_CFA_OFFSET -8
338 FIXUP_TOP_OF_STACK %r11
340 RESTORE_TOP_OF_STACK %r11
344 CFI_ADJUST_CFA_OFFSET 8
351 CFI_ADJUST_CFA_OFFSET -8
354 FIXUP_TOP_OF_STACK %r11
356 GET_THREAD_INFO(%rcx)
357 bt $TIF_IA32,threadinfo_flags(%rcx)
359 RESTORE_TOP_OF_STACK %r11
366 CFI_ADJUST_CFA_OFFSET REST_SKIP
369 jmp int_ret_from_sys_call
373 * sigreturn is special because it needs to restore all registers on return.
374 * This cannot be done with SYSRET, so use the IRET return path instead.
376 ENTRY(stub_rt_sigreturn)
381 FIXUP_TOP_OF_STACK %r11
382 call sys_rt_sigreturn
383 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
385 jmp int_ret_from_sys_call
389 * Interrupt entry/exit.
391 * Interrupt entry points save only callee clobbered registers in fast path.
393 * Entry runs with interrupts off.
396 /* 0(%rsp): interrupt number */
397 .macro interrupt func
399 CFI_DEF_CFA rsp,(SS-RDI)
400 CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
401 CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
403 #ifdef CONFIG_DEBUG_INFO
407 * Setup a stack frame pointer. This allows gdb to trace
408 * back to the original stack.
411 CFI_DEF_CFA_REGISTER rbp
414 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
419 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
420 movq %gs:pda_irqstackptr,%rax
422 pushq %rdi # save old stack
426 ENTRY(common_interrupt)
428 /* 0(%rsp): oldrsp-ARGOFFSET */
432 subl $1,%gs:pda_irqcount
433 #ifdef CONFIG_DEBUG_INFO
436 leaq ARGOFFSET(%rdi),%rsp
438 GET_THREAD_INFO(%rcx)
439 testl $3,CS-ARGOFFSET(%rsp)
442 /* Interrupt came from user space */
444 * Has a correct top of stack, but a partial stack frame
445 * %rcx: thread info. Interrupts off.
447 retint_with_reschedule:
448 movl $_TIF_WORK_MASK,%edi
450 movl threadinfo_flags(%rcx),%edx
461 .section __ex_table,"a"
462 .quad iret_label,bad_iret
465 /* force a signal here? this matches i386 behaviour */
466 /* running with kernel gs */
468 movq $-9999,%rdi /* better code? */
472 /* edi: workmask, edx: work */
474 bt $TIF_NEED_RESCHED,%edx
480 GET_THREAD_INFO(%rcx)
487 movq $-1,ORIG_RAX(%rsp)
488 xorq %rsi,%rsi # oldset
489 movq %rsp,%rdi # &pt_regs
490 call do_notify_resume
493 GET_THREAD_INFO(%rcx)
496 #ifdef CONFIG_PREEMPT
497 /* Returning to kernel space. Check if we need preemption */
498 /* rcx: threadinfo. interrupts off. */
501 cmpl $0,threadinfo_preempt_count(%rcx)
502 jnz retint_restore_args
503 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
504 jnc retint_restore_args
505 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
506 jnc retint_restore_args
507 call preempt_schedule_irq
515 .macro apicinterrupt num,func
522 ENTRY(thermal_interrupt)
523 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
526 ENTRY(reschedule_interrupt)
527 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
529 ENTRY(invalidate_interrupt)
530 apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
532 ENTRY(call_function_interrupt)
533 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
536 #ifdef CONFIG_X86_LOCAL_APIC
537 ENTRY(apic_timer_interrupt)
538 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
540 ENTRY(error_interrupt)
541 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
543 ENTRY(spurious_interrupt)
544 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
548 * Exception entry points.
551 pushq $0 /* push error code/oldrax */
552 pushq %rax /* push real oldrax to the rdi slot */
557 .macro errorentry sym
563 /* error code is on the stack already */
564 /* handle NMI like exceptions that can happen everywhere */
565 .macro paranoidentry sym
569 movl $MSR_GS_BASE,%ecx
576 movq ORIG_RAX(%rsp),%rsi
577 movq $-1,ORIG_RAX(%rsp)
583 * Exception entry point. This expects an error code/orig_rax on the stack
584 * and the exception handler in %rax.
588 CFI_DEF_CFA rsp,(SS-RDI)
589 CFI_REL_OFFSET rsp,(RSP-RDI)
590 CFI_REL_OFFSET rip,(RIP-RDI)
591 /* rdi slot contains rax, oldrax contains error code */
594 CFI_ADJUST_CFA_OFFSET (14*8)
596 CFI_REL_OFFSET rsi,RSI
597 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
599 CFI_REL_OFFSET rdx,RDX
601 CFI_REL_OFFSET rcx,RCX
602 movq %rsi,10*8(%rsp) /* store rax */
603 CFI_REL_OFFSET rax,RAX
609 CFI_REL_OFFSET r10,R10
611 CFI_REL_OFFSET r11,R11
613 CFI_REL_OFFSET rbx,RBX
615 CFI_REL_OFFSET rbp,RBP
617 CFI_REL_OFFSET r12,R12
619 CFI_REL_OFFSET r13,R13
621 CFI_REL_OFFSET r14,R14
623 CFI_REL_OFFSET r15,R15
632 movq ORIG_RAX(%rsp),%rsi /* get error code */
633 movq $-1,ORIG_RAX(%rsp)
635 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
640 GET_THREAD_INFO(%rcx)
643 movl threadinfo_flags(%rcx),%edx
644 movl $_TIF_WORK_MASK,%edi
654 /* There are two places in the kernel that can potentially fault with
655 usergs. Handle them here. The exception handlers after
656 iret run with kernel gs again, so don't set the user space flag.
657 B stepping K8s sometimes report an truncated RIP for IRET
658 exceptions returning to compat mode. Check for these here too. */
659 leaq iret_label(%rip),%rbp
662 movl %ebp,%ebp /* zero extend */
665 cmpq $gs_change,RIP(%rsp)
669 /* Reload gs selector with exception handling */
670 /* edi: new selector */
677 2: mfence /* workaround */
682 .section __ex_table,"a"
684 .quad gs_change,bad_gs
687 /* running with kernelgs */
689 swapgs /* switch back to user gs */
696 * Create a kernel thread.
698 * C extern interface:
699 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
701 * asm input arguments:
702 * rdi: fn, rsi: arg, rdx: flags
706 FAKE_STACK_FRAME $child_rip
709 # rdi: flags, rsi: usp, rdx: will be &pt_regs
711 orq kernel_thread_flags(%rip),%rdi
724 * It isn't worth to check for reschedule here,
725 * so internally to the x86_64 port you can rely on kernel_thread()
726 * not to reschedule the child before returning, this avoids the need
727 * of hacks for example to fork off the per-CPU idle tasks.
728 * [Hopefully no generic code relies on the reschedule -AK]
738 * Here we are in the child and the registers are set as they were
739 * at kernel_thread() invocation in the parent.
749 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
751 * C extern interface:
752 * extern long execve(char *name, char **argv, char **envp)
754 * asm input arguments:
755 * rdi: name, rsi: argv, rdx: envp
757 * We want to fallback into:
758 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
760 * do_sys_execve asm fallback arguments:
761 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
771 je int_ret_from_sys_call
778 errorentry do_page_fault
780 ENTRY(coprocessor_error)
781 zeroentry do_coprocessor_error
783 ENTRY(simd_coprocessor_error)
784 zeroentry do_simd_coprocessor_error
786 ENTRY(device_not_available)
787 zeroentry math_state_restore
789 /* runs on exception stack */
793 CFI_ADJUST_CFA_OFFSET 8
794 paranoidentry do_debug
798 /* runs on exception stack */
802 CFI_ADJUST_CFA_OFFSET 8
805 * "Paranoid" exit path from exception stack.
806 * Paranoid because this is used by NMIs and cannot take
807 * any kernel state for granted.
808 * We don't do kernel preemption checks here, because only
809 * NMI should be common and it does not enable IRQs and
810 * cannot get reschedule ticks.
812 /* ebx: no swapgs flag */
814 testl %ebx,%ebx /* swapgs needed? */
817 jnz paranoid_userspace
824 GET_THREAD_INFO(%rcx)
825 movl threadinfo_flags(%rcx),%ebx
826 andl $_TIF_WORK_MASK,%ebx
828 movq %rsp,%rdi /* &pt_regs */
830 movq %rax,%rsp /* switch stack for scheduling */
831 testl $_TIF_NEED_RESCHED,%ebx
832 jnz paranoid_schedule
833 movl %ebx,%edx /* arg3: thread flags */
835 xorl %esi,%esi /* arg2: oldset */
836 movq %rsp,%rdi /* arg1: &pt_regs */
837 call do_notify_resume
839 jmp paranoid_userspace
844 jmp paranoid_userspace
851 zeroentry do_overflow
857 zeroentry do_invalid_op
859 ENTRY(coprocessor_segment_overrun)
860 zeroentry do_coprocessor_segment_overrun
863 zeroentry do_reserved
865 /* runs on exception stack */
868 paranoidentry do_double_fault
873 errorentry do_invalid_TSS
875 ENTRY(segment_not_present)
876 errorentry do_segment_not_present
878 /* runs on exception stack */
881 paranoidentry do_stack_segment
885 ENTRY(general_protection)
886 errorentry do_general_protection
888 ENTRY(alignment_check)
889 errorentry do_alignment_check
892 zeroentry do_divide_error
894 ENTRY(spurious_interrupt_bug)
895 zeroentry do_spurious_interrupt_bug
897 #ifdef CONFIG_X86_MCE
898 /* runs on exception stack */
902 CFI_ADJUST_CFA_OFFSET 8
903 paranoidentry do_machine_check
909 zeroentry do_call_debug