2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
10 * entry.S contains the system-call and fault low-level handling routines.
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
52 #include <asm/irqflags.h>
56 #ifndef CONFIG_PREEMPT
57 #define retint_kernel retint_restore_args
61 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
62 #ifdef CONFIG_TRACE_IRQFLAGS
63 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
71 * C code is not supposed to know about undefined top of stack. Every time
72 * a C function with an pt_regs argument is called from the SYSCALL based
73 * fast path FIXUP_TOP_OF_STACK is needed.
74 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
78 /* %rsp:at FRAMEEND */
79 .macro FIXUP_TOP_OF_STACK tmp
80 movq %gs:pda_oldrsp,\tmp
82 movq $__USER_DS,SS(%rsp)
83 movq $__USER_CS,CS(%rsp)
85 movq R11(%rsp),\tmp /* get eflags */
86 movq \tmp,EFLAGS(%rsp)
89 .macro RESTORE_TOP_OF_STACK tmp,offset=0
90 movq RSP-\offset(%rsp),\tmp
91 movq \tmp,%gs:pda_oldrsp
92 movq EFLAGS-\offset(%rsp),\tmp
93 movq \tmp,R11-\offset(%rsp)
96 .macro FAKE_STACK_FRAME child_rip
97 /* push in order ss, rsp, eflags, cs, rip */
100 CFI_ADJUST_CFA_OFFSET 8
101 /*CFI_REL_OFFSET ss,0*/
103 CFI_ADJUST_CFA_OFFSET 8
105 pushq $(1<<9) /* eflags - interrupts on */
106 CFI_ADJUST_CFA_OFFSET 8
107 /*CFI_REL_OFFSET rflags,0*/
108 pushq $__KERNEL_CS /* cs */
109 CFI_ADJUST_CFA_OFFSET 8
110 /*CFI_REL_OFFSET cs,0*/
111 pushq \child_rip /* rip */
112 CFI_ADJUST_CFA_OFFSET 8
114 pushq %rax /* orig rax */
115 CFI_ADJUST_CFA_OFFSET 8
118 .macro UNFAKE_STACK_FRAME
120 CFI_ADJUST_CFA_OFFSET -(6*8)
123 .macro CFI_DEFAULT_STACK start=1
129 CFI_DEF_CFA_OFFSET SS+8
131 CFI_REL_OFFSET r15,R15
132 CFI_REL_OFFSET r14,R14
133 CFI_REL_OFFSET r13,R13
134 CFI_REL_OFFSET r12,R12
135 CFI_REL_OFFSET rbp,RBP
136 CFI_REL_OFFSET rbx,RBX
137 CFI_REL_OFFSET r11,R11
138 CFI_REL_OFFSET r10,R10
141 CFI_REL_OFFSET rax,RAX
142 CFI_REL_OFFSET rcx,RCX
143 CFI_REL_OFFSET rdx,RDX
144 CFI_REL_OFFSET rsi,RSI
145 CFI_REL_OFFSET rdi,RDI
146 CFI_REL_OFFSET rip,RIP
147 /*CFI_REL_OFFSET cs,CS*/
148 /*CFI_REL_OFFSET rflags,EFLAGS*/
149 CFI_REL_OFFSET rsp,RSP
150 /*CFI_REL_OFFSET ss,SS*/
153 * A newly forked process directly context switches into this.
158 push kernel_eflags(%rip)
159 CFI_ADJUST_CFA_OFFSET 4
160 popf # reset kernel eflags
161 CFI_ADJUST_CFA_OFFSET -4
163 GET_THREAD_INFO(%rcx)
164 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
168 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
169 je int_ret_from_sys_call
170 testl $_TIF_IA32,threadinfo_flags(%rcx)
171 jnz int_ret_from_sys_call
172 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
173 jmp ret_from_sys_call
176 call syscall_trace_leave
177 GET_THREAD_INFO(%rcx)
183 * System call entry. Upto 6 arguments in registers are supported.
185 * SYSCALL does not save anything on the stack and does not change the
191 * rax system call number
193 * rcx return address for syscall/sysret, C arg3
196 * r10 arg3 (--> moved to rcx for C)
199 * r11 eflags for syscall/sysret, temporary for C
200 * r12-r15,rbp,rbx saved by C code, not touched.
202 * Interrupts are off on entry.
203 * Only called from user space.
205 * XXX if we had a free scratch register we could save the RSP into the stack frame
206 * and report it properly in ps. Unfortunately we haven't.
208 * When user can change the frames always force IRET. That is because
209 * it deals with uncanonical addresses better. SYSRET has trouble
210 * with them due to bugs in both AMD and Intel CPUs.
216 CFI_DEF_CFA rsp,PDA_STACKOFFSET
218 /*CFI_REGISTER rflags,r11*/
220 movq %rsp,%gs:pda_oldrsp
221 movq %gs:pda_kernelstack,%rsp
223 * No need to follow this irqs off/on section - it's straight
228 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
229 movq %rcx,RIP-ARGOFFSET(%rsp)
230 CFI_REL_OFFSET rip,RIP-ARGOFFSET
231 GET_THREAD_INFO(%rcx)
232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
235 cmpq $__NR_syscall_max,%rax
238 call *sys_call_table(,%rax,8) # XXX: rip relative
239 movq %rax,RAX-ARGOFFSET(%rsp)
241 * Syscall return path ending with SYSRET (fast path)
242 * Has incomplete stack frame and undefined top of stack.
244 .globl ret_from_sys_call
246 movl $_TIF_ALLWORK_MASK,%edi
249 GET_THREAD_INFO(%rcx)
252 movl threadinfo_flags(%rcx),%edx
257 * sysretq will re-enable interrupts:
260 movq RIP-ARGOFFSET(%rsp),%rcx
262 RESTORE_ARGS 0,-ARG_SKIP,1
263 /*CFI_REGISTER rflags,r11*/
264 movq %gs:pda_oldrsp,%rsp
268 /* Handle reschedules */
269 /* edx: work, edi: workmask */
272 bt $TIF_NEED_RESCHED,%edx
277 CFI_ADJUST_CFA_OFFSET 8
280 CFI_ADJUST_CFA_OFFSET -8
283 /* Handle a signal */
287 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
290 /* Really a signal */
291 /* edx: work flags (arg3) */
292 leaq do_notify_resume(%rip),%rax
293 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
294 xorl %esi,%esi # oldset -> arg2
295 call ptregscall_common
296 1: movl $_TIF_NEED_RESCHED,%edi
297 /* Use IRET because user could have changed frame. This
298 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
304 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
305 jmp ret_from_sys_call
307 /* Do syscall tracing */
311 movq $-ENOSYS,RAX(%rsp)
312 FIXUP_TOP_OF_STACK %rdi
314 call syscall_trace_enter
315 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
317 cmpq $__NR_syscall_max,%rax
319 movq %r10,%rcx /* fixup for C */
320 call *sys_call_table(,%rax,8)
321 1: movq %rax,RAX-ARGOFFSET(%rsp)
322 /* Use IRET because user could have changed frame */
323 jmp int_ret_from_sys_call
328 * Syscall return path ending with IRET.
329 * Has correct top of stack, but partial stack frame.
331 ENTRY(int_ret_from_sys_call)
334 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
335 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
336 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
337 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
338 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
339 CFI_REL_OFFSET rip,RIP-ARGOFFSET
340 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
341 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
342 CFI_REL_OFFSET rax,RAX-ARGOFFSET
343 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
344 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
345 CFI_REL_OFFSET r8,R8-ARGOFFSET
346 CFI_REL_OFFSET r9,R9-ARGOFFSET
347 CFI_REL_OFFSET r10,R10-ARGOFFSET
348 CFI_REL_OFFSET r11,R11-ARGOFFSET
351 testl $3,CS-ARGOFFSET(%rsp)
352 je retint_restore_args
353 movl $_TIF_ALLWORK_MASK,%edi
354 /* edi: mask to check */
356 GET_THREAD_INFO(%rcx)
357 movl threadinfo_flags(%rcx),%edx
360 andl $~TS_COMPAT,threadinfo_status(%rcx)
363 /* Either reschedule or signal or syscall exit tracking needed. */
364 /* First do a reschedule test. */
365 /* edx: work, edi: workmask */
367 bt $TIF_NEED_RESCHED,%edx
372 CFI_ADJUST_CFA_OFFSET 8
375 CFI_ADJUST_CFA_OFFSET -8
380 /* handle signals and tracing -- both require a full stack frame */
385 /* Check for syscall exit trace */
386 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
389 CFI_ADJUST_CFA_OFFSET 8
390 leaq 8(%rsp),%rdi # &ptregs -> arg1
391 call syscall_trace_leave
393 CFI_ADJUST_CFA_OFFSET -8
394 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
400 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
402 movq %rsp,%rdi # &ptregs -> arg1
403 xorl %esi,%esi # oldset -> arg2
404 call do_notify_resume
405 1: movl $_TIF_NEED_RESCHED,%edi
412 END(int_ret_from_sys_call)
415 * Certain special system calls that need to save a complete full stack frame.
418 .macro PTREGSCALL label,func,arg
421 leaq \func(%rip),%rax
422 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
423 jmp ptregscall_common
429 PTREGSCALL stub_clone, sys_clone, %r8
430 PTREGSCALL stub_fork, sys_fork, %rdi
431 PTREGSCALL stub_vfork, sys_vfork, %rdi
432 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
433 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
434 PTREGSCALL stub_iopl, sys_iopl, %rsi
436 ENTRY(ptregscall_common)
438 CFI_ADJUST_CFA_OFFSET -8
439 CFI_REGISTER rip, r11
442 CFI_REGISTER rip, r15
443 FIXUP_TOP_OF_STACK %r11
445 RESTORE_TOP_OF_STACK %r11
447 CFI_REGISTER rip, r11
450 CFI_ADJUST_CFA_OFFSET 8
451 CFI_REL_OFFSET rip, 0
454 END(ptregscall_common)
459 CFI_ADJUST_CFA_OFFSET -8
460 CFI_REGISTER rip, r11
462 FIXUP_TOP_OF_STACK %r11
464 RESTORE_TOP_OF_STACK %r11
467 jmp int_ret_from_sys_call
472 * sigreturn is special because it needs to restore all registers on return.
473 * This cannot be done with SYSRET, so use the IRET return path instead.
475 ENTRY(stub_rt_sigreturn)
478 CFI_ADJUST_CFA_OFFSET -8
481 FIXUP_TOP_OF_STACK %r11
482 call sys_rt_sigreturn
483 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
485 jmp int_ret_from_sys_call
487 END(stub_rt_sigreturn)
490 * initial frame state for interrupts and exceptions
495 CFI_DEF_CFA rsp,SS+8-\ref
496 /*CFI_REL_OFFSET ss,SS-\ref*/
497 CFI_REL_OFFSET rsp,RSP-\ref
498 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
499 /*CFI_REL_OFFSET cs,CS-\ref*/
500 CFI_REL_OFFSET rip,RIP-\ref
503 /* initial frame state for interrupts (and exceptions without error code) */
504 #define INTR_FRAME _frame RIP
505 /* initial frame state for exceptions with error code (and interrupts with
506 vector already pushed) */
507 #define XCPT_FRAME _frame ORIG_RAX
510 * Interrupt entry/exit.
512 * Interrupt entry points save only callee clobbered registers in fast path.
514 * Entry runs with interrupts off.
517 /* 0(%rsp): interrupt number */
518 .macro interrupt func
521 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
523 CFI_ADJUST_CFA_OFFSET 8
524 CFI_REL_OFFSET rbp, 0
526 CFI_DEF_CFA_REGISTER rbp
530 /* irqcount is used to check if a CPU is already on an interrupt
531 stack or not. While this is essentially redundant with preempt_count
532 it is a little cheaper to use a separate counter in the PDA
533 (short of moving irq_enter into assembly, which would be too
535 1: incl %gs:pda_irqcount
536 cmoveq %gs:pda_irqstackptr,%rsp
537 push %rbp # backlink for old unwinder
539 * We entered an interrupt context - irqs are off:
545 ENTRY(common_interrupt)
548 /* 0(%rsp): oldrsp-ARGOFFSET */
552 decl %gs:pda_irqcount
554 CFI_DEF_CFA_REGISTER rsp
555 CFI_ADJUST_CFA_OFFSET -8
557 GET_THREAD_INFO(%rcx)
558 testl $3,CS-ARGOFFSET(%rsp)
561 /* Interrupt came from user space */
563 * Has a correct top of stack, but a partial stack frame
564 * %rcx: thread info. Interrupts off.
566 retint_with_reschedule:
567 movl $_TIF_WORK_MASK,%edi
569 movl threadinfo_flags(%rcx),%edx
575 * The iretq could re-enable interrupts:
585 * The iretq could re-enable interrupts:
593 .section __ex_table,"a"
594 .quad iret_label,bad_iret
597 /* force a signal here? this matches i386 behaviour */
598 /* running with kernel gs */
600 movq $11,%rdi /* SIGSEGV */
606 /* edi: workmask, edx: work */
609 bt $TIF_NEED_RESCHED,%edx
614 CFI_ADJUST_CFA_OFFSET 8
617 CFI_ADJUST_CFA_OFFSET -8
618 GET_THREAD_INFO(%rcx)
624 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
629 movq $-1,ORIG_RAX(%rsp)
630 xorl %esi,%esi # oldset
631 movq %rsp,%rdi # &pt_regs
632 call do_notify_resume
636 movl $_TIF_NEED_RESCHED,%edi
637 GET_THREAD_INFO(%rcx)
640 #ifdef CONFIG_PREEMPT
641 /* Returning to kernel space. Check if we need preemption */
642 /* rcx: threadinfo. interrupts off. */
644 cmpl $0,threadinfo_preempt_count(%rcx)
645 jnz retint_restore_args
646 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
647 jnc retint_restore_args
648 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
649 jnc retint_restore_args
650 call preempt_schedule_irq
655 END(common_interrupt)
660 .macro apicinterrupt num,func
663 CFI_ADJUST_CFA_OFFSET 8
669 ENTRY(thermal_interrupt)
670 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
671 END(thermal_interrupt)
673 ENTRY(threshold_interrupt)
674 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
675 END(threshold_interrupt)
678 ENTRY(reschedule_interrupt)
679 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
680 END(reschedule_interrupt)
682 .macro INVALIDATE_ENTRY num
683 ENTRY(invalidate_interrupt\num)
684 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
685 END(invalidate_interrupt\num)
697 ENTRY(call_function_interrupt)
698 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
699 END(call_function_interrupt)
702 ENTRY(apic_timer_interrupt)
703 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
704 END(apic_timer_interrupt)
706 ENTRY(error_interrupt)
707 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
710 ENTRY(spurious_interrupt)
711 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
712 END(spurious_interrupt)
715 * Exception entry points.
719 pushq $0 /* push error code/oldrax */
720 CFI_ADJUST_CFA_OFFSET 8
721 pushq %rax /* push real oldrax to the rdi slot */
722 CFI_ADJUST_CFA_OFFSET 8
728 .macro errorentry sym
731 CFI_ADJUST_CFA_OFFSET 8
737 /* error code is on the stack already */
738 /* handle NMI like exceptions that can happen everywhere */
739 .macro paranoidentry sym, ist=0, irqtrace=1
743 movl $MSR_GS_BASE,%ecx
751 movq %gs:pda_data_offset, %rbp
754 movq ORIG_RAX(%rsp),%rsi
755 movq $-1,ORIG_RAX(%rsp)
757 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
761 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
770 * "Paranoid" exit path from exception stack.
771 * Paranoid because this is used by NMIs and cannot take
772 * any kernel state for granted.
773 * We don't do kernel preemption checks here, because only
774 * NMI should be common and it does not enable IRQs and
775 * cannot get reschedule ticks.
777 * "trace" is 0 for the NMI handler only, because irq-tracing
778 * is fundamentally NMI-unsafe. (we cannot change the soft and
779 * hard flags at once, atomically)
781 .macro paranoidexit trace=1
782 /* ebx: no swapgs flag */
784 testl %ebx,%ebx /* swapgs needed? */
785 jnz paranoid_restore\trace
787 jnz paranoid_userspace\trace
788 paranoid_swapgs\trace:
793 paranoid_restore\trace:
796 paranoid_userspace\trace:
797 GET_THREAD_INFO(%rcx)
798 movl threadinfo_flags(%rcx),%ebx
799 andl $_TIF_WORK_MASK,%ebx
800 jz paranoid_swapgs\trace
801 movq %rsp,%rdi /* &pt_regs */
803 movq %rax,%rsp /* switch stack for scheduling */
804 testl $_TIF_NEED_RESCHED,%ebx
805 jnz paranoid_schedule\trace
806 movl %ebx,%edx /* arg3: thread flags */
811 xorl %esi,%esi /* arg2: oldset */
812 movq %rsp,%rdi /* arg1: &pt_regs */
813 call do_notify_resume
818 jmp paranoid_userspace\trace
819 paranoid_schedule\trace:
829 jmp paranoid_userspace\trace
834 * Exception entry point. This expects an error code/orig_rax on the stack
835 * and the exception handler in %rax.
837 KPROBE_ENTRY(error_entry)
839 /* rdi slot contains rax, oldrax contains error code */
842 CFI_ADJUST_CFA_OFFSET (14*8)
844 CFI_REL_OFFSET rsi,RSI
845 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
847 CFI_REL_OFFSET rdx,RDX
849 CFI_REL_OFFSET rcx,RCX
850 movq %rsi,10*8(%rsp) /* store rax */
851 CFI_REL_OFFSET rax,RAX
857 CFI_REL_OFFSET r10,R10
859 CFI_REL_OFFSET r11,R11
861 CFI_REL_OFFSET rbx,RBX
863 CFI_REL_OFFSET rbp,RBP
865 CFI_REL_OFFSET r12,R12
867 CFI_REL_OFFSET r13,R13
869 CFI_REL_OFFSET r14,R14
871 CFI_REL_OFFSET r15,R15
880 movq ORIG_RAX(%rsp),%rsi /* get error code */
881 movq $-1,ORIG_RAX(%rsp)
883 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
889 GET_THREAD_INFO(%rcx)
892 movl threadinfo_flags(%rcx),%edx
893 movl $_TIF_WORK_MASK,%edi
897 * The iret might restore flags:
907 /* There are two places in the kernel that can potentially fault with
908 usergs. Handle them here. The exception handlers after
909 iret run with kernel gs again, so don't set the user space flag.
910 B stepping K8s sometimes report an truncated RIP for IRET
911 exceptions returning to compat mode. Check for these here too. */
912 leaq iret_label(%rip),%rbp
915 movl %ebp,%ebp /* zero extend */
918 cmpq $gs_change,RIP(%rsp)
921 KPROBE_END(error_entry)
923 /* Reload gs selector with exception handling */
924 /* edi: new selector */
928 CFI_ADJUST_CFA_OFFSET 8
933 2: mfence /* workaround */
936 CFI_ADJUST_CFA_OFFSET -8
939 ENDPROC(load_gs_index)
941 .section __ex_table,"a"
943 .quad gs_change,bad_gs
946 /* running with kernelgs */
948 swapgs /* switch back to user gs */
955 * Create a kernel thread.
957 * C extern interface:
958 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
960 * asm input arguments:
961 * rdi: fn, rsi: arg, rdx: flags
965 FAKE_STACK_FRAME $child_rip
968 # rdi: flags, rsi: usp, rdx: will be &pt_regs
970 orq kernel_thread_flags(%rip),%rdi
983 * It isn't worth to check for reschedule here,
984 * so internally to the x86_64 port you can rely on kernel_thread()
985 * not to reschedule the child before returning, this avoids the need
986 * of hacks for example to fork off the per-CPU idle tasks.
987 * [Hopefully no generic code relies on the reschedule -AK]
993 ENDPROC(kernel_thread)
996 pushq $0 # fake return address
999 * Here we are in the child and the registers are set as they were
1000 * at kernel_thread() invocation in the parent.
1012 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1014 * C extern interface:
1015 * extern long execve(char *name, char **argv, char **envp)
1017 * asm input arguments:
1018 * rdi: name, rsi: argv, rdx: envp
1020 * We want to fallback into:
1021 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1023 * do_sys_execve asm fallback arguments:
1024 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1026 ENTRY(kernel_execve)
1031 movq %rax, RAX(%rsp)
1034 je int_ret_from_sys_call
1039 ENDPROC(kernel_execve)
1041 KPROBE_ENTRY(page_fault)
1042 errorentry do_page_fault
1043 KPROBE_END(page_fault)
1045 ENTRY(coprocessor_error)
1046 zeroentry do_coprocessor_error
1047 END(coprocessor_error)
1049 ENTRY(simd_coprocessor_error)
1050 zeroentry do_simd_coprocessor_error
1051 END(simd_coprocessor_error)
1053 ENTRY(device_not_available)
1054 zeroentry math_state_restore
1055 END(device_not_available)
1057 /* runs on exception stack */
1061 CFI_ADJUST_CFA_OFFSET 8
1062 paranoidentry do_debug, DEBUG_STACK
1066 /* runs on exception stack */
1070 CFI_ADJUST_CFA_OFFSET 8
1071 paranoidentry do_nmi, 0, 0
1072 #ifdef CONFIG_TRACE_IRQFLAGS
1083 CFI_ADJUST_CFA_OFFSET 8
1084 paranoidentry do_int3, DEBUG_STACK
1090 zeroentry do_overflow
1098 zeroentry do_invalid_op
1101 ENTRY(coprocessor_segment_overrun)
1102 zeroentry do_coprocessor_segment_overrun
1103 END(coprocessor_segment_overrun)
1106 zeroentry do_reserved
1109 /* runs on exception stack */
1112 paranoidentry do_double_fault
1118 errorentry do_invalid_TSS
1121 ENTRY(segment_not_present)
1122 errorentry do_segment_not_present
1123 END(segment_not_present)
1125 /* runs on exception stack */
1126 ENTRY(stack_segment)
1128 paranoidentry do_stack_segment
1133 KPROBE_ENTRY(general_protection)
1134 errorentry do_general_protection
1135 KPROBE_END(general_protection)
1137 ENTRY(alignment_check)
1138 errorentry do_alignment_check
1139 END(alignment_check)
1142 zeroentry do_divide_error
1145 ENTRY(spurious_interrupt_bug)
1146 zeroentry do_spurious_interrupt_bug
1147 END(spurious_interrupt_bug)
1149 #ifdef CONFIG_X86_MCE
1150 /* runs on exception stack */
1151 ENTRY(machine_check)
1154 CFI_ADJUST_CFA_OFFSET 8
1155 paranoidentry do_machine_check
1161 /* Call softirq on interrupt stack. Interrupts are off. */
1165 CFI_ADJUST_CFA_OFFSET 8
1166 CFI_REL_OFFSET rbp,0
1168 CFI_DEF_CFA_REGISTER rbp
1169 incl %gs:pda_irqcount
1170 cmove %gs:pda_irqstackptr,%rsp
1171 push %rbp # backlink for old unwinder
1174 CFI_DEF_CFA_REGISTER rsp
1175 CFI_ADJUST_CFA_OFFSET -8
1176 decl %gs:pda_irqcount
1179 ENDPROC(call_softirq)
1181 #ifdef CONFIG_STACK_UNWIND
1182 ENTRY(arch_unwind_init_running)
1184 movq %r15, R15(%rdi)
1185 movq %r14, R14(%rdi)
1187 movq %r13, R13(%rdi)
1188 movq %r12, R12(%rdi)
1190 movq %rbp, RBP(%rdi)
1191 movq %rbx, RBX(%rdi)
1193 movq %rax, R11(%rdi)
1194 movq %rax, R10(%rdi)
1197 movq %rax, RAX(%rdi)
1198 movq %rax, RCX(%rdi)
1199 movq %rax, RDX(%rdi)
1200 movq %rax, RSI(%rdi)
1201 movq %rax, RDI(%rdi)
1202 movq %rax, ORIG_RAX(%rdi)
1203 movq %rcx, RIP(%rdi)
1205 movq $__KERNEL_CS, CS(%rdi)
1206 movq %rax, EFLAGS(%rdi)
1207 movq %rcx, RSP(%rdi)
1208 movq $__KERNEL_DS, SS(%rdi)
1211 ENDPROC(arch_unwind_init_running)