2 * Copyright (C) 1995 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
10 * CPU hotplug support - ashok.raj@intel.com
14 * This file handles the architecture-dependent parts of process handling..
17 #include <linux/stackprotector.h>
18 #include <linux/cpu.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
22 #include <linux/kernel.h>
24 #include <linux/elfcore.h>
25 #include <linux/smp.h>
26 #include <linux/slab.h>
27 #include <linux/user.h>
28 #include <linux/interrupt.h>
29 #include <linux/utsname.h>
30 #include <linux/delay.h>
31 #include <linux/module.h>
32 #include <linux/ptrace.h>
33 #include <linux/notifier.h>
34 #include <linux/kprobes.h>
35 #include <linux/kdebug.h>
36 #include <linux/tick.h>
37 #include <linux/prctl.h>
38 #include <linux/uaccess.h>
40 #include <linux/ftrace.h>
41 #include <linux/dmi.h>
43 #include <asm/pgtable.h>
44 #include <asm/system.h>
45 #include <asm/processor.h>
47 #include <asm/mmu_context.h>
48 #include <asm/prctl.h>
50 #include <asm/proto.h>
53 #include <asm/syscalls.h>
56 asmlinkage extern void ret_from_fork(void);
58 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
59 EXPORT_PER_CPU_SYMBOL(current_task);
61 DEFINE_PER_CPU(unsigned long, old_rsp);
62 static DEFINE_PER_CPU(unsigned char, is_idle);
64 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
66 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
68 void idle_notifier_register(struct notifier_block *n)
70 atomic_notifier_chain_register(&idle_notifier, n);
72 EXPORT_SYMBOL_GPL(idle_notifier_register);
74 void idle_notifier_unregister(struct notifier_block *n)
76 atomic_notifier_chain_unregister(&idle_notifier, n);
78 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
82 percpu_write(is_idle, 1);
83 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
86 static void __exit_idle(void)
88 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
90 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
93 /* Called from interrupts to signify idle end */
96 /* idle loop has pid 0 */
103 static inline void play_dead(void)
110 * The idle thread. There's no useful work to be
111 * done, so just try to conserve power and have a
112 * low exit latency (ie sit in a loop waiting for
113 * somebody to say that they'd like to reschedule)
117 current_thread_info()->status |= TS_POLLING;
120 * If we're the non-boot CPU, nothing set the stack canary up
121 * for us. CPU0 already has it initialized but no harm in
122 * doing it again. This is a good place for updating it, as
123 * we wont ever return from this function (so the invalid
124 * canaries already on the stack wont ever trigger).
126 boot_init_stack_canary();
128 /* endless idle loop with no priority at all */
130 tick_nohz_stop_sched_tick(1);
131 while (!need_resched()) {
135 if (cpu_is_offline(smp_processor_id()))
138 * Idle routines should keep interrupts disabled
139 * from here on, until they go to idle.
140 * Otherwise, idle callbacks can misfire.
144 /* Don't trace irqs off for idle */
145 stop_critical_timings();
147 start_critical_timings();
148 /* In many cases the interrupt that ended idle
149 has already called exit_idle. But some idle
150 loops can be woken up without interrupt. */
154 tick_nohz_restart_sched_tick();
155 preempt_enable_no_resched();
161 /* Prints also some state that isn't saved in the pt_regs */
162 void __show_regs(struct pt_regs *regs, int all)
164 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
165 unsigned long d0, d1, d2, d3, d6, d7;
166 unsigned int fsindex, gsindex;
167 unsigned int ds, cs, es;
172 board = dmi_get_system_info(DMI_PRODUCT_NAME);
175 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
176 current->pid, current->comm, print_tainted(),
177 init_utsname()->release,
178 (int)strcspn(init_utsname()->version, " "),
179 init_utsname()->version, board);
180 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
181 printk_address(regs->ip, 1);
182 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
183 regs->sp, regs->flags);
184 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
185 regs->ax, regs->bx, regs->cx);
186 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
187 regs->dx, regs->si, regs->di);
188 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
189 regs->bp, regs->r8, regs->r9);
190 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
191 regs->r10, regs->r11, regs->r12);
192 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
193 regs->r13, regs->r14, regs->r15);
195 asm("movl %%ds,%0" : "=r" (ds));
196 asm("movl %%cs,%0" : "=r" (cs));
197 asm("movl %%es,%0" : "=r" (es));
198 asm("movl %%fs,%0" : "=r" (fsindex));
199 asm("movl %%gs,%0" : "=r" (gsindex));
201 rdmsrl(MSR_FS_BASE, fs);
202 rdmsrl(MSR_GS_BASE, gs);
203 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
213 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
214 fs, fsindex, gs, gsindex, shadowgs);
215 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
217 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
223 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
227 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
230 void show_regs(struct pt_regs *regs)
232 printk(KERN_INFO "CPU %d:", smp_processor_id());
233 __show_regs(regs, 1);
234 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
237 void release_thread(struct task_struct *dead_task)
240 if (dead_task->mm->context.size) {
241 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
243 dead_task->mm->context.ldt,
244 dead_task->mm->context.size);
250 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
252 struct user_desc ud = {
259 struct desc_struct *desc = t->thread.tls_array;
264 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
266 return get_desc_base(&t->thread.tls_array[tls]);
270 * This gets called before we allocate a new thread and copy
271 * the current task into it.
273 void prepare_to_copy(struct task_struct *tsk)
278 int copy_thread(unsigned long clone_flags, unsigned long sp,
279 unsigned long unused,
280 struct task_struct *p, struct pt_regs *regs)
283 struct pt_regs *childregs;
284 struct task_struct *me = current;
286 childregs = ((struct pt_regs *)
287 (THREAD_SIZE + task_stack_page(p))) - 1;
293 childregs->sp = (unsigned long)childregs;
295 p->thread.sp = (unsigned long) childregs;
296 p->thread.sp0 = (unsigned long) (childregs+1);
297 p->thread.usersp = me->thread.usersp;
299 set_tsk_thread_flag(p, TIF_FORK);
301 p->thread.fs = me->thread.fs;
302 p->thread.gs = me->thread.gs;
304 savesegment(gs, p->thread.gsindex);
305 savesegment(fs, p->thread.fsindex);
306 savesegment(es, p->thread.es);
307 savesegment(ds, p->thread.ds);
309 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
310 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
311 if (!p->thread.io_bitmap_ptr) {
312 p->thread.io_bitmap_max = 0;
315 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
317 set_tsk_thread_flag(p, TIF_IO_BITMAP);
321 * Set a new TLS for the child thread?
323 if (clone_flags & CLONE_SETTLS) {
324 #ifdef CONFIG_IA32_EMULATION
325 if (test_thread_flag(TIF_IA32))
326 err = do_set_thread_area(p, -1,
327 (struct user_desc __user *)childregs->si, 0);
330 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
335 ds_copy_thread(p, me);
337 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
338 p->thread.debugctlmsr = 0;
342 if (err && p->thread.io_bitmap_ptr) {
343 kfree(p->thread.io_bitmap_ptr);
344 p->thread.io_bitmap_max = 0;
350 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
358 percpu_write(old_rsp, new_sp);
359 regs->cs = __USER_CS;
360 regs->ss = __USER_DS;
364 * Free the old FP and other extended state
366 free_thread_xstate(current);
368 EXPORT_SYMBOL_GPL(start_thread);
371 * switch_to(x,y) should switch tasks from x to y.
373 * This could still be optimized:
374 * - fold all the options into a flag word and test it with a single test.
375 * - could test fs/gs bitsliced
377 * Kprobes not supported here. Set the probe on schedule instead.
378 * Function graph tracer not supported too.
380 __notrace_funcgraph struct task_struct *
381 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
383 struct thread_struct *prev = &prev_p->thread;
384 struct thread_struct *next = &next_p->thread;
385 int cpu = smp_processor_id();
386 struct tss_struct *tss = &per_cpu(init_tss, cpu);
387 unsigned fsindex, gsindex;
389 /* we're going to use this soon, after a few expensive things */
390 if (next_p->fpu_counter > 5)
391 prefetch(next->xstate);
394 * Reload esp0, LDT and the page table pointer:
400 * This won't pick up thread selector changes, but I guess that is ok.
402 savesegment(es, prev->es);
403 if (unlikely(next->es | prev->es))
404 loadsegment(es, next->es);
406 savesegment(ds, prev->ds);
407 if (unlikely(next->ds | prev->ds))
408 loadsegment(ds, next->ds);
411 /* We must save %fs and %gs before load_TLS() because
412 * %fs and %gs may be cleared by load_TLS().
414 * (e.g. xen_load_tls())
416 savesegment(fs, fsindex);
417 savesegment(gs, gsindex);
422 * Leave lazy mode, flushing any hypercalls made here.
423 * This must be done before restoring TLS segments so
424 * the GDT and LDT are properly updated, and must be
425 * done before math_state_restore, so the TS bit is up
428 arch_end_context_switch(next_p);
433 * Segment register != 0 always requires a reload. Also
434 * reload when it has changed. When prev process used 64bit
435 * base always reload to avoid an information leak.
437 if (unlikely(fsindex | next->fsindex | prev->fs)) {
438 loadsegment(fs, next->fsindex);
440 * Check if the user used a selector != 0; if yes
441 * clear 64bit base, since overloaded base is always
442 * mapped to the Null selector
447 /* when next process has a 64bit base use it */
449 wrmsrl(MSR_FS_BASE, next->fs);
450 prev->fsindex = fsindex;
452 if (unlikely(gsindex | next->gsindex | prev->gs)) {
453 load_gs_index(next->gsindex);
458 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
459 prev->gsindex = gsindex;
461 /* Must be after DS reload */
465 * Switch the PDA and FPU contexts.
467 prev->usersp = percpu_read(old_rsp);
468 percpu_write(old_rsp, next->usersp);
469 percpu_write(current_task, next_p);
471 percpu_write(kernel_stack,
472 (unsigned long)task_stack_page(next_p) +
473 THREAD_SIZE - KERNEL_STACK_OFFSET);
476 * Now maybe reload the debug registers and handle I/O bitmaps
478 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
479 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
480 __switch_to_xtra(prev_p, next_p, tss);
482 /* If the task has used fpu the last 5 timeslices, just do a full
483 * restore of the math state immediately to avoid the trap; the
484 * chances of needing FPU soon are obviously high now
486 * tsk_used_math() checks prevent calling math_state_restore(),
487 * which can sleep in the case of !tsk_used_math()
489 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
490 math_state_restore();
495 * sys_execve() executes a new program.
498 long sys_execve(char __user *name, char __user * __user *argv,
499 char __user * __user *envp, struct pt_regs *regs)
504 filename = getname(name);
505 error = PTR_ERR(filename);
506 if (IS_ERR(filename))
508 error = do_execve(filename, argv, envp, regs);
513 void set_personality_64bit(void)
515 /* inherit personality from parent */
517 /* Make sure to be in 64bit mode */
518 clear_thread_flag(TIF_IA32);
520 /* TBD: overwrites user setup. Should have two bits.
521 But 64bit processes have always behaved this way,
522 so it's not too bad. The main problem is just that
523 32bit childs are affected again. */
524 current->personality &= ~READ_IMPLIES_EXEC;
528 sys_clone(unsigned long clone_flags, unsigned long newsp,
529 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
533 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
536 unsigned long get_wchan(struct task_struct *p)
542 if (!p || p == current || p->state == TASK_RUNNING)
544 stack = (unsigned long)task_stack_page(p);
545 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
547 fp = *(u64 *)(p->thread.sp);
549 if (fp < (unsigned long)stack ||
550 fp >= (unsigned long)stack+THREAD_SIZE)
553 if (!in_sched_functions(ip))
556 } while (count++ < 16);
560 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
563 int doit = task == current;
568 if (addr >= TASK_SIZE_OF(task))
571 /* handle small bases via the GDT because that's faster to
573 if (addr <= 0xffffffff) {
574 set_32bit_tls(task, GS_TLS, addr);
576 load_TLS(&task->thread, cpu);
577 load_gs_index(GS_TLS_SEL);
579 task->thread.gsindex = GS_TLS_SEL;
582 task->thread.gsindex = 0;
583 task->thread.gs = addr;
586 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
592 /* Not strictly needed for fs, but do it for symmetry
594 if (addr >= TASK_SIZE_OF(task))
597 /* handle small bases via the GDT because that's faster to
599 if (addr <= 0xffffffff) {
600 set_32bit_tls(task, FS_TLS, addr);
602 load_TLS(&task->thread, cpu);
603 loadsegment(fs, FS_TLS_SEL);
605 task->thread.fsindex = FS_TLS_SEL;
608 task->thread.fsindex = 0;
609 task->thread.fs = addr;
611 /* set the selector to 0 to not confuse
614 ret = checking_wrmsrl(MSR_FS_BASE, addr);
621 if (task->thread.fsindex == FS_TLS_SEL)
622 base = read_32bit_tls(task, FS_TLS);
624 rdmsrl(MSR_FS_BASE, base);
626 base = task->thread.fs;
627 ret = put_user(base, (unsigned long __user *)addr);
633 if (task->thread.gsindex == GS_TLS_SEL)
634 base = read_32bit_tls(task, GS_TLS);
636 savesegment(gs, gsindex);
638 rdmsrl(MSR_KERNEL_GS_BASE, base);
640 base = task->thread.gs;
642 base = task->thread.gs;
643 ret = put_user(base, (unsigned long __user *)addr);
655 long sys_arch_prctl(int code, unsigned long addr)
657 return do_arch_prctl(current, code, addr);