[PATCH] Factor out common io apic routing entry access
[linux-2.6] / arch / x86_64 / kernel / process.c
1 /*
2  *  linux/arch/x86-64/kernel/process.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Pentium III FXSR, SSE support
7  *      Gareth Hughes <gareth@valinux.com>, May 2000
8  * 
9  *  X86-64 port
10  *      Andi Kleen.
11  *
12  *      CPU hotplug support - ashok.raj@intel.com
13  */
14
15 /*
16  * This file handles the architecture-dependent parts of process handling..
17  */
18
19 #include <stdarg.h>
20
21 #include <linux/cpu.h>
22 #include <linux/errno.h>
23 #include <linux/sched.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/elfcore.h>
27 #include <linux/smp.h>
28 #include <linux/slab.h>
29 #include <linux/user.h>
30 #include <linux/module.h>
31 #include <linux/a.out.h>
32 #include <linux/interrupt.h>
33 #include <linux/delay.h>
34 #include <linux/ptrace.h>
35 #include <linux/utsname.h>
36 #include <linux/random.h>
37 #include <linux/notifier.h>
38 #include <linux/kprobes.h>
39
40 #include <asm/uaccess.h>
41 #include <asm/pgtable.h>
42 #include <asm/system.h>
43 #include <asm/io.h>
44 #include <asm/processor.h>
45 #include <asm/i387.h>
46 #include <asm/mmu_context.h>
47 #include <asm/pda.h>
48 #include <asm/prctl.h>
49 #include <asm/kdebug.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54
55 asmlinkage extern void ret_from_fork(void);
56
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61
62 /*
63  * Powermanagement idle function, if any..
64  */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
70
71 void idle_notifier_register(struct notifier_block *n)
72 {
73         atomic_notifier_chain_register(&idle_notifier, n);
74 }
75 EXPORT_SYMBOL_GPL(idle_notifier_register);
76
77 void idle_notifier_unregister(struct notifier_block *n)
78 {
79         atomic_notifier_chain_unregister(&idle_notifier, n);
80 }
81 EXPORT_SYMBOL(idle_notifier_unregister);
82
83 enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
84 static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
85
86 void enter_idle(void)
87 {
88         __get_cpu_var(idle_state) = CPU_IDLE;
89         atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
90 }
91
92 static void __exit_idle(void)
93 {
94         __get_cpu_var(idle_state) = CPU_NOT_IDLE;
95         atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
96 }
97
98 /* Called from interrupts to signify idle end */
99 void exit_idle(void)
100 {
101         if (current->pid | read_pda(irqcount))
102                 return;
103         __exit_idle();
104 }
105
106 /*
107  * We use this if we don't have any better
108  * idle routine..
109  */
110 static void default_idle(void)
111 {
112         local_irq_enable();
113
114         current_thread_info()->status &= ~TS_POLLING;
115         smp_mb__after_clear_bit();
116         while (!need_resched()) {
117                 local_irq_disable();
118                 if (!need_resched())
119                         safe_halt();
120                 else
121                         local_irq_enable();
122         }
123         current_thread_info()->status |= TS_POLLING;
124 }
125
126 /*
127  * On SMP it's slightly faster (but much more power-consuming!)
128  * to poll the ->need_resched flag instead of waiting for the
129  * cross-CPU IPI to arrive. Use this option with caution.
130  */
131 static void poll_idle (void)
132 {
133         local_irq_enable();
134
135         asm volatile(
136                 "2:"
137                 "testl %0,%1;"
138                 "rep; nop;"
139                 "je 2b;"
140                 : :
141                 "i" (_TIF_NEED_RESCHED),
142                 "m" (current_thread_info()->flags));
143 }
144
145 void cpu_idle_wait(void)
146 {
147         unsigned int cpu, this_cpu = get_cpu();
148         cpumask_t map;
149
150         set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
151         put_cpu();
152
153         cpus_clear(map);
154         for_each_online_cpu(cpu) {
155                 per_cpu(cpu_idle_state, cpu) = 1;
156                 cpu_set(cpu, map);
157         }
158
159         __get_cpu_var(cpu_idle_state) = 0;
160
161         wmb();
162         do {
163                 ssleep(1);
164                 for_each_online_cpu(cpu) {
165                         if (cpu_isset(cpu, map) &&
166                                         !per_cpu(cpu_idle_state, cpu))
167                                 cpu_clear(cpu, map);
168                 }
169                 cpus_and(map, map, cpu_online_map);
170         } while (!cpus_empty(map));
171 }
172 EXPORT_SYMBOL_GPL(cpu_idle_wait);
173
174 #ifdef CONFIG_HOTPLUG_CPU
175 DECLARE_PER_CPU(int, cpu_state);
176
177 #include <asm/nmi.h>
178 /* We halt the CPU with physical CPU hotplug */
179 static inline void play_dead(void)
180 {
181         idle_task_exit();
182         wbinvd();
183         mb();
184         /* Ack it */
185         __get_cpu_var(cpu_state) = CPU_DEAD;
186
187         local_irq_disable();
188         while (1)
189                 halt();
190 }
191 #else
192 static inline void play_dead(void)
193 {
194         BUG();
195 }
196 #endif /* CONFIG_HOTPLUG_CPU */
197
198 /*
199  * The idle thread. There's no useful work to be
200  * done, so just try to conserve power and have a
201  * low exit latency (ie sit in a loop waiting for
202  * somebody to say that they'd like to reschedule)
203  */
204 void cpu_idle (void)
205 {
206         current_thread_info()->status |= TS_POLLING;
207         /* endless idle loop with no priority at all */
208         while (1) {
209                 while (!need_resched()) {
210                         void (*idle)(void);
211
212                         if (__get_cpu_var(cpu_idle_state))
213                                 __get_cpu_var(cpu_idle_state) = 0;
214
215                         rmb();
216                         idle = pm_idle;
217                         if (!idle)
218                                 idle = default_idle;
219                         if (cpu_is_offline(smp_processor_id()))
220                                 play_dead();
221                         enter_idle();
222                         idle();
223                         __exit_idle();
224                 }
225
226                 preempt_enable_no_resched();
227                 schedule();
228                 preempt_disable();
229         }
230 }
231
232 /*
233  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
234  * which can obviate IPI to trigger checking of need_resched.
235  * We execute MONITOR against need_resched and enter optimized wait state
236  * through MWAIT. Whenever someone changes need_resched, we would be woken
237  * up from MWAIT (without an IPI).
238  */
239 static void mwait_idle(void)
240 {
241         local_irq_enable();
242
243         while (!need_resched()) {
244                 __monitor((void *)&current_thread_info()->flags, 0, 0);
245                 smp_mb();
246                 if (need_resched())
247                         break;
248                 __mwait(0, 0);
249         }
250 }
251
252 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
253 {
254         static int printed;
255         if (cpu_has(c, X86_FEATURE_MWAIT)) {
256                 /*
257                  * Skip, if setup has overridden idle.
258                  * One CPU supports mwait => All CPUs supports mwait
259                  */
260                 if (!pm_idle) {
261                         if (!printed) {
262                                 printk("using mwait in idle threads.\n");
263                                 printed = 1;
264                         }
265                         pm_idle = mwait_idle;
266                 }
267         }
268 }
269
270 static int __init idle_setup (char *str)
271 {
272         if (!strncmp(str, "poll", 4)) {
273                 printk("using polling idle threads.\n");
274                 pm_idle = poll_idle;
275         }
276
277         boot_option_idle_override = 1;
278         return 1;
279 }
280
281 __setup("idle=", idle_setup);
282
283 /* Prints also some state that isn't saved in the pt_regs */ 
284 void __show_regs(struct pt_regs * regs)
285 {
286         unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
287         unsigned int fsindex,gsindex;
288         unsigned int ds,cs,es; 
289
290         printk("\n");
291         print_modules();
292         printk("Pid: %d, comm: %.20s %s %s %.*s\n",
293                 current->pid, current->comm, print_tainted(),
294                 system_utsname.release,
295                 (int)strcspn(system_utsname.version, " "),
296                 system_utsname.version);
297         printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
298         printk_address(regs->rip); 
299         printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
300                 regs->eflags);
301         printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
302                regs->rax, regs->rbx, regs->rcx);
303         printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
304                regs->rdx, regs->rsi, regs->rdi); 
305         printk("RBP: %016lx R08: %016lx R09: %016lx\n",
306                regs->rbp, regs->r8, regs->r9); 
307         printk("R10: %016lx R11: %016lx R12: %016lx\n",
308                regs->r10, regs->r11, regs->r12); 
309         printk("R13: %016lx R14: %016lx R15: %016lx\n",
310                regs->r13, regs->r14, regs->r15); 
311
312         asm("movl %%ds,%0" : "=r" (ds)); 
313         asm("movl %%cs,%0" : "=r" (cs)); 
314         asm("movl %%es,%0" : "=r" (es)); 
315         asm("movl %%fs,%0" : "=r" (fsindex));
316         asm("movl %%gs,%0" : "=r" (gsindex));
317
318         rdmsrl(MSR_FS_BASE, fs);
319         rdmsrl(MSR_GS_BASE, gs); 
320         rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
321
322         asm("movq %%cr0, %0": "=r" (cr0));
323         asm("movq %%cr2, %0": "=r" (cr2));
324         asm("movq %%cr3, %0": "=r" (cr3));
325         asm("movq %%cr4, %0": "=r" (cr4));
326
327         printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
328                fs,fsindex,gs,gsindex,shadowgs); 
329         printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
330         printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
331 }
332
333 void show_regs(struct pt_regs *regs)
334 {
335         printk("CPU %d:", smp_processor_id());
336         __show_regs(regs);
337         show_trace(NULL, regs, (void *)(regs + 1));
338 }
339
340 /*
341  * Free current thread data structures etc..
342  */
343 void exit_thread(void)
344 {
345         struct task_struct *me = current;
346         struct thread_struct *t = &me->thread;
347
348         if (me->thread.io_bitmap_ptr) { 
349                 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
350
351                 kfree(t->io_bitmap_ptr);
352                 t->io_bitmap_ptr = NULL;
353                 clear_thread_flag(TIF_IO_BITMAP);
354                 /*
355                  * Careful, clear this in the TSS too:
356                  */
357                 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
358                 t->io_bitmap_max = 0;
359                 put_cpu();
360         }
361 }
362
363 void flush_thread(void)
364 {
365         struct task_struct *tsk = current;
366         struct thread_info *t = current_thread_info();
367
368         if (t->flags & _TIF_ABI_PENDING) {
369                 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
370                 if (t->flags & _TIF_IA32)
371                         current_thread_info()->status |= TS_COMPAT;
372         }
373         t->flags &= ~_TIF_DEBUG;
374
375         tsk->thread.debugreg0 = 0;
376         tsk->thread.debugreg1 = 0;
377         tsk->thread.debugreg2 = 0;
378         tsk->thread.debugreg3 = 0;
379         tsk->thread.debugreg6 = 0;
380         tsk->thread.debugreg7 = 0;
381         memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));        
382         /*
383          * Forget coprocessor state..
384          */
385         clear_fpu(tsk);
386         clear_used_math();
387 }
388
389 void release_thread(struct task_struct *dead_task)
390 {
391         if (dead_task->mm) {
392                 if (dead_task->mm->context.size) {
393                         printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
394                                         dead_task->comm,
395                                         dead_task->mm->context.ldt,
396                                         dead_task->mm->context.size);
397                         BUG();
398                 }
399         }
400 }
401
402 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
403 {
404         struct user_desc ud = { 
405                 .base_addr = addr,
406                 .limit = 0xfffff,
407                 .seg_32bit = 1,
408                 .limit_in_pages = 1,
409                 .useable = 1,
410         };
411         struct n_desc_struct *desc = (void *)t->thread.tls_array;
412         desc += tls;
413         desc->a = LDT_entry_a(&ud); 
414         desc->b = LDT_entry_b(&ud); 
415 }
416
417 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
418 {
419         struct desc_struct *desc = (void *)t->thread.tls_array;
420         desc += tls;
421         return desc->base0 | 
422                 (((u32)desc->base1) << 16) | 
423                 (((u32)desc->base2) << 24);
424 }
425
426 /*
427  * This gets called before we allocate a new thread and copy
428  * the current task into it.
429  */
430 void prepare_to_copy(struct task_struct *tsk)
431 {
432         unlazy_fpu(tsk);
433 }
434
435 int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 
436                 unsigned long unused,
437         struct task_struct * p, struct pt_regs * regs)
438 {
439         int err;
440         struct pt_regs * childregs;
441         struct task_struct *me = current;
442
443         childregs = ((struct pt_regs *)
444                         (THREAD_SIZE + task_stack_page(p))) - 1;
445         *childregs = *regs;
446
447         childregs->rax = 0;
448         childregs->rsp = rsp;
449         if (rsp == ~0UL)
450                 childregs->rsp = (unsigned long)childregs;
451
452         p->thread.rsp = (unsigned long) childregs;
453         p->thread.rsp0 = (unsigned long) (childregs+1);
454         p->thread.userrsp = me->thread.userrsp; 
455
456         set_tsk_thread_flag(p, TIF_FORK);
457
458         p->thread.fs = me->thread.fs;
459         p->thread.gs = me->thread.gs;
460
461         asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
462         asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
463         asm("mov %%es,%0" : "=m" (p->thread.es));
464         asm("mov %%ds,%0" : "=m" (p->thread.ds));
465
466         if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
467                 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
468                 if (!p->thread.io_bitmap_ptr) {
469                         p->thread.io_bitmap_max = 0;
470                         return -ENOMEM;
471                 }
472                 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
473                                 IO_BITMAP_BYTES);
474                 set_tsk_thread_flag(p, TIF_IO_BITMAP);
475         } 
476
477         /*
478          * Set a new TLS for the child thread?
479          */
480         if (clone_flags & CLONE_SETTLS) {
481 #ifdef CONFIG_IA32_EMULATION
482                 if (test_thread_flag(TIF_IA32))
483                         err = ia32_child_tls(p, childregs); 
484                 else                    
485 #endif   
486                         err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
487                 if (err) 
488                         goto out;
489         }
490         err = 0;
491 out:
492         if (err && p->thread.io_bitmap_ptr) {
493                 kfree(p->thread.io_bitmap_ptr);
494                 p->thread.io_bitmap_max = 0;
495         }
496         return err;
497 }
498
499 /*
500  * This special macro can be used to load a debugging register
501  */
502 #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
503
504 static inline void __switch_to_xtra(struct task_struct *prev_p,
505                                     struct task_struct *next_p,
506                                     struct tss_struct *tss)
507 {
508         struct thread_struct *prev, *next;
509
510         prev = &prev_p->thread,
511         next = &next_p->thread;
512
513         if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
514                 loaddebug(next, 0);
515                 loaddebug(next, 1);
516                 loaddebug(next, 2);
517                 loaddebug(next, 3);
518                 /* no 4 and 5 */
519                 loaddebug(next, 6);
520                 loaddebug(next, 7);
521         }
522
523         if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
524                 /*
525                  * Copy the relevant range of the IO bitmap.
526                  * Normally this is 128 bytes or less:
527                  */
528                 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
529                        max(prev->io_bitmap_max, next->io_bitmap_max));
530         } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
531                 /*
532                  * Clear any possible leftover bits:
533                  */
534                 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
535         }
536 }
537
538 /*
539  *      switch_to(x,y) should switch tasks from x to y.
540  *
541  * This could still be optimized: 
542  * - fold all the options into a flag word and test it with a single test.
543  * - could test fs/gs bitsliced
544  *
545  * Kprobes not supported here. Set the probe on schedule instead.
546  */
547 __kprobes struct task_struct *
548 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
549 {
550         struct thread_struct *prev = &prev_p->thread,
551                                  *next = &next_p->thread;
552         int cpu = smp_processor_id();  
553         struct tss_struct *tss = &per_cpu(init_tss, cpu);
554
555         /*
556          * Reload esp0, LDT and the page table pointer:
557          */
558         tss->rsp0 = next->rsp0;
559
560         /* 
561          * Switch DS and ES.
562          * This won't pick up thread selector changes, but I guess that is ok.
563          */
564         asm volatile("mov %%es,%0" : "=m" (prev->es));
565         if (unlikely(next->es | prev->es))
566                 loadsegment(es, next->es); 
567         
568         asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
569         if (unlikely(next->ds | prev->ds))
570                 loadsegment(ds, next->ds);
571
572         load_TLS(next, cpu);
573
574         /* 
575          * Switch FS and GS.
576          */
577         { 
578                 unsigned fsindex;
579                 asm volatile("movl %%fs,%0" : "=r" (fsindex)); 
580                 /* segment register != 0 always requires a reload. 
581                    also reload when it has changed. 
582                    when prev process used 64bit base always reload
583                    to avoid an information leak. */
584                 if (unlikely(fsindex | next->fsindex | prev->fs)) {
585                         loadsegment(fs, next->fsindex);
586                         /* check if the user used a selector != 0
587                          * if yes clear 64bit base, since overloaded base
588                          * is always mapped to the Null selector
589                          */
590                         if (fsindex)
591                         prev->fs = 0;                           
592                 }
593                 /* when next process has a 64bit base use it */
594                 if (next->fs) 
595                         wrmsrl(MSR_FS_BASE, next->fs); 
596                 prev->fsindex = fsindex;
597         }
598         { 
599                 unsigned gsindex;
600                 asm volatile("movl %%gs,%0" : "=r" (gsindex)); 
601                 if (unlikely(gsindex | next->gsindex | prev->gs)) {
602                         load_gs_index(next->gsindex);
603                         if (gsindex)
604                         prev->gs = 0;                           
605                 }
606                 if (next->gs)
607                         wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 
608                 prev->gsindex = gsindex;
609         }
610
611         /* 
612          * Switch the PDA and FPU contexts.
613          */
614         prev->userrsp = read_pda(oldrsp); 
615         write_pda(oldrsp, next->userrsp); 
616         write_pda(pcurrent, next_p); 
617
618         /* This must be here to ensure both math_state_restore() and
619            kernel_fpu_begin() work consistently. 
620            And the AMD workaround requires it to be after DS reload. */
621         unlazy_fpu(prev_p);
622         write_pda(kernelstack,
623                   task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
624
625         /*
626          * Now maybe reload the debug registers and handle I/O bitmaps
627          */
628         if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
629             || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
630                 __switch_to_xtra(prev_p, next_p, tss);
631
632         return prev_p;
633 }
634
635 /*
636  * sys_execve() executes a new program.
637  */
638 asmlinkage 
639 long sys_execve(char __user *name, char __user * __user *argv,
640                 char __user * __user *envp, struct pt_regs regs)
641 {
642         long error;
643         char * filename;
644
645         filename = getname(name);
646         error = PTR_ERR(filename);
647         if (IS_ERR(filename)) 
648                 return error;
649         error = do_execve(filename, argv, envp, &regs); 
650         if (error == 0) {
651                 task_lock(current);
652                 current->ptrace &= ~PT_DTRACE;
653                 task_unlock(current);
654         }
655         putname(filename);
656         return error;
657 }
658
659 void set_personality_64bit(void)
660 {
661         /* inherit personality from parent */
662
663         /* Make sure to be in 64bit mode */
664         clear_thread_flag(TIF_IA32); 
665
666         /* TBD: overwrites user setup. Should have two bits.
667            But 64bit processes have always behaved this way,
668            so it's not too bad. The main problem is just that
669            32bit childs are affected again. */
670         current->personality &= ~READ_IMPLIES_EXEC;
671 }
672
673 asmlinkage long sys_fork(struct pt_regs *regs)
674 {
675         return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
676 }
677
678 asmlinkage long
679 sys_clone(unsigned long clone_flags, unsigned long newsp,
680           void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
681 {
682         if (!newsp)
683                 newsp = regs->rsp;
684         return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
685 }
686
687 /*
688  * This is trivial, and on the face of it looks like it
689  * could equally well be done in user mode.
690  *
691  * Not so, for quite unobvious reasons - register pressure.
692  * In user mode vfork() cannot have a stack frame, and if
693  * done by calling the "clone()" system call directly, you
694  * do not have enough call-clobbered registers to hold all
695  * the information you need.
696  */
697 asmlinkage long sys_vfork(struct pt_regs *regs)
698 {
699         return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
700                     NULL, NULL);
701 }
702
703 unsigned long get_wchan(struct task_struct *p)
704 {
705         unsigned long stack;
706         u64 fp,rip;
707         int count = 0;
708
709         if (!p || p == current || p->state==TASK_RUNNING)
710                 return 0; 
711         stack = (unsigned long)task_stack_page(p);
712         if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
713                 return 0;
714         fp = *(u64 *)(p->thread.rsp);
715         do { 
716                 if (fp < (unsigned long)stack ||
717                     fp > (unsigned long)stack+THREAD_SIZE)
718                         return 0; 
719                 rip = *(u64 *)(fp+8); 
720                 if (!in_sched_functions(rip))
721                         return rip; 
722                 fp = *(u64 *)fp; 
723         } while (count++ < 16); 
724         return 0;
725 }
726
727 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
728
729         int ret = 0; 
730         int doit = task == current;
731         int cpu;
732
733         switch (code) { 
734         case ARCH_SET_GS:
735                 if (addr >= TASK_SIZE_OF(task))
736                         return -EPERM; 
737                 cpu = get_cpu();
738                 /* handle small bases via the GDT because that's faster to 
739                    switch. */
740                 if (addr <= 0xffffffff) {  
741                         set_32bit_tls(task, GS_TLS, addr); 
742                         if (doit) { 
743                                 load_TLS(&task->thread, cpu);
744                                 load_gs_index(GS_TLS_SEL); 
745                         }
746                         task->thread.gsindex = GS_TLS_SEL; 
747                         task->thread.gs = 0;
748                 } else { 
749                         task->thread.gsindex = 0;
750                         task->thread.gs = addr;
751                         if (doit) {
752                                 load_gs_index(0);
753                                 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
754                         } 
755                 }
756                 put_cpu();
757                 break;
758         case ARCH_SET_FS:
759                 /* Not strictly needed for fs, but do it for symmetry
760                    with gs */
761                 if (addr >= TASK_SIZE_OF(task))
762                         return -EPERM; 
763                 cpu = get_cpu();
764                 /* handle small bases via the GDT because that's faster to 
765                    switch. */
766                 if (addr <= 0xffffffff) { 
767                         set_32bit_tls(task, FS_TLS, addr);
768                         if (doit) { 
769                                 load_TLS(&task->thread, cpu); 
770                                 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
771                         }
772                         task->thread.fsindex = FS_TLS_SEL;
773                         task->thread.fs = 0;
774                 } else { 
775                         task->thread.fsindex = 0;
776                         task->thread.fs = addr;
777                         if (doit) {
778                                 /* set the selector to 0 to not confuse
779                                    __switch_to */
780                                 asm volatile("movl %0,%%fs" :: "r" (0));
781                                 ret = checking_wrmsrl(MSR_FS_BASE, addr);
782                         }
783                 }
784                 put_cpu();
785                 break;
786         case ARCH_GET_FS: { 
787                 unsigned long base; 
788                 if (task->thread.fsindex == FS_TLS_SEL)
789                         base = read_32bit_tls(task, FS_TLS);
790                 else if (doit)
791                         rdmsrl(MSR_FS_BASE, base);
792                 else
793                         base = task->thread.fs;
794                 ret = put_user(base, (unsigned long __user *)addr); 
795                 break; 
796         }
797         case ARCH_GET_GS: { 
798                 unsigned long base;
799                 unsigned gsindex;
800                 if (task->thread.gsindex == GS_TLS_SEL)
801                         base = read_32bit_tls(task, GS_TLS);
802                 else if (doit) {
803                         asm("movl %%gs,%0" : "=r" (gsindex));
804                         if (gsindex)
805                                 rdmsrl(MSR_KERNEL_GS_BASE, base);
806                         else
807                                 base = task->thread.gs;
808                 }
809                 else
810                         base = task->thread.gs;
811                 ret = put_user(base, (unsigned long __user *)addr); 
812                 break;
813         }
814
815         default:
816                 ret = -EINVAL;
817                 break;
818         } 
819
820         return ret;     
821
822
823 long sys_arch_prctl(int code, unsigned long addr)
824 {
825         return do_arch_prctl(current, code, addr);
826
827
828 /* 
829  * Capture the user space registers if the task is not running (in user space)
830  */
831 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
832 {
833         struct pt_regs *pp, ptregs;
834
835         pp = task_pt_regs(tsk);
836
837         ptregs = *pp; 
838         ptregs.cs &= 0xffff;
839         ptregs.ss &= 0xffff;
840
841         elf_core_copy_regs(regs, &ptregs);
842  
843         return 1;
844 }
845
846 unsigned long arch_align_stack(unsigned long sp)
847 {
848         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
849                 sp -= get_random_int() % 8192;
850         return sp & ~0xf;
851 }