x86: clean up switch_to()
[linux-2.6] / include / asm-x86 / system.h
1 #ifndef _ASM_X86_SYSTEM_H_
2 #define _ASM_X86_SYSTEM_H_
3
4 #include <asm/asm.h>
5 #include <asm/segment.h>
6 #include <asm/cpufeature.h>
7 #include <asm/cmpxchg.h>
8 #include <asm/nops.h>
9
10 #include <linux/kernel.h>
11 #include <linux/irqflags.h>
12
13 /* entries in ARCH_DLINFO: */
14 #ifdef CONFIG_IA32_EMULATION
15 # define AT_VECTOR_SIZE_ARCH 2
16 #else
17 # define AT_VECTOR_SIZE_ARCH 1
18 #endif
19
20 #ifdef CONFIG_X86_32
21
22 struct task_struct; /* one of the stranger aspects of C forward declarations */
23 struct task_struct *__switch_to(struct task_struct *prev,
24                                 struct task_struct *next);
25
26 /*
27  * Saving eflags is important. It switches not only IOPL between tasks,
28  * it also protects other tasks from NT leaking through sysenter etc.
29  */
30 #define switch_to(prev, next, last)                                     \
31 do {                                                                    \
32         unsigned long esi, edi;                                         \
33                                                                         \
34         asm volatile(                                                   \
35                 "pushfl                 \n\t"   /* save    flags */     \
36                 "pushl %%ebp            \n\t"   /* save    EBP   */     \
37                 "movl %%esp,%[prev_sp]  \n\t"   /* save    ESP   */     \
38                 "movl %[next_sp],%%esp  \n\t"   /* restore ESP   */     \
39                 "movl $1f,%[prev_ip]    \n\t"   /* save    EIP   */     \
40                 "pushl %[next_ip]       \n\t"   /* restore EIP   */     \
41                 "jmp __switch_to        \n"     /* regparm call  */     \
42                 "1:                     \t"                             \
43                 "popl %%ebp             \n\t"   /* restore EBP   */     \
44                 "popfl                  \n"     /* restore flags */     \
45                                                                         \
46                   /* output parameters */                               \
47                 : [prev_sp] "=m" (prev->thread.sp),                     \
48                   [prev_ip] "=m" (prev->thread.ip),                     \
49                             "=a" (last),                                \
50                                                                         \
51                   /* clobbered output registers: */                     \
52                   "=S" (esi), "=D" (edi)                                \
53                                                                         \
54                   /* input parameters: */                               \
55                 : [next_sp]  "m" (next->thread.sp),                     \
56                   [next_ip]  "m" (next->thread.ip),                     \
57                                                                         \
58                   /* regparm parameters for __switch_to(): */           \
59                   [prev]     "a" (prev),                                \
60                   [next]     "d" (next)                                 \
61         );                                                              \
62 } while (0)
63
64 /*
65  * disable hlt during certain critical i/o operations
66  */
67 #define HAVE_DISABLE_HLT
68 #else
69 #define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
70 #define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
71
72 /* frame pointer must be last for get_wchan */
73 #define SAVE_CONTEXT    "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
74 #define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
75
76 #define __EXTRA_CLOBBER  \
77         , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
78           "r12", "r13", "r14", "r15"
79
80 /* Save restore flags to clear handle leaking NT */
81 #define switch_to(prev, next, last) \
82         asm volatile(SAVE_CONTEXT                                                   \
83              "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */       \
84              "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */    \
85              "call __switch_to\n\t"                                       \
86              ".globl thread_return\n"                                     \
87              "thread_return:\n\t"                                         \
88              "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"                       \
89              "movq %P[thread_info](%%rsi),%%r8\n\t"                       \
90              LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"        \
91              "movq %%rax,%%rdi\n\t"                                       \
92              "jc   ret_from_fork\n\t"                                     \
93              RESTORE_CONTEXT                                              \
94              : "=a" (last)                                                \
95              : [next] "S" (next), [prev] "D" (prev),                      \
96                [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
97                [ti_flags] "i" (offsetof(struct thread_info, flags)),      \
98                [tif_fork] "i" (TIF_FORK),                                 \
99                [thread_info] "i" (offsetof(struct task_struct, stack)),   \
100                [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))  \
101              : "memory", "cc" __EXTRA_CLOBBER)
102 #endif
103
104 #ifdef __KERNEL__
105 #define _set_base(addr, base) do { unsigned long __pr; \
106 __asm__ __volatile__ ("movw %%dx,%1\n\t" \
107         "rorl $16,%%edx\n\t" \
108         "movb %%dl,%2\n\t" \
109         "movb %%dh,%3" \
110         :"=&d" (__pr) \
111         :"m" (*((addr)+2)), \
112          "m" (*((addr)+4)), \
113          "m" (*((addr)+7)), \
114          "0" (base) \
115         ); } while (0)
116
117 #define _set_limit(addr, limit) do { unsigned long __lr; \
118 __asm__ __volatile__ ("movw %%dx,%1\n\t" \
119         "rorl $16,%%edx\n\t" \
120         "movb %2,%%dh\n\t" \
121         "andb $0xf0,%%dh\n\t" \
122         "orb %%dh,%%dl\n\t" \
123         "movb %%dl,%2" \
124         :"=&d" (__lr) \
125         :"m" (*(addr)), \
126          "m" (*((addr)+6)), \
127          "0" (limit) \
128         ); } while (0)
129
130 #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
131 #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
132
133 extern void load_gs_index(unsigned);
134
135 /*
136  * Load a segment. Fall back on loading the zero
137  * segment if something goes wrong..
138  */
139 #define loadsegment(seg, value)                 \
140         asm volatile("\n"                       \
141                 "1:\t"                          \
142                 "movl %k0,%%" #seg "\n"         \
143                 "2:\n"                          \
144                 ".section .fixup,\"ax\"\n"      \
145                 "3:\t"                          \
146                 "movl %k1, %%" #seg "\n\t"      \
147                 "jmp 2b\n"                      \
148                 ".previous\n"                   \
149                 _ASM_EXTABLE(1b,3b)             \
150                 : :"r" (value), "r" (0))
151
152
153 /*
154  * Save a segment register away
155  */
156 #define savesegment(seg, value) \
157         asm volatile("mov %%" #seg ",%0":"=rm" (value))
158
159 static inline unsigned long get_limit(unsigned long segment)
160 {
161         unsigned long __limit;
162         __asm__("lsll %1,%0"
163                 :"=r" (__limit):"r" (segment));
164         return __limit+1;
165 }
166
167 static inline void native_clts(void)
168 {
169         asm volatile ("clts");
170 }
171
172 /*
173  * Volatile isn't enough to prevent the compiler from reordering the
174  * read/write functions for the control registers and messing everything up.
175  * A memory clobber would solve the problem, but would prevent reordering of
176  * all loads stores around it, which can hurt performance. Solution is to
177  * use a variable and mimic reads and writes to it to enforce serialization
178  */
179 static unsigned long __force_order;
180
181 static inline unsigned long native_read_cr0(void)
182 {
183         unsigned long val;
184         asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order));
185         return val;
186 }
187
188 static inline void native_write_cr0(unsigned long val)
189 {
190         asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order));
191 }
192
193 static inline unsigned long native_read_cr2(void)
194 {
195         unsigned long val;
196         asm volatile("mov %%cr2,%0\n\t" :"=r" (val), "=m" (__force_order));
197         return val;
198 }
199
200 static inline void native_write_cr2(unsigned long val)
201 {
202         asm volatile("mov %0,%%cr2": :"r" (val), "m" (__force_order));
203 }
204
205 static inline unsigned long native_read_cr3(void)
206 {
207         unsigned long val;
208         asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order));
209         return val;
210 }
211
212 static inline void native_write_cr3(unsigned long val)
213 {
214         asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order));
215 }
216
217 static inline unsigned long native_read_cr4(void)
218 {
219         unsigned long val;
220         asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order));
221         return val;
222 }
223
224 static inline unsigned long native_read_cr4_safe(void)
225 {
226         unsigned long val;
227         /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
228          * exists, so it will never fail. */
229 #ifdef CONFIG_X86_32
230         asm volatile("1: mov %%cr4, %0\n"
231                      "2:\n"
232                      _ASM_EXTABLE(1b,2b)
233                      : "=r" (val), "=m" (__force_order) : "0" (0));
234 #else
235         val = native_read_cr4();
236 #endif
237         return val;
238 }
239
240 static inline void native_write_cr4(unsigned long val)
241 {
242         asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order));
243 }
244
245 #ifdef CONFIG_X86_64
246 static inline unsigned long native_read_cr8(void)
247 {
248         unsigned long cr8;
249         asm volatile("movq %%cr8,%0" : "=r" (cr8));
250         return cr8;
251 }
252
253 static inline void native_write_cr8(unsigned long val)
254 {
255         asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
256 }
257 #endif
258
259 static inline void native_wbinvd(void)
260 {
261         asm volatile("wbinvd": : :"memory");
262 }
263 #ifdef CONFIG_PARAVIRT
264 #include <asm/paravirt.h>
265 #else
266 #define read_cr0()      (native_read_cr0())
267 #define write_cr0(x)    (native_write_cr0(x))
268 #define read_cr2()      (native_read_cr2())
269 #define write_cr2(x)    (native_write_cr2(x))
270 #define read_cr3()      (native_read_cr3())
271 #define write_cr3(x)    (native_write_cr3(x))
272 #define read_cr4()      (native_read_cr4())
273 #define read_cr4_safe() (native_read_cr4_safe())
274 #define write_cr4(x)    (native_write_cr4(x))
275 #define wbinvd()        (native_wbinvd())
276 #ifdef CONFIG_X86_64
277 #define read_cr8()      (native_read_cr8())
278 #define write_cr8(x)    (native_write_cr8(x))
279 #endif
280
281 /* Clear the 'TS' bit */
282 #define clts()          (native_clts())
283
284 #endif/* CONFIG_PARAVIRT */
285
286 #define stts() write_cr0(8 | read_cr0())
287
288 #endif /* __KERNEL__ */
289
290 static inline void clflush(volatile void *__p)
291 {
292         asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
293 }
294
295 #define nop() __asm__ __volatile__ ("nop")
296
297 void disable_hlt(void);
298 void enable_hlt(void);
299
300 extern int es7000_plat;
301 void cpu_idle_wait(void);
302
303 extern unsigned long arch_align_stack(unsigned long sp);
304 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
305
306 void default_idle(void);
307
308 /*
309  * Force strict CPU ordering.
310  * And yes, this is required on UP too when we're talking
311  * to devices.
312  */
313 #ifdef CONFIG_X86_32
314 /*
315  * Some non-Intel clones support out of order store. wmb() ceases to be a
316  * nop for these.
317  */
318 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
319 #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
320 #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
321 #else
322 #define mb()    asm volatile("mfence":::"memory")
323 #define rmb()   asm volatile("lfence":::"memory")
324 #define wmb()   asm volatile("sfence" ::: "memory")
325 #endif
326
327 /**
328  * read_barrier_depends - Flush all pending reads that subsequents reads
329  * depend on.
330  *
331  * No data-dependent reads from memory-like regions are ever reordered
332  * over this barrier.  All reads preceding this primitive are guaranteed
333  * to access memory (but not necessarily other CPUs' caches) before any
334  * reads following this primitive that depend on the data return by
335  * any of the preceding reads.  This primitive is much lighter weight than
336  * rmb() on most CPUs, and is never heavier weight than is
337  * rmb().
338  *
339  * These ordering constraints are respected by both the local CPU
340  * and the compiler.
341  *
342  * Ordering is not guaranteed by anything other than these primitives,
343  * not even by data dependencies.  See the documentation for
344  * memory_barrier() for examples and URLs to more information.
345  *
346  * For example, the following code would force ordering (the initial
347  * value of "a" is zero, "b" is one, and "p" is "&a"):
348  *
349  * <programlisting>
350  *      CPU 0                           CPU 1
351  *
352  *      b = 2;
353  *      memory_barrier();
354  *      p = &b;                         q = p;
355  *                                      read_barrier_depends();
356  *                                      d = *q;
357  * </programlisting>
358  *
359  * because the read of "*q" depends on the read of "p" and these
360  * two reads are separated by a read_barrier_depends().  However,
361  * the following code, with the same initial values for "a" and "b":
362  *
363  * <programlisting>
364  *      CPU 0                           CPU 1
365  *
366  *      a = 2;
367  *      memory_barrier();
368  *      b = 3;                          y = b;
369  *                                      read_barrier_depends();
370  *                                      x = a;
371  * </programlisting>
372  *
373  * does not enforce ordering, since there is no data dependency between
374  * the read of "a" and the read of "b".  Therefore, on some CPUs, such
375  * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
376  * in cases like this where there are no data dependencies.
377  **/
378
379 #define read_barrier_depends()  do { } while (0)
380
381 #ifdef CONFIG_SMP
382 #define smp_mb()        mb()
383 #ifdef CONFIG_X86_PPRO_FENCE
384 # define smp_rmb()      rmb()
385 #else
386 # define smp_rmb()      barrier()
387 #endif
388 #ifdef CONFIG_X86_OOSTORE
389 # define smp_wmb()      wmb()
390 #else
391 # define smp_wmb()      barrier()
392 #endif
393 #define smp_read_barrier_depends()      read_barrier_depends()
394 #define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
395 #else
396 #define smp_mb()        barrier()
397 #define smp_rmb()       barrier()
398 #define smp_wmb()       barrier()
399 #define smp_read_barrier_depends()      do { } while (0)
400 #define set_mb(var, value) do { var = value; barrier(); } while (0)
401 #endif
402
403 /*
404  * Stop RDTSC speculation. This is needed when you need to use RDTSC
405  * (or get_cycles or vread that possibly accesses the TSC) in a defined
406  * code region.
407  *
408  * (Could use an alternative three way for this if there was one.)
409  */
410 static inline void rdtsc_barrier(void)
411 {
412         alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
413         alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
414 }
415
416 #endif