Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6] / arch / x86 / xen / setup.c
1 /*
2  * Machine specific setup for xen
3  *
4  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
5  */
6
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <linux/mm.h>
10 #include <linux/pm.h>
11
12 #include <asm/elf.h>
13 #include <asm/vdso.h>
14 #include <asm/e820.h>
15 #include <asm/setup.h>
16 #include <asm/acpi.h>
17 #include <asm/xen/hypervisor.h>
18 #include <asm/xen/hypercall.h>
19
20 #include <xen/page.h>
21 #include <xen/interface/callback.h>
22 #include <xen/interface/physdev.h>
23 #include <xen/features.h>
24
25 #include "xen-ops.h"
26 #include "vdso.h"
27
28 /* These are code, but not functions.  Defined in entry.S */
29 extern const char xen_hypervisor_callback[];
30 extern const char xen_failsafe_callback[];
31 extern void xen_sysenter_target(void);
32 extern void xen_syscall_target(void);
33 extern void xen_syscall32_target(void);
34
35
36 /**
37  * machine_specific_memory_setup - Hook for machine specific memory setup.
38  **/
39
40 char * __init xen_memory_setup(void)
41 {
42         unsigned long max_pfn = xen_start_info->nr_pages;
43
44         max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
45
46         e820.nr_map = 0;
47
48         e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
49
50         /*
51          * Even though this is normal, usable memory under Xen, reserve
52          * ISA memory anyway because too many things think they can poke
53          * about in there.
54          */
55         e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
56                         E820_RESERVED);
57
58         /*
59          * Reserve Xen bits:
60          *  - mfn_list
61          *  - xen_start_info
62          * See comment above "struct start_info" in <xen/interface/xen.h>
63          */
64         e820_add_region(__pa(xen_start_info->mfn_list),
65                         xen_start_info->pt_base - xen_start_info->mfn_list,
66                         E820_RESERVED);
67
68         sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
69
70         return "Xen";
71 }
72
73 static void xen_idle(void)
74 {
75         local_irq_disable();
76
77         if (need_resched())
78                 local_irq_enable();
79         else {
80                 current_thread_info()->status &= ~TS_POLLING;
81                 smp_mb__after_clear_bit();
82                 safe_halt();
83                 current_thread_info()->status |= TS_POLLING;
84         }
85 }
86
87 /*
88  * Set the bit indicating "nosegneg" library variants should be used.
89  * We only need to bother in pure 32-bit mode; compat 32-bit processes
90  * can have un-truncated segments, so wrapping around is allowed.
91  */
92 static void __init fiddle_vdso(void)
93 {
94 #ifdef CONFIG_X86_32
95         u32 *mask;
96         mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK);
97         *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
98         mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK);
99         *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
100 #endif
101 }
102
103 static __cpuinit int register_callback(unsigned type, const void *func)
104 {
105         struct callback_register callback = {
106                 .type = type,
107                 .address = XEN_CALLBACK(__KERNEL_CS, func),
108                 .flags = CALLBACKF_mask_events,
109         };
110
111         return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
112 }
113
114 void __cpuinit xen_enable_sysenter(void)
115 {
116         int ret;
117         unsigned sysenter_feature;
118
119 #ifdef CONFIG_X86_32
120         sysenter_feature = X86_FEATURE_SEP;
121 #else
122         sysenter_feature = X86_FEATURE_SYSENTER32;
123 #endif
124
125         if (!boot_cpu_has(sysenter_feature))
126                 return;
127
128         ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
129         if(ret != 0)
130                 setup_clear_cpu_cap(sysenter_feature);
131 }
132
133 void __cpuinit xen_enable_syscall(void)
134 {
135 #ifdef CONFIG_X86_64
136         int ret;
137
138         ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
139         if (ret != 0) {
140                 printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
141                 /* Pretty fatal; 64-bit userspace has no other
142                    mechanism for syscalls. */
143         }
144
145         if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
146                 ret = register_callback(CALLBACKTYPE_syscall32,
147                                         xen_syscall32_target);
148                 if (ret != 0)
149                         setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
150         }
151 #endif /* CONFIG_X86_64 */
152 }
153
154 void __init xen_arch_setup(void)
155 {
156         struct physdev_set_iopl set_iopl;
157         int rc;
158
159         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
160         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
161
162         if (!xen_feature(XENFEAT_auto_translated_physmap))
163                 HYPERVISOR_vm_assist(VMASST_CMD_enable,
164                                      VMASST_TYPE_pae_extended_cr3);
165
166         if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
167             register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
168                 BUG();
169
170         xen_enable_sysenter();
171         xen_enable_syscall();
172
173         set_iopl.iopl = 1;
174         rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
175         if (rc != 0)
176                 printk(KERN_INFO "physdev_op failed %d\n", rc);
177
178 #ifdef CONFIG_ACPI
179         if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
180                 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
181                 disable_acpi();
182         }
183 #endif
184
185         memcpy(boot_command_line, xen_start_info->cmd_line,
186                MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
187                COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
188
189         pm_idle = xen_idle;
190
191         paravirt_disable_iospace();
192
193         fiddle_vdso();
194 }