2 * machine_kexec.c - handle transition of Linux booting another kernel
3 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
5 * This source code is licensed under the GNU General Public License,
6 * Version 2. See the file COPYING for more details.
10 #include <linux/kexec.h>
11 #include <linux/delay.h>
12 #include <linux/string.h>
13 #include <linux/reboot.h>
15 #include <asm/pgtable.h>
16 #include <asm/pgalloc.h>
17 #include <asm/tlbflush.h>
18 #include <asm/mmu_context.h>
21 #include <asm/cpufeature.h>
22 #include <asm/hw_irq.h>
24 #define LEVEL0_SIZE (1UL << 12UL)
25 #define LEVEL1_SIZE (1UL << 21UL)
26 #define LEVEL2_SIZE (1UL << 30UL)
27 #define LEVEL3_SIZE (1UL << 39UL)
28 #define LEVEL4_SIZE (1UL << 48UL)
30 #define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
31 #define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE)
32 #define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
33 #define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
35 static void init_level2_page(u64 *level2p, unsigned long addr)
37 unsigned long end_addr;
40 end_addr = addr + LEVEL2_SIZE;
41 while (addr < end_addr) {
42 *(level2p++) = addr | L1_ATTR;
47 static int init_level3_page(struct kimage *image, u64 *level3p,
48 unsigned long addr, unsigned long last_addr)
50 unsigned long end_addr;
55 end_addr = addr + LEVEL3_SIZE;
56 while ((addr < last_addr) && (addr < end_addr)) {
60 page = kimage_alloc_control_pages(image, 0);
65 level2p = (u64 *)page_address(page);
66 init_level2_page(level2p, addr);
67 *(level3p++) = __pa(level2p) | L2_ATTR;
70 /* clear the unused entries */
71 while (addr < end_addr) {
80 static int init_level4_page(struct kimage *image, u64 *level4p,
81 unsigned long addr, unsigned long last_addr)
83 unsigned long end_addr;
88 end_addr = addr + LEVEL4_SIZE;
89 while ((addr < last_addr) && (addr < end_addr)) {
93 page = kimage_alloc_control_pages(image, 0);
98 level3p = (u64 *)page_address(page);
99 result = init_level3_page(image, level3p, addr, last_addr);
103 *(level4p++) = __pa(level3p) | L3_ATTR;
106 /* clear the unused entries */
107 while (addr < end_addr) {
116 static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
119 level4p = (u64 *)__va(start_pgtable);
120 return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
123 static void set_idt(void *newidt, u16 limit)
125 unsigned char curidt[10];
127 /* x86-64 supports unaliged loads & stores */
128 (*(u16 *)(curidt)) = limit;
129 (*(u64 *)(curidt +2)) = (unsigned long)(newidt);
131 __asm__ __volatile__ (
138 static void set_gdt(void *newgdt, u16 limit)
140 unsigned char curgdt[10];
142 /* x86-64 supports unaligned loads & stores */
143 (*(u16 *)(curgdt)) = limit;
144 (*(u64 *)(curgdt +2)) = (unsigned long)(newgdt);
146 __asm__ __volatile__ (
152 static void load_segments(void)
154 __asm__ __volatile__ (
155 "\tmovl $"STR(__KERNEL_DS)",%eax\n"
166 typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page,
167 unsigned long control_code_buffer,
168 unsigned long start_address,
169 unsigned long pgtable) ATTRIB_NORET;
171 const extern unsigned char relocate_new_kernel[];
172 const extern unsigned long relocate_new_kernel_size;
174 int machine_kexec_prepare(struct kimage *image)
176 unsigned long start_pgtable, control_code_buffer;
179 /* Calculate the offsets */
180 start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
181 control_code_buffer = start_pgtable + 4096UL;
183 /* Setup the identity mapped 64bit page table */
184 result = init_pgtable(image, start_pgtable);
188 /* Place the code in the reboot code buffer */
189 memcpy(__va(control_code_buffer), relocate_new_kernel,
190 relocate_new_kernel_size);
195 void machine_kexec_cleanup(struct kimage *image)
201 * Do not allocate memory (or fail in any way) in machine_kexec().
202 * We are past the point of no return, committed to rebooting now.
204 NORET_TYPE void machine_kexec(struct kimage *image)
206 unsigned long page_list;
207 unsigned long control_code_buffer;
208 unsigned long start_pgtable;
209 relocate_new_kernel_t rnk;
211 /* Interrupts aren't acceptable while we reboot */
214 /* Calculate the offsets */
215 page_list = image->head;
216 start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
217 control_code_buffer = start_pgtable + 4096UL;
219 /* Set the low half of the page table to my identity mapped
220 * page table for kexec. Leave the high half pointing at the
221 * kernel pages. Don't bother to flush the global pages
222 * as that will happen when I fully switch to my identity mapped
225 memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2);
229 /* The segment registers are funny things, they are
230 * automatically loaded from a table, in memory wherever you
231 * set them to a specific selector, but this table is never
232 * accessed again unless you set the segment to a different selector.
234 * The more common model are caches where the behide
235 * the scenes work is done, but is also dropped at arbitrary
238 * I take advantage of this here by force loading the
239 * segments, before I zap the gdt with an invalid value.
242 /* The gdt & idt are now invalid.
243 * If you want to load them you must set up your own idt & gdt.
245 set_gdt(phys_to_virt(0),0);
246 set_idt(phys_to_virt(0),0);
248 rnk = (relocate_new_kernel_t) control_code_buffer;
249 (*rnk)(page_list, control_code_buffer, image->start, start_pgtable);