2 * Hypervisor-assisted dump
4 * Linas Vepstas, Manish Ahuja 2008
5 * Copyright 2008 IBM Corp.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
14 #include <linux/init.h>
15 #include <linux/kobject.h>
18 #include <linux/pfn.h>
19 #include <linux/swap.h>
20 #include <linux/sysfs.h>
23 #include <asm/phyp_dump.h>
24 #include <asm/machdep.h>
28 /* Variables, used to communicate data between early boot and late boot */
29 static struct phyp_dump phyp_dump_vars;
30 struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
32 static int ibm_configure_kernel_dump;
33 /* ------------------------------------------------- */
34 /* RTAS interfaces to declare the dump regions */
43 u64 destination_address;
46 struct phyp_dump_header {
51 u32 first_offset_section;
52 u32 dump_disk_section;
57 /* No dump disk path string used */
59 struct dump_section cpu_data;
60 struct dump_section hpte_data;
61 struct dump_section kernel_data;
64 /* The dump header *must be* in low memory, so .bss it */
65 static struct phyp_dump_header phdr;
67 #define NUM_DUMP_SECTIONS 3
68 #define DUMP_HEADER_VERSION 0x1
69 #define DUMP_REQUEST_FLAG 0x1
70 #define DUMP_SOURCE_CPU 0x0001
71 #define DUMP_SOURCE_HPTE 0x0002
72 #define DUMP_SOURCE_RMO 0x0011
73 #define DUMP_ERROR_FLAG 0x2000
74 #define DUMP_TRIGGERED 0x4000
75 #define DUMP_PERFORMED 0x8000
79 * init_dump_header() - initialize the header declaring a dump
80 * Returns: length of dump save area.
82 * When the hypervisor saves crashed state, it needs to put
83 * it somewhere. The dump header tells the hypervisor where
84 * the data can be saved.
86 static unsigned long init_dump_header(struct phyp_dump_header *ph)
88 unsigned long addr_offset = 0;
90 /* Set up the dump header */
91 ph->version = DUMP_HEADER_VERSION;
92 ph->num_of_sections = NUM_DUMP_SECTIONS;
95 ph->first_offset_section =
96 (u32)offsetof(struct phyp_dump_header, cpu_data);
97 ph->dump_disk_section = 0;
99 ph->num_of_blocks_dd = 0;
102 ph->maxtime_to_auto = 0; /* disabled */
104 /* The first two sections are mandatory */
105 ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
106 ph->cpu_data.source_type = DUMP_SOURCE_CPU;
107 ph->cpu_data.source_address = 0;
108 ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
109 ph->cpu_data.destination_address = addr_offset;
110 addr_offset += phyp_dump_info->cpu_state_size;
112 ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
113 ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
114 ph->hpte_data.source_address = 0;
115 ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
116 ph->hpte_data.destination_address = addr_offset;
117 addr_offset += phyp_dump_info->hpte_region_size;
119 /* This section describes the low kernel region */
120 ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
121 ph->kernel_data.source_type = DUMP_SOURCE_RMO;
122 ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
123 ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
124 ph->kernel_data.destination_address = addr_offset;
125 addr_offset += ph->kernel_data.source_length;
130 static void print_dump_header(const struct phyp_dump_header *ph)
136 printk(KERN_INFO "dump header:\n");
137 /* setup some ph->sections required */
138 printk(KERN_INFO "version = %d\n", ph->version);
139 printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
140 printk(KERN_INFO "Status = 0x%x\n", ph->status);
142 /* No ph->disk, so all should be set to 0 */
143 printk(KERN_INFO "Offset to first section 0x%x\n",
144 ph->first_offset_section);
145 printk(KERN_INFO "dump disk sections should be zero\n");
146 printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
147 printk(KERN_INFO "block num = %ld\n", ph->block_num_dd);
148 printk(KERN_INFO "number of blocks = %ld\n", ph->num_of_blocks_dd);
149 printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
150 printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
152 /*set cpu state and hpte states as well scratch pad area */
153 printk(KERN_INFO " CPU AREA \n");
154 printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
155 printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
156 printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
157 printk(KERN_INFO "cpu source_address =%lx\n",
158 ph->cpu_data.source_address);
159 printk(KERN_INFO "cpu source_length =%lx\n",
160 ph->cpu_data.source_length);
161 printk(KERN_INFO "cpu length_copied =%lx\n",
162 ph->cpu_data.length_copied);
164 printk(KERN_INFO " HPTE AREA \n");
165 printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
166 printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
167 printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
168 printk(KERN_INFO "HPTE source_address =%lx\n",
169 ph->hpte_data.source_address);
170 printk(KERN_INFO "HPTE source_length =%lx\n",
171 ph->hpte_data.source_length);
172 printk(KERN_INFO "HPTE length_copied =%lx\n",
173 ph->hpte_data.length_copied);
175 printk(KERN_INFO " SRSD AREA \n");
176 printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
177 printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
178 printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
179 printk(KERN_INFO "SRSD source_address =%lx\n",
180 ph->kernel_data.source_address);
181 printk(KERN_INFO "SRSD source_length =%lx\n",
182 ph->kernel_data.source_length);
183 printk(KERN_INFO "SRSD length_copied =%lx\n",
184 ph->kernel_data.length_copied);
188 static ssize_t show_phyp_dump_active(struct kobject *kobj,
189 struct kobj_attribute *attr, char *buf)
192 /* create filesystem entry so kdump is phyp-dump aware */
193 return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
196 static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
197 show_phyp_dump_active,
200 static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
204 /* Add addr value if not initialized before */
205 if (ph->cpu_data.destination_address == 0) {
206 ph->cpu_data.destination_address += addr;
207 ph->hpte_data.destination_address += addr;
208 ph->kernel_data.destination_address += addr;
211 /* ToDo Invalidate kdump and free memory range. */
214 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
215 1, ph, sizeof(struct phyp_dump_header));
216 } while (rtas_busy_delay(rc));
219 printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
221 print_dump_header(ph);
225 rc = sysfs_create_file(kernel_kobj, &pdl.attr);
227 printk(KERN_ERR "phyp-dump: unable to create sysfs"
232 void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
236 /* Add addr value if not initialized before */
237 if (ph->cpu_data.destination_address == 0) {
238 ph->cpu_data.destination_address += addr;
239 ph->hpte_data.destination_address += addr;
240 ph->kernel_data.destination_address += addr;
244 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
245 2, ph, sizeof(struct phyp_dump_header));
246 } while (rtas_busy_delay(rc));
249 printk(KERN_ERR "phyp-dump: unexpected error (%d) "
250 "on invalidate\n", rc);
251 print_dump_header(ph);
255 /* ------------------------------------------------- */
257 * release_memory_range -- release memory previously lmb_reserved
258 * @start_pfn: starting physical frame number
259 * @nr_pages: number of pages to free.
261 * This routine will release memory that had been previously
262 * lmb_reserved in early boot. The released memory becomes
263 * available for genreal use.
265 static void release_memory_range(unsigned long start_pfn,
266 unsigned long nr_pages)
269 unsigned long end_pfn;
272 end_pfn = start_pfn + nr_pages;
274 for (i = start_pfn; i <= end_pfn; i++) {
275 rpage = pfn_to_page(i);
276 if (PageReserved(rpage)) {
277 ClearPageReserved(rpage);
278 init_page_count(rpage);
286 * track_freed_range -- Counts the range being freed.
287 * Once the counter goes to zero, it re-registers dump for
291 track_freed_range(unsigned long addr, unsigned long length)
293 static unsigned long scratch_area_size, reserved_area_size;
295 if (addr < phyp_dump_info->init_reserve_start)
298 if ((addr >= phyp_dump_info->init_reserve_start) &&
299 (addr <= phyp_dump_info->init_reserve_start +
300 phyp_dump_info->init_reserve_size))
301 reserved_area_size += length;
303 if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
304 (addr <= phyp_dump_info->reserved_scratch_addr +
305 phyp_dump_info->reserved_scratch_size))
306 scratch_area_size += length;
308 if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
309 (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
311 invalidate_last_dump(&phdr,
312 phyp_dump_info->reserved_scratch_addr);
313 register_dump_area(&phdr,
314 phyp_dump_info->reserved_scratch_addr);
318 /* ------------------------------------------------- */
320 * sysfs_release_region -- sysfs interface to release memory range.
323 * "echo <start addr> <length> > /sys/kernel/release_region"
326 * "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
328 * will release 256MB starting at 1GB.
330 static ssize_t store_release_region(struct kobject *kobj,
331 struct kobj_attribute *attr,
332 const char *buf, size_t count)
334 unsigned long start_addr, length, end_addr;
335 unsigned long start_pfn, nr_pages;
338 ret = sscanf(buf, "%lx %lx", &start_addr, &length);
342 track_freed_range(start_addr, length);
344 /* Range-check - don't free any reserved memory that
345 * wasn't reserved for phyp-dump */
346 if (start_addr < phyp_dump_info->init_reserve_start)
347 start_addr = phyp_dump_info->init_reserve_start;
349 end_addr = phyp_dump_info->init_reserve_start +
350 phyp_dump_info->init_reserve_size;
351 if (start_addr+length > end_addr)
352 length = end_addr - start_addr;
354 /* Release the region of memory assed in by user */
355 start_pfn = PFN_DOWN(start_addr);
356 nr_pages = PFN_DOWN(length);
357 release_memory_range(start_pfn, nr_pages);
362 static ssize_t show_release_region(struct kobject *kobj,
363 struct kobj_attribute *attr, char *buf)
365 u64 second_addr_range;
367 /* total reserved size - start of scratch area */
368 second_addr_range = phyp_dump_info->init_reserve_size -
369 phyp_dump_info->reserved_scratch_size;
370 return sprintf(buf, "CPU:0x%lx-0x%lx: HPTE:0x%lx-0x%lx:"
371 " DUMP:0x%lx-0x%lx, 0x%lx-0x%lx:\n",
372 phdr.cpu_data.destination_address,
373 phdr.cpu_data.length_copied,
374 phdr.hpte_data.destination_address,
375 phdr.hpte_data.length_copied,
376 phdr.kernel_data.destination_address,
377 phdr.kernel_data.length_copied,
378 phyp_dump_info->init_reserve_start,
382 static struct kobj_attribute rr = __ATTR(release_region, 0600,
384 store_release_region);
386 static int __init phyp_dump_setup(void)
388 struct device_node *rtas;
389 const struct phyp_dump_header *dump_header = NULL;
390 unsigned long dump_area_start;
391 unsigned long dump_area_length;
395 /* If no memory was reserved in early boot, there is nothing to do */
396 if (phyp_dump_info->init_reserve_size == 0)
399 /* Return if phyp dump not supported */
400 if (!phyp_dump_info->phyp_dump_configured)
403 /* Is there dump data waiting for us? If there isn't,
404 * then register a new dump area, and release all of
405 * the rest of the reserved ram.
407 * The /rtas/ibm,kernel-dump rtas node is present only
408 * if there is dump data waiting for us.
410 rtas = of_find_node_by_path("/rtas");
412 dump_header = of_get_property(rtas, "ibm,kernel-dump",
417 ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
419 print_dump_header(dump_header);
420 dump_area_length = init_dump_header(&phdr);
422 dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
424 if (dump_header == NULL) {
425 register_dump_area(&phdr, dump_area_start);
429 /* re-register the dump area, if old dump was invalid */
430 if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
431 invalidate_last_dump(&phdr, dump_area_start);
432 register_dump_area(&phdr, dump_area_start);
437 phyp_dump_info->reserved_scratch_addr =
438 dump_header->cpu_data.destination_address;
439 phyp_dump_info->reserved_scratch_size =
440 dump_header->cpu_data.source_length +
441 dump_header->hpte_data.source_length +
442 dump_header->kernel_data.source_length;
445 /* Should we create a dump_subsys, analogous to s390/ipl.c ? */
446 rc = sysfs_create_file(kernel_kobj, &rr.attr);
448 printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
451 /* ToDo: re-register the dump area, for next time. */
454 machine_subsys_initcall(pseries, phyp_dump_setup);
456 int __init early_init_dt_scan_phyp_dump(unsigned long node,
457 const char *uname, int depth, void *data)
459 const unsigned int *sizes;
461 phyp_dump_info->phyp_dump_configured = 0;
462 phyp_dump_info->phyp_dump_is_active = 0;
464 if (depth != 1 || strcmp(uname, "rtas") != 0)
467 if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
468 phyp_dump_info->phyp_dump_configured++;
470 if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
471 phyp_dump_info->phyp_dump_is_active++;
473 sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
479 phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
482 phyp_dump_info->hpte_region_size =
483 *((unsigned long *)&sizes[4]);
487 /* Look for phyp_dump= cmdline option */
488 static int __init early_phyp_dump_enabled(char *p)
490 phyp_dump_info->phyp_dump_at_boot = 1;
495 if (strncmp(p, "1", 1) == 0)
496 phyp_dump_info->phyp_dump_at_boot = 1;
497 else if (strncmp(p, "0", 1) == 0)
498 phyp_dump_info->phyp_dump_at_boot = 0;
502 early_param("phyp_dump", early_phyp_dump_enabled);
504 /* Look for phyp_dump_reserve_size= cmdline option */
505 static int __init early_phyp_dump_reserve_size(char *p)
508 phyp_dump_info->reserve_bootvar = memparse(p, &p);
512 early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);