2  * Hypervisor-assisted dump
 
   4  * Linas Vepstas, Manish Ahuja 2008
 
   5  * Copyright 2008 IBM Corp.
 
   7  *      This program is free software; you can redistribute it and/or
 
   8  *      modify it under the terms of the GNU General Public License
 
   9  *      as published by the Free Software Foundation; either version
 
  10  *      2 of the License, or (at your option) any later version.
 
  14 #include <linux/init.h>
 
  15 #include <linux/kobject.h>
 
  18 #include <linux/pfn.h>
 
  19 #include <linux/swap.h>
 
  20 #include <linux/sysfs.h>
 
  23 #include <asm/phyp_dump.h>
 
  24 #include <asm/machdep.h>
 
  28 /* Variables, used to communicate data between early boot and late boot */
 
  29 static struct phyp_dump phyp_dump_vars;
 
  30 struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
 
  32 static int ibm_configure_kernel_dump;
 
  33 /* ------------------------------------------------- */
 
  34 /* RTAS interfaces to declare the dump regions */
 
  43         u64 destination_address;
 
  46 struct phyp_dump_header {
 
  51         u32 first_offset_section;
 
  52         u32 dump_disk_section;
 
  57         /* No dump disk path string used */
 
  59         struct dump_section cpu_data;
 
  60         struct dump_section hpte_data;
 
  61         struct dump_section kernel_data;
 
  64 /* The dump header *must be* in low memory, so .bss it */
 
  65 static struct phyp_dump_header phdr;
 
  67 #define NUM_DUMP_SECTIONS       3
 
  68 #define DUMP_HEADER_VERSION     0x1
 
  69 #define DUMP_REQUEST_FLAG       0x1
 
  70 #define DUMP_SOURCE_CPU         0x0001
 
  71 #define DUMP_SOURCE_HPTE        0x0002
 
  72 #define DUMP_SOURCE_RMO         0x0011
 
  73 #define DUMP_ERROR_FLAG         0x2000
 
  74 #define DUMP_TRIGGERED          0x4000
 
  75 #define DUMP_PERFORMED          0x8000
 
  79  * init_dump_header() - initialize the header declaring a dump
 
  80  * Returns: length of dump save area.
 
  82  * When the hypervisor saves crashed state, it needs to put
 
  83  * it somewhere. The dump header tells the hypervisor where
 
  84  * the data can be saved.
 
  86 static unsigned long init_dump_header(struct phyp_dump_header *ph)
 
  88         unsigned long addr_offset = 0;
 
  90         /* Set up the dump header */
 
  91         ph->version = DUMP_HEADER_VERSION;
 
  92         ph->num_of_sections = NUM_DUMP_SECTIONS;
 
  95         ph->first_offset_section =
 
  96                 (u32)offsetof(struct phyp_dump_header, cpu_data);
 
  97         ph->dump_disk_section = 0;
 
  99         ph->num_of_blocks_dd = 0;
 
 102         ph->maxtime_to_auto = 0; /* disabled */
 
 104         /* The first two sections are mandatory */
 
 105         ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
 
 106         ph->cpu_data.source_type = DUMP_SOURCE_CPU;
 
 107         ph->cpu_data.source_address = 0;
 
 108         ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
 
 109         ph->cpu_data.destination_address = addr_offset;
 
 110         addr_offset += phyp_dump_info->cpu_state_size;
 
 112         ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
 
 113         ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
 
 114         ph->hpte_data.source_address = 0;
 
 115         ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
 
 116         ph->hpte_data.destination_address = addr_offset;
 
 117         addr_offset += phyp_dump_info->hpte_region_size;
 
 119         /* This section describes the low kernel region */
 
 120         ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
 
 121         ph->kernel_data.source_type = DUMP_SOURCE_RMO;
 
 122         ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
 
 123         ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
 
 124         ph->kernel_data.destination_address = addr_offset;
 
 125         addr_offset += ph->kernel_data.source_length;
 
 130 static void print_dump_header(const struct phyp_dump_header *ph)
 
 136         printk(KERN_INFO "dump header:\n");
 
 137         /* setup some ph->sections required */
 
 138         printk(KERN_INFO "version = %d\n", ph->version);
 
 139         printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
 
 140         printk(KERN_INFO "Status = 0x%x\n", ph->status);
 
 142         /* No ph->disk, so all should be set to 0 */
 
 143         printk(KERN_INFO "Offset to first section 0x%x\n",
 
 144                 ph->first_offset_section);
 
 145         printk(KERN_INFO "dump disk sections should be zero\n");
 
 146         printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
 
 147         printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);
 
 148         printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);
 
 149         printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
 
 150         printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
 
 152         /*set cpu state and hpte states as well scratch pad area */
 
 153         printk(KERN_INFO " CPU AREA \n");
 
 154         printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
 
 155         printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
 
 156         printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
 
 157         printk(KERN_INFO "cpu source_address =%llx\n",
 
 158                 ph->cpu_data.source_address);
 
 159         printk(KERN_INFO "cpu source_length =%llx\n",
 
 160                 ph->cpu_data.source_length);
 
 161         printk(KERN_INFO "cpu length_copied =%llx\n",
 
 162                 ph->cpu_data.length_copied);
 
 164         printk(KERN_INFO " HPTE AREA \n");
 
 165         printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
 
 166         printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
 
 167         printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
 
 168         printk(KERN_INFO "HPTE source_address =%llx\n",
 
 169                 ph->hpte_data.source_address);
 
 170         printk(KERN_INFO "HPTE source_length =%llx\n",
 
 171                 ph->hpte_data.source_length);
 
 172         printk(KERN_INFO "HPTE length_copied =%llx\n",
 
 173                 ph->hpte_data.length_copied);
 
 175         printk(KERN_INFO " SRSD AREA \n");
 
 176         printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
 
 177         printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
 
 178         printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
 
 179         printk(KERN_INFO "SRSD source_address =%llx\n",
 
 180                 ph->kernel_data.source_address);
 
 181         printk(KERN_INFO "SRSD source_length =%llx\n",
 
 182                 ph->kernel_data.source_length);
 
 183         printk(KERN_INFO "SRSD length_copied =%llx\n",
 
 184                 ph->kernel_data.length_copied);
 
 188 static ssize_t show_phyp_dump_active(struct kobject *kobj,
 
 189                         struct kobj_attribute *attr, char *buf)
 
 192         /* create filesystem entry so kdump is phyp-dump aware */
 
 193         return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
 
 196 static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
 
 197                                         show_phyp_dump_active,
 
 200 static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
 
 204         /* Add addr value if not initialized before */
 
 205         if (ph->cpu_data.destination_address == 0) {
 
 206                 ph->cpu_data.destination_address += addr;
 
 207                 ph->hpte_data.destination_address += addr;
 
 208                 ph->kernel_data.destination_address += addr;
 
 211         /* ToDo Invalidate kdump and free memory range. */
 
 214                 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
 
 215                                 1, ph, sizeof(struct phyp_dump_header));
 
 216         } while (rtas_busy_delay(rc));
 
 219                 printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
 
 221                 print_dump_header(ph);
 
 225         rc = sysfs_create_file(kernel_kobj, &pdl.attr);
 
 227                 printk(KERN_ERR "phyp-dump: unable to create sysfs"
 
 232 void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
 
 236         /* Add addr value if not initialized before */
 
 237         if (ph->cpu_data.destination_address == 0) {
 
 238                 ph->cpu_data.destination_address += addr;
 
 239                 ph->hpte_data.destination_address += addr;
 
 240                 ph->kernel_data.destination_address += addr;
 
 244                 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
 
 245                                 2, ph, sizeof(struct phyp_dump_header));
 
 246         } while (rtas_busy_delay(rc));
 
 249                 printk(KERN_ERR "phyp-dump: unexpected error (%d) "
 
 250                                                 "on invalidate\n", rc);
 
 251                 print_dump_header(ph);
 
 255 /* ------------------------------------------------- */
 
 257  * release_memory_range -- release memory previously lmb_reserved
 
 258  * @start_pfn: starting physical frame number
 
 259  * @nr_pages: number of pages to free.
 
 261  * This routine will release memory that had been previously
 
 262  * lmb_reserved in early boot. The released memory becomes
 
 263  * available for genreal use.
 
 265 static void release_memory_range(unsigned long start_pfn,
 
 266                         unsigned long nr_pages)
 
 269         unsigned long end_pfn;
 
 272         end_pfn = start_pfn + nr_pages;
 
 274         for (i = start_pfn; i <= end_pfn; i++) {
 
 275                 rpage = pfn_to_page(i);
 
 276                 if (PageReserved(rpage)) {
 
 277                         ClearPageReserved(rpage);
 
 278                         init_page_count(rpage);
 
 286  * track_freed_range -- Counts the range being freed.
 
 287  * Once the counter goes to zero, it re-registers dump for
 
 291 track_freed_range(unsigned long addr, unsigned long length)
 
 293         static unsigned long scratch_area_size, reserved_area_size;
 
 295         if (addr < phyp_dump_info->init_reserve_start)
 
 298         if ((addr >= phyp_dump_info->init_reserve_start) &&
 
 299             (addr <= phyp_dump_info->init_reserve_start +
 
 300              phyp_dump_info->init_reserve_size))
 
 301                 reserved_area_size += length;
 
 303         if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
 
 304             (addr <= phyp_dump_info->reserved_scratch_addr +
 
 305              phyp_dump_info->reserved_scratch_size))
 
 306                 scratch_area_size += length;
 
 308         if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
 
 309             (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
 
 311                 invalidate_last_dump(&phdr,
 
 312                                 phyp_dump_info->reserved_scratch_addr);
 
 313                 register_dump_area(&phdr,
 
 314                                 phyp_dump_info->reserved_scratch_addr);
 
 318 /* ------------------------------------------------- */
 
 320  * sysfs_release_region -- sysfs interface to release memory range.
 
 323  *   "echo <start addr> <length> > /sys/kernel/release_region"
 
 326  *   "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
 
 328  * will release 256MB starting at 1GB.
 
 330 static ssize_t store_release_region(struct kobject *kobj,
 
 331                                 struct kobj_attribute *attr,
 
 332                                 const char *buf, size_t count)
 
 334         unsigned long start_addr, length, end_addr;
 
 335         unsigned long start_pfn, nr_pages;
 
 338         ret = sscanf(buf, "%lx %lx", &start_addr, &length);
 
 342         track_freed_range(start_addr, length);
 
 344         /* Range-check - don't free any reserved memory that
 
 345          * wasn't reserved for phyp-dump */
 
 346         if (start_addr < phyp_dump_info->init_reserve_start)
 
 347                 start_addr = phyp_dump_info->init_reserve_start;
 
 349         end_addr = phyp_dump_info->init_reserve_start +
 
 350                         phyp_dump_info->init_reserve_size;
 
 351         if (start_addr+length > end_addr)
 
 352                 length = end_addr - start_addr;
 
 354         /* Release the region of memory assed in by user */
 
 355         start_pfn = PFN_DOWN(start_addr);
 
 356         nr_pages = PFN_DOWN(length);
 
 357         release_memory_range(start_pfn, nr_pages);
 
 362 static ssize_t show_release_region(struct kobject *kobj,
 
 363                         struct kobj_attribute *attr, char *buf)
 
 365         u64 second_addr_range;
 
 367         /* total reserved size - start of scratch area */
 
 368         second_addr_range = phyp_dump_info->init_reserve_size -
 
 369                                 phyp_dump_info->reserved_scratch_size;
 
 370         return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
 
 371                             " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
 
 372                 phdr.cpu_data.destination_address,
 
 373                 phdr.cpu_data.length_copied,
 
 374                 phdr.hpte_data.destination_address,
 
 375                 phdr.hpte_data.length_copied,
 
 376                 phdr.kernel_data.destination_address,
 
 377                 phdr.kernel_data.length_copied,
 
 378                 phyp_dump_info->init_reserve_start,
 
 382 static struct kobj_attribute rr = __ATTR(release_region, 0600,
 
 384                                         store_release_region);
 
 386 static int __init phyp_dump_setup(void)
 
 388         struct device_node *rtas;
 
 389         const struct phyp_dump_header *dump_header = NULL;
 
 390         unsigned long dump_area_start;
 
 391         unsigned long dump_area_length;
 
 395         /* If no memory was reserved in early boot, there is nothing to do */
 
 396         if (phyp_dump_info->init_reserve_size == 0)
 
 399         /* Return if phyp dump not supported */
 
 400         if (!phyp_dump_info->phyp_dump_configured)
 
 403         /* Is there dump data waiting for us? If there isn't,
 
 404          * then register a new dump area, and release all of
 
 405          * the rest of the reserved ram.
 
 407          * The /rtas/ibm,kernel-dump rtas node is present only
 
 408          * if there is dump data waiting for us.
 
 410         rtas = of_find_node_by_path("/rtas");
 
 412                 dump_header = of_get_property(rtas, "ibm,kernel-dump",
 
 417         ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
 
 419         print_dump_header(dump_header);
 
 420         dump_area_length = init_dump_header(&phdr);
 
 422         dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
 
 424         if (dump_header == NULL) {
 
 425                 register_dump_area(&phdr, dump_area_start);
 
 429         /* re-register the dump area, if old dump was invalid */
 
 430         if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
 
 431                 invalidate_last_dump(&phdr, dump_area_start);
 
 432                 register_dump_area(&phdr, dump_area_start);
 
 437                 phyp_dump_info->reserved_scratch_addr =
 
 438                                 dump_header->cpu_data.destination_address;
 
 439                 phyp_dump_info->reserved_scratch_size =
 
 440                                 dump_header->cpu_data.source_length +
 
 441                                 dump_header->hpte_data.source_length +
 
 442                                 dump_header->kernel_data.source_length;
 
 445         /* Should we create a dump_subsys, analogous to s390/ipl.c ? */
 
 446         rc = sysfs_create_file(kernel_kobj, &rr.attr);
 
 448                 printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
 
 451         /* ToDo: re-register the dump area, for next time. */
 
 454 machine_subsys_initcall(pseries, phyp_dump_setup);
 
 456 int __init early_init_dt_scan_phyp_dump(unsigned long node,
 
 457                 const char *uname, int depth, void *data)
 
 459         const unsigned int *sizes;
 
 461         phyp_dump_info->phyp_dump_configured = 0;
 
 462         phyp_dump_info->phyp_dump_is_active = 0;
 
 464         if (depth != 1 || strcmp(uname, "rtas") != 0)
 
 467         if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
 
 468                 phyp_dump_info->phyp_dump_configured++;
 
 470         if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
 
 471                 phyp_dump_info->phyp_dump_is_active++;
 
 473         sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
 
 479                 phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
 
 482                 phyp_dump_info->hpte_region_size =
 
 483                                                 *((unsigned long *)&sizes[4]);
 
 487 /* Look for phyp_dump= cmdline option */
 
 488 static int __init early_phyp_dump_enabled(char *p)
 
 490         phyp_dump_info->phyp_dump_at_boot = 1;
 
 495         if (strncmp(p, "1", 1) == 0)
 
 496                 phyp_dump_info->phyp_dump_at_boot = 1;
 
 497         else if (strncmp(p, "0", 1) == 0)
 
 498                 phyp_dump_info->phyp_dump_at_boot = 0;
 
 502 early_param("phyp_dump", early_phyp_dump_enabled);
 
 504 /* Look for phyp_dump_reserve_size= cmdline option */
 
 505 static int __init early_phyp_dump_reserve_size(char *p)
 
 508                 phyp_dump_info->reserve_bootvar = memparse(p, &p);
 
 512 early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);