Merge git://git.kernel.org/pub/scm/linux/kernel/git/sam/kbuild-next
[linux-2.6] / arch / powerpc / platforms / pseries / phyp_dump.c
1 /*
2  * Hypervisor-assisted dump
3  *
4  * Linas Vepstas, Manish Ahuja 2008
5  * Copyright 2008 IBM Corp.
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  */
13
14 #include <linux/init.h>
15 #include <linux/kobject.h>
16 #include <linux/mm.h>
17 #include <linux/of.h>
18 #include <linux/pfn.h>
19 #include <linux/swap.h>
20 #include <linux/sysfs.h>
21
22 #include <asm/page.h>
23 #include <asm/phyp_dump.h>
24 #include <asm/machdep.h>
25 #include <asm/prom.h>
26 #include <asm/rtas.h>
27
28 /* Variables, used to communicate data between early boot and late boot */
29 static struct phyp_dump phyp_dump_vars;
30 struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
31
32 static int ibm_configure_kernel_dump;
33 /* ------------------------------------------------- */
34 /* RTAS interfaces to declare the dump regions */
35
36 struct dump_section {
37         u32 dump_flags;
38         u16 source_type;
39         u16 error_flags;
40         u64 source_address;
41         u64 source_length;
42         u64 length_copied;
43         u64 destination_address;
44 };
45
46 struct phyp_dump_header {
47         u32 version;
48         u16 num_of_sections;
49         u16 status;
50
51         u32 first_offset_section;
52         u32 dump_disk_section;
53         u64 block_num_dd;
54         u64 num_of_blocks_dd;
55         u32 offset_dd;
56         u32 maxtime_to_auto;
57         /* No dump disk path string used */
58
59         struct dump_section cpu_data;
60         struct dump_section hpte_data;
61         struct dump_section kernel_data;
62 };
63
64 /* The dump header *must be* in low memory, so .bss it */
65 static struct phyp_dump_header phdr;
66
67 #define NUM_DUMP_SECTIONS       3
68 #define DUMP_HEADER_VERSION     0x1
69 #define DUMP_REQUEST_FLAG       0x1
70 #define DUMP_SOURCE_CPU         0x0001
71 #define DUMP_SOURCE_HPTE        0x0002
72 #define DUMP_SOURCE_RMO         0x0011
73 #define DUMP_ERROR_FLAG         0x2000
74 #define DUMP_TRIGGERED          0x4000
75 #define DUMP_PERFORMED          0x8000
76
77
78 /**
79  * init_dump_header() - initialize the header declaring a dump
80  * Returns: length of dump save area.
81  *
82  * When the hypervisor saves crashed state, it needs to put
83  * it somewhere. The dump header tells the hypervisor where
84  * the data can be saved.
85  */
86 static unsigned long init_dump_header(struct phyp_dump_header *ph)
87 {
88         unsigned long addr_offset = 0;
89
90         /* Set up the dump header */
91         ph->version = DUMP_HEADER_VERSION;
92         ph->num_of_sections = NUM_DUMP_SECTIONS;
93         ph->status = 0;
94
95         ph->first_offset_section =
96                 (u32)offsetof(struct phyp_dump_header, cpu_data);
97         ph->dump_disk_section = 0;
98         ph->block_num_dd = 0;
99         ph->num_of_blocks_dd = 0;
100         ph->offset_dd = 0;
101
102         ph->maxtime_to_auto = 0; /* disabled */
103
104         /* The first two sections are mandatory */
105         ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
106         ph->cpu_data.source_type = DUMP_SOURCE_CPU;
107         ph->cpu_data.source_address = 0;
108         ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
109         ph->cpu_data.destination_address = addr_offset;
110         addr_offset += phyp_dump_info->cpu_state_size;
111
112         ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
113         ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
114         ph->hpte_data.source_address = 0;
115         ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
116         ph->hpte_data.destination_address = addr_offset;
117         addr_offset += phyp_dump_info->hpte_region_size;
118
119         /* This section describes the low kernel region */
120         ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
121         ph->kernel_data.source_type = DUMP_SOURCE_RMO;
122         ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
123         ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
124         ph->kernel_data.destination_address = addr_offset;
125         addr_offset += ph->kernel_data.source_length;
126
127         return addr_offset;
128 }
129
130 static void print_dump_header(const struct phyp_dump_header *ph)
131 {
132 #ifdef DEBUG
133         if (ph == NULL)
134                 return;
135
136         printk(KERN_INFO "dump header:\n");
137         /* setup some ph->sections required */
138         printk(KERN_INFO "version = %d\n", ph->version);
139         printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
140         printk(KERN_INFO "Status = 0x%x\n", ph->status);
141
142         /* No ph->disk, so all should be set to 0 */
143         printk(KERN_INFO "Offset to first section 0x%x\n",
144                 ph->first_offset_section);
145         printk(KERN_INFO "dump disk sections should be zero\n");
146         printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
147         printk(KERN_INFO "block num = %ld\n", ph->block_num_dd);
148         printk(KERN_INFO "number of blocks = %ld\n", ph->num_of_blocks_dd);
149         printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
150         printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
151
152         /*set cpu state and hpte states as well scratch pad area */
153         printk(KERN_INFO " CPU AREA \n");
154         printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
155         printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
156         printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
157         printk(KERN_INFO "cpu source_address =%lx\n",
158                 ph->cpu_data.source_address);
159         printk(KERN_INFO "cpu source_length =%lx\n",
160                 ph->cpu_data.source_length);
161         printk(KERN_INFO "cpu length_copied =%lx\n",
162                 ph->cpu_data.length_copied);
163
164         printk(KERN_INFO " HPTE AREA \n");
165         printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
166         printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
167         printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
168         printk(KERN_INFO "HPTE source_address =%lx\n",
169                 ph->hpte_data.source_address);
170         printk(KERN_INFO "HPTE source_length =%lx\n",
171                 ph->hpte_data.source_length);
172         printk(KERN_INFO "HPTE length_copied =%lx\n",
173                 ph->hpte_data.length_copied);
174
175         printk(KERN_INFO " SRSD AREA \n");
176         printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
177         printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
178         printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
179         printk(KERN_INFO "SRSD source_address =%lx\n",
180                 ph->kernel_data.source_address);
181         printk(KERN_INFO "SRSD source_length =%lx\n",
182                 ph->kernel_data.source_length);
183         printk(KERN_INFO "SRSD length_copied =%lx\n",
184                 ph->kernel_data.length_copied);
185 #endif
186 }
187
188 static ssize_t show_phyp_dump_active(struct kobject *kobj,
189                         struct kobj_attribute *attr, char *buf)
190 {
191
192         /* create filesystem entry so kdump is phyp-dump aware */
193         return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
194 }
195
196 static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
197                                         show_phyp_dump_active,
198                                         NULL);
199
200 static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
201 {
202         int rc;
203
204         /* Add addr value if not initialized before */
205         if (ph->cpu_data.destination_address == 0) {
206                 ph->cpu_data.destination_address += addr;
207                 ph->hpte_data.destination_address += addr;
208                 ph->kernel_data.destination_address += addr;
209         }
210
211         /* ToDo Invalidate kdump and free memory range. */
212
213         do {
214                 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
215                                 1, ph, sizeof(struct phyp_dump_header));
216         } while (rtas_busy_delay(rc));
217
218         if (rc) {
219                 printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
220                                                 "register\n", rc);
221                 print_dump_header(ph);
222                 return;
223         }
224
225         rc = sysfs_create_file(kernel_kobj, &pdl.attr);
226         if (rc)
227                 printk(KERN_ERR "phyp-dump: unable to create sysfs"
228                                 " file (%d)\n", rc);
229 }
230
231 static
232 void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
233 {
234         int rc;
235
236         /* Add addr value if not initialized before */
237         if (ph->cpu_data.destination_address == 0) {
238                 ph->cpu_data.destination_address += addr;
239                 ph->hpte_data.destination_address += addr;
240                 ph->kernel_data.destination_address += addr;
241         }
242
243         do {
244                 rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
245                                 2, ph, sizeof(struct phyp_dump_header));
246         } while (rtas_busy_delay(rc));
247
248         if (rc) {
249                 printk(KERN_ERR "phyp-dump: unexpected error (%d) "
250                                                 "on invalidate\n", rc);
251                 print_dump_header(ph);
252         }
253 }
254
255 /* ------------------------------------------------- */
256 /**
257  * release_memory_range -- release memory previously lmb_reserved
258  * @start_pfn: starting physical frame number
259  * @nr_pages: number of pages to free.
260  *
261  * This routine will release memory that had been previously
262  * lmb_reserved in early boot. The released memory becomes
263  * available for genreal use.
264  */
265 static void release_memory_range(unsigned long start_pfn,
266                         unsigned long nr_pages)
267 {
268         struct page *rpage;
269         unsigned long end_pfn;
270         long i;
271
272         end_pfn = start_pfn + nr_pages;
273
274         for (i = start_pfn; i <= end_pfn; i++) {
275                 rpage = pfn_to_page(i);
276                 if (PageReserved(rpage)) {
277                         ClearPageReserved(rpage);
278                         init_page_count(rpage);
279                         __free_page(rpage);
280                         totalram_pages++;
281                 }
282         }
283 }
284
285 /**
286  * track_freed_range -- Counts the range being freed.
287  * Once the counter goes to zero, it re-registers dump for
288  * future use.
289  */
290 static void
291 track_freed_range(unsigned long addr, unsigned long length)
292 {
293         static unsigned long scratch_area_size, reserved_area_size;
294
295         if (addr < phyp_dump_info->init_reserve_start)
296                 return;
297
298         if ((addr >= phyp_dump_info->init_reserve_start) &&
299             (addr <= phyp_dump_info->init_reserve_start +
300              phyp_dump_info->init_reserve_size))
301                 reserved_area_size += length;
302
303         if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
304             (addr <= phyp_dump_info->reserved_scratch_addr +
305              phyp_dump_info->reserved_scratch_size))
306                 scratch_area_size += length;
307
308         if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
309             (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
310
311                 invalidate_last_dump(&phdr,
312                                 phyp_dump_info->reserved_scratch_addr);
313                 register_dump_area(&phdr,
314                                 phyp_dump_info->reserved_scratch_addr);
315         }
316 }
317
318 /* ------------------------------------------------- */
319 /**
320  * sysfs_release_region -- sysfs interface to release memory range.
321  *
322  * Usage:
323  *   "echo <start addr> <length> > /sys/kernel/release_region"
324  *
325  * Example:
326  *   "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
327  *
328  * will release 256MB starting at 1GB.
329  */
330 static ssize_t store_release_region(struct kobject *kobj,
331                                 struct kobj_attribute *attr,
332                                 const char *buf, size_t count)
333 {
334         unsigned long start_addr, length, end_addr;
335         unsigned long start_pfn, nr_pages;
336         ssize_t ret;
337
338         ret = sscanf(buf, "%lx %lx", &start_addr, &length);
339         if (ret != 2)
340                 return -EINVAL;
341
342         track_freed_range(start_addr, length);
343
344         /* Range-check - don't free any reserved memory that
345          * wasn't reserved for phyp-dump */
346         if (start_addr < phyp_dump_info->init_reserve_start)
347                 start_addr = phyp_dump_info->init_reserve_start;
348
349         end_addr = phyp_dump_info->init_reserve_start +
350                         phyp_dump_info->init_reserve_size;
351         if (start_addr+length > end_addr)
352                 length = end_addr - start_addr;
353
354         /* Release the region of memory assed in by user */
355         start_pfn = PFN_DOWN(start_addr);
356         nr_pages = PFN_DOWN(length);
357         release_memory_range(start_pfn, nr_pages);
358
359         return count;
360 }
361
362 static ssize_t show_release_region(struct kobject *kobj,
363                         struct kobj_attribute *attr, char *buf)
364 {
365         u64 second_addr_range;
366
367         /* total reserved size - start of scratch area */
368         second_addr_range = phyp_dump_info->init_reserve_size -
369                                 phyp_dump_info->reserved_scratch_size;
370         return sprintf(buf, "CPU:0x%lx-0x%lx: HPTE:0x%lx-0x%lx:"
371                             " DUMP:0x%lx-0x%lx, 0x%lx-0x%lx:\n",
372                 phdr.cpu_data.destination_address,
373                 phdr.cpu_data.length_copied,
374                 phdr.hpte_data.destination_address,
375                 phdr.hpte_data.length_copied,
376                 phdr.kernel_data.destination_address,
377                 phdr.kernel_data.length_copied,
378                 phyp_dump_info->init_reserve_start,
379                 second_addr_range);
380 }
381
382 static struct kobj_attribute rr = __ATTR(release_region, 0600,
383                                         show_release_region,
384                                         store_release_region);
385
386 static int __init phyp_dump_setup(void)
387 {
388         struct device_node *rtas;
389         const struct phyp_dump_header *dump_header = NULL;
390         unsigned long dump_area_start;
391         unsigned long dump_area_length;
392         int header_len = 0;
393         int rc;
394
395         /* If no memory was reserved in early boot, there is nothing to do */
396         if (phyp_dump_info->init_reserve_size == 0)
397                 return 0;
398
399         /* Return if phyp dump not supported */
400         if (!phyp_dump_info->phyp_dump_configured)
401                 return -ENOSYS;
402
403         /* Is there dump data waiting for us? If there isn't,
404          * then register a new dump area, and release all of
405          * the rest of the reserved ram.
406          *
407          * The /rtas/ibm,kernel-dump rtas node is present only
408          * if there is dump data waiting for us.
409          */
410         rtas = of_find_node_by_path("/rtas");
411         if (rtas) {
412                 dump_header = of_get_property(rtas, "ibm,kernel-dump",
413                                                 &header_len);
414                 of_node_put(rtas);
415         }
416
417         ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
418
419         print_dump_header(dump_header);
420         dump_area_length = init_dump_header(&phdr);
421         /* align down */
422         dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
423
424         if (dump_header == NULL) {
425                 register_dump_area(&phdr, dump_area_start);
426                 return 0;
427         }
428
429         /* re-register the dump area, if old dump was invalid */
430         if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
431                 invalidate_last_dump(&phdr, dump_area_start);
432                 register_dump_area(&phdr, dump_area_start);
433                 return 0;
434         }
435
436         if (dump_header) {
437                 phyp_dump_info->reserved_scratch_addr =
438                                 dump_header->cpu_data.destination_address;
439                 phyp_dump_info->reserved_scratch_size =
440                                 dump_header->cpu_data.source_length +
441                                 dump_header->hpte_data.source_length +
442                                 dump_header->kernel_data.source_length;
443         }
444
445         /* Should we create a dump_subsys, analogous to s390/ipl.c ? */
446         rc = sysfs_create_file(kernel_kobj, &rr.attr);
447         if (rc)
448                 printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
449                                                                         rc);
450
451         /* ToDo: re-register the dump area, for next time. */
452         return 0;
453 }
454 machine_subsys_initcall(pseries, phyp_dump_setup);
455
456 int __init early_init_dt_scan_phyp_dump(unsigned long node,
457                 const char *uname, int depth, void *data)
458 {
459         const unsigned int *sizes;
460
461         phyp_dump_info->phyp_dump_configured = 0;
462         phyp_dump_info->phyp_dump_is_active = 0;
463
464         if (depth != 1 || strcmp(uname, "rtas") != 0)
465                 return 0;
466
467         if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
468                 phyp_dump_info->phyp_dump_configured++;
469
470         if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
471                 phyp_dump_info->phyp_dump_is_active++;
472
473         sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
474                                     NULL);
475         if (!sizes)
476                 return 0;
477
478         if (sizes[0] == 1)
479                 phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
480
481         if (sizes[3] == 2)
482                 phyp_dump_info->hpte_region_size =
483                                                 *((unsigned long *)&sizes[4]);
484         return 1;
485 }
486
487 /* Look for phyp_dump= cmdline option */
488 static int __init early_phyp_dump_enabled(char *p)
489 {
490         phyp_dump_info->phyp_dump_at_boot = 1;
491
492         if (!p)
493                 return 0;
494
495         if (strncmp(p, "1", 1) == 0)
496                 phyp_dump_info->phyp_dump_at_boot = 1;
497         else if (strncmp(p, "0", 1) == 0)
498                 phyp_dump_info->phyp_dump_at_boot = 0;
499
500         return 0;
501 }
502 early_param("phyp_dump", early_phyp_dump_enabled);
503
504 /* Look for phyp_dump_reserve_size= cmdline option */
505 static int __init early_phyp_dump_reserve_size(char *p)
506 {
507         if (p)
508                 phyp_dump_info->reserve_bootvar = memparse(p, &p);
509
510         return 0;
511 }
512 early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);