Merge branch 'master'
[linux-2.6] / kernel / power / snapshot.c
1 /*
2  * linux/kernel/power/snapshot.c
3  *
4  * This file provide system snapshot/restore functionality.
5  *
6  * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
7  *
8  * This file is released under the GPLv2, and is based on swsusp.c.
9  *
10  */
11
12
13 #include <linux/module.h>
14 #include <linux/mm.h>
15 #include <linux/suspend.h>
16 #include <linux/smp_lock.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/spinlock.h>
20 #include <linux/kernel.h>
21 #include <linux/pm.h>
22 #include <linux/device.h>
23 #include <linux/bootmem.h>
24 #include <linux/syscalls.h>
25 #include <linux/console.h>
26 #include <linux/highmem.h>
27
28 #include <asm/uaccess.h>
29 #include <asm/mmu_context.h>
30 #include <asm/pgtable.h>
31 #include <asm/tlbflush.h>
32 #include <asm/io.h>
33
34 #include "power.h"
35
36 #ifdef CONFIG_HIGHMEM
37 struct highmem_page {
38         char *data;
39         struct page *page;
40         struct highmem_page *next;
41 };
42
43 static struct highmem_page *highmem_copy;
44
45 static int save_highmem_zone(struct zone *zone)
46 {
47         unsigned long zone_pfn;
48         mark_free_pages(zone);
49         for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
50                 struct page *page;
51                 struct highmem_page *save;
52                 void *kaddr;
53                 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
54
55                 if (!(pfn%1000))
56                         printk(".");
57                 if (!pfn_valid(pfn))
58                         continue;
59                 page = pfn_to_page(pfn);
60                 /*
61                  * This condition results from rvmalloc() sans vmalloc_32()
62                  * and architectural memory reservations. This should be
63                  * corrected eventually when the cases giving rise to this
64                  * are better understood.
65                  */
66                 if (PageReserved(page)) {
67                         printk("highmem reserved page?!\n");
68                         continue;
69                 }
70                 BUG_ON(PageNosave(page));
71                 if (PageNosaveFree(page))
72                         continue;
73                 save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
74                 if (!save)
75                         return -ENOMEM;
76                 save->next = highmem_copy;
77                 save->page = page;
78                 save->data = (void *) get_zeroed_page(GFP_ATOMIC);
79                 if (!save->data) {
80                         kfree(save);
81                         return -ENOMEM;
82                 }
83                 kaddr = kmap_atomic(page, KM_USER0);
84                 memcpy(save->data, kaddr, PAGE_SIZE);
85                 kunmap_atomic(kaddr, KM_USER0);
86                 highmem_copy = save;
87         }
88         return 0;
89 }
90
91
92 static int save_highmem(void)
93 {
94         struct zone *zone;
95         int res = 0;
96
97         pr_debug("swsusp: Saving Highmem\n");
98         for_each_zone (zone) {
99                 if (is_highmem(zone))
100                         res = save_highmem_zone(zone);
101                 if (res)
102                         return res;
103         }
104         return 0;
105 }
106
107 int restore_highmem(void)
108 {
109         printk("swsusp: Restoring Highmem\n");
110         while (highmem_copy) {
111                 struct highmem_page *save = highmem_copy;
112                 void *kaddr;
113                 highmem_copy = save->next;
114
115                 kaddr = kmap_atomic(save->page, KM_USER0);
116                 memcpy(kaddr, save->data, PAGE_SIZE);
117                 kunmap_atomic(kaddr, KM_USER0);
118                 free_page((long) save->data);
119                 kfree(save);
120         }
121         return 0;
122 }
123 #else
124 static int save_highmem(void) { return 0; }
125 int restore_highmem(void) { return 0; }
126 #endif /* CONFIG_HIGHMEM */
127
128
129 static int pfn_is_nosave(unsigned long pfn)
130 {
131         unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
132         unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
133         return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
134 }
135
136 /**
137  *      saveable - Determine whether a page should be cloned or not.
138  *      @pfn:   The page
139  *
140  *      We save a page if it's Reserved, and not in the range of pages
141  *      statically defined as 'unsaveable', or if it isn't reserved, and
142  *      isn't part of a free chunk of pages.
143  */
144
145 static int saveable(struct zone *zone, unsigned long *zone_pfn)
146 {
147         unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
148         struct page *page;
149
150         if (!pfn_valid(pfn))
151                 return 0;
152
153         page = pfn_to_page(pfn);
154         BUG_ON(PageReserved(page) && PageNosave(page));
155         if (PageNosave(page))
156                 return 0;
157         if (PageReserved(page) && pfn_is_nosave(pfn)) {
158                 pr_debug("[nosave pfn 0x%lx]", pfn);
159                 return 0;
160         }
161         if (PageNosaveFree(page))
162                 return 0;
163
164         return 1;
165 }
166
167 static unsigned count_data_pages(void)
168 {
169         struct zone *zone;
170         unsigned long zone_pfn;
171         unsigned n;
172
173         n = 0;
174         for_each_zone (zone) {
175                 if (is_highmem(zone))
176                         continue;
177                 mark_free_pages(zone);
178                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
179                         n += saveable(zone, &zone_pfn);
180         }
181         return n;
182 }
183
184 static void copy_data_pages(struct pbe *pblist)
185 {
186         struct zone *zone;
187         unsigned long zone_pfn;
188         struct pbe *pbe, *p;
189
190         pbe = pblist;
191         for_each_zone (zone) {
192                 if (is_highmem(zone))
193                         continue;
194                 mark_free_pages(zone);
195                 /* This is necessary for swsusp_free() */
196                 for_each_pb_page (p, pblist)
197                         SetPageNosaveFree(virt_to_page(p));
198                 for_each_pbe (p, pblist)
199                         SetPageNosaveFree(virt_to_page(p->address));
200                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
201                         if (saveable(zone, &zone_pfn)) {
202                                 struct page *page;
203                                 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
204                                 BUG_ON(!pbe);
205                                 pbe->orig_address = (unsigned long)page_address(page);
206                                 /* copy_page is not usable for copying task structs. */
207                                 memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
208                                 pbe = pbe->next;
209                         }
210                 }
211         }
212         BUG_ON(pbe);
213 }
214
215
216 /**
217  *      free_pagedir - free pages allocated with alloc_pagedir()
218  */
219
220 static void free_pagedir(struct pbe *pblist)
221 {
222         struct pbe *pbe;
223
224         while (pblist) {
225                 pbe = (pblist + PB_PAGE_SKIP)->next;
226                 ClearPageNosave(virt_to_page(pblist));
227                 ClearPageNosaveFree(virt_to_page(pblist));
228                 free_page((unsigned long)pblist);
229                 pblist = pbe;
230         }
231 }
232
233 /**
234  *      fill_pb_page - Create a list of PBEs on a given memory page
235  */
236
237 static inline void fill_pb_page(struct pbe *pbpage)
238 {
239         struct pbe *p;
240
241         p = pbpage;
242         pbpage += PB_PAGE_SKIP;
243         do
244                 p->next = p + 1;
245         while (++p < pbpage);
246 }
247
248 /**
249  *      create_pbe_list - Create a list of PBEs on top of a given chain
250  *      of memory pages allocated with alloc_pagedir()
251  */
252
253 void create_pbe_list(struct pbe *pblist, unsigned nr_pages)
254 {
255         struct pbe *pbpage, *p;
256         unsigned num = PBES_PER_PAGE;
257
258         for_each_pb_page (pbpage, pblist) {
259                 if (num >= nr_pages)
260                         break;
261
262                 fill_pb_page(pbpage);
263                 num += PBES_PER_PAGE;
264         }
265         if (pbpage) {
266                 for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
267                         p->next = p + 1;
268                 p->next = NULL;
269         }
270         pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
271 }
272
273 static void *alloc_image_page(void)
274 {
275         void *res = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
276         if (res) {
277                 SetPageNosave(virt_to_page(res));
278                 SetPageNosaveFree(virt_to_page(res));
279         }
280         return res;
281 }
282
283 /**
284  *      alloc_pagedir - Allocate the page directory.
285  *
286  *      First, determine exactly how many pages we need and
287  *      allocate them.
288  *
289  *      We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
290  *      struct pbe elements (pbes) and the last element in the page points
291  *      to the next page.
292  *
293  *      On each page we set up a list of struct_pbe elements.
294  */
295
296 struct pbe *alloc_pagedir(unsigned nr_pages)
297 {
298         unsigned num;
299         struct pbe *pblist, *pbe;
300
301         if (!nr_pages)
302                 return NULL;
303
304         pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
305         pblist = alloc_image_page();
306         /* FIXME: rewrite this ugly loop */
307         for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
308                         pbe = pbe->next, num += PBES_PER_PAGE) {
309                 pbe += PB_PAGE_SKIP;
310                 pbe->next = alloc_image_page();
311         }
312         if (!pbe) { /* get_zeroed_page() failed */
313                 free_pagedir(pblist);
314                 pblist = NULL;
315         }
316         return pblist;
317 }
318
319 /**
320  * Free pages we allocated for suspend. Suspend pages are alocated
321  * before atomic copy, so we need to free them after resume.
322  */
323
324 void swsusp_free(void)
325 {
326         struct zone *zone;
327         unsigned long zone_pfn;
328
329         for_each_zone(zone) {
330                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
331                         if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
332                                 struct page * page;
333                                 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
334                                 if (PageNosave(page) && PageNosaveFree(page)) {
335                                         ClearPageNosave(page);
336                                         ClearPageNosaveFree(page);
337                                         free_page((long) page_address(page));
338                                 }
339                         }
340         }
341 }
342
343
344 /**
345  *      enough_free_mem - Make sure we enough free memory to snapshot.
346  *
347  *      Returns TRUE or FALSE after checking the number of available
348  *      free pages.
349  */
350
351 static int enough_free_mem(unsigned nr_pages)
352 {
353         pr_debug("swsusp: available memory: %u pages\n", nr_free_pages());
354         return nr_free_pages() > (nr_pages + PAGES_FOR_IO +
355                 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
356 }
357
358
359 static struct pbe *swsusp_alloc(unsigned nr_pages)
360 {
361         struct pbe *pblist, *p;
362
363         if (!(pblist = alloc_pagedir(nr_pages))) {
364                 printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
365                 return NULL;
366         }
367         create_pbe_list(pblist, nr_pages);
368
369         for_each_pbe (p, pblist) {
370                 p->address = (unsigned long)alloc_image_page();
371                 if (!p->address) {
372                         printk(KERN_ERR "suspend: Allocating image pages failed.\n");
373                         swsusp_free();
374                         return NULL;
375                 }
376         }
377
378         return pblist;
379 }
380
381 asmlinkage int swsusp_save(void)
382 {
383         unsigned nr_pages;
384
385         pr_debug("swsusp: critical section: \n");
386         if (save_highmem()) {
387                 printk(KERN_CRIT "swsusp: Not enough free pages for highmem\n");
388                 restore_highmem();
389                 return -ENOMEM;
390         }
391
392         drain_local_pages();
393         nr_pages = count_data_pages();
394         printk("swsusp: Need to copy %u pages\n", nr_pages);
395
396         pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
397                  nr_pages,
398                  (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
399                  PAGES_FOR_IO, nr_free_pages());
400
401         /* This is needed because of the fixed size of swsusp_info */
402         if (MAX_PBES < (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE)
403                 return -ENOSPC;
404
405         if (!enough_free_mem(nr_pages)) {
406                 printk(KERN_ERR "swsusp: Not enough free memory\n");
407                 return -ENOMEM;
408         }
409
410         if (!enough_swap(nr_pages)) {
411                 printk(KERN_ERR "swsusp: Not enough free swap\n");
412                 return -ENOSPC;
413         }
414
415         pagedir_nosave = swsusp_alloc(nr_pages);
416         if (!pagedir_nosave)
417                 return -ENOMEM;
418
419         /* During allocating of suspend pagedir, new cold pages may appear.
420          * Kill them.
421          */
422         drain_local_pages();
423         copy_data_pages(pagedir_nosave);
424
425         /*
426          * End of critical section. From now on, we can write to memory,
427          * but we should not touch disk. This specially means we must _not_
428          * touch swap space! Except we must write out our image of course.
429          */
430
431         nr_copy_pages = nr_pages;
432
433         printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
434         return 0;
435 }