perf_counter tools: Move perfstat supporting code into perfcounters.h
[linux-2.6] / kernel / power / snapshot.c
1 /*
2  * linux/kernel/power/snapshot.c
3  *
4  * This file provides system snapshot/restore functionality for swsusp.
5  *
6  * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
7  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
8  *
9  * This file is released under the GPLv2.
10  *
11  */
12
13 #include <linux/version.h>
14 #include <linux/module.h>
15 #include <linux/mm.h>
16 #include <linux/suspend.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/spinlock.h>
20 #include <linux/kernel.h>
21 #include <linux/pm.h>
22 #include <linux/device.h>
23 #include <linux/init.h>
24 #include <linux/bootmem.h>
25 #include <linux/syscalls.h>
26 #include <linux/console.h>
27 #include <linux/highmem.h>
28 #include <linux/list.h>
29
30 #include <asm/uaccess.h>
31 #include <asm/mmu_context.h>
32 #include <asm/pgtable.h>
33 #include <asm/tlbflush.h>
34 #include <asm/io.h>
35
36 #include "power.h"
37
38 static int swsusp_page_is_free(struct page *);
39 static void swsusp_set_page_forbidden(struct page *);
40 static void swsusp_unset_page_forbidden(struct page *);
41
42 /* List of PBEs needed for restoring the pages that were allocated before
43  * the suspend and included in the suspend image, but have also been
44  * allocated by the "resume" kernel, so their contents cannot be written
45  * directly to their "original" page frames.
46  */
47 struct pbe *restore_pblist;
48
49 /* Pointer to an auxiliary buffer (1 page) */
50 static void *buffer;
51
52 /**
53  *      @safe_needed - on resume, for storing the PBE list and the image,
54  *      we can only use memory pages that do not conflict with the pages
55  *      used before suspend.  The unsafe pages have PageNosaveFree set
56  *      and we count them using unsafe_pages.
57  *
58  *      Each allocated image page is marked as PageNosave and PageNosaveFree
59  *      so that swsusp_free() can release it.
60  */
61
62 #define PG_ANY          0
63 #define PG_SAFE         1
64 #define PG_UNSAFE_CLEAR 1
65 #define PG_UNSAFE_KEEP  0
66
67 static unsigned int allocated_unsafe_pages;
68
69 static void *get_image_page(gfp_t gfp_mask, int safe_needed)
70 {
71         void *res;
72
73         res = (void *)get_zeroed_page(gfp_mask);
74         if (safe_needed)
75                 while (res && swsusp_page_is_free(virt_to_page(res))) {
76                         /* The page is unsafe, mark it for swsusp_free() */
77                         swsusp_set_page_forbidden(virt_to_page(res));
78                         allocated_unsafe_pages++;
79                         res = (void *)get_zeroed_page(gfp_mask);
80                 }
81         if (res) {
82                 swsusp_set_page_forbidden(virt_to_page(res));
83                 swsusp_set_page_free(virt_to_page(res));
84         }
85         return res;
86 }
87
88 unsigned long get_safe_page(gfp_t gfp_mask)
89 {
90         return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
91 }
92
93 static struct page *alloc_image_page(gfp_t gfp_mask)
94 {
95         struct page *page;
96
97         page = alloc_page(gfp_mask);
98         if (page) {
99                 swsusp_set_page_forbidden(page);
100                 swsusp_set_page_free(page);
101         }
102         return page;
103 }
104
105 /**
106  *      free_image_page - free page represented by @addr, allocated with
107  *      get_image_page (page flags set by it must be cleared)
108  */
109
110 static inline void free_image_page(void *addr, int clear_nosave_free)
111 {
112         struct page *page;
113
114         BUG_ON(!virt_addr_valid(addr));
115
116         page = virt_to_page(addr);
117
118         swsusp_unset_page_forbidden(page);
119         if (clear_nosave_free)
120                 swsusp_unset_page_free(page);
121
122         __free_page(page);
123 }
124
125 /* struct linked_page is used to build chains of pages */
126
127 #define LINKED_PAGE_DATA_SIZE   (PAGE_SIZE - sizeof(void *))
128
129 struct linked_page {
130         struct linked_page *next;
131         char data[LINKED_PAGE_DATA_SIZE];
132 } __attribute__((packed));
133
134 static inline void
135 free_list_of_pages(struct linked_page *list, int clear_page_nosave)
136 {
137         while (list) {
138                 struct linked_page *lp = list->next;
139
140                 free_image_page(list, clear_page_nosave);
141                 list = lp;
142         }
143 }
144
145 /**
146   *     struct chain_allocator is used for allocating small objects out of
147   *     a linked list of pages called 'the chain'.
148   *
149   *     The chain grows each time when there is no room for a new object in
150   *     the current page.  The allocated objects cannot be freed individually.
151   *     It is only possible to free them all at once, by freeing the entire
152   *     chain.
153   *
154   *     NOTE: The chain allocator may be inefficient if the allocated objects
155   *     are not much smaller than PAGE_SIZE.
156   */
157
158 struct chain_allocator {
159         struct linked_page *chain;      /* the chain */
160         unsigned int used_space;        /* total size of objects allocated out
161                                          * of the current page
162                                          */
163         gfp_t gfp_mask;         /* mask for allocating pages */
164         int safe_needed;        /* if set, only "safe" pages are allocated */
165 };
166
167 static void
168 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
169 {
170         ca->chain = NULL;
171         ca->used_space = LINKED_PAGE_DATA_SIZE;
172         ca->gfp_mask = gfp_mask;
173         ca->safe_needed = safe_needed;
174 }
175
176 static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
177 {
178         void *ret;
179
180         if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
181                 struct linked_page *lp;
182
183                 lp = get_image_page(ca->gfp_mask, ca->safe_needed);
184                 if (!lp)
185                         return NULL;
186
187                 lp->next = ca->chain;
188                 ca->chain = lp;
189                 ca->used_space = 0;
190         }
191         ret = ca->chain->data + ca->used_space;
192         ca->used_space += size;
193         return ret;
194 }
195
196 /**
197  *      Data types related to memory bitmaps.
198  *
199  *      Memory bitmap is a structure consiting of many linked lists of
200  *      objects.  The main list's elements are of type struct zone_bitmap
201  *      and each of them corresonds to one zone.  For each zone bitmap
202  *      object there is a list of objects of type struct bm_block that
203  *      represent each blocks of bitmap in which information is stored.
204  *
205  *      struct memory_bitmap contains a pointer to the main list of zone
206  *      bitmap objects, a struct bm_position used for browsing the bitmap,
207  *      and a pointer to the list of pages used for allocating all of the
208  *      zone bitmap objects and bitmap block objects.
209  *
210  *      NOTE: It has to be possible to lay out the bitmap in memory
211  *      using only allocations of order 0.  Additionally, the bitmap is
212  *      designed to work with arbitrary number of zones (this is over the
213  *      top for now, but let's avoid making unnecessary assumptions ;-).
214  *
215  *      struct zone_bitmap contains a pointer to a list of bitmap block
216  *      objects and a pointer to the bitmap block object that has been
217  *      most recently used for setting bits.  Additionally, it contains the
218  *      pfns that correspond to the start and end of the represented zone.
219  *
220  *      struct bm_block contains a pointer to the memory page in which
221  *      information is stored (in the form of a block of bitmap)
222  *      It also contains the pfns that correspond to the start and end of
223  *      the represented memory area.
224  */
225
226 #define BM_END_OF_MAP   (~0UL)
227
228 #define BM_BITS_PER_BLOCK       (PAGE_SIZE << 3)
229
230 struct bm_block {
231         struct list_head hook;  /* hook into a list of bitmap blocks */
232         unsigned long start_pfn;        /* pfn represented by the first bit */
233         unsigned long end_pfn;  /* pfn represented by the last bit plus 1 */
234         unsigned long *data;    /* bitmap representing pages */
235 };
236
237 static inline unsigned long bm_block_bits(struct bm_block *bb)
238 {
239         return bb->end_pfn - bb->start_pfn;
240 }
241
242 /* strcut bm_position is used for browsing memory bitmaps */
243
244 struct bm_position {
245         struct bm_block *block;
246         int bit;
247 };
248
249 struct memory_bitmap {
250         struct list_head blocks;        /* list of bitmap blocks */
251         struct linked_page *p_list;     /* list of pages used to store zone
252                                          * bitmap objects and bitmap block
253                                          * objects
254                                          */
255         struct bm_position cur; /* most recently used bit position */
256 };
257
258 /* Functions that operate on memory bitmaps */
259
260 static void memory_bm_position_reset(struct memory_bitmap *bm)
261 {
262         bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
263         bm->cur.bit = 0;
264 }
265
266 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
267
268 /**
269  *      create_bm_block_list - create a list of block bitmap objects
270  *      @nr_blocks - number of blocks to allocate
271  *      @list - list to put the allocated blocks into
272  *      @ca - chain allocator to be used for allocating memory
273  */
274 static int create_bm_block_list(unsigned long pages,
275                                 struct list_head *list,
276                                 struct chain_allocator *ca)
277 {
278         unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
279
280         while (nr_blocks-- > 0) {
281                 struct bm_block *bb;
282
283                 bb = chain_alloc(ca, sizeof(struct bm_block));
284                 if (!bb)
285                         return -ENOMEM;
286                 list_add(&bb->hook, list);
287         }
288
289         return 0;
290 }
291
292 struct mem_extent {
293         struct list_head hook;
294         unsigned long start;
295         unsigned long end;
296 };
297
298 /**
299  *      free_mem_extents - free a list of memory extents
300  *      @list - list of extents to empty
301  */
302 static void free_mem_extents(struct list_head *list)
303 {
304         struct mem_extent *ext, *aux;
305
306         list_for_each_entry_safe(ext, aux, list, hook) {
307                 list_del(&ext->hook);
308                 kfree(ext);
309         }
310 }
311
312 /**
313  *      create_mem_extents - create a list of memory extents representing
314  *                           contiguous ranges of PFNs
315  *      @list - list to put the extents into
316  *      @gfp_mask - mask to use for memory allocations
317  */
318 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
319 {
320         struct zone *zone;
321
322         INIT_LIST_HEAD(list);
323
324         for_each_populated_zone(zone) {
325                 unsigned long zone_start, zone_end;
326                 struct mem_extent *ext, *cur, *aux;
327
328                 zone_start = zone->zone_start_pfn;
329                 zone_end = zone->zone_start_pfn + zone->spanned_pages;
330
331                 list_for_each_entry(ext, list, hook)
332                         if (zone_start <= ext->end)
333                                 break;
334
335                 if (&ext->hook == list || zone_end < ext->start) {
336                         /* New extent is necessary */
337                         struct mem_extent *new_ext;
338
339                         new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
340                         if (!new_ext) {
341                                 free_mem_extents(list);
342                                 return -ENOMEM;
343                         }
344                         new_ext->start = zone_start;
345                         new_ext->end = zone_end;
346                         list_add_tail(&new_ext->hook, &ext->hook);
347                         continue;
348                 }
349
350                 /* Merge this zone's range of PFNs with the existing one */
351                 if (zone_start < ext->start)
352                         ext->start = zone_start;
353                 if (zone_end > ext->end)
354                         ext->end = zone_end;
355
356                 /* More merging may be possible */
357                 cur = ext;
358                 list_for_each_entry_safe_continue(cur, aux, list, hook) {
359                         if (zone_end < cur->start)
360                                 break;
361                         if (zone_end < cur->end)
362                                 ext->end = cur->end;
363                         list_del(&cur->hook);
364                         kfree(cur);
365                 }
366         }
367
368         return 0;
369 }
370
371 /**
372   *     memory_bm_create - allocate memory for a memory bitmap
373   */
374 static int
375 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
376 {
377         struct chain_allocator ca;
378         struct list_head mem_extents;
379         struct mem_extent *ext;
380         int error;
381
382         chain_init(&ca, gfp_mask, safe_needed);
383         INIT_LIST_HEAD(&bm->blocks);
384
385         error = create_mem_extents(&mem_extents, gfp_mask);
386         if (error)
387                 return error;
388
389         list_for_each_entry(ext, &mem_extents, hook) {
390                 struct bm_block *bb;
391                 unsigned long pfn = ext->start;
392                 unsigned long pages = ext->end - ext->start;
393
394                 bb = list_entry(bm->blocks.prev, struct bm_block, hook);
395
396                 error = create_bm_block_list(pages, bm->blocks.prev, &ca);
397                 if (error)
398                         goto Error;
399
400                 list_for_each_entry_continue(bb, &bm->blocks, hook) {
401                         bb->data = get_image_page(gfp_mask, safe_needed);
402                         if (!bb->data) {
403                                 error = -ENOMEM;
404                                 goto Error;
405                         }
406
407                         bb->start_pfn = pfn;
408                         if (pages >= BM_BITS_PER_BLOCK) {
409                                 pfn += BM_BITS_PER_BLOCK;
410                                 pages -= BM_BITS_PER_BLOCK;
411                         } else {
412                                 /* This is executed only once in the loop */
413                                 pfn += pages;
414                         }
415                         bb->end_pfn = pfn;
416                 }
417         }
418
419         bm->p_list = ca.chain;
420         memory_bm_position_reset(bm);
421  Exit:
422         free_mem_extents(&mem_extents);
423         return error;
424
425  Error:
426         bm->p_list = ca.chain;
427         memory_bm_free(bm, PG_UNSAFE_CLEAR);
428         goto Exit;
429 }
430
431 /**
432   *     memory_bm_free - free memory occupied by the memory bitmap @bm
433   */
434 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
435 {
436         struct bm_block *bb;
437
438         list_for_each_entry(bb, &bm->blocks, hook)
439                 if (bb->data)
440                         free_image_page(bb->data, clear_nosave_free);
441
442         free_list_of_pages(bm->p_list, clear_nosave_free);
443
444         INIT_LIST_HEAD(&bm->blocks);
445 }
446
447 /**
448  *      memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
449  *      to given pfn.  The cur_zone_bm member of @bm and the cur_block member
450  *      of @bm->cur_zone_bm are updated.
451  */
452 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
453                                 void **addr, unsigned int *bit_nr)
454 {
455         struct bm_block *bb;
456
457         /*
458          * Check if the pfn corresponds to the current bitmap block and find
459          * the block where it fits if this is not the case.
460          */
461         bb = bm->cur.block;
462         if (pfn < bb->start_pfn)
463                 list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
464                         if (pfn >= bb->start_pfn)
465                                 break;
466
467         if (pfn >= bb->end_pfn)
468                 list_for_each_entry_continue(bb, &bm->blocks, hook)
469                         if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
470                                 break;
471
472         if (&bb->hook == &bm->blocks)
473                 return -EFAULT;
474
475         /* The block has been found */
476         bm->cur.block = bb;
477         pfn -= bb->start_pfn;
478         bm->cur.bit = pfn + 1;
479         *bit_nr = pfn;
480         *addr = bb->data;
481         return 0;
482 }
483
484 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
485 {
486         void *addr;
487         unsigned int bit;
488         int error;
489
490         error = memory_bm_find_bit(bm, pfn, &addr, &bit);
491         BUG_ON(error);
492         set_bit(bit, addr);
493 }
494
495 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
496 {
497         void *addr;
498         unsigned int bit;
499         int error;
500
501         error = memory_bm_find_bit(bm, pfn, &addr, &bit);
502         if (!error)
503                 set_bit(bit, addr);
504         return error;
505 }
506
507 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
508 {
509         void *addr;
510         unsigned int bit;
511         int error;
512
513         error = memory_bm_find_bit(bm, pfn, &addr, &bit);
514         BUG_ON(error);
515         clear_bit(bit, addr);
516 }
517
518 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
519 {
520         void *addr;
521         unsigned int bit;
522         int error;
523
524         error = memory_bm_find_bit(bm, pfn, &addr, &bit);
525         BUG_ON(error);
526         return test_bit(bit, addr);
527 }
528
529 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
530 {
531         void *addr;
532         unsigned int bit;
533
534         return !memory_bm_find_bit(bm, pfn, &addr, &bit);
535 }
536
537 /**
538  *      memory_bm_next_pfn - find the pfn that corresponds to the next set bit
539  *      in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
540  *      returned.
541  *
542  *      It is required to run memory_bm_position_reset() before the first call to
543  *      this function.
544  */
545
546 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
547 {
548         struct bm_block *bb;
549         int bit;
550
551         bb = bm->cur.block;
552         do {
553                 bit = bm->cur.bit;
554                 bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
555                 if (bit < bm_block_bits(bb))
556                         goto Return_pfn;
557
558                 bb = list_entry(bb->hook.next, struct bm_block, hook);
559                 bm->cur.block = bb;
560                 bm->cur.bit = 0;
561         } while (&bb->hook != &bm->blocks);
562
563         memory_bm_position_reset(bm);
564         return BM_END_OF_MAP;
565
566  Return_pfn:
567         bm->cur.bit = bit + 1;
568         return bb->start_pfn + bit;
569 }
570
571 /**
572  *      This structure represents a range of page frames the contents of which
573  *      should not be saved during the suspend.
574  */
575
576 struct nosave_region {
577         struct list_head list;
578         unsigned long start_pfn;
579         unsigned long end_pfn;
580 };
581
582 static LIST_HEAD(nosave_regions);
583
584 /**
585  *      register_nosave_region - register a range of page frames the contents
586  *      of which should not be saved during the suspend (to be used in the early
587  *      initialization code)
588  */
589
590 void __init
591 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
592                          int use_kmalloc)
593 {
594         struct nosave_region *region;
595
596         if (start_pfn >= end_pfn)
597                 return;
598
599         if (!list_empty(&nosave_regions)) {
600                 /* Try to extend the previous region (they should be sorted) */
601                 region = list_entry(nosave_regions.prev,
602                                         struct nosave_region, list);
603                 if (region->end_pfn == start_pfn) {
604                         region->end_pfn = end_pfn;
605                         goto Report;
606                 }
607         }
608         if (use_kmalloc) {
609                 /* during init, this shouldn't fail */
610                 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
611                 BUG_ON(!region);
612         } else
613                 /* This allocation cannot fail */
614                 region = alloc_bootmem_low(sizeof(struct nosave_region));
615         region->start_pfn = start_pfn;
616         region->end_pfn = end_pfn;
617         list_add_tail(&region->list, &nosave_regions);
618  Report:
619         printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
620                 start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
621 }
622
623 /*
624  * Set bits in this map correspond to the page frames the contents of which
625  * should not be saved during the suspend.
626  */
627 static struct memory_bitmap *forbidden_pages_map;
628
629 /* Set bits in this map correspond to free page frames. */
630 static struct memory_bitmap *free_pages_map;
631
632 /*
633  * Each page frame allocated for creating the image is marked by setting the
634  * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
635  */
636
637 void swsusp_set_page_free(struct page *page)
638 {
639         if (free_pages_map)
640                 memory_bm_set_bit(free_pages_map, page_to_pfn(page));
641 }
642
643 static int swsusp_page_is_free(struct page *page)
644 {
645         return free_pages_map ?
646                 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
647 }
648
649 void swsusp_unset_page_free(struct page *page)
650 {
651         if (free_pages_map)
652                 memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
653 }
654
655 static void swsusp_set_page_forbidden(struct page *page)
656 {
657         if (forbidden_pages_map)
658                 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
659 }
660
661 int swsusp_page_is_forbidden(struct page *page)
662 {
663         return forbidden_pages_map ?
664                 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
665 }
666
667 static void swsusp_unset_page_forbidden(struct page *page)
668 {
669         if (forbidden_pages_map)
670                 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
671 }
672
673 /**
674  *      mark_nosave_pages - set bits corresponding to the page frames the
675  *      contents of which should not be saved in a given bitmap.
676  */
677
678 static void mark_nosave_pages(struct memory_bitmap *bm)
679 {
680         struct nosave_region *region;
681
682         if (list_empty(&nosave_regions))
683                 return;
684
685         list_for_each_entry(region, &nosave_regions, list) {
686                 unsigned long pfn;
687
688                 pr_debug("PM: Marking nosave pages: %016lx - %016lx\n",
689                                 region->start_pfn << PAGE_SHIFT,
690                                 region->end_pfn << PAGE_SHIFT);
691
692                 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
693                         if (pfn_valid(pfn)) {
694                                 /*
695                                  * It is safe to ignore the result of
696                                  * mem_bm_set_bit_check() here, since we won't
697                                  * touch the PFNs for which the error is
698                                  * returned anyway.
699                                  */
700                                 mem_bm_set_bit_check(bm, pfn);
701                         }
702         }
703 }
704
705 /**
706  *      create_basic_memory_bitmaps - create bitmaps needed for marking page
707  *      frames that should not be saved and free page frames.  The pointers
708  *      forbidden_pages_map and free_pages_map are only modified if everything
709  *      goes well, because we don't want the bits to be used before both bitmaps
710  *      are set up.
711  */
712
713 int create_basic_memory_bitmaps(void)
714 {
715         struct memory_bitmap *bm1, *bm2;
716         int error = 0;
717
718         BUG_ON(forbidden_pages_map || free_pages_map);
719
720         bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
721         if (!bm1)
722                 return -ENOMEM;
723
724         error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
725         if (error)
726                 goto Free_first_object;
727
728         bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
729         if (!bm2)
730                 goto Free_first_bitmap;
731
732         error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
733         if (error)
734                 goto Free_second_object;
735
736         forbidden_pages_map = bm1;
737         free_pages_map = bm2;
738         mark_nosave_pages(forbidden_pages_map);
739
740         pr_debug("PM: Basic memory bitmaps created\n");
741
742         return 0;
743
744  Free_second_object:
745         kfree(bm2);
746  Free_first_bitmap:
747         memory_bm_free(bm1, PG_UNSAFE_CLEAR);
748  Free_first_object:
749         kfree(bm1);
750         return -ENOMEM;
751 }
752
753 /**
754  *      free_basic_memory_bitmaps - free memory bitmaps allocated by
755  *      create_basic_memory_bitmaps().  The auxiliary pointers are necessary
756  *      so that the bitmaps themselves are not referred to while they are being
757  *      freed.
758  */
759
760 void free_basic_memory_bitmaps(void)
761 {
762         struct memory_bitmap *bm1, *bm2;
763
764         BUG_ON(!(forbidden_pages_map && free_pages_map));
765
766         bm1 = forbidden_pages_map;
767         bm2 = free_pages_map;
768         forbidden_pages_map = NULL;
769         free_pages_map = NULL;
770         memory_bm_free(bm1, PG_UNSAFE_CLEAR);
771         kfree(bm1);
772         memory_bm_free(bm2, PG_UNSAFE_CLEAR);
773         kfree(bm2);
774
775         pr_debug("PM: Basic memory bitmaps freed\n");
776 }
777
778 /**
779  *      snapshot_additional_pages - estimate the number of additional pages
780  *      be needed for setting up the suspend image data structures for given
781  *      zone (usually the returned value is greater than the exact number)
782  */
783
784 unsigned int snapshot_additional_pages(struct zone *zone)
785 {
786         unsigned int res;
787
788         res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
789         res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
790         return 2 * res;
791 }
792
793 #ifdef CONFIG_HIGHMEM
794 /**
795  *      count_free_highmem_pages - compute the total number of free highmem
796  *      pages, system-wide.
797  */
798
799 static unsigned int count_free_highmem_pages(void)
800 {
801         struct zone *zone;
802         unsigned int cnt = 0;
803
804         for_each_populated_zone(zone)
805                 if (is_highmem(zone))
806                         cnt += zone_page_state(zone, NR_FREE_PAGES);
807
808         return cnt;
809 }
810
811 /**
812  *      saveable_highmem_page - Determine whether a highmem page should be
813  *      included in the suspend image.
814  *
815  *      We should save the page if it isn't Nosave or NosaveFree, or Reserved,
816  *      and it isn't a part of a free chunk of pages.
817  */
818 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
819 {
820         struct page *page;
821
822         if (!pfn_valid(pfn))
823                 return NULL;
824
825         page = pfn_to_page(pfn);
826         if (page_zone(page) != zone)
827                 return NULL;
828
829         BUG_ON(!PageHighMem(page));
830
831         if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
832             PageReserved(page))
833                 return NULL;
834
835         return page;
836 }
837
838 /**
839  *      count_highmem_pages - compute the total number of saveable highmem
840  *      pages.
841  */
842
843 unsigned int count_highmem_pages(void)
844 {
845         struct zone *zone;
846         unsigned int n = 0;
847
848         for_each_zone(zone) {
849                 unsigned long pfn, max_zone_pfn;
850
851                 if (!is_highmem(zone))
852                         continue;
853
854                 mark_free_pages(zone);
855                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
856                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
857                         if (saveable_highmem_page(zone, pfn))
858                                 n++;
859         }
860         return n;
861 }
862 #else
863 static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
864 {
865         return NULL;
866 }
867 #endif /* CONFIG_HIGHMEM */
868
869 /**
870  *      saveable_page - Determine whether a non-highmem page should be included
871  *      in the suspend image.
872  *
873  *      We should save the page if it isn't Nosave, and is not in the range
874  *      of pages statically defined as 'unsaveable', and it isn't a part of
875  *      a free chunk of pages.
876  */
877 static struct page *saveable_page(struct zone *zone, unsigned long pfn)
878 {
879         struct page *page;
880
881         if (!pfn_valid(pfn))
882                 return NULL;
883
884         page = pfn_to_page(pfn);
885         if (page_zone(page) != zone)
886                 return NULL;
887
888         BUG_ON(PageHighMem(page));
889
890         if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
891                 return NULL;
892
893         if (PageReserved(page)
894             && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
895                 return NULL;
896
897         return page;
898 }
899
900 /**
901  *      count_data_pages - compute the total number of saveable non-highmem
902  *      pages.
903  */
904
905 unsigned int count_data_pages(void)
906 {
907         struct zone *zone;
908         unsigned long pfn, max_zone_pfn;
909         unsigned int n = 0;
910
911         for_each_zone(zone) {
912                 if (is_highmem(zone))
913                         continue;
914
915                 mark_free_pages(zone);
916                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
917                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
918                         if (saveable_page(zone, pfn))
919                                 n++;
920         }
921         return n;
922 }
923
924 /* This is needed, because copy_page and memcpy are not usable for copying
925  * task structs.
926  */
927 static inline void do_copy_page(long *dst, long *src)
928 {
929         int n;
930
931         for (n = PAGE_SIZE / sizeof(long); n; n--)
932                 *dst++ = *src++;
933 }
934
935
936 /**
937  *      safe_copy_page - check if the page we are going to copy is marked as
938  *              present in the kernel page tables (this always is the case if
939  *              CONFIG_DEBUG_PAGEALLOC is not set and in that case
940  *              kernel_page_present() always returns 'true').
941  */
942 static void safe_copy_page(void *dst, struct page *s_page)
943 {
944         if (kernel_page_present(s_page)) {
945                 do_copy_page(dst, page_address(s_page));
946         } else {
947                 kernel_map_pages(s_page, 1, 1);
948                 do_copy_page(dst, page_address(s_page));
949                 kernel_map_pages(s_page, 1, 0);
950         }
951 }
952
953
954 #ifdef CONFIG_HIGHMEM
955 static inline struct page *
956 page_is_saveable(struct zone *zone, unsigned long pfn)
957 {
958         return is_highmem(zone) ?
959                 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
960 }
961
962 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
963 {
964         struct page *s_page, *d_page;
965         void *src, *dst;
966
967         s_page = pfn_to_page(src_pfn);
968         d_page = pfn_to_page(dst_pfn);
969         if (PageHighMem(s_page)) {
970                 src = kmap_atomic(s_page, KM_USER0);
971                 dst = kmap_atomic(d_page, KM_USER1);
972                 do_copy_page(dst, src);
973                 kunmap_atomic(src, KM_USER0);
974                 kunmap_atomic(dst, KM_USER1);
975         } else {
976                 if (PageHighMem(d_page)) {
977                         /* Page pointed to by src may contain some kernel
978                          * data modified by kmap_atomic()
979                          */
980                         safe_copy_page(buffer, s_page);
981                         dst = kmap_atomic(d_page, KM_USER0);
982                         memcpy(dst, buffer, PAGE_SIZE);
983                         kunmap_atomic(dst, KM_USER0);
984                 } else {
985                         safe_copy_page(page_address(d_page), s_page);
986                 }
987         }
988 }
989 #else
990 #define page_is_saveable(zone, pfn)     saveable_page(zone, pfn)
991
992 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
993 {
994         safe_copy_page(page_address(pfn_to_page(dst_pfn)),
995                                 pfn_to_page(src_pfn));
996 }
997 #endif /* CONFIG_HIGHMEM */
998
999 static void
1000 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1001 {
1002         struct zone *zone;
1003         unsigned long pfn;
1004
1005         for_each_zone(zone) {
1006                 unsigned long max_zone_pfn;
1007
1008                 mark_free_pages(zone);
1009                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1010                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1011                         if (page_is_saveable(zone, pfn))
1012                                 memory_bm_set_bit(orig_bm, pfn);
1013         }
1014         memory_bm_position_reset(orig_bm);
1015         memory_bm_position_reset(copy_bm);
1016         for(;;) {
1017                 pfn = memory_bm_next_pfn(orig_bm);
1018                 if (unlikely(pfn == BM_END_OF_MAP))
1019                         break;
1020                 copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1021         }
1022 }
1023
1024 /* Total number of image pages */
1025 static unsigned int nr_copy_pages;
1026 /* Number of pages needed for saving the original pfns of the image pages */
1027 static unsigned int nr_meta_pages;
1028
1029 /**
1030  *      swsusp_free - free pages allocated for the suspend.
1031  *
1032  *      Suspend pages are alocated before the atomic copy is made, so we
1033  *      need to release them after the resume.
1034  */
1035
1036 void swsusp_free(void)
1037 {
1038         struct zone *zone;
1039         unsigned long pfn, max_zone_pfn;
1040
1041         for_each_zone(zone) {
1042                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1043                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1044                         if (pfn_valid(pfn)) {
1045                                 struct page *page = pfn_to_page(pfn);
1046
1047                                 if (swsusp_page_is_forbidden(page) &&
1048                                     swsusp_page_is_free(page)) {
1049                                         swsusp_unset_page_forbidden(page);
1050                                         swsusp_unset_page_free(page);
1051                                         __free_page(page);
1052                                 }
1053                         }
1054         }
1055         nr_copy_pages = 0;
1056         nr_meta_pages = 0;
1057         restore_pblist = NULL;
1058         buffer = NULL;
1059 }
1060
1061 #ifdef CONFIG_HIGHMEM
1062 /**
1063   *     count_pages_for_highmem - compute the number of non-highmem pages
1064   *     that will be necessary for creating copies of highmem pages.
1065   */
1066
1067 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1068 {
1069         unsigned int free_highmem = count_free_highmem_pages();
1070
1071         if (free_highmem >= nr_highmem)
1072                 nr_highmem = 0;
1073         else
1074                 nr_highmem -= free_highmem;
1075
1076         return nr_highmem;
1077 }
1078 #else
1079 static unsigned int
1080 count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1081 #endif /* CONFIG_HIGHMEM */
1082
1083 /**
1084  *      enough_free_mem - Make sure we have enough free memory for the
1085  *      snapshot image.
1086  */
1087
1088 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1089 {
1090         struct zone *zone;
1091         unsigned int free = 0, meta = 0;
1092
1093         for_each_zone(zone) {
1094                 meta += snapshot_additional_pages(zone);
1095                 if (!is_highmem(zone))
1096                         free += zone_page_state(zone, NR_FREE_PAGES);
1097         }
1098
1099         nr_pages += count_pages_for_highmem(nr_highmem);
1100         pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n",
1101                 nr_pages, PAGES_FOR_IO, meta, free);
1102
1103         return free > nr_pages + PAGES_FOR_IO + meta;
1104 }
1105
1106 #ifdef CONFIG_HIGHMEM
1107 /**
1108  *      get_highmem_buffer - if there are some highmem pages in the suspend
1109  *      image, we may need the buffer to copy them and/or load their data.
1110  */
1111
1112 static inline int get_highmem_buffer(int safe_needed)
1113 {
1114         buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1115         return buffer ? 0 : -ENOMEM;
1116 }
1117
1118 /**
1119  *      alloc_highmem_image_pages - allocate some highmem pages for the image.
1120  *      Try to allocate as many pages as needed, but if the number of free
1121  *      highmem pages is lesser than that, allocate them all.
1122  */
1123
1124 static inline unsigned int
1125 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1126 {
1127         unsigned int to_alloc = count_free_highmem_pages();
1128
1129         if (to_alloc > nr_highmem)
1130                 to_alloc = nr_highmem;
1131
1132         nr_highmem -= to_alloc;
1133         while (to_alloc-- > 0) {
1134                 struct page *page;
1135
1136                 page = alloc_image_page(__GFP_HIGHMEM);
1137                 memory_bm_set_bit(bm, page_to_pfn(page));
1138         }
1139         return nr_highmem;
1140 }
1141 #else
1142 static inline int get_highmem_buffer(int safe_needed) { return 0; }
1143
1144 static inline unsigned int
1145 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1146 #endif /* CONFIG_HIGHMEM */
1147
1148 /**
1149  *      swsusp_alloc - allocate memory for the suspend image
1150  *
1151  *      We first try to allocate as many highmem pages as there are
1152  *      saveable highmem pages in the system.  If that fails, we allocate
1153  *      non-highmem pages for the copies of the remaining highmem ones.
1154  *
1155  *      In this approach it is likely that the copies of highmem pages will
1156  *      also be located in the high memory, because of the way in which
1157  *      copy_data_pages() works.
1158  */
1159
1160 static int
1161 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1162                 unsigned int nr_pages, unsigned int nr_highmem)
1163 {
1164         int error;
1165
1166         error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1167         if (error)
1168                 goto Free;
1169
1170         error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1171         if (error)
1172                 goto Free;
1173
1174         if (nr_highmem > 0) {
1175                 error = get_highmem_buffer(PG_ANY);
1176                 if (error)
1177                         goto Free;
1178
1179                 nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
1180         }
1181         while (nr_pages-- > 0) {
1182                 struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1183
1184                 if (!page)
1185                         goto Free;
1186
1187                 memory_bm_set_bit(copy_bm, page_to_pfn(page));
1188         }
1189         return 0;
1190
1191  Free:
1192         swsusp_free();
1193         return -ENOMEM;
1194 }
1195
1196 /* Memory bitmap used for marking saveable pages (during suspend) or the
1197  * suspend image pages (during resume)
1198  */
1199 static struct memory_bitmap orig_bm;
1200 /* Memory bitmap used on suspend for marking allocated pages that will contain
1201  * the copies of saveable pages.  During resume it is initially used for
1202  * marking the suspend image pages, but then its set bits are duplicated in
1203  * @orig_bm and it is released.  Next, on systems with high memory, it may be
1204  * used for marking "safe" highmem pages, but it has to be reinitialized for
1205  * this purpose.
1206  */
1207 static struct memory_bitmap copy_bm;
1208
1209 asmlinkage int swsusp_save(void)
1210 {
1211         unsigned int nr_pages, nr_highmem;
1212
1213         printk(KERN_INFO "PM: Creating hibernation image: \n");
1214
1215         drain_local_pages(NULL);
1216         nr_pages = count_data_pages();
1217         nr_highmem = count_highmem_pages();
1218         printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1219
1220         if (!enough_free_mem(nr_pages, nr_highmem)) {
1221                 printk(KERN_ERR "PM: Not enough free memory\n");
1222                 return -ENOMEM;
1223         }
1224
1225         if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1226                 printk(KERN_ERR "PM: Memory allocation failed\n");
1227                 return -ENOMEM;
1228         }
1229
1230         /* During allocating of suspend pagedir, new cold pages may appear.
1231          * Kill them.
1232          */
1233         drain_local_pages(NULL);
1234         copy_data_pages(&copy_bm, &orig_bm);
1235
1236         /*
1237          * End of critical section. From now on, we can write to memory,
1238          * but we should not touch disk. This specially means we must _not_
1239          * touch swap space! Except we must write out our image of course.
1240          */
1241
1242         nr_pages += nr_highmem;
1243         nr_copy_pages = nr_pages;
1244         nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1245
1246         printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1247                 nr_pages);
1248
1249         return 0;
1250 }
1251
1252 #ifndef CONFIG_ARCH_HIBERNATION_HEADER
1253 static int init_header_complete(struct swsusp_info *info)
1254 {
1255         memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1256         info->version_code = LINUX_VERSION_CODE;
1257         return 0;
1258 }
1259
1260 static char *check_image_kernel(struct swsusp_info *info)
1261 {
1262         if (info->version_code != LINUX_VERSION_CODE)
1263                 return "kernel version";
1264         if (strcmp(info->uts.sysname,init_utsname()->sysname))
1265                 return "system type";
1266         if (strcmp(info->uts.release,init_utsname()->release))
1267                 return "kernel release";
1268         if (strcmp(info->uts.version,init_utsname()->version))
1269                 return "version";
1270         if (strcmp(info->uts.machine,init_utsname()->machine))
1271                 return "machine";
1272         return NULL;
1273 }
1274 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1275
1276 unsigned long snapshot_get_image_size(void)
1277 {
1278         return nr_copy_pages + nr_meta_pages + 1;
1279 }
1280
1281 static int init_header(struct swsusp_info *info)
1282 {
1283         memset(info, 0, sizeof(struct swsusp_info));
1284         info->num_physpages = num_physpages;
1285         info->image_pages = nr_copy_pages;
1286         info->pages = snapshot_get_image_size();
1287         info->size = info->pages;
1288         info->size <<= PAGE_SHIFT;
1289         return init_header_complete(info);
1290 }
1291
1292 /**
1293  *      pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1294  *      are stored in the array @buf[] (1 page at a time)
1295  */
1296
1297 static inline void
1298 pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1299 {
1300         int j;
1301
1302         for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1303                 buf[j] = memory_bm_next_pfn(bm);
1304                 if (unlikely(buf[j] == BM_END_OF_MAP))
1305                         break;
1306         }
1307 }
1308
1309 /**
1310  *      snapshot_read_next - used for reading the system memory snapshot.
1311  *
1312  *      On the first call to it @handle should point to a zeroed
1313  *      snapshot_handle structure.  The structure gets updated and a pointer
1314  *      to it should be passed to this function every next time.
1315  *
1316  *      The @count parameter should contain the number of bytes the caller
1317  *      wants to read from the snapshot.  It must not be zero.
1318  *
1319  *      On success the function returns a positive number.  Then, the caller
1320  *      is allowed to read up to the returned number of bytes from the memory
1321  *      location computed by the data_of() macro.  The number returned
1322  *      may be smaller than @count, but this only happens if the read would
1323  *      cross a page boundary otherwise.
1324  *
1325  *      The function returns 0 to indicate the end of data stream condition,
1326  *      and a negative number is returned on error.  In such cases the
1327  *      structure pointed to by @handle is not updated and should not be used
1328  *      any more.
1329  */
1330
1331 int snapshot_read_next(struct snapshot_handle *handle, size_t count)
1332 {
1333         if (handle->cur > nr_meta_pages + nr_copy_pages)
1334                 return 0;
1335
1336         if (!buffer) {
1337                 /* This makes the buffer be freed by swsusp_free() */
1338                 buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1339                 if (!buffer)
1340                         return -ENOMEM;
1341         }
1342         if (!handle->offset) {
1343                 int error;
1344
1345                 error = init_header((struct swsusp_info *)buffer);
1346                 if (error)
1347                         return error;
1348                 handle->buffer = buffer;
1349                 memory_bm_position_reset(&orig_bm);
1350                 memory_bm_position_reset(&copy_bm);
1351         }
1352         if (handle->prev < handle->cur) {
1353                 if (handle->cur <= nr_meta_pages) {
1354                         memset(buffer, 0, PAGE_SIZE);
1355                         pack_pfns(buffer, &orig_bm);
1356                 } else {
1357                         struct page *page;
1358
1359                         page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1360                         if (PageHighMem(page)) {
1361                                 /* Highmem pages are copied to the buffer,
1362                                  * because we can't return with a kmapped
1363                                  * highmem page (we may not be called again).
1364                                  */
1365                                 void *kaddr;
1366
1367                                 kaddr = kmap_atomic(page, KM_USER0);
1368                                 memcpy(buffer, kaddr, PAGE_SIZE);
1369                                 kunmap_atomic(kaddr, KM_USER0);
1370                                 handle->buffer = buffer;
1371                         } else {
1372                                 handle->buffer = page_address(page);
1373                         }
1374                 }
1375                 handle->prev = handle->cur;
1376         }
1377         handle->buf_offset = handle->cur_offset;
1378         if (handle->cur_offset + count >= PAGE_SIZE) {
1379                 count = PAGE_SIZE - handle->cur_offset;
1380                 handle->cur_offset = 0;
1381                 handle->cur++;
1382         } else {
1383                 handle->cur_offset += count;
1384         }
1385         handle->offset += count;
1386         return count;
1387 }
1388
1389 /**
1390  *      mark_unsafe_pages - mark the pages that cannot be used for storing
1391  *      the image during resume, because they conflict with the pages that
1392  *      had been used before suspend
1393  */
1394
1395 static int mark_unsafe_pages(struct memory_bitmap *bm)
1396 {
1397         struct zone *zone;
1398         unsigned long pfn, max_zone_pfn;
1399
1400         /* Clear page flags */
1401         for_each_zone(zone) {
1402                 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1403                 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1404                         if (pfn_valid(pfn))
1405                                 swsusp_unset_page_free(pfn_to_page(pfn));
1406         }
1407
1408         /* Mark pages that correspond to the "original" pfns as "unsafe" */
1409         memory_bm_position_reset(bm);
1410         do {
1411                 pfn = memory_bm_next_pfn(bm);
1412                 if (likely(pfn != BM_END_OF_MAP)) {
1413                         if (likely(pfn_valid(pfn)))
1414                                 swsusp_set_page_free(pfn_to_page(pfn));
1415                         else
1416                                 return -EFAULT;
1417                 }
1418         } while (pfn != BM_END_OF_MAP);
1419
1420         allocated_unsafe_pages = 0;
1421
1422         return 0;
1423 }
1424
1425 static void
1426 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1427 {
1428         unsigned long pfn;
1429
1430         memory_bm_position_reset(src);
1431         pfn = memory_bm_next_pfn(src);
1432         while (pfn != BM_END_OF_MAP) {
1433                 memory_bm_set_bit(dst, pfn);
1434                 pfn = memory_bm_next_pfn(src);
1435         }
1436 }
1437
1438 static int check_header(struct swsusp_info *info)
1439 {
1440         char *reason;
1441
1442         reason = check_image_kernel(info);
1443         if (!reason && info->num_physpages != num_physpages)
1444                 reason = "memory size";
1445         if (reason) {
1446                 printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1447                 return -EPERM;
1448         }
1449         return 0;
1450 }
1451
1452 /**
1453  *      load header - check the image header and copy data from it
1454  */
1455
1456 static int
1457 load_header(struct swsusp_info *info)
1458 {
1459         int error;
1460
1461         restore_pblist = NULL;
1462         error = check_header(info);
1463         if (!error) {
1464                 nr_copy_pages = info->image_pages;
1465                 nr_meta_pages = info->pages - info->image_pages - 1;
1466         }
1467         return error;
1468 }
1469
1470 /**
1471  *      unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1472  *      the corresponding bit in the memory bitmap @bm
1473  */
1474 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1475 {
1476         int j;
1477
1478         for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1479                 if (unlikely(buf[j] == BM_END_OF_MAP))
1480                         break;
1481
1482                 if (memory_bm_pfn_present(bm, buf[j]))
1483                         memory_bm_set_bit(bm, buf[j]);
1484                 else
1485                         return -EFAULT;
1486         }
1487
1488         return 0;
1489 }
1490
1491 /* List of "safe" pages that may be used to store data loaded from the suspend
1492  * image
1493  */
1494 static struct linked_page *safe_pages_list;
1495
1496 #ifdef CONFIG_HIGHMEM
1497 /* struct highmem_pbe is used for creating the list of highmem pages that
1498  * should be restored atomically during the resume from disk, because the page
1499  * frames they have occupied before the suspend are in use.
1500  */
1501 struct highmem_pbe {
1502         struct page *copy_page; /* data is here now */
1503         struct page *orig_page; /* data was here before the suspend */
1504         struct highmem_pbe *next;
1505 };
1506
1507 /* List of highmem PBEs needed for restoring the highmem pages that were
1508  * allocated before the suspend and included in the suspend image, but have
1509  * also been allocated by the "resume" kernel, so their contents cannot be
1510  * written directly to their "original" page frames.
1511  */
1512 static struct highmem_pbe *highmem_pblist;
1513
1514 /**
1515  *      count_highmem_image_pages - compute the number of highmem pages in the
1516  *      suspend image.  The bits in the memory bitmap @bm that correspond to the
1517  *      image pages are assumed to be set.
1518  */
1519
1520 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1521 {
1522         unsigned long pfn;
1523         unsigned int cnt = 0;
1524
1525         memory_bm_position_reset(bm);
1526         pfn = memory_bm_next_pfn(bm);
1527         while (pfn != BM_END_OF_MAP) {
1528                 if (PageHighMem(pfn_to_page(pfn)))
1529                         cnt++;
1530
1531                 pfn = memory_bm_next_pfn(bm);
1532         }
1533         return cnt;
1534 }
1535
1536 /**
1537  *      prepare_highmem_image - try to allocate as many highmem pages as
1538  *      there are highmem image pages (@nr_highmem_p points to the variable
1539  *      containing the number of highmem image pages).  The pages that are
1540  *      "safe" (ie. will not be overwritten when the suspend image is
1541  *      restored) have the corresponding bits set in @bm (it must be
1542  *      unitialized).
1543  *
1544  *      NOTE: This function should not be called if there are no highmem
1545  *      image pages.
1546  */
1547
1548 static unsigned int safe_highmem_pages;
1549
1550 static struct memory_bitmap *safe_highmem_bm;
1551
1552 static int
1553 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1554 {
1555         unsigned int to_alloc;
1556
1557         if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1558                 return -ENOMEM;
1559
1560         if (get_highmem_buffer(PG_SAFE))
1561                 return -ENOMEM;
1562
1563         to_alloc = count_free_highmem_pages();
1564         if (to_alloc > *nr_highmem_p)
1565                 to_alloc = *nr_highmem_p;
1566         else
1567                 *nr_highmem_p = to_alloc;
1568
1569         safe_highmem_pages = 0;
1570         while (to_alloc-- > 0) {
1571                 struct page *page;
1572
1573                 page = alloc_page(__GFP_HIGHMEM);
1574                 if (!swsusp_page_is_free(page)) {
1575                         /* The page is "safe", set its bit the bitmap */
1576                         memory_bm_set_bit(bm, page_to_pfn(page));
1577                         safe_highmem_pages++;
1578                 }
1579                 /* Mark the page as allocated */
1580                 swsusp_set_page_forbidden(page);
1581                 swsusp_set_page_free(page);
1582         }
1583         memory_bm_position_reset(bm);
1584         safe_highmem_bm = bm;
1585         return 0;
1586 }
1587
1588 /**
1589  *      get_highmem_page_buffer - for given highmem image page find the buffer
1590  *      that suspend_write_next() should set for its caller to write to.
1591  *
1592  *      If the page is to be saved to its "original" page frame or a copy of
1593  *      the page is to be made in the highmem, @buffer is returned.  Otherwise,
1594  *      the copy of the page is to be made in normal memory, so the address of
1595  *      the copy is returned.
1596  *
1597  *      If @buffer is returned, the caller of suspend_write_next() will write
1598  *      the page's contents to @buffer, so they will have to be copied to the
1599  *      right location on the next call to suspend_write_next() and it is done
1600  *      with the help of copy_last_highmem_page().  For this purpose, if
1601  *      @buffer is returned, @last_highmem page is set to the page to which
1602  *      the data will have to be copied from @buffer.
1603  */
1604
1605 static struct page *last_highmem_page;
1606
1607 static void *
1608 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1609 {
1610         struct highmem_pbe *pbe;
1611         void *kaddr;
1612
1613         if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1614                 /* We have allocated the "original" page frame and we can
1615                  * use it directly to store the loaded page.
1616                  */
1617                 last_highmem_page = page;
1618                 return buffer;
1619         }
1620         /* The "original" page frame has not been allocated and we have to
1621          * use a "safe" page frame to store the loaded page.
1622          */
1623         pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1624         if (!pbe) {
1625                 swsusp_free();
1626                 return ERR_PTR(-ENOMEM);
1627         }
1628         pbe->orig_page = page;
1629         if (safe_highmem_pages > 0) {
1630                 struct page *tmp;
1631
1632                 /* Copy of the page will be stored in high memory */
1633                 kaddr = buffer;
1634                 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1635                 safe_highmem_pages--;
1636                 last_highmem_page = tmp;
1637                 pbe->copy_page = tmp;
1638         } else {
1639                 /* Copy of the page will be stored in normal memory */
1640                 kaddr = safe_pages_list;
1641                 safe_pages_list = safe_pages_list->next;
1642                 pbe->copy_page = virt_to_page(kaddr);
1643         }
1644         pbe->next = highmem_pblist;
1645         highmem_pblist = pbe;
1646         return kaddr;
1647 }
1648
1649 /**
1650  *      copy_last_highmem_page - copy the contents of a highmem image from
1651  *      @buffer, where the caller of snapshot_write_next() has place them,
1652  *      to the right location represented by @last_highmem_page .
1653  */
1654
1655 static void copy_last_highmem_page(void)
1656 {
1657         if (last_highmem_page) {
1658                 void *dst;
1659
1660                 dst = kmap_atomic(last_highmem_page, KM_USER0);
1661                 memcpy(dst, buffer, PAGE_SIZE);
1662                 kunmap_atomic(dst, KM_USER0);
1663                 last_highmem_page = NULL;
1664         }
1665 }
1666
1667 static inline int last_highmem_page_copied(void)
1668 {
1669         return !last_highmem_page;
1670 }
1671
1672 static inline void free_highmem_data(void)
1673 {
1674         if (safe_highmem_bm)
1675                 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
1676
1677         if (buffer)
1678                 free_image_page(buffer, PG_UNSAFE_CLEAR);
1679 }
1680 #else
1681 static inline int get_safe_write_buffer(void) { return 0; }
1682
1683 static unsigned int
1684 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
1685
1686 static inline int
1687 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1688 {
1689         return 0;
1690 }
1691
1692 static inline void *
1693 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1694 {
1695         return ERR_PTR(-EINVAL);
1696 }
1697
1698 static inline void copy_last_highmem_page(void) {}
1699 static inline int last_highmem_page_copied(void) { return 1; }
1700 static inline void free_highmem_data(void) {}
1701 #endif /* CONFIG_HIGHMEM */
1702
1703 /**
1704  *      prepare_image - use the memory bitmap @bm to mark the pages that will
1705  *      be overwritten in the process of restoring the system memory state
1706  *      from the suspend image ("unsafe" pages) and allocate memory for the
1707  *      image.
1708  *
1709  *      The idea is to allocate a new memory bitmap first and then allocate
1710  *      as many pages as needed for the image data, but not to assign these
1711  *      pages to specific tasks initially.  Instead, we just mark them as
1712  *      allocated and create a lists of "safe" pages that will be used
1713  *      later.  On systems with high memory a list of "safe" highmem pages is
1714  *      also created.
1715  */
1716
1717 #define PBES_PER_LINKED_PAGE    (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
1718
1719 static int
1720 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1721 {
1722         unsigned int nr_pages, nr_highmem;
1723         struct linked_page *sp_list, *lp;
1724         int error;
1725
1726         /* If there is no highmem, the buffer will not be necessary */
1727         free_image_page(buffer, PG_UNSAFE_CLEAR);
1728         buffer = NULL;
1729
1730         nr_highmem = count_highmem_image_pages(bm);
1731         error = mark_unsafe_pages(bm);
1732         if (error)
1733                 goto Free;
1734
1735         error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
1736         if (error)
1737                 goto Free;
1738
1739         duplicate_memory_bitmap(new_bm, bm);
1740         memory_bm_free(bm, PG_UNSAFE_KEEP);
1741         if (nr_highmem > 0) {
1742                 error = prepare_highmem_image(bm, &nr_highmem);
1743                 if (error)
1744                         goto Free;
1745         }
1746         /* Reserve some safe pages for potential later use.
1747          *
1748          * NOTE: This way we make sure there will be enough safe pages for the
1749          * chain_alloc() in get_buffer().  It is a bit wasteful, but
1750          * nr_copy_pages cannot be greater than 50% of the memory anyway.
1751          */
1752         sp_list = NULL;
1753         /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
1754         nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1755         nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
1756         while (nr_pages > 0) {
1757                 lp = get_image_page(GFP_ATOMIC, PG_SAFE);
1758                 if (!lp) {
1759                         error = -ENOMEM;
1760                         goto Free;
1761                 }
1762                 lp->next = sp_list;
1763                 sp_list = lp;
1764                 nr_pages--;
1765         }
1766         /* Preallocate memory for the image */
1767         safe_pages_list = NULL;
1768         nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1769         while (nr_pages > 0) {
1770                 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
1771                 if (!lp) {
1772                         error = -ENOMEM;
1773                         goto Free;
1774                 }
1775                 if (!swsusp_page_is_free(virt_to_page(lp))) {
1776                         /* The page is "safe", add it to the list */
1777                         lp->next = safe_pages_list;
1778                         safe_pages_list = lp;
1779                 }
1780                 /* Mark the page as allocated */
1781                 swsusp_set_page_forbidden(virt_to_page(lp));
1782                 swsusp_set_page_free(virt_to_page(lp));
1783                 nr_pages--;
1784         }
1785         /* Free the reserved safe pages so that chain_alloc() can use them */
1786         while (sp_list) {
1787                 lp = sp_list->next;
1788                 free_image_page(sp_list, PG_UNSAFE_CLEAR);
1789                 sp_list = lp;
1790         }
1791         return 0;
1792
1793  Free:
1794         swsusp_free();
1795         return error;
1796 }
1797
1798 /**
1799  *      get_buffer - compute the address that snapshot_write_next() should
1800  *      set for its caller to write to.
1801  */
1802
1803 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1804 {
1805         struct pbe *pbe;
1806         struct page *page;
1807         unsigned long pfn = memory_bm_next_pfn(bm);
1808
1809         if (pfn == BM_END_OF_MAP)
1810                 return ERR_PTR(-EFAULT);
1811
1812         page = pfn_to_page(pfn);
1813         if (PageHighMem(page))
1814                 return get_highmem_page_buffer(page, ca);
1815
1816         if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
1817                 /* We have allocated the "original" page frame and we can
1818                  * use it directly to store the loaded page.
1819                  */
1820                 return page_address(page);
1821
1822         /* The "original" page frame has not been allocated and we have to
1823          * use a "safe" page frame to store the loaded page.
1824          */
1825         pbe = chain_alloc(ca, sizeof(struct pbe));
1826         if (!pbe) {
1827                 swsusp_free();
1828                 return ERR_PTR(-ENOMEM);
1829         }
1830         pbe->orig_address = page_address(page);
1831         pbe->address = safe_pages_list;
1832         safe_pages_list = safe_pages_list->next;
1833         pbe->next = restore_pblist;
1834         restore_pblist = pbe;
1835         return pbe->address;
1836 }
1837
1838 /**
1839  *      snapshot_write_next - used for writing the system memory snapshot.
1840  *
1841  *      On the first call to it @handle should point to a zeroed
1842  *      snapshot_handle structure.  The structure gets updated and a pointer
1843  *      to it should be passed to this function every next time.
1844  *
1845  *      The @count parameter should contain the number of bytes the caller
1846  *      wants to write to the image.  It must not be zero.
1847  *
1848  *      On success the function returns a positive number.  Then, the caller
1849  *      is allowed to write up to the returned number of bytes to the memory
1850  *      location computed by the data_of() macro.  The number returned
1851  *      may be smaller than @count, but this only happens if the write would
1852  *      cross a page boundary otherwise.
1853  *
1854  *      The function returns 0 to indicate the "end of file" condition,
1855  *      and a negative number is returned on error.  In such cases the
1856  *      structure pointed to by @handle is not updated and should not be used
1857  *      any more.
1858  */
1859
1860 int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1861 {
1862         static struct chain_allocator ca;
1863         int error = 0;
1864
1865         /* Check if we have already loaded the entire image */
1866         if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
1867                 return 0;
1868
1869         if (handle->offset == 0) {
1870                 if (!buffer)
1871                         /* This makes the buffer be freed by swsusp_free() */
1872                         buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1873
1874                 if (!buffer)
1875                         return -ENOMEM;
1876
1877                 handle->buffer = buffer;
1878         }
1879         handle->sync_read = 1;
1880         if (handle->prev < handle->cur) {
1881                 if (handle->prev == 0) {
1882                         error = load_header(buffer);
1883                         if (error)
1884                                 return error;
1885
1886                         error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
1887                         if (error)
1888                                 return error;
1889
1890                 } else if (handle->prev <= nr_meta_pages) {
1891                         error = unpack_orig_pfns(buffer, &copy_bm);
1892                         if (error)
1893                                 return error;
1894
1895                         if (handle->prev == nr_meta_pages) {
1896                                 error = prepare_image(&orig_bm, &copy_bm);
1897                                 if (error)
1898                                         return error;
1899
1900                                 chain_init(&ca, GFP_ATOMIC, PG_SAFE);
1901                                 memory_bm_position_reset(&orig_bm);
1902                                 restore_pblist = NULL;
1903                                 handle->buffer = get_buffer(&orig_bm, &ca);
1904                                 handle->sync_read = 0;
1905                                 if (IS_ERR(handle->buffer))
1906                                         return PTR_ERR(handle->buffer);
1907                         }
1908                 } else {
1909                         copy_last_highmem_page();
1910                         handle->buffer = get_buffer(&orig_bm, &ca);
1911                         if (IS_ERR(handle->buffer))
1912                                 return PTR_ERR(handle->buffer);
1913                         if (handle->buffer != buffer)
1914                                 handle->sync_read = 0;
1915                 }
1916                 handle->prev = handle->cur;
1917         }
1918         handle->buf_offset = handle->cur_offset;
1919         if (handle->cur_offset + count >= PAGE_SIZE) {
1920                 count = PAGE_SIZE - handle->cur_offset;
1921                 handle->cur_offset = 0;
1922                 handle->cur++;
1923         } else {
1924                 handle->cur_offset += count;
1925         }
1926         handle->offset += count;
1927         return count;
1928 }
1929
1930 /**
1931  *      snapshot_write_finalize - must be called after the last call to
1932  *      snapshot_write_next() in case the last page in the image happens
1933  *      to be a highmem page and its contents should be stored in the
1934  *      highmem.  Additionally, it releases the memory that will not be
1935  *      used any more.
1936  */
1937
1938 void snapshot_write_finalize(struct snapshot_handle *handle)
1939 {
1940         copy_last_highmem_page();
1941         /* Free only if we have loaded the image entirely */
1942         if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
1943                 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
1944                 free_highmem_data();
1945         }
1946 }
1947
1948 int snapshot_image_loaded(struct snapshot_handle *handle)
1949 {
1950         return !(!nr_copy_pages || !last_highmem_page_copied() ||
1951                         handle->cur <= nr_meta_pages + nr_copy_pages);
1952 }
1953
1954 #ifdef CONFIG_HIGHMEM
1955 /* Assumes that @buf is ready and points to a "safe" page */
1956 static inline void
1957 swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
1958 {
1959         void *kaddr1, *kaddr2;
1960
1961         kaddr1 = kmap_atomic(p1, KM_USER0);
1962         kaddr2 = kmap_atomic(p2, KM_USER1);
1963         memcpy(buf, kaddr1, PAGE_SIZE);
1964         memcpy(kaddr1, kaddr2, PAGE_SIZE);
1965         memcpy(kaddr2, buf, PAGE_SIZE);
1966         kunmap_atomic(kaddr1, KM_USER0);
1967         kunmap_atomic(kaddr2, KM_USER1);
1968 }
1969
1970 /**
1971  *      restore_highmem - for each highmem page that was allocated before
1972  *      the suspend and included in the suspend image, and also has been
1973  *      allocated by the "resume" kernel swap its current (ie. "before
1974  *      resume") contents with the previous (ie. "before suspend") one.
1975  *
1976  *      If the resume eventually fails, we can call this function once
1977  *      again and restore the "before resume" highmem state.
1978  */
1979
1980 int restore_highmem(void)
1981 {
1982         struct highmem_pbe *pbe = highmem_pblist;
1983         void *buf;
1984
1985         if (!pbe)
1986                 return 0;
1987
1988         buf = get_image_page(GFP_ATOMIC, PG_SAFE);
1989         if (!buf)
1990                 return -ENOMEM;
1991
1992         while (pbe) {
1993                 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
1994                 pbe = pbe->next;
1995         }
1996         free_image_page(buf, PG_UNSAFE_CLEAR);
1997         return 0;
1998 }
1999 #endif /* CONFIG_HIGHMEM */