Btrfs: update space balancing code
[linux-2.6] / kernel / power / swap.c
1 /*
2  * linux/kernel/power/swap.c
3  *
4  * This file provides functions for reading the suspend image from
5  * and writing it to a swap partition.
6  *
7  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
8  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9  *
10  * This file is released under the GPLv2.
11  *
12  */
13
14 #include <linux/module.h>
15 #include <linux/file.h>
16 #include <linux/utsname.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/genhd.h>
20 #include <linux/device.h>
21 #include <linux/buffer_head.h>
22 #include <linux/bio.h>
23 #include <linux/blkdev.h>
24 #include <linux/swap.h>
25 #include <linux/swapops.h>
26 #include <linux/pm.h>
27
28 #include "power.h"
29
30 #define SWSUSP_SIG      "S1SUSPEND"
31
32 struct swsusp_header {
33         char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
34         sector_t image;
35         unsigned int flags;     /* Flags to pass to the "boot" kernel */
36         char    orig_sig[10];
37         char    sig[10];
38 } __attribute__((packed));
39
40 static struct swsusp_header *swsusp_header;
41
42 /*
43  * General things
44  */
45
46 static unsigned short root_swap = 0xffff;
47 static struct block_device *resume_bdev;
48
49 /**
50  *      submit - submit BIO request.
51  *      @rw:    READ or WRITE.
52  *      @off    physical offset of page.
53  *      @page:  page we're reading or writing.
54  *      @bio_chain: list of pending biod (for async reading)
55  *
56  *      Straight from the textbook - allocate and initialize the bio.
57  *      If we're reading, make sure the page is marked as dirty.
58  *      Then submit it and, if @bio_chain == NULL, wait.
59  */
60 static int submit(int rw, pgoff_t page_off, struct page *page,
61                         struct bio **bio_chain)
62 {
63         struct bio *bio;
64
65         bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
66         if (!bio)
67                 return -ENOMEM;
68         bio->bi_sector = page_off * (PAGE_SIZE >> 9);
69         bio->bi_bdev = resume_bdev;
70         bio->bi_end_io = end_swap_bio_read;
71
72         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
73                 printk(KERN_ERR "PM: Adding page to bio failed at %ld\n",
74                         page_off);
75                 bio_put(bio);
76                 return -EFAULT;
77         }
78
79         lock_page(page);
80         bio_get(bio);
81
82         if (bio_chain == NULL) {
83                 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
84                 wait_on_page_locked(page);
85                 if (rw == READ)
86                         bio_set_pages_dirty(bio);
87                 bio_put(bio);
88         } else {
89                 if (rw == READ)
90                         get_page(page); /* These pages are freed later */
91                 bio->bi_private = *bio_chain;
92                 *bio_chain = bio;
93                 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
94         }
95         return 0;
96 }
97
98 static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
99 {
100         return submit(READ, page_off, virt_to_page(addr), bio_chain);
101 }
102
103 static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
104 {
105         return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
106 }
107
108 static int wait_on_bio_chain(struct bio **bio_chain)
109 {
110         struct bio *bio;
111         struct bio *next_bio;
112         int ret = 0;
113
114         if (bio_chain == NULL)
115                 return 0;
116
117         bio = *bio_chain;
118         if (bio == NULL)
119                 return 0;
120         while (bio) {
121                 struct page *page;
122
123                 next_bio = bio->bi_private;
124                 page = bio->bi_io_vec[0].bv_page;
125                 wait_on_page_locked(page);
126                 if (!PageUptodate(page) || PageError(page))
127                         ret = -EIO;
128                 put_page(page);
129                 bio_put(bio);
130                 bio = next_bio;
131         }
132         *bio_chain = NULL;
133         return ret;
134 }
135
136 /*
137  * Saving part
138  */
139
140 static int mark_swapfiles(sector_t start, unsigned int flags)
141 {
142         int error;
143
144         bio_read_page(swsusp_resume_block, swsusp_header, NULL);
145         if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
146             !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
147                 memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
148                 memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
149                 swsusp_header->image = start;
150                 swsusp_header->flags = flags;
151                 error = bio_write_page(swsusp_resume_block,
152                                         swsusp_header, NULL);
153         } else {
154                 printk(KERN_ERR "PM: Swap header not found!\n");
155                 error = -ENODEV;
156         }
157         return error;
158 }
159
160 /**
161  *      swsusp_swap_check - check if the resume device is a swap device
162  *      and get its index (if so)
163  */
164
165 static int swsusp_swap_check(void) /* This is called before saving image */
166 {
167         int res;
168
169         res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
170                         &resume_bdev);
171         if (res < 0)
172                 return res;
173
174         root_swap = res;
175         res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR);
176         if (res)
177                 return res;
178
179         res = set_blocksize(resume_bdev, PAGE_SIZE);
180         if (res < 0)
181                 blkdev_put(resume_bdev);
182
183         return res;
184 }
185
186 /**
187  *      write_page - Write one page to given swap location.
188  *      @buf:           Address we're writing.
189  *      @offset:        Offset of the swap page we're writing to.
190  *      @bio_chain:     Link the next write BIO here
191  */
192
193 static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
194 {
195         void *src;
196
197         if (!offset)
198                 return -ENOSPC;
199
200         if (bio_chain) {
201                 src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
202                 if (src) {
203                         memcpy(src, buf, PAGE_SIZE);
204                 } else {
205                         WARN_ON_ONCE(1);
206                         bio_chain = NULL;       /* Go synchronous */
207                         src = buf;
208                 }
209         } else {
210                 src = buf;
211         }
212         return bio_write_page(offset, src, bio_chain);
213 }
214
215 /*
216  *      The swap map is a data structure used for keeping track of each page
217  *      written to a swap partition.  It consists of many swap_map_page
218  *      structures that contain each an array of MAP_PAGE_SIZE swap entries.
219  *      These structures are stored on the swap and linked together with the
220  *      help of the .next_swap member.
221  *
222  *      The swap map is created during suspend.  The swap map pages are
223  *      allocated and populated one at a time, so we only need one memory
224  *      page to set up the entire structure.
225  *
226  *      During resume we also only need to use one swap_map_page structure
227  *      at a time.
228  */
229
230 #define MAP_PAGE_ENTRIES        (PAGE_SIZE / sizeof(sector_t) - 1)
231
232 struct swap_map_page {
233         sector_t entries[MAP_PAGE_ENTRIES];
234         sector_t next_swap;
235 };
236
237 /**
238  *      The swap_map_handle structure is used for handling swap in
239  *      a file-alike way
240  */
241
242 struct swap_map_handle {
243         struct swap_map_page *cur;
244         sector_t cur_swap;
245         unsigned int k;
246 };
247
248 static void release_swap_writer(struct swap_map_handle *handle)
249 {
250         if (handle->cur)
251                 free_page((unsigned long)handle->cur);
252         handle->cur = NULL;
253 }
254
255 static int get_swap_writer(struct swap_map_handle *handle)
256 {
257         handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
258         if (!handle->cur)
259                 return -ENOMEM;
260         handle->cur_swap = alloc_swapdev_block(root_swap);
261         if (!handle->cur_swap) {
262                 release_swap_writer(handle);
263                 return -ENOSPC;
264         }
265         handle->k = 0;
266         return 0;
267 }
268
269 static int swap_write_page(struct swap_map_handle *handle, void *buf,
270                                 struct bio **bio_chain)
271 {
272         int error = 0;
273         sector_t offset;
274
275         if (!handle->cur)
276                 return -EINVAL;
277         offset = alloc_swapdev_block(root_swap);
278         error = write_page(buf, offset, bio_chain);
279         if (error)
280                 return error;
281         handle->cur->entries[handle->k++] = offset;
282         if (handle->k >= MAP_PAGE_ENTRIES) {
283                 error = wait_on_bio_chain(bio_chain);
284                 if (error)
285                         goto out;
286                 offset = alloc_swapdev_block(root_swap);
287                 if (!offset)
288                         return -ENOSPC;
289                 handle->cur->next_swap = offset;
290                 error = write_page(handle->cur, handle->cur_swap, NULL);
291                 if (error)
292                         goto out;
293                 memset(handle->cur, 0, PAGE_SIZE);
294                 handle->cur_swap = offset;
295                 handle->k = 0;
296         }
297  out:
298         return error;
299 }
300
301 static int flush_swap_writer(struct swap_map_handle *handle)
302 {
303         if (handle->cur && handle->cur_swap)
304                 return write_page(handle->cur, handle->cur_swap, NULL);
305         else
306                 return -EINVAL;
307 }
308
309 /**
310  *      save_image - save the suspend image data
311  */
312
313 static int save_image(struct swap_map_handle *handle,
314                       struct snapshot_handle *snapshot,
315                       unsigned int nr_to_write)
316 {
317         unsigned int m;
318         int ret;
319         int error = 0;
320         int nr_pages;
321         int err2;
322         struct bio *bio;
323         struct timeval start;
324         struct timeval stop;
325
326         printk(KERN_INFO "PM: Saving image data pages (%u pages) ...     ",
327                 nr_to_write);
328         m = nr_to_write / 100;
329         if (!m)
330                 m = 1;
331         nr_pages = 0;
332         bio = NULL;
333         do_gettimeofday(&start);
334         do {
335                 ret = snapshot_read_next(snapshot, PAGE_SIZE);
336                 if (ret > 0) {
337                         error = swap_write_page(handle, data_of(*snapshot),
338                                                 &bio);
339                         if (error)
340                                 break;
341                         if (!(nr_pages % m))
342                                 printk("\b\b\b\b%3d%%", nr_pages / m);
343                         nr_pages++;
344                 }
345         } while (ret > 0);
346         err2 = wait_on_bio_chain(&bio);
347         do_gettimeofday(&stop);
348         if (!error)
349                 error = err2;
350         if (!error)
351                 printk("\b\b\b\bdone\n");
352         swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
353         return error;
354 }
355
356 /**
357  *      enough_swap - Make sure we have enough swap to save the image.
358  *
359  *      Returns TRUE or FALSE after checking the total amount of swap
360  *      space avaiable from the resume partition.
361  */
362
363 static int enough_swap(unsigned int nr_pages)
364 {
365         unsigned int free_swap = count_swap_pages(root_swap, 1);
366
367         pr_debug("PM: Free swap pages: %u\n", free_swap);
368         return free_swap > nr_pages + PAGES_FOR_IO;
369 }
370
371 /**
372  *      swsusp_write - Write entire image and metadata.
373  *      @flags: flags to pass to the "boot" kernel in the image header
374  *
375  *      It is important _NOT_ to umount filesystems at this point. We want
376  *      them synced (in case something goes wrong) but we DO not want to mark
377  *      filesystem clean: it is not. (And it does not matter, if we resume
378  *      correctly, we'll mark system clean, anyway.)
379  */
380
381 int swsusp_write(unsigned int flags)
382 {
383         struct swap_map_handle handle;
384         struct snapshot_handle snapshot;
385         struct swsusp_info *header;
386         int error;
387
388         error = swsusp_swap_check();
389         if (error) {
390                 printk(KERN_ERR "PM: Cannot find swap device, try "
391                                 "swapon -a.\n");
392                 return error;
393         }
394         memset(&snapshot, 0, sizeof(struct snapshot_handle));
395         error = snapshot_read_next(&snapshot, PAGE_SIZE);
396         if (error < PAGE_SIZE) {
397                 if (error >= 0)
398                         error = -EFAULT;
399
400                 goto out;
401         }
402         header = (struct swsusp_info *)data_of(snapshot);
403         if (!enough_swap(header->pages)) {
404                 printk(KERN_ERR "PM: Not enough free swap\n");
405                 error = -ENOSPC;
406                 goto out;
407         }
408         error = get_swap_writer(&handle);
409         if (!error) {
410                 sector_t start = handle.cur_swap;
411
412                 error = swap_write_page(&handle, header, NULL);
413                 if (!error)
414                         error = save_image(&handle, &snapshot,
415                                         header->pages - 1);
416
417                 if (!error) {
418                         flush_swap_writer(&handle);
419                         printk(KERN_INFO "PM: S");
420                         error = mark_swapfiles(start, flags);
421                         printk("|\n");
422                 }
423         }
424         if (error)
425                 free_all_swap_pages(root_swap);
426
427         release_swap_writer(&handle);
428  out:
429         swsusp_close();
430         return error;
431 }
432
433 /**
434  *      The following functions allow us to read data using a swap map
435  *      in a file-alike way
436  */
437
438 static void release_swap_reader(struct swap_map_handle *handle)
439 {
440         if (handle->cur)
441                 free_page((unsigned long)handle->cur);
442         handle->cur = NULL;
443 }
444
445 static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
446 {
447         int error;
448
449         if (!start)
450                 return -EINVAL;
451
452         handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
453         if (!handle->cur)
454                 return -ENOMEM;
455
456         error = bio_read_page(start, handle->cur, NULL);
457         if (error) {
458                 release_swap_reader(handle);
459                 return error;
460         }
461         handle->k = 0;
462         return 0;
463 }
464
465 static int swap_read_page(struct swap_map_handle *handle, void *buf,
466                                 struct bio **bio_chain)
467 {
468         sector_t offset;
469         int error;
470
471         if (!handle->cur)
472                 return -EINVAL;
473         offset = handle->cur->entries[handle->k];
474         if (!offset)
475                 return -EFAULT;
476         error = bio_read_page(offset, buf, bio_chain);
477         if (error)
478                 return error;
479         if (++handle->k >= MAP_PAGE_ENTRIES) {
480                 error = wait_on_bio_chain(bio_chain);
481                 handle->k = 0;
482                 offset = handle->cur->next_swap;
483                 if (!offset)
484                         release_swap_reader(handle);
485                 else if (!error)
486                         error = bio_read_page(offset, handle->cur, NULL);
487         }
488         return error;
489 }
490
491 /**
492  *      load_image - load the image using the swap map handle
493  *      @handle and the snapshot handle @snapshot
494  *      (assume there are @nr_pages pages to load)
495  */
496
497 static int load_image(struct swap_map_handle *handle,
498                       struct snapshot_handle *snapshot,
499                       unsigned int nr_to_read)
500 {
501         unsigned int m;
502         int error = 0;
503         struct timeval start;
504         struct timeval stop;
505         struct bio *bio;
506         int err2;
507         unsigned nr_pages;
508
509         printk(KERN_INFO "PM: Loading image data pages (%u pages) ...     ",
510                 nr_to_read);
511         m = nr_to_read / 100;
512         if (!m)
513                 m = 1;
514         nr_pages = 0;
515         bio = NULL;
516         do_gettimeofday(&start);
517         for ( ; ; ) {
518                 error = snapshot_write_next(snapshot, PAGE_SIZE);
519                 if (error <= 0)
520                         break;
521                 error = swap_read_page(handle, data_of(*snapshot), &bio);
522                 if (error)
523                         break;
524                 if (snapshot->sync_read)
525                         error = wait_on_bio_chain(&bio);
526                 if (error)
527                         break;
528                 if (!(nr_pages % m))
529                         printk("\b\b\b\b%3d%%", nr_pages / m);
530                 nr_pages++;
531         }
532         err2 = wait_on_bio_chain(&bio);
533         do_gettimeofday(&stop);
534         if (!error)
535                 error = err2;
536         if (!error) {
537                 printk("\b\b\b\bdone\n");
538                 snapshot_write_finalize(snapshot);
539                 if (!snapshot_image_loaded(snapshot))
540                         error = -ENODATA;
541         }
542         swsusp_show_speed(&start, &stop, nr_to_read, "Read");
543         return error;
544 }
545
546 /**
547  *      swsusp_read - read the hibernation image.
548  *      @flags_p: flags passed by the "frozen" kernel in the image header should
549  *                be written into this memeory location
550  */
551
552 int swsusp_read(unsigned int *flags_p)
553 {
554         int error;
555         struct swap_map_handle handle;
556         struct snapshot_handle snapshot;
557         struct swsusp_info *header;
558
559         *flags_p = swsusp_header->flags;
560         if (IS_ERR(resume_bdev)) {
561                 pr_debug("PM: Image device not initialised\n");
562                 return PTR_ERR(resume_bdev);
563         }
564
565         memset(&snapshot, 0, sizeof(struct snapshot_handle));
566         error = snapshot_write_next(&snapshot, PAGE_SIZE);
567         if (error < PAGE_SIZE)
568                 return error < 0 ? error : -EFAULT;
569         header = (struct swsusp_info *)data_of(snapshot);
570         error = get_swap_reader(&handle, swsusp_header->image);
571         if (!error)
572                 error = swap_read_page(&handle, header, NULL);
573         if (!error)
574                 error = load_image(&handle, &snapshot, header->pages - 1);
575         release_swap_reader(&handle);
576
577         blkdev_put(resume_bdev);
578
579         if (!error)
580                 pr_debug("PM: Image successfully loaded\n");
581         else
582                 pr_debug("PM: Error %d resuming\n", error);
583         return error;
584 }
585
586 /**
587  *      swsusp_check - Check for swsusp signature in the resume device
588  */
589
590 int swsusp_check(void)
591 {
592         int error;
593
594         resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
595         if (!IS_ERR(resume_bdev)) {
596                 set_blocksize(resume_bdev, PAGE_SIZE);
597                 memset(swsusp_header, 0, PAGE_SIZE);
598                 error = bio_read_page(swsusp_resume_block,
599                                         swsusp_header, NULL);
600                 if (error)
601                         return error;
602
603                 if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
604                         memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
605                         /* Reset swap signature now */
606                         error = bio_write_page(swsusp_resume_block,
607                                                 swsusp_header, NULL);
608                 } else {
609                         return -EINVAL;
610                 }
611                 if (error)
612                         blkdev_put(resume_bdev);
613                 else
614                         pr_debug("PM: Signature found, resuming\n");
615         } else {
616                 error = PTR_ERR(resume_bdev);
617         }
618
619         if (error)
620                 pr_debug("PM: Error %d checking image file\n", error);
621
622         return error;
623 }
624
625 /**
626  *      swsusp_close - close swap device.
627  */
628
629 void swsusp_close(void)
630 {
631         if (IS_ERR(resume_bdev)) {
632                 pr_debug("PM: Image device not initialised\n");
633                 return;
634         }
635
636         blkdev_put(resume_bdev);
637 }
638
639 static int swsusp_header_init(void)
640 {
641         swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
642         if (!swsusp_header)
643                 panic("Could not allocate memory for swsusp_header\n");
644         return 0;
645 }
646
647 core_initcall(swsusp_header_init);