Merge git://oss.sgi.com:8090/oss/git/rc-fixes-xfs-2.6
[linux-2.6] / kernel / power / swsusp.c
1 /*
2  * linux/kernel/power/swsusp.c
3  *
4  * This file provides code to write suspend image to swap and read it back.
5  *
6  * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
7  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
8  *
9  * This file is released under the GPLv2.
10  *
11  * I'd like to thank the following people for their work:
12  *
13  * Pavel Machek <pavel@ucw.cz>:
14  * Modifications, defectiveness pointing, being with me at the very beginning,
15  * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
16  *
17  * Steve Doddi <dirk@loth.demon.co.uk>:
18  * Support the possibility of hardware state restoring.
19  *
20  * Raph <grey.havens@earthling.net>:
21  * Support for preserving states of network devices and virtual console
22  * (including X and svgatextmode)
23  *
24  * Kurt Garloff <garloff@suse.de>:
25  * Straightened the critical function in order to prevent compilers from
26  * playing tricks with local variables.
27  *
28  * Andreas Mohr <a.mohr@mailto.de>
29  *
30  * Alex Badea <vampire@go.ro>:
31  * Fixed runaway init
32  *
33  * Rafael J. Wysocki <rjw@sisk.pl>
34  * Added the swap map data structure and reworked the handling of swap
35  *
36  * More state savers are welcome. Especially for the scsi layer...
37  *
38  * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
39  */
40
41 #include <linux/module.h>
42 #include <linux/mm.h>
43 #include <linux/suspend.h>
44 #include <linux/smp_lock.h>
45 #include <linux/file.h>
46 #include <linux/utsname.h>
47 #include <linux/version.h>
48 #include <linux/delay.h>
49 #include <linux/bitops.h>
50 #include <linux/spinlock.h>
51 #include <linux/genhd.h>
52 #include <linux/kernel.h>
53 #include <linux/major.h>
54 #include <linux/swap.h>
55 #include <linux/pm.h>
56 #include <linux/device.h>
57 #include <linux/buffer_head.h>
58 #include <linux/swapops.h>
59 #include <linux/bootmem.h>
60 #include <linux/syscalls.h>
61 #include <linux/highmem.h>
62 #include <linux/bio.h>
63
64 #include <asm/uaccess.h>
65 #include <asm/mmu_context.h>
66 #include <asm/pgtable.h>
67 #include <asm/tlbflush.h>
68 #include <asm/io.h>
69
70 #include "power.h"
71
72 /*
73  * Preferred image size in bytes (tunable via /sys/power/image_size).
74  * When it is set to N, swsusp will do its best to ensure the image
75  * size will not exceed N bytes, but if that is impossible, it will
76  * try to create the smallest image possible.
77  */
78 unsigned long image_size = 500 * 1024 * 1024;
79
80 #ifdef CONFIG_HIGHMEM
81 unsigned int count_highmem_pages(void);
82 int save_highmem(void);
83 int restore_highmem(void);
84 #else
85 static int save_highmem(void) { return 0; }
86 static int restore_highmem(void) { return 0; }
87 static unsigned int count_highmem_pages(void) { return 0; }
88 #endif
89
90 extern char resume_file[];
91
92 #define SWSUSP_SIG      "S1SUSPEND"
93
94 static struct swsusp_header {
95         char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
96         swp_entry_t image;
97         char    orig_sig[10];
98         char    sig[10];
99 } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
100
101 static struct swsusp_info swsusp_info;
102
103 /*
104  * Saving part...
105  */
106
107 static unsigned short root_swap = 0xffff;
108
109 static int mark_swapfiles(swp_entry_t start)
110 {
111         int error;
112
113         rw_swap_page_sync(READ,
114                           swp_entry(root_swap, 0),
115                           virt_to_page((unsigned long)&swsusp_header));
116         if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
117             !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
118                 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
119                 memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
120                 swsusp_header.image = start;
121                 error = rw_swap_page_sync(WRITE,
122                                           swp_entry(root_swap, 0),
123                                           virt_to_page((unsigned long)
124                                                        &swsusp_header));
125         } else {
126                 pr_debug("swsusp: Partition is not swap space.\n");
127                 error = -ENODEV;
128         }
129         return error;
130 }
131
132 /*
133  * Check whether the swap device is the specified resume
134  * device, irrespective of whether they are specified by
135  * identical names.
136  *
137  * (Thus, device inode aliasing is allowed.  You can say /dev/hda4
138  * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
139  * and they'll be considered the same device.  This is *necessary* for
140  * devfs, since the resume code can only recognize the form /dev/hda4,
141  * but the suspend code would see the long name.)
142  */
143 static inline int is_resume_device(const struct swap_info_struct *swap_info)
144 {
145         struct file *file = swap_info->swap_file;
146         struct inode *inode = file->f_dentry->d_inode;
147
148         return S_ISBLK(inode->i_mode) &&
149                 swsusp_resume_device == MKDEV(imajor(inode), iminor(inode));
150 }
151
152 static int swsusp_swap_check(void) /* This is called before saving image */
153 {
154         int i;
155
156         if (!swsusp_resume_device)
157                 return -ENODEV;
158         spin_lock(&swap_lock);
159         for (i = 0; i < MAX_SWAPFILES; i++) {
160                 if (!(swap_info[i].flags & SWP_WRITEOK))
161                         continue;
162                 if (is_resume_device(swap_info + i)) {
163                         spin_unlock(&swap_lock);
164                         root_swap = i;
165                         return 0;
166                 }
167         }
168         spin_unlock(&swap_lock);
169         return -ENODEV;
170 }
171
172 /**
173  *      write_page - Write one page to a fresh swap location.
174  *      @addr:  Address we're writing.
175  *      @loc:   Place to store the entry we used.
176  *
177  *      Allocate a new swap entry and 'sync' it. Note we discard -EIO
178  *      errors. That is an artifact left over from swsusp. It did not
179  *      check the return of rw_swap_page_sync() at all, since most pages
180  *      written back to swap would return -EIO.
181  *      This is a partial improvement, since we will at least return other
182  *      errors, though we need to eventually fix the damn code.
183  */
184 static int write_page(unsigned long addr, swp_entry_t *loc)
185 {
186         swp_entry_t entry;
187         int error = -ENOSPC;
188
189         entry = get_swap_page_of_type(root_swap);
190         if (swp_offset(entry)) {
191                 error = rw_swap_page_sync(WRITE, entry, virt_to_page(addr));
192                 if (!error || error == -EIO)
193                         *loc = entry;
194         }
195         return error;
196 }
197
198 /**
199  *      Swap map-handling functions
200  *
201  *      The swap map is a data structure used for keeping track of each page
202  *      written to the swap.  It consists of many swap_map_page structures
203  *      that contain each an array of MAP_PAGE_SIZE swap entries.
204  *      These structures are linked together with the help of either the
205  *      .next (in memory) or the .next_swap (in swap) member.
206  *
207  *      The swap map is created during suspend.  At that time we need to keep
208  *      it in memory, because we have to free all of the allocated swap
209  *      entries if an error occurs.  The memory needed is preallocated
210  *      so that we know in advance if there's enough of it.
211  *
212  *      The first swap_map_page structure is filled with the swap entries that
213  *      correspond to the first MAP_PAGE_SIZE data pages written to swap and
214  *      so on.  After the all of the data pages have been written, the order
215  *      of the swap_map_page structures in the map is reversed so that they
216  *      can be read from swap in the original order.  This causes the data
217  *      pages to be loaded in exactly the same order in which they have been
218  *      saved.
219  *
220  *      During resume we only need to use one swap_map_page structure
221  *      at a time, which means that we only need to use two memory pages for
222  *      reading the image - one for reading the swap_map_page structures
223  *      and the second for reading the data pages from swap.
224  */
225
226 #define MAP_PAGE_SIZE   ((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \
227                         / sizeof(swp_entry_t))
228
229 struct swap_map_page {
230         swp_entry_t             entries[MAP_PAGE_SIZE];
231         swp_entry_t             next_swap;
232         struct swap_map_page    *next;
233 };
234
235 static inline void free_swap_map(struct swap_map_page *swap_map)
236 {
237         struct swap_map_page *swp;
238
239         while (swap_map) {
240                 swp = swap_map->next;
241                 free_page((unsigned long)swap_map);
242                 swap_map = swp;
243         }
244 }
245
246 static struct swap_map_page *alloc_swap_map(unsigned int nr_pages)
247 {
248         struct swap_map_page *swap_map, *swp;
249         unsigned n = 0;
250
251         if (!nr_pages)
252                 return NULL;
253
254         pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages);
255         swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
256         swp = swap_map;
257         for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) {
258                 swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
259                 swp = swp->next;
260                 if (!swp) {
261                         free_swap_map(swap_map);
262                         return NULL;
263                 }
264         }
265         return swap_map;
266 }
267
268 /**
269  *      reverse_swap_map - reverse the order of pages in the swap map
270  *      @swap_map
271  */
272
273 static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map)
274 {
275         struct swap_map_page *prev, *next;
276
277         prev = NULL;
278         while (swap_map) {
279                 next = swap_map->next;
280                 swap_map->next = prev;
281                 prev = swap_map;
282                 swap_map = next;
283         }
284         return prev;
285 }
286
287 /**
288  *      free_swap_map_entries - free the swap entries allocated to store
289  *      the swap map @swap_map (this is only called in case of an error)
290  */
291 static inline void free_swap_map_entries(struct swap_map_page *swap_map)
292 {
293         while (swap_map) {
294                 if (swap_map->next_swap.val)
295                         swap_free(swap_map->next_swap);
296                 swap_map = swap_map->next;
297         }
298 }
299
300 /**
301  *      save_swap_map - save the swap map used for tracing the data pages
302  *      stored in the swap
303  */
304
305 static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start)
306 {
307         swp_entry_t entry = (swp_entry_t){0};
308         int error;
309
310         while (swap_map) {
311                 swap_map->next_swap = entry;
312                 if ((error = write_page((unsigned long)swap_map, &entry)))
313                         return error;
314                 swap_map = swap_map->next;
315         }
316         *start = entry;
317         return 0;
318 }
319
320 /**
321  *      free_image_entries - free the swap entries allocated to store
322  *      the image data pages (this is only called in case of an error)
323  */
324
325 static inline void free_image_entries(struct swap_map_page *swp)
326 {
327         unsigned k;
328
329         while (swp) {
330                 for (k = 0; k < MAP_PAGE_SIZE; k++)
331                         if (swp->entries[k].val)
332                                 swap_free(swp->entries[k]);
333                 swp = swp->next;
334         }
335 }
336
337 /**
338  *      The swap_map_handle structure is used for handling the swap map in
339  *      a file-alike way
340  */
341
342 struct swap_map_handle {
343         struct swap_map_page *cur;
344         unsigned int k;
345 };
346
347 static inline void init_swap_map_handle(struct swap_map_handle *handle,
348                                         struct swap_map_page *map)
349 {
350         handle->cur = map;
351         handle->k = 0;
352 }
353
354 static inline int swap_map_write_page(struct swap_map_handle *handle,
355                                       unsigned long addr)
356 {
357         int error;
358
359         error = write_page(addr, handle->cur->entries + handle->k);
360         if (error)
361                 return error;
362         if (++handle->k >= MAP_PAGE_SIZE) {
363                 handle->cur = handle->cur->next;
364                 handle->k = 0;
365         }
366         return 0;
367 }
368
369 /**
370  *      save_image_data - save the data pages pointed to by the PBEs
371  *      from the list @pblist using the swap map handle @handle
372  *      (assume there are @nr_pages data pages to save)
373  */
374
375 static int save_image_data(struct pbe *pblist,
376                            struct swap_map_handle *handle,
377                            unsigned int nr_pages)
378 {
379         unsigned int m;
380         struct pbe *p;
381         int error = 0;
382
383         printk("Saving image data pages (%u pages) ...     ", nr_pages);
384         m = nr_pages / 100;
385         if (!m)
386                 m = 1;
387         nr_pages = 0;
388         for_each_pbe (p, pblist) {
389                 error = swap_map_write_page(handle, p->address);
390                 if (error)
391                         break;
392                 if (!(nr_pages % m))
393                         printk("\b\b\b\b%3d%%", nr_pages / m);
394                 nr_pages++;
395         }
396         if (!error)
397                 printk("\b\b\b\bdone\n");
398         return error;
399 }
400
401 static void dump_info(void)
402 {
403         pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
404         pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
405         pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
406         pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
407         pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
408         pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
409         pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
410         pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
411         pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
412         pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
413         pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages);
414 }
415
416 static void init_header(unsigned int nr_pages)
417 {
418         memset(&swsusp_info, 0, sizeof(swsusp_info));
419         swsusp_info.version_code = LINUX_VERSION_CODE;
420         swsusp_info.num_physpages = num_physpages;
421         memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
422
423         swsusp_info.cpus = num_online_cpus();
424         swsusp_info.image_pages = nr_pages;
425         swsusp_info.pages = nr_pages +
426                 ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
427 }
428
429 /**
430  *      pack_orig_addresses - the .orig_address fields of the PBEs from the
431  *      list starting at @pbe are stored in the array @buf[] (1 page)
432  */
433
434 static inline struct pbe *pack_orig_addresses(unsigned long *buf,
435                                               struct pbe *pbe)
436 {
437         int j;
438
439         for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
440                 buf[j] = pbe->orig_address;
441                 pbe = pbe->next;
442         }
443         if (!pbe)
444                 for (; j < PAGE_SIZE / sizeof(long); j++)
445                         buf[j] = 0;
446         return pbe;
447 }
448
449 /**
450  *      save_image_metadata - save the .orig_address fields of the PBEs
451  *      from the list @pblist using the swap map handle @handle
452  */
453
454 static int save_image_metadata(struct pbe *pblist,
455                                struct swap_map_handle *handle)
456 {
457         unsigned long *buf;
458         unsigned int n = 0;
459         struct pbe *p;
460         int error = 0;
461
462         printk("Saving image metadata ... ");
463         buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
464         if (!buf)
465                 return -ENOMEM;
466         p = pblist;
467         while (p) {
468                 p = pack_orig_addresses(buf, p);
469                 error = swap_map_write_page(handle, (unsigned long)buf);
470                 if (error)
471                         break;
472                 n++;
473         }
474         free_page((unsigned long)buf);
475         if (!error)
476                 printk("done (%u pages saved)\n", n);
477         return error;
478 }
479
480 /**
481  *      enough_swap - Make sure we have enough swap to save the image.
482  *
483  *      Returns TRUE or FALSE after checking the total amount of swap
484  *      space avaiable from the resume partition.
485  */
486
487 static int enough_swap(unsigned int nr_pages)
488 {
489         unsigned int free_swap = swap_info[root_swap].pages -
490                 swap_info[root_swap].inuse_pages;
491
492         pr_debug("swsusp: free swap pages: %u\n", free_swap);
493         return free_swap > (nr_pages + PAGES_FOR_IO +
494                 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
495 }
496
497 /**
498  *      swsusp_write - Write entire image and metadata.
499  *
500  *      It is important _NOT_ to umount filesystems at this point. We want
501  *      them synced (in case something goes wrong) but we DO not want to mark
502  *      filesystem clean: it is not. (And it does not matter, if we resume
503  *      correctly, we'll mark system clean, anyway.)
504  */
505
506 int swsusp_write(struct pbe *pblist, unsigned int nr_pages)
507 {
508         struct swap_map_page *swap_map;
509         struct swap_map_handle handle;
510         swp_entry_t start;
511         int error;
512
513         if ((error = swsusp_swap_check())) {
514                 printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
515                 return error;
516         }
517         if (!enough_swap(nr_pages)) {
518                 printk(KERN_ERR "swsusp: Not enough free swap\n");
519                 return -ENOSPC;
520         }
521
522         init_header(nr_pages);
523         swap_map = alloc_swap_map(swsusp_info.pages);
524         if (!swap_map)
525                 return -ENOMEM;
526         init_swap_map_handle(&handle, swap_map);
527
528         error = swap_map_write_page(&handle, (unsigned long)&swsusp_info);
529         if (!error)
530                 error = save_image_metadata(pblist, &handle);
531         if (!error)
532                 error = save_image_data(pblist, &handle, nr_pages);
533         if (error)
534                 goto Free_image_entries;
535
536         swap_map = reverse_swap_map(swap_map);
537         error = save_swap_map(swap_map, &start);
538         if (error)
539                 goto Free_map_entries;
540
541         dump_info();
542         printk( "S" );
543         error = mark_swapfiles(start);
544         printk( "|\n" );
545         if (error)
546                 goto Free_map_entries;
547
548 Free_swap_map:
549         free_swap_map(swap_map);
550         return error;
551
552 Free_map_entries:
553         free_swap_map_entries(swap_map);
554 Free_image_entries:
555         free_image_entries(swap_map);
556         goto Free_swap_map;
557 }
558
559 /**
560  *      swsusp_shrink_memory -  Try to free as much memory as needed
561  *
562  *      ... but do not OOM-kill anyone
563  *
564  *      Notice: all userland should be stopped before it is called, or
565  *      livelock is possible.
566  */
567
568 #define SHRINK_BITE     10000
569
570 int swsusp_shrink_memory(void)
571 {
572         long size, tmp;
573         struct zone *zone;
574         unsigned long pages = 0;
575         unsigned int i = 0;
576         char *p = "-\\|/";
577
578         printk("Shrinking memory...  ");
579         do {
580                 size = 2 * count_highmem_pages();
581                 size += size / 50 + count_data_pages();
582                 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
583                         PAGES_FOR_IO;
584                 tmp = size;
585                 for_each_zone (zone)
586                         if (!is_highmem(zone))
587                                 tmp -= zone->free_pages;
588                 if (tmp > 0) {
589                         tmp = shrink_all_memory(SHRINK_BITE);
590                         if (!tmp)
591                                 return -ENOMEM;
592                         pages += tmp;
593                 } else if (size > image_size / PAGE_SIZE) {
594                         tmp = shrink_all_memory(SHRINK_BITE);
595                         pages += tmp;
596                 }
597                 printk("\b%c", p[i++%4]);
598         } while (tmp > 0);
599         printk("\bdone (%lu pages freed)\n", pages);
600
601         return 0;
602 }
603
604 int swsusp_suspend(void)
605 {
606         int error;
607
608         if ((error = arch_prepare_suspend()))
609                 return error;
610         local_irq_disable();
611         /* At this point, device_suspend() has been called, but *not*
612          * device_power_down(). We *must* device_power_down() now.
613          * Otherwise, drivers for some devices (e.g. interrupt controllers)
614          * become desynchronized with the actual state of the hardware
615          * at resume time, and evil weirdness ensues.
616          */
617         if ((error = device_power_down(PMSG_FREEZE))) {
618                 printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
619                 goto Enable_irqs;
620         }
621
622         if ((error = save_highmem())) {
623                 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
624                 goto Restore_highmem;
625         }
626
627         save_processor_state();
628         if ((error = swsusp_arch_suspend()))
629                 printk(KERN_ERR "Error %d suspending\n", error);
630         /* Restore control flow magically appears here */
631         restore_processor_state();
632 Restore_highmem:
633         restore_highmem();
634         device_power_up();
635 Enable_irqs:
636         local_irq_enable();
637         return error;
638 }
639
640 int swsusp_resume(void)
641 {
642         int error;
643         local_irq_disable();
644         if (device_power_down(PMSG_FREEZE))
645                 printk(KERN_ERR "Some devices failed to power down, very bad\n");
646         /* We'll ignore saved state, but this gets preempt count (etc) right */
647         save_processor_state();
648         error = swsusp_arch_resume();
649         /* Code below is only ever reached in case of failure. Otherwise
650          * execution continues at place where swsusp_arch_suspend was called
651          */
652         BUG_ON(!error);
653         /* The only reason why swsusp_arch_resume() can fail is memory being
654          * very tight, so we have to free it as soon as we can to avoid
655          * subsequent failures
656          */
657         swsusp_free();
658         restore_processor_state();
659         restore_highmem();
660         touch_softlockup_watchdog();
661         device_power_up();
662         local_irq_enable();
663         return error;
664 }
665
666 /**
667  *      mark_unsafe_pages - mark the pages that cannot be used for storing
668  *      the image during resume, because they conflict with the pages that
669  *      had been used before suspend
670  */
671
672 static void mark_unsafe_pages(struct pbe *pblist)
673 {
674         struct zone *zone;
675         unsigned long zone_pfn;
676         struct pbe *p;
677
678         if (!pblist) /* a sanity check */
679                 return;
680
681         /* Clear page flags */
682         for_each_zone (zone) {
683                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
684                         if (pfn_valid(zone_pfn + zone->zone_start_pfn))
685                                 ClearPageNosaveFree(pfn_to_page(zone_pfn +
686                                         zone->zone_start_pfn));
687         }
688
689         /* Mark orig addresses */
690         for_each_pbe (p, pblist)
691                 SetPageNosaveFree(virt_to_page(p->orig_address));
692
693 }
694
695 static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
696 {
697         /* We assume both lists contain the same number of elements */
698         while (src) {
699                 dst->orig_address = src->orig_address;
700                 dst = dst->next;
701                 src = src->next;
702         }
703 }
704
705 /*
706  *      Using bio to read from swap.
707  *      This code requires a bit more work than just using buffer heads
708  *      but, it is the recommended way for 2.5/2.6.
709  *      The following are to signal the beginning and end of I/O. Bios
710  *      finish asynchronously, while we want them to happen synchronously.
711  *      A simple atomic_t, and a wait loop take care of this problem.
712  */
713
714 static atomic_t io_done = ATOMIC_INIT(0);
715
716 static int end_io(struct bio *bio, unsigned int num, int err)
717 {
718         if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
719                 panic("I/O error reading memory image");
720         atomic_set(&io_done, 0);
721         return 0;
722 }
723
724 static struct block_device *resume_bdev;
725
726 /**
727  *      submit - submit BIO request.
728  *      @rw:    READ or WRITE.
729  *      @off    physical offset of page.
730  *      @page:  page we're reading or writing.
731  *
732  *      Straight from the textbook - allocate and initialize the bio.
733  *      If we're writing, make sure the page is marked as dirty.
734  *      Then submit it and wait.
735  */
736
737 static int submit(int rw, pgoff_t page_off, void *page)
738 {
739         int error = 0;
740         struct bio *bio;
741
742         bio = bio_alloc(GFP_ATOMIC, 1);
743         if (!bio)
744                 return -ENOMEM;
745         bio->bi_sector = page_off * (PAGE_SIZE >> 9);
746         bio->bi_bdev = resume_bdev;
747         bio->bi_end_io = end_io;
748
749         if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
750                 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
751                 error = -EFAULT;
752                 goto Done;
753         }
754
755
756         atomic_set(&io_done, 1);
757         submit_bio(rw | (1 << BIO_RW_SYNC), bio);
758         while (atomic_read(&io_done))
759                 yield();
760         if (rw == READ)
761                 bio_set_pages_dirty(bio);
762  Done:
763         bio_put(bio);
764         return error;
765 }
766
767 static int bio_read_page(pgoff_t page_off, void *page)
768 {
769         return submit(READ, page_off, page);
770 }
771
772 static int bio_write_page(pgoff_t page_off, void *page)
773 {
774         return submit(WRITE, page_off, page);
775 }
776
777 /**
778  *      The following functions allow us to read data using a swap map
779  *      in a file-alike way
780  */
781
782 static inline void release_swap_map_reader(struct swap_map_handle *handle)
783 {
784         if (handle->cur)
785                 free_page((unsigned long)handle->cur);
786         handle->cur = NULL;
787 }
788
789 static inline int get_swap_map_reader(struct swap_map_handle *handle,
790                                       swp_entry_t start)
791 {
792         int error;
793
794         if (!swp_offset(start))
795                 return -EINVAL;
796         handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
797         if (!handle->cur)
798                 return -ENOMEM;
799         error = bio_read_page(swp_offset(start), handle->cur);
800         if (error) {
801                 release_swap_map_reader(handle);
802                 return error;
803         }
804         handle->k = 0;
805         return 0;
806 }
807
808 static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf)
809 {
810         unsigned long offset;
811         int error;
812
813         if (!handle->cur)
814                 return -EINVAL;
815         offset = swp_offset(handle->cur->entries[handle->k]);
816         if (!offset)
817                 return -EINVAL;
818         error = bio_read_page(offset, buf);
819         if (error)
820                 return error;
821         if (++handle->k >= MAP_PAGE_SIZE) {
822                 handle->k = 0;
823                 offset = swp_offset(handle->cur->next_swap);
824                 if (!offset)
825                         release_swap_map_reader(handle);
826                 else
827                         error = bio_read_page(offset, handle->cur);
828         }
829         return error;
830 }
831
832 static int check_header(void)
833 {
834         char *reason = NULL;
835
836         dump_info();
837         if (swsusp_info.version_code != LINUX_VERSION_CODE)
838                 reason = "kernel version";
839         if (swsusp_info.num_physpages != num_physpages)
840                 reason = "memory size";
841         if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
842                 reason = "system type";
843         if (strcmp(swsusp_info.uts.release,system_utsname.release))
844                 reason = "kernel release";
845         if (strcmp(swsusp_info.uts.version,system_utsname.version))
846                 reason = "version";
847         if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
848                 reason = "machine";
849         if (reason) {
850                 printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
851                 return -EPERM;
852         }
853         return 0;
854 }
855
856 /**
857  *      load_image_data - load the image data using the swap map handle
858  *      @handle and store them using the page backup list @pblist
859  *      (assume there are @nr_pages pages to load)
860  */
861
862 static int load_image_data(struct pbe *pblist,
863                            struct swap_map_handle *handle,
864                            unsigned int nr_pages)
865 {
866         int error;
867         unsigned int m;
868         struct pbe *p;
869
870         if (!pblist)
871                 return -EINVAL;
872         printk("Loading image data pages (%u pages) ...     ", nr_pages);
873         m = nr_pages / 100;
874         if (!m)
875                 m = 1;
876         nr_pages = 0;
877         p = pblist;
878         while (p) {
879                 error = swap_map_read_page(handle, (void *)p->address);
880                 if (error)
881                         break;
882                 p = p->next;
883                 if (!(nr_pages % m))
884                         printk("\b\b\b\b%3d%%", nr_pages / m);
885                 nr_pages++;
886         }
887         if (!error)
888                 printk("\b\b\b\bdone\n");
889         return error;
890 }
891
892 /**
893  *      unpack_orig_addresses - copy the elements of @buf[] (1 page) to
894  *      the PBEs in the list starting at @pbe
895  */
896
897 static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
898                                                 struct pbe *pbe)
899 {
900         int j;
901
902         for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
903                 pbe->orig_address = buf[j];
904                 pbe = pbe->next;
905         }
906         return pbe;
907 }
908
909 /**
910  *      load_image_metadata - load the image metadata using the swap map
911  *      handle @handle and put them into the PBEs in the list @pblist
912  */
913
914 static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle)
915 {
916         struct pbe *p;
917         unsigned long *buf;
918         unsigned int n = 0;
919         int error = 0;
920
921         printk("Loading image metadata ... ");
922         buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
923         if (!buf)
924                 return -ENOMEM;
925         p = pblist;
926         while (p) {
927                 error = swap_map_read_page(handle, buf);
928                 if (error)
929                         break;
930                 p = unpack_orig_addresses(buf, p);
931                 n++;
932         }
933         free_page((unsigned long)buf);
934         if (!error)
935                 printk("done (%u pages loaded)\n", n);
936         return error;
937 }
938
939 int swsusp_read(struct pbe **pblist_ptr)
940 {
941         int error;
942         struct pbe *p, *pblist;
943         struct swap_map_handle handle;
944         unsigned int nr_pages;
945
946         if (IS_ERR(resume_bdev)) {
947                 pr_debug("swsusp: block device not initialised\n");
948                 return PTR_ERR(resume_bdev);
949         }
950
951         error = get_swap_map_reader(&handle, swsusp_header.image);
952         if (!error)
953                 error = swap_map_read_page(&handle, &swsusp_info);
954         if (!error)
955                 error = check_header();
956         if (error)
957                 return error;
958         nr_pages = swsusp_info.image_pages;
959         p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0);
960         if (!p)
961                 return -ENOMEM;
962         error = load_image_metadata(p, &handle);
963         if (!error) {
964                 mark_unsafe_pages(p);
965                 pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
966                 if (pblist)
967                         copy_page_backup_list(pblist, p);
968                 free_pagedir(p);
969                 if (!pblist)
970                         error = -ENOMEM;
971
972                 /* Allocate memory for the image and read the data from swap */
973                 if (!error)
974                         error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
975                 if (!error) {
976                         release_eaten_pages();
977                         error = load_image_data(pblist, &handle, nr_pages);
978                 }
979                 if (!error)
980                         *pblist_ptr = pblist;
981         }
982         release_swap_map_reader(&handle);
983
984         blkdev_put(resume_bdev);
985
986         if (!error)
987                 pr_debug("swsusp: Reading resume file was successful\n");
988         else
989                 pr_debug("swsusp: Error %d resuming\n", error);
990         return error;
991 }
992
993 /**
994  *      swsusp_check - Check for swsusp signature in the resume device
995  */
996
997 int swsusp_check(void)
998 {
999         int error;
1000
1001         resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
1002         if (!IS_ERR(resume_bdev)) {
1003                 set_blocksize(resume_bdev, PAGE_SIZE);
1004                 memset(&swsusp_header, 0, sizeof(swsusp_header));
1005                 if ((error = bio_read_page(0, &swsusp_header)))
1006                         return error;
1007                 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
1008                         memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
1009                         /* Reset swap signature now */
1010                         error = bio_write_page(0, &swsusp_header);
1011                 } else {
1012                         return -EINVAL;
1013                 }
1014                 if (error)
1015                         blkdev_put(resume_bdev);
1016                 else
1017                         pr_debug("swsusp: Signature found, resuming\n");
1018         } else {
1019                 error = PTR_ERR(resume_bdev);
1020         }
1021
1022         if (error)
1023                 pr_debug("swsusp: Error %d check for resume file\n", error);
1024
1025         return error;
1026 }
1027
1028 /**
1029  *      swsusp_close - close swap device.
1030  */
1031
1032 void swsusp_close(void)
1033 {
1034         if (IS_ERR(resume_bdev)) {
1035                 pr_debug("swsusp: block device not initialised\n");
1036                 return;
1037         }
1038
1039         blkdev_put(resume_bdev);
1040 }