Merge by hand (whitespace conflicts in libata.h)
[linux-2.6] / kernel / power / swsusp.c
1 /*
2  * linux/kernel/power/swsusp.c
3  *
4  * This file provides code to write suspend image to swap and read it back.
5  *
6  * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
7  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
8  *
9  * This file is released under the GPLv2.
10  *
11  * I'd like to thank the following people for their work:
12  *
13  * Pavel Machek <pavel@ucw.cz>:
14  * Modifications, defectiveness pointing, being with me at the very beginning,
15  * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
16  *
17  * Steve Doddi <dirk@loth.demon.co.uk>:
18  * Support the possibility of hardware state restoring.
19  *
20  * Raph <grey.havens@earthling.net>:
21  * Support for preserving states of network devices and virtual console
22  * (including X and svgatextmode)
23  *
24  * Kurt Garloff <garloff@suse.de>:
25  * Straightened the critical function in order to prevent compilers from
26  * playing tricks with local variables.
27  *
28  * Andreas Mohr <a.mohr@mailto.de>
29  *
30  * Alex Badea <vampire@go.ro>:
31  * Fixed runaway init
32  *
33  * Andreas Steinmetz <ast@domdv.de>:
34  * Added encrypted suspend option
35  *
36  * More state savers are welcome. Especially for the scsi layer...
37  *
38  * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
39  */
40
41 #include <linux/module.h>
42 #include <linux/mm.h>
43 #include <linux/suspend.h>
44 #include <linux/smp_lock.h>
45 #include <linux/file.h>
46 #include <linux/utsname.h>
47 #include <linux/version.h>
48 #include <linux/delay.h>
49 #include <linux/bitops.h>
50 #include <linux/spinlock.h>
51 #include <linux/genhd.h>
52 #include <linux/kernel.h>
53 #include <linux/major.h>
54 #include <linux/swap.h>
55 #include <linux/pm.h>
56 #include <linux/device.h>
57 #include <linux/buffer_head.h>
58 #include <linux/swapops.h>
59 #include <linux/bootmem.h>
60 #include <linux/syscalls.h>
61 #include <linux/highmem.h>
62 #include <linux/bio.h>
63
64 #include <asm/uaccess.h>
65 #include <asm/mmu_context.h>
66 #include <asm/pgtable.h>
67 #include <asm/tlbflush.h>
68 #include <asm/io.h>
69
70 #include <linux/random.h>
71 #include <linux/crypto.h>
72 #include <asm/scatterlist.h>
73
74 #include "power.h"
75
76 #ifdef CONFIG_HIGHMEM
77 int save_highmem(void);
78 int restore_highmem(void);
79 #else
80 static int save_highmem(void) { return 0; }
81 static int restore_highmem(void) { return 0; }
82 #endif
83
84 #define CIPHER "aes"
85 #define MAXKEY 32
86 #define MAXIV  32
87
88 extern char resume_file[];
89
90 /* Local variables that should not be affected by save */
91 unsigned int nr_copy_pages __nosavedata = 0;
92
93 /* Suspend pagedir is allocated before final copy, therefore it
94    must be freed after resume
95
96    Warning: this is even more evil than it seems. Pagedirs this file
97    talks about are completely different from page directories used by
98    MMU hardware.
99  */
100 suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
101
102 #define SWSUSP_SIG      "S1SUSPEND"
103
104 static struct swsusp_header {
105         char reserved[PAGE_SIZE - 20 - MAXKEY - MAXIV - sizeof(swp_entry_t)];
106         u8 key_iv[MAXKEY+MAXIV];
107         swp_entry_t swsusp_info;
108         char    orig_sig[10];
109         char    sig[10];
110 } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
111
112 static struct swsusp_info swsusp_info;
113
114 /*
115  * Saving part...
116  */
117
118 /* We memorize in swapfile_used what swap devices are used for suspension */
119 #define SWAPFILE_UNUSED    0
120 #define SWAPFILE_SUSPEND   1    /* This is the suspending device */
121 #define SWAPFILE_IGNORED   2    /* Those are other swap devices ignored for suspension */
122
123 static unsigned short swapfile_used[MAX_SWAPFILES];
124 static unsigned short root_swap;
125
126 static int write_page(unsigned long addr, swp_entry_t *loc);
127 static int bio_read_page(pgoff_t page_off, void *page);
128
129 static u8 key_iv[MAXKEY+MAXIV];
130
131 #ifdef CONFIG_SWSUSP_ENCRYPT
132
133 static int crypto_init(int mode, void **mem)
134 {
135         int error = 0;
136         int len;
137         char *modemsg;
138         struct crypto_tfm *tfm;
139
140         modemsg = mode ? "suspend not possible" : "resume not possible";
141
142         tfm = crypto_alloc_tfm(CIPHER, CRYPTO_TFM_MODE_CBC);
143         if(!tfm) {
144                 printk(KERN_ERR "swsusp: no tfm, %s\n", modemsg);
145                 error = -EINVAL;
146                 goto out;
147         }
148
149         if(MAXKEY < crypto_tfm_alg_min_keysize(tfm)) {
150                 printk(KERN_ERR "swsusp: key buffer too small, %s\n", modemsg);
151                 error = -ENOKEY;
152                 goto fail;
153         }
154
155         if (mode)
156                 get_random_bytes(key_iv, MAXKEY+MAXIV);
157
158         len = crypto_tfm_alg_max_keysize(tfm);
159         if (len > MAXKEY)
160                 len = MAXKEY;
161
162         if (crypto_cipher_setkey(tfm, key_iv, len)) {
163                 printk(KERN_ERR "swsusp: key setup failure, %s\n", modemsg);
164                 error = -EKEYREJECTED;
165                 goto fail;
166         }
167
168         len = crypto_tfm_alg_ivsize(tfm);
169
170         if (MAXIV < len) {
171                 printk(KERN_ERR "swsusp: iv buffer too small, %s\n", modemsg);
172                 error = -EOVERFLOW;
173                 goto fail;
174         }
175
176         crypto_cipher_set_iv(tfm, key_iv+MAXKEY, len);
177
178         *mem=(void *)tfm;
179
180         goto out;
181
182 fail:   crypto_free_tfm(tfm);
183 out:    return error;
184 }
185
186 static __inline__ void crypto_exit(void *mem)
187 {
188         crypto_free_tfm((struct crypto_tfm *)mem);
189 }
190
191 static __inline__ int crypto_write(struct pbe *p, void *mem)
192 {
193         int error = 0;
194         struct scatterlist src, dst;
195
196         src.page   = virt_to_page(p->address);
197         src.offset = 0;
198         src.length = PAGE_SIZE;
199         dst.page   = virt_to_page((void *)&swsusp_header);
200         dst.offset = 0;
201         dst.length = PAGE_SIZE;
202
203         error = crypto_cipher_encrypt((struct crypto_tfm *)mem, &dst, &src,
204                                         PAGE_SIZE);
205
206         if (!error)
207                 error = write_page((unsigned long)&swsusp_header,
208                                 &(p->swap_address));
209         return error;
210 }
211
212 static __inline__ int crypto_read(struct pbe *p, void *mem)
213 {
214         int error = 0;
215         struct scatterlist src, dst;
216
217         error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
218         if (!error) {
219                 src.offset = 0;
220                 src.length = PAGE_SIZE;
221                 dst.offset = 0;
222                 dst.length = PAGE_SIZE;
223                 src.page = dst.page = virt_to_page((void *)p->address);
224
225                 error = crypto_cipher_decrypt((struct crypto_tfm *)mem, &dst,
226                                                 &src, PAGE_SIZE);
227         }
228         return error;
229 }
230 #else
231 static __inline__ int crypto_init(int mode, void *mem)
232 {
233         return 0;
234 }
235
236 static __inline__ void crypto_exit(void *mem)
237 {
238 }
239
240 static __inline__ int crypto_write(struct pbe *p, void *mem)
241 {
242         return write_page(p->address, &(p->swap_address));
243 }
244
245 static __inline__ int crypto_read(struct pbe *p, void *mem)
246 {
247         return bio_read_page(swp_offset(p->swap_address), (void *)p->address);
248 }
249 #endif
250
251 static int mark_swapfiles(swp_entry_t prev)
252 {
253         int error;
254
255         rw_swap_page_sync(READ,
256                           swp_entry(root_swap, 0),
257                           virt_to_page((unsigned long)&swsusp_header));
258         if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
259             !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
260                 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
261                 memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
262                 memcpy(swsusp_header.key_iv, key_iv, MAXKEY+MAXIV);
263                 swsusp_header.swsusp_info = prev;
264                 error = rw_swap_page_sync(WRITE,
265                                           swp_entry(root_swap, 0),
266                                           virt_to_page((unsigned long)
267                                                        &swsusp_header));
268         } else {
269                 pr_debug("swsusp: Partition is not swap space.\n");
270                 error = -ENODEV;
271         }
272         return error;
273 }
274
275 /*
276  * Check whether the swap device is the specified resume
277  * device, irrespective of whether they are specified by
278  * identical names.
279  *
280  * (Thus, device inode aliasing is allowed.  You can say /dev/hda4
281  * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
282  * and they'll be considered the same device.  This is *necessary* for
283  * devfs, since the resume code can only recognize the form /dev/hda4,
284  * but the suspend code would see the long name.)
285  */
286 static int is_resume_device(const struct swap_info_struct *swap_info)
287 {
288         struct file *file = swap_info->swap_file;
289         struct inode *inode = file->f_dentry->d_inode;
290
291         return S_ISBLK(inode->i_mode) &&
292                 swsusp_resume_device == MKDEV(imajor(inode), iminor(inode));
293 }
294
295 static int swsusp_swap_check(void) /* This is called before saving image */
296 {
297         int i, len;
298
299         len=strlen(resume_file);
300         root_swap = 0xFFFF;
301
302         spin_lock(&swap_lock);
303         for (i=0; i<MAX_SWAPFILES; i++) {
304                 if (!(swap_info[i].flags & SWP_WRITEOK)) {
305                         swapfile_used[i]=SWAPFILE_UNUSED;
306                 } else {
307                         if (!len) {
308                                 printk(KERN_WARNING "resume= option should be used to set suspend device" );
309                                 if (root_swap == 0xFFFF) {
310                                         swapfile_used[i] = SWAPFILE_SUSPEND;
311                                         root_swap = i;
312                                 } else
313                                         swapfile_used[i] = SWAPFILE_IGNORED;
314                         } else {
315                                 /* we ignore all swap devices that are not the resume_file */
316                                 if (is_resume_device(&swap_info[i])) {
317                                         swapfile_used[i] = SWAPFILE_SUSPEND;
318                                         root_swap = i;
319                                 } else {
320                                         swapfile_used[i] = SWAPFILE_IGNORED;
321                                 }
322                         }
323                 }
324         }
325         spin_unlock(&swap_lock);
326         return (root_swap != 0xffff) ? 0 : -ENODEV;
327 }
328
329 /**
330  * This is called after saving image so modification
331  * will be lost after resume... and that's what we want.
332  * we make the device unusable. A new call to
333  * lock_swapdevices can unlock the devices.
334  */
335 static void lock_swapdevices(void)
336 {
337         int i;
338
339         spin_lock(&swap_lock);
340         for (i = 0; i< MAX_SWAPFILES; i++)
341                 if (swapfile_used[i] == SWAPFILE_IGNORED) {
342                         swap_info[i].flags ^= SWP_WRITEOK;
343                 }
344         spin_unlock(&swap_lock);
345 }
346
347 /**
348  *      write_page - Write one page to a fresh swap location.
349  *      @addr:  Address we're writing.
350  *      @loc:   Place to store the entry we used.
351  *
352  *      Allocate a new swap entry and 'sync' it. Note we discard -EIO
353  *      errors. That is an artifact left over from swsusp. It did not
354  *      check the return of rw_swap_page_sync() at all, since most pages
355  *      written back to swap would return -EIO.
356  *      This is a partial improvement, since we will at least return other
357  *      errors, though we need to eventually fix the damn code.
358  */
359 static int write_page(unsigned long addr, swp_entry_t *loc)
360 {
361         swp_entry_t entry;
362         int error = 0;
363
364         entry = get_swap_page();
365         if (swp_offset(entry) &&
366             swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
367                 error = rw_swap_page_sync(WRITE, entry,
368                                           virt_to_page(addr));
369                 if (error == -EIO)
370                         error = 0;
371                 if (!error)
372                         *loc = entry;
373         } else
374                 error = -ENOSPC;
375         return error;
376 }
377
378 /**
379  *      data_free - Free the swap entries used by the saved image.
380  *
381  *      Walk the list of used swap entries and free each one.
382  *      This is only used for cleanup when suspend fails.
383  */
384 static void data_free(void)
385 {
386         swp_entry_t entry;
387         struct pbe *p;
388
389         for_each_pbe (p, pagedir_nosave) {
390                 entry = p->swap_address;
391                 if (entry.val)
392                         swap_free(entry);
393                 else
394                         break;
395         }
396 }
397
398 /**
399  *      data_write - Write saved image to swap.
400  *
401  *      Walk the list of pages in the image and sync each one to swap.
402  */
403 static int data_write(void)
404 {
405         int error = 0, i = 0;
406         unsigned int mod = nr_copy_pages / 100;
407         struct pbe *p;
408         void *tfm;
409
410         if ((error = crypto_init(1, &tfm)))
411                 return error;
412
413         if (!mod)
414                 mod = 1;
415
416         printk( "Writing data to swap (%d pages)...     ", nr_copy_pages );
417         for_each_pbe (p, pagedir_nosave) {
418                 if (!(i%mod))
419                         printk( "\b\b\b\b%3d%%", i / mod );
420                 if ((error = crypto_write(p, tfm))) {
421                         crypto_exit(tfm);
422                         return error;
423                 }
424                 i++;
425         }
426         printk("\b\b\b\bdone\n");
427         crypto_exit(tfm);
428         return error;
429 }
430
431 static void dump_info(void)
432 {
433         pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
434         pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
435         pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
436         pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
437         pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
438         pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
439         pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
440         pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
441         pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
442         pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
443         pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages);
444 }
445
446 static void init_header(void)
447 {
448         memset(&swsusp_info, 0, sizeof(swsusp_info));
449         swsusp_info.version_code = LINUX_VERSION_CODE;
450         swsusp_info.num_physpages = num_physpages;
451         memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
452
453         swsusp_info.suspend_pagedir = pagedir_nosave;
454         swsusp_info.cpus = num_online_cpus();
455         swsusp_info.image_pages = nr_copy_pages;
456 }
457
458 static int close_swap(void)
459 {
460         swp_entry_t entry;
461         int error;
462
463         dump_info();
464         error = write_page((unsigned long)&swsusp_info, &entry);
465         if (!error) {
466                 printk( "S" );
467                 error = mark_swapfiles(entry);
468                 printk( "|\n" );
469         }
470         return error;
471 }
472
473 /**
474  *      free_pagedir_entries - Free pages used by the page directory.
475  *
476  *      This is used during suspend for error recovery.
477  */
478
479 static void free_pagedir_entries(void)
480 {
481         int i;
482
483         for (i = 0; i < swsusp_info.pagedir_pages; i++)
484                 swap_free(swsusp_info.pagedir[i]);
485 }
486
487
488 /**
489  *      write_pagedir - Write the array of pages holding the page directory.
490  *      @last:  Last swap entry we write (needed for header).
491  */
492
493 static int write_pagedir(void)
494 {
495         int error = 0;
496         unsigned int n = 0;
497         struct pbe *pbe;
498
499         printk( "Writing pagedir...");
500         for_each_pb_page (pbe, pagedir_nosave) {
501                 if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++])))
502                         return error;
503         }
504
505         swsusp_info.pagedir_pages = n;
506         printk("done (%u pages)\n", n);
507         return error;
508 }
509
510 /**
511  *      enough_swap - Make sure we have enough swap to save the image.
512  *
513  *      Returns TRUE or FALSE after checking the total amount of swap
514  *      space avaiable.
515  *
516  *      FIXME: si_swapinfo(&i) returns all swap devices information.
517  *      We should only consider resume_device.
518  */
519
520 static int enough_swap(unsigned int nr_pages)
521 {
522         struct sysinfo i;
523
524         si_swapinfo(&i);
525         pr_debug("swsusp: available swap: %lu pages\n", i.freeswap);
526         return i.freeswap > (nr_pages + PAGES_FOR_IO +
527                 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
528 }
529
530 /**
531  *      write_suspend_image - Write entire image and metadata.
532  *
533  */
534 static int write_suspend_image(void)
535 {
536         int error;
537
538         if (!enough_swap(nr_copy_pages)) {
539                 printk(KERN_ERR "swsusp: Not enough free swap\n");
540                 return -ENOSPC;
541         }
542
543         init_header();
544         if ((error = data_write()))
545                 goto FreeData;
546
547         if ((error = write_pagedir()))
548                 goto FreePagedir;
549
550         if ((error = close_swap()))
551                 goto FreePagedir;
552  Done:
553         memset(key_iv, 0, MAXKEY+MAXIV);
554         return error;
555  FreePagedir:
556         free_pagedir_entries();
557  FreeData:
558         data_free();
559         goto Done;
560 }
561
562 /* It is important _NOT_ to umount filesystems at this point. We want
563  * them synced (in case something goes wrong) but we DO not want to mark
564  * filesystem clean: it is not. (And it does not matter, if we resume
565  * correctly, we'll mark system clean, anyway.)
566  */
567 int swsusp_write(void)
568 {
569         int error;
570
571         if ((error = swsusp_swap_check())) {
572                 printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n");
573                 return error;
574         }
575         lock_swapdevices();
576         error = write_suspend_image();
577         /* This will unlock ignored swap devices since writing is finished */
578         lock_swapdevices();
579         return error;
580 }
581
582
583
584 int swsusp_suspend(void)
585 {
586         int error;
587
588         if ((error = arch_prepare_suspend()))
589                 return error;
590         local_irq_disable();
591         /* At this point, device_suspend() has been called, but *not*
592          * device_power_down(). We *must* device_power_down() now.
593          * Otherwise, drivers for some devices (e.g. interrupt controllers)
594          * become desynchronized with the actual state of the hardware
595          * at resume time, and evil weirdness ensues.
596          */
597         if ((error = device_power_down(PMSG_FREEZE))) {
598                 printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
599                 goto Enable_irqs;
600         }
601
602         if ((error = save_highmem())) {
603                 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
604                 goto Restore_highmem;
605         }
606
607         save_processor_state();
608         if ((error = swsusp_arch_suspend()))
609                 printk(KERN_ERR "Error %d suspending\n", error);
610         /* Restore control flow magically appears here */
611         restore_processor_state();
612 Restore_highmem:
613         restore_highmem();
614         device_power_up();
615 Enable_irqs:
616         local_irq_enable();
617         return error;
618 }
619
620 int swsusp_resume(void)
621 {
622         int error;
623         local_irq_disable();
624         if (device_power_down(PMSG_FREEZE))
625                 printk(KERN_ERR "Some devices failed to power down, very bad\n");
626         /* We'll ignore saved state, but this gets preempt count (etc) right */
627         save_processor_state();
628         error = swsusp_arch_resume();
629         /* Code below is only ever reached in case of failure. Otherwise
630          * execution continues at place where swsusp_arch_suspend was called
631          */
632         BUG_ON(!error);
633         /* The only reason why swsusp_arch_resume() can fail is memory being
634          * very tight, so we have to free it as soon as we can to avoid
635          * subsequent failures
636          */
637         swsusp_free();
638         restore_processor_state();
639         restore_highmem();
640         touch_softlockup_watchdog();
641         device_power_up();
642         local_irq_enable();
643         return error;
644 }
645
646 /**
647  *      mark_unsafe_pages - mark the pages that cannot be used for storing
648  *      the image during resume, because they conflict with the pages that
649  *      had been used before suspend
650  */
651
652 static void mark_unsafe_pages(struct pbe *pblist)
653 {
654         struct zone *zone;
655         unsigned long zone_pfn;
656         struct pbe *p;
657
658         if (!pblist) /* a sanity check */
659                 return;
660
661         /* Clear page flags */
662         for_each_zone (zone) {
663                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
664                         if (pfn_valid(zone_pfn + zone->zone_start_pfn))
665                                 ClearPageNosaveFree(pfn_to_page(zone_pfn +
666                                         zone->zone_start_pfn));
667         }
668
669         /* Mark orig addresses */
670         for_each_pbe (p, pblist)
671                 SetPageNosaveFree(virt_to_page(p->orig_address));
672
673 }
674
675 static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
676 {
677         /* We assume both lists contain the same number of elements */
678         while (src) {
679                 dst->orig_address = src->orig_address;
680                 dst->swap_address = src->swap_address;
681                 dst = dst->next;
682                 src = src->next;
683         }
684 }
685
686 /*
687  *      Using bio to read from swap.
688  *      This code requires a bit more work than just using buffer heads
689  *      but, it is the recommended way for 2.5/2.6.
690  *      The following are to signal the beginning and end of I/O. Bios
691  *      finish asynchronously, while we want them to happen synchronously.
692  *      A simple atomic_t, and a wait loop take care of this problem.
693  */
694
695 static atomic_t io_done = ATOMIC_INIT(0);
696
697 static int end_io(struct bio *bio, unsigned int num, int err)
698 {
699         if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
700                 panic("I/O error reading memory image");
701         atomic_set(&io_done, 0);
702         return 0;
703 }
704
705 static struct block_device *resume_bdev;
706
707 /**
708  *      submit - submit BIO request.
709  *      @rw:    READ or WRITE.
710  *      @off    physical offset of page.
711  *      @page:  page we're reading or writing.
712  *
713  *      Straight from the textbook - allocate and initialize the bio.
714  *      If we're writing, make sure the page is marked as dirty.
715  *      Then submit it and wait.
716  */
717
718 static int submit(int rw, pgoff_t page_off, void *page)
719 {
720         int error = 0;
721         struct bio *bio;
722
723         bio = bio_alloc(GFP_ATOMIC, 1);
724         if (!bio)
725                 return -ENOMEM;
726         bio->bi_sector = page_off * (PAGE_SIZE >> 9);
727         bio_get(bio);
728         bio->bi_bdev = resume_bdev;
729         bio->bi_end_io = end_io;
730
731         if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
732                 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
733                 error = -EFAULT;
734                 goto Done;
735         }
736
737         if (rw == WRITE)
738                 bio_set_pages_dirty(bio);
739
740         atomic_set(&io_done, 1);
741         submit_bio(rw | (1 << BIO_RW_SYNC), bio);
742         while (atomic_read(&io_done))
743                 yield();
744
745  Done:
746         bio_put(bio);
747         return error;
748 }
749
750 static int bio_read_page(pgoff_t page_off, void *page)
751 {
752         return submit(READ, page_off, page);
753 }
754
755 static int bio_write_page(pgoff_t page_off, void *page)
756 {
757         return submit(WRITE, page_off, page);
758 }
759
760 /*
761  * Sanity check if this image makes sense with this kernel/swap context
762  * I really don't think that it's foolproof but more than nothing..
763  */
764
765 static const char *sanity_check(void)
766 {
767         dump_info();
768         if (swsusp_info.version_code != LINUX_VERSION_CODE)
769                 return "kernel version";
770         if (swsusp_info.num_physpages != num_physpages)
771                 return "memory size";
772         if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
773                 return "system type";
774         if (strcmp(swsusp_info.uts.release,system_utsname.release))
775                 return "kernel release";
776         if (strcmp(swsusp_info.uts.version,system_utsname.version))
777                 return "version";
778         if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
779                 return "machine";
780 #if 0
781         /* We can't use number of online CPUs when we use hotplug to remove them ;-))) */
782         if (swsusp_info.cpus != num_possible_cpus())
783                 return "number of cpus";
784 #endif
785         return NULL;
786 }
787
788
789 static int check_header(void)
790 {
791         const char *reason = NULL;
792         int error;
793
794         if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info)))
795                 return error;
796
797         /* Is this same machine? */
798         if ((reason = sanity_check())) {
799                 printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason);
800                 return -EPERM;
801         }
802         nr_copy_pages = swsusp_info.image_pages;
803         return error;
804 }
805
806 static int check_sig(void)
807 {
808         int error;
809
810         memset(&swsusp_header, 0, sizeof(swsusp_header));
811         if ((error = bio_read_page(0, &swsusp_header)))
812                 return error;
813         if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
814                 memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
815                 memcpy(key_iv, swsusp_header.key_iv, MAXKEY+MAXIV);
816                 memset(swsusp_header.key_iv, 0, MAXKEY+MAXIV);
817
818                 /*
819                  * Reset swap signature now.
820                  */
821                 error = bio_write_page(0, &swsusp_header);
822         } else {
823                 return -EINVAL;
824         }
825         if (!error)
826                 pr_debug("swsusp: Signature found, resuming\n");
827         return error;
828 }
829
830 /**
831  *      data_read - Read image pages from swap.
832  *
833  *      You do not need to check for overlaps, check_pagedir()
834  *      already did that.
835  */
836
837 static int data_read(struct pbe *pblist)
838 {
839         struct pbe *p;
840         int error = 0;
841         int i = 0;
842         int mod = swsusp_info.image_pages / 100;
843         void *tfm;
844
845         if ((error = crypto_init(0, &tfm)))
846                 return error;
847
848         if (!mod)
849                 mod = 1;
850
851         printk("swsusp: Reading image data (%lu pages):     ",
852                         swsusp_info.image_pages);
853
854         for_each_pbe (p, pblist) {
855                 if (!(i % mod))
856                         printk("\b\b\b\b%3d%%", i / mod);
857
858                 if ((error = crypto_read(p, tfm))) {
859                         crypto_exit(tfm);
860                         return error;
861                 }
862
863                 i++;
864         }
865         printk("\b\b\b\bdone\n");
866         crypto_exit(tfm);
867         return error;
868 }
869
870 /**
871  *      read_pagedir - Read page backup list pages from swap
872  */
873
874 static int read_pagedir(struct pbe *pblist)
875 {
876         struct pbe *pbpage, *p;
877         unsigned int i = 0;
878         int error;
879
880         if (!pblist)
881                 return -EFAULT;
882
883         printk("swsusp: Reading pagedir (%lu pages)\n",
884                         swsusp_info.pagedir_pages);
885
886         for_each_pb_page (pbpage, pblist) {
887                 unsigned long offset = swp_offset(swsusp_info.pagedir[i++]);
888
889                 error = -EFAULT;
890                 if (offset) {
891                         p = (pbpage + PB_PAGE_SKIP)->next;
892                         error = bio_read_page(offset, (void *)pbpage);
893                         (pbpage + PB_PAGE_SKIP)->next = p;
894                 }
895                 if (error)
896                         break;
897         }
898
899         if (!error)
900                 BUG_ON(i != swsusp_info.pagedir_pages);
901
902         return error;
903 }
904
905
906 static int check_suspend_image(void)
907 {
908         int error = 0;
909
910         if ((error = check_sig()))
911                 return error;
912
913         if ((error = check_header()))
914                 return error;
915
916         return 0;
917 }
918
919 static int read_suspend_image(void)
920 {
921         int error = 0;
922         struct pbe *p;
923
924         if (!(p = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 0)))
925                 return -ENOMEM;
926
927         if ((error = read_pagedir(p)))
928                 return error;
929         create_pbe_list(p, nr_copy_pages);
930         mark_unsafe_pages(p);
931         pagedir_nosave = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1);
932         if (pagedir_nosave) {
933                 create_pbe_list(pagedir_nosave, nr_copy_pages);
934                 copy_page_backup_list(pagedir_nosave, p);
935         }
936         free_pagedir(p);
937         if (!pagedir_nosave)
938                 return -ENOMEM;
939
940         /* Allocate memory for the image and read the data from swap */
941
942         error = alloc_data_pages(pagedir_nosave, GFP_ATOMIC, 1);
943
944         if (!error)
945                 error = data_read(pagedir_nosave);
946
947         return error;
948 }
949
950 /**
951  *      swsusp_check - Check for saved image in swap
952  */
953
954 int swsusp_check(void)
955 {
956         int error;
957
958         resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
959         if (!IS_ERR(resume_bdev)) {
960                 set_blocksize(resume_bdev, PAGE_SIZE);
961                 error = check_suspend_image();
962                 if (error)
963                     blkdev_put(resume_bdev);
964         } else
965                 error = PTR_ERR(resume_bdev);
966
967         if (!error)
968                 pr_debug("swsusp: resume file found\n");
969         else
970                 pr_debug("swsusp: Error %d check for resume file\n", error);
971         return error;
972 }
973
974 /**
975  *      swsusp_read - Read saved image from swap.
976  */
977
978 int swsusp_read(void)
979 {
980         int error;
981
982         if (IS_ERR(resume_bdev)) {
983                 pr_debug("swsusp: block device not initialised\n");
984                 return PTR_ERR(resume_bdev);
985         }
986
987         error = read_suspend_image();
988         blkdev_put(resume_bdev);
989         memset(key_iv, 0, MAXKEY+MAXIV);
990
991         if (!error)
992                 pr_debug("swsusp: Reading resume file was successful\n");
993         else
994                 pr_debug("swsusp: Error %d resuming\n", error);
995         return error;
996 }
997
998 /**
999  *      swsusp_close - close swap device.
1000  */
1001
1002 void swsusp_close(void)
1003 {
1004         if (IS_ERR(resume_bdev)) {
1005                 pr_debug("swsusp: block device not initialised\n");
1006                 return;
1007         }
1008
1009         blkdev_put(resume_bdev);
1010 }