2 * Copyright (C) International Business Machines Corp., 2000-2005
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <linux/bio.h>
23 #include <linux/init.h>
24 #include <linux/buffer_head.h>
25 #include <linux/mempool.h>
26 #include "jfs_incore.h"
27 #include "jfs_superblock.h"
28 #include "jfs_filsys.h"
29 #include "jfs_metapage.h"
30 #include "jfs_txnmgr.h"
31 #include "jfs_debug.h"
33 #ifdef CONFIG_JFS_STATISTICS
35 uint pagealloc; /* # of page allocations */
36 uint pagefree; /* # of page frees */
37 uint lockwait; /* # of sleeping lock_metapage() calls */
41 #define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
42 #define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag)
44 static inline void unlock_metapage(struct metapage *mp)
46 clear_bit(META_locked, &mp->flag);
50 static inline void __lock_metapage(struct metapage *mp)
52 DECLARE_WAITQUEUE(wait, current);
53 INCREMENT(mpStat.lockwait);
54 add_wait_queue_exclusive(&mp->wait, &wait);
56 set_current_state(TASK_UNINTERRUPTIBLE);
57 if (metapage_locked(mp)) {
58 unlock_page(mp->page);
62 } while (trylock_metapage(mp));
63 __set_current_state(TASK_RUNNING);
64 remove_wait_queue(&mp->wait, &wait);
68 * Must have mp->page locked
70 static inline void lock_metapage(struct metapage *mp)
72 if (trylock_metapage(mp))
76 #define METAPOOL_MIN_PAGES 32
77 static kmem_cache_t *metapage_cache;
78 static mempool_t *metapage_mempool;
80 #define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
87 struct metapage *mp[MPS_PER_PAGE];
89 #define mp_anchor(page) ((struct meta_anchor *)page_private(page))
91 static inline struct metapage *page_to_mp(struct page *page, uint offset)
93 if (!PagePrivate(page))
95 return mp_anchor(page)->mp[offset >> L2PSIZE];
98 static inline int insert_metapage(struct page *page, struct metapage *mp)
100 struct meta_anchor *a;
102 int l2mp_blocks; /* log2 blocks per metapage */
104 if (PagePrivate(page))
107 a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS);
110 memset(a, 0, sizeof(struct meta_anchor));
111 set_page_private(page, (unsigned long)a);
112 SetPagePrivate(page);
117 l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
118 index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
126 static inline void remove_metapage(struct page *page, struct metapage *mp)
128 struct meta_anchor *a = mp_anchor(page);
129 int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
132 index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
134 BUG_ON(a->mp[index] != mp);
137 if (--a->mp_count == 0) {
139 set_page_private(page, 0);
140 ClearPagePrivate(page);
145 static inline void inc_io(struct page *page)
147 atomic_inc(&mp_anchor(page)->io_count);
150 static inline void dec_io(struct page *page, void (*handler) (struct page *))
152 if (atomic_dec_and_test(&mp_anchor(page)->io_count))
157 static inline struct metapage *page_to_mp(struct page *page, uint offset)
159 return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
162 static inline int insert_metapage(struct page *page, struct metapage *mp)
165 set_page_private(page, (unsigned long)mp);
166 SetPagePrivate(page);
172 static inline void remove_metapage(struct page *page, struct metapage *mp)
174 set_page_private(page, 0);
175 ClearPagePrivate(page);
179 #define inc_io(page) do {} while(0)
180 #define dec_io(page, handler) handler(page)
184 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
186 struct metapage *mp = (struct metapage *)foo;
188 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
189 SLAB_CTOR_CONSTRUCTOR) {
196 set_bit(META_free, &mp->flag);
197 init_waitqueue_head(&mp->wait);
201 static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
203 return mempool_alloc(metapage_mempool, gfp_mask);
206 static inline void free_metapage(struct metapage *mp)
209 set_bit(META_free, &mp->flag);
211 mempool_free(mp, metapage_mempool);
214 int __init metapage_init(void)
217 * Allocate the metapage structures
219 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
220 0, 0, init_once, NULL);
221 if (metapage_cache == NULL)
224 metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab,
225 mempool_free_slab, metapage_cache);
227 if (metapage_mempool == NULL) {
228 kmem_cache_destroy(metapage_cache);
235 void metapage_exit(void)
237 mempool_destroy(metapage_mempool);
238 kmem_cache_destroy(metapage_cache);
241 static inline void drop_metapage(struct page *page, struct metapage *mp)
243 if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
244 test_bit(META_io, &mp->flag))
246 remove_metapage(page, mp);
247 INCREMENT(mpStat.pagefree);
252 * Metapage address space operations
255 static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
261 sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >>
264 if (lblock >= file_blocks)
266 if (lblock + *len > file_blocks)
267 *len = file_blocks - lblock;
270 rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
271 if ((rc == 0) && *len)
272 lblock = (sector_t)xaddr;
275 } /* else no mapping */
280 static void last_read_complete(struct page *page)
282 if (!PageError(page))
283 SetPageUptodate(page);
287 static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
290 struct page *page = bio->bi_private;
295 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
296 printk(KERN_ERR "metapage_read_end_io: I/O error\n");
300 dec_io(page, last_read_complete);
306 static void remove_from_logsync(struct metapage *mp)
308 struct jfs_log *log = mp->log;
311 * This can race. Recheck that log hasn't been set to null, and after
312 * acquiring logsync lock, recheck lsn
317 LOGSYNC_LOCK(log, flags);
323 list_del(&mp->synclist);
325 LOGSYNC_UNLOCK(log, flags);
328 static void last_write_complete(struct page *page)
333 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
334 mp = page_to_mp(page, offset);
335 if (mp && test_bit(META_io, &mp->flag)) {
337 remove_from_logsync(mp);
338 clear_bit(META_io, &mp->flag);
341 * I'd like to call drop_metapage here, but I don't think it's
342 * safe unless I have the page locked
345 end_page_writeback(page);
348 static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done,
351 struct page *page = bio->bi_private;
353 BUG_ON(!PagePrivate(page));
358 if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
359 printk(KERN_ERR "metapage_write_end_io: I/O error\n");
362 dec_io(page, last_write_complete);
367 static int metapage_writepage(struct page *page, struct writeback_control *wbc)
369 struct bio *bio = NULL;
370 unsigned int block_offset; /* block offset of mp within page */
371 struct inode *inode = page->mapping->host;
372 unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
379 sector_t next_block = 0;
381 unsigned long bio_bytes = 0;
382 unsigned long bio_offset = 0;
385 page_start = (sector_t)page->index <<
386 (PAGE_CACHE_SHIFT - inode->i_blkbits);
387 BUG_ON(!PageLocked(page));
388 BUG_ON(PageWriteback(page));
390 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
391 mp = page_to_mp(page, offset);
393 if (!mp || !test_bit(META_dirty, &mp->flag))
396 if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
399 * Make sure this page isn't blocked indefinitely.
400 * If the journal isn't undergoing I/O, push it
402 if (mp->log && !(mp->log->cflag & logGC_PAGEOUT))
403 jfs_flush_journal(mp->log, 0);
407 clear_bit(META_dirty, &mp->flag);
408 block_offset = offset >> inode->i_blkbits;
409 lblock = page_start + block_offset;
411 if (xlen && lblock == next_block) {
412 /* Contiguous, in memory & on disk */
413 len = min(xlen, blocks_per_mp);
415 bio_bytes += len << inode->i_blkbits;
416 set_bit(META_io, &mp->flag);
420 if (bio_add_page(bio, page, bio_bytes, bio_offset) <
424 * Increment counter before submitting i/o to keep
425 * count from hitting zero before we're through
430 submit_bio(WRITE, bio);
433 set_page_writeback(page);
436 xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
437 pblock = metapage_get_blocks(inode, lblock, &xlen);
439 /* Need better error handling */
440 printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
441 dec_io(page, last_write_complete);
444 set_bit(META_io, &mp->flag);
445 len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage);
447 bio = bio_alloc(GFP_NOFS, 1);
448 bio->bi_bdev = inode->i_sb->s_bdev;
449 bio->bi_sector = pblock << (inode->i_blkbits - 9);
450 bio->bi_end_io = metapage_write_end_io;
451 bio->bi_private = page;
453 /* Don't call bio_add_page yet, we may add to this vec */
455 bio_bytes = len << inode->i_blkbits;
458 next_block = lblock + len;
461 if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
466 submit_bio(WRITE, bio);
469 redirty_page_for_writepage(wbc, page);
475 /* We should never reach here, since we're only adding one vec */
476 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
479 dump_mem("bio", bio, sizeof(*bio));
483 dec_io(page, last_write_complete);
488 static int metapage_readpage(struct file *fp, struct page *page)
490 struct inode *inode = page->mapping->host;
491 struct bio *bio = NULL;
492 unsigned int block_offset;
493 unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
494 sector_t page_start; /* address of page in fs blocks */
500 BUG_ON(!PageLocked(page));
501 page_start = (sector_t)page->index <<
502 (PAGE_CACHE_SHIFT - inode->i_blkbits);
505 while (block_offset < blocks_per_page) {
506 xlen = blocks_per_page - block_offset;
507 pblock = metapage_get_blocks(inode, page_start + block_offset,
510 if (!PagePrivate(page))
511 insert_metapage(page, NULL);
514 submit_bio(READ, bio);
516 bio = bio_alloc(GFP_NOFS, 1);
517 bio->bi_bdev = inode->i_sb->s_bdev;
518 bio->bi_sector = pblock << (inode->i_blkbits - 9);
519 bio->bi_end_io = metapage_read_end_io;
520 bio->bi_private = page;
521 len = xlen << inode->i_blkbits;
522 offset = block_offset << inode->i_blkbits;
523 if (bio_add_page(bio, page, len, offset) < len)
525 block_offset += xlen;
530 submit_bio(READ, bio);
537 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
539 dec_io(page, last_read_complete);
543 static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
549 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
550 mp = page_to_mp(page, offset);
555 jfs_info("metapage_releasepage: mp = 0x%p", mp);
556 if (mp->count || mp->nohomeok) {
557 jfs_info("count = %ld, nohomeok = %d", mp->count,
562 wait_on_page_writeback(page);
563 //WARN_ON(test_bit(META_dirty, &mp->flag));
564 if (test_bit(META_dirty, &mp->flag)) {
565 dump_mem("dirty mp in metapage_releasepage", mp,
566 sizeof(struct metapage));
567 dump_mem("page", page, sizeof(struct page));
571 remove_from_logsync(mp);
572 remove_metapage(page, mp);
573 INCREMENT(mpStat.pagefree);
582 static int metapage_invalidatepage(struct page *page, unsigned long offset)
586 if (PageWriteback(page))
589 return metapage_releasepage(page, 0);
592 struct address_space_operations jfs_metapage_aops = {
593 .readpage = metapage_readpage,
594 .writepage = metapage_writepage,
595 .sync_page = block_sync_page,
596 .releasepage = metapage_releasepage,
597 .invalidatepage = metapage_invalidatepage,
598 .set_page_dirty = __set_page_dirty_nobuffers,
601 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
602 unsigned int size, int absolute,
607 struct address_space *mapping;
608 struct metapage *mp = NULL;
610 unsigned long page_index;
611 unsigned long page_offset;
613 jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
614 inode->i_ino, lblock, absolute);
616 l2bsize = inode->i_blkbits;
617 l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
618 page_index = lblock >> l2BlocksPerPage;
619 page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
620 if ((page_offset + size) > PAGE_CACHE_SIZE) {
621 jfs_err("MetaData crosses page boundary!!");
622 jfs_err("lblock = %lx, size = %d", lblock, size);
627 mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
630 * If an nfs client tries to read an inode that is larger
631 * than any existing inodes, we may try to read past the
632 * end of the inode map
634 if ((lblock << inode->i_blkbits) >= inode->i_size)
636 mapping = inode->i_mapping;
639 if (new && (PSIZE == PAGE_CACHE_SIZE)) {
640 page = grab_cache_page(mapping, page_index);
642 jfs_err("grab_cache_page failed!");
645 SetPageUptodate(page);
647 page = read_cache_page(mapping, page_index,
648 (filler_t *)mapping->a_ops->readpage, NULL);
649 if (IS_ERR(page) || !PageUptodate(page)) {
650 jfs_err("read_cache_page failed!");
656 mp = page_to_mp(page, page_offset);
658 if (mp->logical_size != size) {
659 jfs_error(inode->i_sb,
660 "__get_metapage: mp->logical_size != size");
661 jfs_err("logical_size = %d, size = %d",
662 mp->logical_size, size);
668 if (test_bit(META_discard, &mp->flag)) {
670 jfs_error(inode->i_sb,
671 "__get_metapage: using a "
672 "discarded metapage");
673 discard_metapage(mp);
676 clear_bit(META_discard, &mp->flag);
679 INCREMENT(mpStat.pagealloc);
680 mp = alloc_metapage(GFP_NOFS);
683 mp->xflag = COMMIT_PAGE;
686 mp->logical_size = size;
687 mp->data = page_address(page) + page_offset;
689 if (unlikely(insert_metapage(page, mp))) {
697 jfs_info("zeroing mp = 0x%p", mp);
698 memset(mp->data, 0, PSIZE);
702 jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
710 void grab_metapage(struct metapage * mp)
712 jfs_info("grab_metapage: mp = 0x%p", mp);
713 page_cache_get(mp->page);
717 unlock_page(mp->page);
720 void force_metapage(struct metapage *mp)
722 struct page *page = mp->page;
723 jfs_info("force_metapage: mp = 0x%p", mp);
724 set_bit(META_forcewrite, &mp->flag);
725 clear_bit(META_sync, &mp->flag);
726 page_cache_get(page);
728 set_page_dirty(page);
729 write_one_page(page, 1);
730 clear_bit(META_forcewrite, &mp->flag);
731 page_cache_release(page);
734 void hold_metapage(struct metapage *mp)
739 void put_metapage(struct metapage *mp)
741 if (mp->count || mp->nohomeok) {
742 /* Someone else will release this */
743 unlock_page(mp->page);
746 page_cache_get(mp->page);
749 unlock_page(mp->page);
750 release_metapage(mp);
753 void release_metapage(struct metapage * mp)
755 struct page *page = mp->page;
756 jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
764 if (--mp->count || mp->nohomeok) {
766 page_cache_release(page);
770 if (test_bit(META_dirty, &mp->flag)) {
771 set_page_dirty(page);
772 if (test_bit(META_sync, &mp->flag)) {
773 clear_bit(META_sync, &mp->flag);
774 write_one_page(page, 1);
775 lock_page(page); /* write_one_page unlocks the page */
777 } else if (mp->lsn) /* discard_metapage doesn't remove it */
778 remove_from_logsync(mp);
780 #if MPS_PER_PAGE == 1
782 * If we know this is the only thing in the page, we can throw
783 * the page out of the page cache. If pages are larger, we
784 * don't want to do this.
787 /* Retest mp->count since we may have released page lock */
788 if (test_bit(META_discard, &mp->flag) && !mp->count) {
789 clear_page_dirty(page);
790 ClearPageUptodate(page);
793 /* Try to keep metapages from using up too much memory */
794 drop_metapage(page, mp);
797 page_cache_release(page);
800 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
803 int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
804 int BlocksPerPage = 1 << l2BlocksPerPage;
805 /* All callers are interested in block device's mapping */
806 struct address_space *mapping =
807 JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
813 * Mark metapages to discard. They will eventually be
814 * released, but should not be written.
816 for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
817 lblock += BlocksPerPage) {
818 page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
821 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
822 mp = page_to_mp(page, offset);
825 if (mp->index < addr)
827 if (mp->index >= addr + len)
830 clear_bit(META_dirty, &mp->flag);
831 set_bit(META_discard, &mp->flag);
833 remove_from_logsync(mp);
836 page_cache_release(page);
840 #ifdef CONFIG_JFS_STATISTICS
841 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
842 int *eof, void *data)
847 len += sprintf(buffer,
848 "JFS Metapage statistics\n"
849 "=======================\n"
850 "page allocations = %d\n"
858 *start = buffer + begin;