[GFS2] Journaled file write/unstuff bug
[linux-2.6] / fs / gfs2 / ops_address.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/mpage.h>
18 #include <linux/fs.h>
19 #include <linux/writeback.h>
20 #include <linux/gfs2_ondisk.h>
21 #include <linux/lm_interface.h>
22
23 #include "gfs2.h"
24 #include "incore.h"
25 #include "bmap.h"
26 #include "glock.h"
27 #include "inode.h"
28 #include "log.h"
29 #include "meta_io.h"
30 #include "ops_address.h"
31 #include "quota.h"
32 #include "trans.h"
33 #include "rgrp.h"
34 #include "ops_file.h"
35 #include "super.h"
36 #include "util.h"
37 #include "glops.h"
38
39
40 static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
41                                    unsigned int from, unsigned int to)
42 {
43         struct buffer_head *head = page_buffers(page);
44         unsigned int bsize = head->b_size;
45         struct buffer_head *bh;
46         unsigned int start, end;
47
48         for (bh = head, start = 0; bh != head || !start;
49              bh = bh->b_this_page, start = end) {
50                 end = start + bsize;
51                 if (end <= from || start >= to)
52                         continue;
53                 if (gfs2_is_jdata(ip))
54                         set_buffer_uptodate(bh);
55                 gfs2_trans_add_bh(ip->i_gl, bh, 0);
56         }
57 }
58
59 /**
60  * gfs2_get_block - Fills in a buffer head with details about a block
61  * @inode: The inode
62  * @lblock: The block number to look up
63  * @bh_result: The buffer head to return the result in
64  * @create: Non-zero if we may add block to the file
65  *
66  * Returns: errno
67  */
68
69 int gfs2_get_block(struct inode *inode, sector_t lblock,
70                    struct buffer_head *bh_result, int create)
71 {
72         return gfs2_block_map(inode, lblock, create, bh_result);
73 }
74
75 /**
76  * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
77  * @inode: The inode
78  * @lblock: The block number to look up
79  * @bh_result: The buffer head to return the result in
80  * @create: Non-zero if we may add block to the file
81  *
82  * Returns: errno
83  */
84
85 static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
86                                   struct buffer_head *bh_result, int create)
87 {
88         int error;
89
90         error = gfs2_block_map(inode, lblock, 0, bh_result);
91         if (error)
92                 return error;
93         if (bh_result->b_blocknr == 0)
94                 return -EIO;
95         return 0;
96 }
97
98 static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
99                                  struct buffer_head *bh_result, int create)
100 {
101         return gfs2_block_map(inode, lblock, 0, bh_result);
102 }
103
104 /**
105  * gfs2_writepage - Write complete page
106  * @page: Page to write
107  *
108  * Returns: errno
109  *
110  * Some of this is copied from block_write_full_page() although we still
111  * call it to do most of the work.
112  */
113
114 static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
115 {
116         struct inode *inode = page->mapping->host;
117         struct gfs2_inode *ip = GFS2_I(inode);
118         struct gfs2_sbd *sdp = GFS2_SB(inode);
119         loff_t i_size = i_size_read(inode);
120         pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
121         unsigned offset;
122         int error;
123         int done_trans = 0;
124
125         if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
126                 unlock_page(page);
127                 return -EIO;
128         }
129         if (current->journal_info)
130                 goto out_ignore;
131
132         /* Is the page fully outside i_size? (truncate in progress) */
133         offset = i_size & (PAGE_CACHE_SIZE-1);
134         if (page->index > end_index || (page->index == end_index && !offset)) {
135                 page->mapping->a_ops->invalidatepage(page, 0);
136                 unlock_page(page);
137                 return 0; /* don't care */
138         }
139
140         if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
141             PageChecked(page)) {
142                 ClearPageChecked(page);
143                 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
144                 if (error)
145                         goto out_ignore;
146                 if (!page_has_buffers(page)) {
147                         create_empty_buffers(page, inode->i_sb->s_blocksize,
148                                              (1 << BH_Dirty)|(1 << BH_Uptodate));
149                 }
150                 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
151                 done_trans = 1;
152         }
153         error = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
154         if (done_trans)
155                 gfs2_trans_end(sdp);
156         gfs2_meta_cache_flush(ip);
157         return error;
158
159 out_ignore:
160         redirty_page_for_writepage(wbc, page);
161         unlock_page(page);
162         return 0;
163 }
164
165 /**
166  * gfs2_writepages - Write a bunch of dirty pages back to disk
167  * @mapping: The mapping to write
168  * @wbc: Write-back control
169  *
170  * For journaled files and/or ordered writes this just falls back to the
171  * kernel's default writepages path for now. We will probably want to change
172  * that eventually (i.e. when we look at allocate on flush).
173  *
174  * For the data=writeback case though we can already ignore buffer heads
175  * and write whole extents at once. This is a big reduction in the
176  * number of I/O requests we send and the bmap calls we make in this case.
177  */
178 static int gfs2_writepages(struct address_space *mapping,
179                            struct writeback_control *wbc)
180 {
181         struct inode *inode = mapping->host;
182         struct gfs2_inode *ip = GFS2_I(inode);
183         struct gfs2_sbd *sdp = GFS2_SB(inode);
184
185         if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
186                 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
187
188         return generic_writepages(mapping, wbc);
189 }
190
191 /**
192  * stuffed_readpage - Fill in a Linux page with stuffed file data
193  * @ip: the inode
194  * @page: the page
195  *
196  * Returns: errno
197  */
198
199 static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
200 {
201         struct buffer_head *dibh;
202         void *kaddr;
203         int error;
204
205         /*
206          * Due to the order of unstuffing files and ->nopage(), we can be
207          * asked for a zero page in the case of a stuffed file being extended,
208          * so we need to supply one here. It doesn't happen often.
209          */
210         if (unlikely(page->index)) {
211                 kaddr = kmap_atomic(page, KM_USER0);
212                 memset(kaddr, 0, PAGE_CACHE_SIZE);
213                 kunmap_atomic(kaddr, KM_USER0);
214                 flush_dcache_page(page);
215                 SetPageUptodate(page);
216                 return 0;
217         }
218
219         error = gfs2_meta_inode_buffer(ip, &dibh);
220         if (error)
221                 return error;
222
223         kaddr = kmap_atomic(page, KM_USER0);
224         memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
225                ip->i_di.di_size);
226         memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
227         kunmap_atomic(kaddr, KM_USER0);
228         flush_dcache_page(page);
229         brelse(dibh);
230         SetPageUptodate(page);
231
232         return 0;
233 }
234
235
236 /**
237  * gfs2_readpage - readpage with locking
238  * @file: The file to read a page for. N.B. This may be NULL if we are
239  * reading an internal file.
240  * @page: The page to read
241  *
242  * Returns: errno
243  */
244
245 static int gfs2_readpage(struct file *file, struct page *page)
246 {
247         struct gfs2_inode *ip = GFS2_I(page->mapping->host);
248         struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
249         struct gfs2_file *gf = NULL;
250         struct gfs2_holder gh;
251         int error;
252         int do_unlock = 0;
253
254         if (likely(file != &gfs2_internal_file_sentinel)) {
255                 if (file) {
256                         gf = file->private_data;
257                         if (test_bit(GFF_EXLOCK, &gf->f_flags))
258                                 /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
259                                 goto skip_lock;
260                 }
261                 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
262                 do_unlock = 1;
263                 error = gfs2_glock_nq_atime(&gh);
264                 if (unlikely(error))
265                         goto out_unlock;
266         }
267
268 skip_lock:
269         if (gfs2_is_stuffed(ip)) {
270                 error = stuffed_readpage(ip, page);
271                 unlock_page(page);
272         } else
273                 error = mpage_readpage(page, gfs2_get_block);
274
275         if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
276                 error = -EIO;
277
278         if (do_unlock) {
279                 gfs2_glock_dq_m(1, &gh);
280                 gfs2_holder_uninit(&gh);
281         }
282 out:
283         return error;
284 out_unlock:
285         unlock_page(page);
286         if (error == GLR_TRYFAILED) {
287                 error = AOP_TRUNCATED_PAGE;
288                 yield();
289         }
290         if (do_unlock)
291                 gfs2_holder_uninit(&gh);
292         goto out;
293 }
294
295 /**
296  * gfs2_readpages - Read a bunch of pages at once
297  *
298  * Some notes:
299  * 1. This is only for readahead, so we can simply ignore any things
300  *    which are slightly inconvenient (such as locking conflicts between
301  *    the page lock and the glock) and return having done no I/O. Its
302  *    obviously not something we'd want to do on too regular a basis.
303  *    Any I/O we ignore at this time will be done via readpage later.
304  * 2. We don't handle stuffed files here we let readpage do the honours.
305  * 3. mpage_readpages() does most of the heavy lifting in the common case.
306  * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
307  * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
308  *    well as read-ahead.
309  */
310 static int gfs2_readpages(struct file *file, struct address_space *mapping,
311                           struct list_head *pages, unsigned nr_pages)
312 {
313         struct inode *inode = mapping->host;
314         struct gfs2_inode *ip = GFS2_I(inode);
315         struct gfs2_sbd *sdp = GFS2_SB(inode);
316         struct gfs2_holder gh;
317         int ret = 0;
318         int do_unlock = 0;
319
320         if (likely(file != &gfs2_internal_file_sentinel)) {
321                 if (file) {
322                         struct gfs2_file *gf = file->private_data;
323                         if (test_bit(GFF_EXLOCK, &gf->f_flags))
324                                 goto skip_lock;
325                 }
326                 gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
327                                  LM_FLAG_TRY_1CB|GL_ATIME, &gh);
328                 do_unlock = 1;
329                 ret = gfs2_glock_nq_atime(&gh);
330                 if (ret == GLR_TRYFAILED)
331                         goto out_noerror;
332                 if (unlikely(ret))
333                         goto out_unlock;
334         }
335 skip_lock:
336         if (!gfs2_is_stuffed(ip))
337                 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
338
339         if (do_unlock) {
340                 gfs2_glock_dq_m(1, &gh);
341                 gfs2_holder_uninit(&gh);
342         }
343 out:
344         if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
345                 ret = -EIO;
346         return ret;
347 out_noerror:
348         ret = 0;
349 out_unlock:
350         if (do_unlock)
351                 gfs2_holder_uninit(&gh);
352         goto out;
353 }
354
355 /**
356  * gfs2_prepare_write - Prepare to write a page to a file
357  * @file: The file to write to
358  * @page: The page which is to be prepared for writing
359  * @from: From (byte range within page)
360  * @to: To (byte range within page)
361  *
362  * Returns: errno
363  */
364
365 static int gfs2_prepare_write(struct file *file, struct page *page,
366                               unsigned from, unsigned to)
367 {
368         struct gfs2_inode *ip = GFS2_I(page->mapping->host);
369         struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
370         unsigned int data_blocks, ind_blocks, rblocks;
371         int alloc_required;
372         int error = 0;
373         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
374         loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
375         struct gfs2_alloc *al;
376         unsigned int write_len = to - from;
377
378
379         gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
380         error = gfs2_glock_nq_atime(&ip->i_gh);
381         if (unlikely(error)) {
382                 if (error == GLR_TRYFAILED) {
383                         unlock_page(page);
384                         error = AOP_TRUNCATED_PAGE;
385                         yield();
386                 }
387                 goto out_uninit;
388         }
389
390         gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks);
391
392         error = gfs2_write_alloc_required(ip, pos, write_len, &alloc_required);
393         if (error)
394                 goto out_unlock;
395
396
397         ip->i_alloc.al_requested = 0;
398         if (alloc_required) {
399                 al = gfs2_alloc_get(ip);
400
401                 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
402                 if (error)
403                         goto out_alloc_put;
404
405                 error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
406                 if (error)
407                         goto out_qunlock;
408
409                 al->al_requested = data_blocks + ind_blocks;
410                 error = gfs2_inplace_reserve(ip);
411                 if (error)
412                         goto out_qunlock;
413         }
414
415         rblocks = RES_DINODE + ind_blocks;
416         if (gfs2_is_jdata(ip))
417                 rblocks += data_blocks ? data_blocks : 1;
418         if (ind_blocks || data_blocks)
419                 rblocks += RES_STATFS + RES_QUOTA;
420
421         error = gfs2_trans_begin(sdp, rblocks, 0);
422         if (error)
423                 goto out;
424
425         if (gfs2_is_stuffed(ip)) {
426                 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
427                         error = gfs2_unstuff_dinode(ip, page);
428                         if (error == 0)
429                                 goto prepare_write;
430                 } else if (!PageUptodate(page))
431                         error = stuffed_readpage(ip, page);
432                 goto out;
433         }
434
435 prepare_write:
436         error = block_prepare_write(page, from, to, gfs2_get_block);
437
438 out:
439         if (error) {
440                 gfs2_trans_end(sdp);
441                 if (alloc_required) {
442                         gfs2_inplace_release(ip);
443 out_qunlock:
444                         gfs2_quota_unlock(ip);
445 out_alloc_put:
446                         gfs2_alloc_put(ip);
447                 }
448 out_unlock:
449                 gfs2_glock_dq_m(1, &ip->i_gh);
450 out_uninit:
451                 gfs2_holder_uninit(&ip->i_gh);
452         }
453
454         return error;
455 }
456
457 /**
458  * adjust_fs_space - Adjusts the free space available due to gfs2_grow
459  * @inode: the rindex inode
460  */
461 static void adjust_fs_space(struct inode *inode)
462 {
463         struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
464         struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
465         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
466         u64 fs_total, new_free;
467
468         /* Total up the file system space, according to the latest rindex. */
469         fs_total = gfs2_ri_total(sdp);
470
471         spin_lock(&sdp->sd_statfs_spin);
472         if (fs_total > (m_sc->sc_total + l_sc->sc_total))
473                 new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
474         else
475                 new_free = 0;
476         spin_unlock(&sdp->sd_statfs_spin);
477         fs_warn(sdp, "File system extended by %llu blocks.\n",
478                 (unsigned long long)new_free);
479         gfs2_statfs_change(sdp, new_free, new_free, 0);
480 }
481
482 /**
483  * gfs2_commit_write - Commit write to a file
484  * @file: The file to write to
485  * @page: The page containing the data
486  * @from: From (byte range within page)
487  * @to: To (byte range within page)
488  *
489  * Returns: errno
490  */
491
492 static int gfs2_commit_write(struct file *file, struct page *page,
493                              unsigned from, unsigned to)
494 {
495         struct inode *inode = page->mapping->host;
496         struct gfs2_inode *ip = GFS2_I(inode);
497         struct gfs2_sbd *sdp = GFS2_SB(inode);
498         int error = -EOPNOTSUPP;
499         struct buffer_head *dibh;
500         struct gfs2_alloc *al = &ip->i_alloc;
501         struct gfs2_dinode *di;
502
503         if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
504                 goto fail_nounlock;
505
506         error = gfs2_meta_inode_buffer(ip, &dibh);
507         if (error)
508                 goto fail_endtrans;
509
510         gfs2_trans_add_bh(ip->i_gl, dibh, 1);
511         di = (struct gfs2_dinode *)dibh->b_data;
512
513         if (gfs2_is_stuffed(ip)) {
514                 u64 file_size;
515                 void *kaddr;
516
517                 file_size = ((u64)page->index << PAGE_CACHE_SHIFT) + to;
518
519                 kaddr = kmap_atomic(page, KM_USER0);
520                 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
521                        kaddr + from, to - from);
522                 kunmap_atomic(kaddr, KM_USER0);
523
524                 SetPageUptodate(page);
525
526                 if (inode->i_size < file_size) {
527                         i_size_write(inode, file_size);
528                         mark_inode_dirty(inode);
529                 }
530         } else {
531                 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
532                     gfs2_is_jdata(ip))
533                         gfs2_page_add_databufs(ip, page, from, to);
534                 error = generic_commit_write(file, page, from, to);
535                 if (error)
536                         goto fail;
537         }
538
539         if (ip->i_di.di_size < inode->i_size) {
540                 ip->i_di.di_size = inode->i_size;
541                 di->di_size = cpu_to_be64(inode->i_size);
542         }
543
544         if (inode == sdp->sd_rindex)
545                 adjust_fs_space(inode);
546
547         brelse(dibh);
548         gfs2_trans_end(sdp);
549         if (al->al_requested) {
550                 gfs2_inplace_release(ip);
551                 gfs2_quota_unlock(ip);
552                 gfs2_alloc_put(ip);
553         }
554         unlock_page(page);
555         gfs2_glock_dq_m(1, &ip->i_gh);
556         lock_page(page);
557         gfs2_holder_uninit(&ip->i_gh);
558         return 0;
559
560 fail:
561         brelse(dibh);
562 fail_endtrans:
563         gfs2_trans_end(sdp);
564         if (al->al_requested) {
565                 gfs2_inplace_release(ip);
566                 gfs2_quota_unlock(ip);
567                 gfs2_alloc_put(ip);
568         }
569         unlock_page(page);
570         gfs2_glock_dq_m(1, &ip->i_gh);
571         lock_page(page);
572         gfs2_holder_uninit(&ip->i_gh);
573 fail_nounlock:
574         ClearPageUptodate(page);
575         return error;
576 }
577
578 /**
579  * gfs2_set_page_dirty - Page dirtying function
580  * @page: The page to dirty
581  *
582  * Returns: 1 if it dirtyed the page, or 0 otherwise
583  */
584  
585 static int gfs2_set_page_dirty(struct page *page)
586 {
587         struct gfs2_inode *ip = GFS2_I(page->mapping->host);
588         struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
589
590         if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
591                 SetPageChecked(page);
592         return __set_page_dirty_buffers(page);
593 }
594
595 /**
596  * gfs2_bmap - Block map function
597  * @mapping: Address space info
598  * @lblock: The block to map
599  *
600  * Returns: The disk address for the block or 0 on hole or error
601  */
602
603 static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
604 {
605         struct gfs2_inode *ip = GFS2_I(mapping->host);
606         struct gfs2_holder i_gh;
607         sector_t dblock = 0;
608         int error;
609
610         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
611         if (error)
612                 return 0;
613
614         if (!gfs2_is_stuffed(ip))
615                 dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
616
617         gfs2_glock_dq_uninit(&i_gh);
618
619         return dblock;
620 }
621
622 static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
623 {
624         struct gfs2_bufdata *bd;
625
626         gfs2_log_lock(sdp);
627         bd = bh->b_private;
628         if (bd) {
629                 bd->bd_bh = NULL;
630                 bh->b_private = NULL;
631                 if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
632                         kmem_cache_free(gfs2_bufdata_cachep, bd);
633         }
634         gfs2_log_unlock(sdp);
635
636         lock_buffer(bh);
637         clear_buffer_dirty(bh);
638         bh->b_bdev = NULL;
639         clear_buffer_mapped(bh);
640         clear_buffer_req(bh);
641         clear_buffer_new(bh);
642         clear_buffer_delay(bh);
643         unlock_buffer(bh);
644 }
645
646 static void gfs2_invalidatepage(struct page *page, unsigned long offset)
647 {
648         struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
649         struct buffer_head *head, *bh, *next;
650         unsigned int curr_off = 0;
651
652         BUG_ON(!PageLocked(page));
653         if (offset == 0)
654                 ClearPageChecked(page);
655         if (!page_has_buffers(page))
656                 return;
657
658         bh = head = page_buffers(page);
659         do {
660                 unsigned int next_off = curr_off + bh->b_size;
661                 next = bh->b_this_page;
662
663                 if (offset <= curr_off)
664                         discard_buffer(sdp, bh);
665
666                 curr_off = next_off;
667                 bh = next;
668         } while (bh != head);
669
670         if (!offset)
671                 try_to_release_page(page, 0);
672
673         return;
674 }
675
676 /**
677  * gfs2_ok_for_dio - check that dio is valid on this file
678  * @ip: The inode
679  * @rw: READ or WRITE
680  * @offset: The offset at which we are reading or writing
681  *
682  * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
683  *          1 (to accept the i/o request)
684  */
685 static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
686 {
687         /*
688          * Should we return an error here? I can't see that O_DIRECT for
689          * a journaled file makes any sense. For now we'll silently fall
690          * back to buffered I/O, likewise we do the same for stuffed
691          * files since they are (a) small and (b) unaligned.
692          */
693         if (gfs2_is_jdata(ip))
694                 return 0;
695
696         if (gfs2_is_stuffed(ip))
697                 return 0;
698
699         if (offset > i_size_read(&ip->i_inode))
700                 return 0;
701         return 1;
702 }
703
704
705
706 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
707                               const struct iovec *iov, loff_t offset,
708                               unsigned long nr_segs)
709 {
710         struct file *file = iocb->ki_filp;
711         struct inode *inode = file->f_mapping->host;
712         struct gfs2_inode *ip = GFS2_I(inode);
713         struct gfs2_holder gh;
714         int rv;
715
716         /*
717          * Deferred lock, even if its a write, since we do no allocation
718          * on this path. All we need change is atime, and this lock mode
719          * ensures that other nodes have flushed their buffered read caches
720          * (i.e. their page cache entries for this inode). We do not,
721          * unfortunately have the option of only flushing a range like
722          * the VFS does.
723          */
724         gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
725         rv = gfs2_glock_nq_atime(&gh);
726         if (rv)
727                 return rv;
728         rv = gfs2_ok_for_dio(ip, rw, offset);
729         if (rv != 1)
730                 goto out; /* dio not valid, fall back to buffered i/o */
731
732         rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
733                                            iov, offset, nr_segs,
734                                            gfs2_get_block_direct, NULL);
735 out:
736         gfs2_glock_dq_m(1, &gh);
737         gfs2_holder_uninit(&gh);
738         return rv;
739 }
740
741 /**
742  * stuck_releasepage - We're stuck in gfs2_releasepage().  Print stuff out.
743  * @bh: the buffer we're stuck on
744  *
745  */
746
747 static void stuck_releasepage(struct buffer_head *bh)
748 {
749         struct inode *inode = bh->b_page->mapping->host;
750         struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
751         struct gfs2_bufdata *bd = bh->b_private;
752         struct gfs2_glock *gl;
753 static unsigned limit = 0;
754
755         if (limit > 3)
756                 return;
757         limit++;
758
759         fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode);
760         fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
761                 (unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count));
762         fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
763         fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
764
765         if (!bd)
766                 return;
767
768         gl = bd->bd_gl;
769
770         fs_warn(sdp, "gl = (%u, %llu)\n",
771                 gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number);
772
773         fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
774                 (list_empty(&bd->bd_list_tr)) ? "no" : "yes",
775                 (list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
776
777         if (gl->gl_ops == &gfs2_inode_glops) {
778                 struct gfs2_inode *ip = gl->gl_object;
779                 unsigned int x;
780
781                 if (!ip)
782                         return;
783
784                 fs_warn(sdp, "ip = %llu %llu\n",
785                         (unsigned long long)ip->i_no_formal_ino,
786                         (unsigned long long)ip->i_no_addr);
787
788                 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
789                         fs_warn(sdp, "ip->i_cache[%u] = %s\n",
790                                 x, (ip->i_cache[x]) ? "!NULL" : "NULL");
791         }
792 }
793
794 /**
795  * gfs2_releasepage - free the metadata associated with a page
796  * @page: the page that's being released
797  * @gfp_mask: passed from Linux VFS, ignored by us
798  *
799  * Call try_to_free_buffers() if the buffers in this page can be
800  * released.
801  *
802  * Returns: 0
803  */
804
805 int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
806 {
807         struct inode *aspace = page->mapping->host;
808         struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
809         struct buffer_head *bh, *head;
810         struct gfs2_bufdata *bd;
811         unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ;
812
813         if (!page_has_buffers(page))
814                 goto out;
815
816         head = bh = page_buffers(page);
817         do {
818                 while (atomic_read(&bh->b_count)) {
819                         if (!atomic_read(&aspace->i_writecount))
820                                 return 0;
821
822                         if (!(gfp_mask & __GFP_WAIT))
823                                 return 0;
824
825                         if (time_after_eq(jiffies, t)) {
826                                 stuck_releasepage(bh);
827                                 /* should we withdraw here? */
828                                 return 0;
829                         }
830
831                         yield();
832                 }
833
834                 gfs2_assert_warn(sdp, !buffer_pinned(bh));
835                 gfs2_assert_warn(sdp, !buffer_dirty(bh));
836
837                 gfs2_log_lock(sdp);
838                 bd = bh->b_private;
839                 if (bd) {
840                         gfs2_assert_warn(sdp, bd->bd_bh == bh);
841                         gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
842                         gfs2_assert_warn(sdp, !bd->bd_ail);
843                         bd->bd_bh = NULL;
844                         if (!list_empty(&bd->bd_le.le_list))
845                                 bd = NULL;
846                         bh->b_private = NULL;
847                 }
848                 gfs2_log_unlock(sdp);
849                 if (bd)
850                         kmem_cache_free(gfs2_bufdata_cachep, bd);
851
852                 bh = bh->b_this_page;
853         } while (bh != head);
854
855 out:
856         return try_to_free_buffers(page);
857 }
858
859 const struct address_space_operations gfs2_file_aops = {
860         .writepage = gfs2_writepage,
861         .writepages = gfs2_writepages,
862         .readpage = gfs2_readpage,
863         .readpages = gfs2_readpages,
864         .sync_page = block_sync_page,
865         .prepare_write = gfs2_prepare_write,
866         .commit_write = gfs2_commit_write,
867         .set_page_dirty = gfs2_set_page_dirty,
868         .bmap = gfs2_bmap,
869         .invalidatepage = gfs2_invalidatepage,
870         .releasepage = gfs2_releasepage,
871         .direct_IO = gfs2_direct_IO,
872 };
873