NFS: Cleanup: add common helper nfs_page_length()
[linux-2.6] / fs / nfs / write.c
1 /*
2  * linux/fs/nfs/write.c
3  *
4  * Writing file data over NFS.
5  *
6  * We do it like this: When a (user) process wishes to write data to an
7  * NFS file, a write request is allocated that contains the RPC task data
8  * plus some info on the page to be written, and added to the inode's
9  * write chain. If the process writes past the end of the page, an async
10  * RPC call to write the page is scheduled immediately; otherwise, the call
11  * is delayed for a few seconds.
12  *
13  * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
14  *
15  * Write requests are kept on the inode's writeback list. Each entry in
16  * that list references the page (portion) to be written. When the
17  * cache timeout has expired, the RPC task is woken up, and tries to
18  * lock the page. As soon as it manages to do so, the request is moved
19  * from the writeback list to the writelock list.
20  *
21  * Note: we must make sure never to confuse the inode passed in the
22  * write_page request with the one in page->inode. As far as I understand
23  * it, these are different when doing a swap-out.
24  *
25  * To understand everything that goes on here and in the NFS read code,
26  * one should be aware that a page is locked in exactly one of the following
27  * cases:
28  *
29  *  -   A write request is in progress.
30  *  -   A user process is in generic_file_write/nfs_update_page
31  *  -   A user process is in generic_file_read
32  *
33  * Also note that because of the way pages are invalidated in
34  * nfs_revalidate_inode, the following assertions hold:
35  *
36  *  -   If a page is dirty, there will be no read requests (a page will
37  *      not be re-read unless invalidated by nfs_revalidate_inode).
38  *  -   If the page is not uptodate, there will be no pending write
39  *      requests, and no process will be in nfs_update_page.
40  *
41  * FIXME: Interaction with the vmscan routines is not optimal yet.
42  * Either vmscan must be made nfs-savvy, or we need a different page
43  * reclaim concept that supports something like FS-independent
44  * buffer_heads with a b_ops-> field.
45  *
46  * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
47  */
48
49 #include <linux/types.h>
50 #include <linux/slab.h>
51 #include <linux/mm.h>
52 #include <linux/pagemap.h>
53 #include <linux/file.h>
54 #include <linux/writeback.h>
55
56 #include <linux/sunrpc/clnt.h>
57 #include <linux/nfs_fs.h>
58 #include <linux/nfs_mount.h>
59 #include <linux/nfs_page.h>
60 #include <linux/backing-dev.h>
61
62 #include <asm/uaccess.h>
63 #include <linux/smp_lock.h>
64
65 #include "delegation.h"
66 #include "internal.h"
67 #include "iostat.h"
68
69 #define NFSDBG_FACILITY         NFSDBG_PAGECACHE
70
71 #define MIN_POOL_WRITE          (32)
72 #define MIN_POOL_COMMIT         (4)
73
74 /*
75  * Local function declarations
76  */
77 static struct nfs_page * nfs_update_request(struct nfs_open_context*,
78                                             struct inode *,
79                                             struct page *,
80                                             unsigned int, unsigned int);
81 static int nfs_wait_on_write_congestion(struct address_space *, int);
82 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
83 static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
84 static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how);
85 static const struct rpc_call_ops nfs_write_partial_ops;
86 static const struct rpc_call_ops nfs_write_full_ops;
87 static const struct rpc_call_ops nfs_commit_ops;
88
89 static kmem_cache_t *nfs_wdata_cachep;
90 static mempool_t *nfs_wdata_mempool;
91 static mempool_t *nfs_commit_mempool;
92
93 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
94
95 struct nfs_write_data *nfs_commit_alloc(void)
96 {
97         struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
98
99         if (p) {
100                 memset(p, 0, sizeof(*p));
101                 INIT_LIST_HEAD(&p->pages);
102         }
103         return p;
104 }
105
106 void nfs_commit_rcu_free(struct rcu_head *head)
107 {
108         struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
109         if (p && (p->pagevec != &p->page_array[0]))
110                 kfree(p->pagevec);
111         mempool_free(p, nfs_commit_mempool);
112 }
113
114 void nfs_commit_free(struct nfs_write_data *wdata)
115 {
116         call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
117 }
118
119 struct nfs_write_data *nfs_writedata_alloc(size_t len)
120 {
121         unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
122         struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
123
124         if (p) {
125                 memset(p, 0, sizeof(*p));
126                 INIT_LIST_HEAD(&p->pages);
127                 p->npages = pagecount;
128                 if (pagecount <= ARRAY_SIZE(p->page_array))
129                         p->pagevec = p->page_array;
130                 else {
131                         p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
132                         if (!p->pagevec) {
133                                 mempool_free(p, nfs_wdata_mempool);
134                                 p = NULL;
135                         }
136                 }
137         }
138         return p;
139 }
140
141 static void nfs_writedata_rcu_free(struct rcu_head *head)
142 {
143         struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
144         if (p && (p->pagevec != &p->page_array[0]))
145                 kfree(p->pagevec);
146         mempool_free(p, nfs_wdata_mempool);
147 }
148
149 static void nfs_writedata_free(struct nfs_write_data *wdata)
150 {
151         call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
152 }
153
154 void nfs_writedata_release(void *wdata)
155 {
156         nfs_writedata_free(wdata);
157 }
158
159 static struct nfs_page *nfs_page_find_request_locked(struct page *page)
160 {
161         struct nfs_page *req = NULL;
162
163         if (PagePrivate(page)) {
164                 req = (struct nfs_page *)page_private(page);
165                 if (req != NULL)
166                         atomic_inc(&req->wb_count);
167         }
168         return req;
169 }
170
171 static struct nfs_page *nfs_page_find_request(struct page *page)
172 {
173         struct nfs_page *req = NULL;
174         spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
175
176         spin_lock(req_lock);
177         req = nfs_page_find_request_locked(page);
178         spin_unlock(req_lock);
179         return req;
180 }
181
182 /* Adjust the file length if we're writing beyond the end */
183 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
184 {
185         struct inode *inode = page->mapping->host;
186         loff_t end, i_size = i_size_read(inode);
187         unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
188
189         if (i_size > 0 && page->index < end_index)
190                 return;
191         end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
192         if (i_size >= end)
193                 return;
194         nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
195         i_size_write(inode, end);
196 }
197
198 /* We can set the PG_uptodate flag if we see that a write request
199  * covers the full page.
200  */
201 static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
202 {
203         if (PageUptodate(page))
204                 return;
205         if (base != 0)
206                 return;
207         if (count != nfs_page_length(page))
208                 return;
209         if (count != PAGE_CACHE_SIZE)
210                 memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count);
211         SetPageUptodate(page);
212 }
213
214 /*
215  * Write a page synchronously.
216  * Offset is the data offset within the page.
217  */
218 static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
219                 struct page *page, unsigned int offset, unsigned int count,
220                 int how)
221 {
222         unsigned int    wsize = NFS_SERVER(inode)->wsize;
223         int             result, written = 0;
224         struct nfs_write_data *wdata;
225
226         wdata = nfs_writedata_alloc(wsize);
227         if (!wdata)
228                 return -ENOMEM;
229
230         wdata->flags = how;
231         wdata->cred = ctx->cred;
232         wdata->inode = inode;
233         wdata->args.fh = NFS_FH(inode);
234         wdata->args.context = ctx;
235         wdata->args.pages = &page;
236         wdata->args.stable = NFS_FILE_SYNC;
237         wdata->args.pgbase = offset;
238         wdata->args.count = wsize;
239         wdata->res.fattr = &wdata->fattr;
240         wdata->res.verf = &wdata->verf;
241
242         dprintk("NFS:      nfs_writepage_sync(%s/%Ld %d@%Ld)\n",
243                 inode->i_sb->s_id,
244                 (long long)NFS_FILEID(inode),
245                 count, (long long)(page_offset(page) + offset));
246
247         set_page_writeback(page);
248         nfs_begin_data_update(inode);
249         do {
250                 if (count < wsize)
251                         wdata->args.count = count;
252                 wdata->args.offset = page_offset(page) + wdata->args.pgbase;
253
254                 result = NFS_PROTO(inode)->write(wdata);
255
256                 if (result < 0) {
257                         /* Must mark the page invalid after I/O error */
258                         ClearPageUptodate(page);
259                         goto io_error;
260                 }
261                 if (result < wdata->args.count)
262                         printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n",
263                                         wdata->args.count, result);
264
265                 wdata->args.offset += result;
266                 wdata->args.pgbase += result;
267                 written += result;
268                 count -= result;
269                 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
270         } while (count);
271         /* Update file length */
272         nfs_grow_file(page, offset, written);
273         /* Set the PG_uptodate flag? */
274         nfs_mark_uptodate(page, offset, written);
275
276         if (PageError(page))
277                 ClearPageError(page);
278
279 io_error:
280         nfs_end_data_update(inode);
281         end_page_writeback(page);
282         nfs_writedata_release(wdata);
283         return written ? written : result;
284 }
285
286 static int nfs_writepage_async(struct nfs_open_context *ctx,
287                 struct inode *inode, struct page *page,
288                 unsigned int offset, unsigned int count)
289 {
290         struct nfs_page *req;
291
292         req = nfs_update_request(ctx, inode, page, offset, count);
293         if (IS_ERR(req))
294                 return PTR_ERR(req);
295         /* Update file length */
296         nfs_grow_file(page, offset, count);
297         /* Set the PG_uptodate flag? */
298         nfs_mark_uptodate(page, offset, count);
299         nfs_unlock_request(req);
300         return 0;
301 }
302
303 static int wb_priority(struct writeback_control *wbc)
304 {
305         if (wbc->for_reclaim)
306                 return FLUSH_HIGHPRI;
307         if (wbc->for_kupdate)
308                 return FLUSH_LOWPRI;
309         return 0;
310 }
311
312 /*
313  * Write an mmapped page to the server.
314  */
315 int nfs_writepage(struct page *page, struct writeback_control *wbc)
316 {
317         struct nfs_open_context *ctx;
318         struct inode *inode = page->mapping->host;
319         unsigned offset;
320         int inode_referenced = 0;
321         int priority = wb_priority(wbc);
322         int err;
323
324         nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
325         nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
326
327         /*
328          * Note: We need to ensure that we have a reference to the inode
329          *       if we are to do asynchronous writes. If not, waiting
330          *       in nfs_wait_on_request() may deadlock with clear_inode().
331          *
332          *       If igrab() fails here, then it is in any case safe to
333          *       call nfs_wb_page(), since there will be no pending writes.
334          */
335         if (igrab(inode) != 0)
336                 inode_referenced = 1;
337
338         /* Ensure we've flushed out any previous writes */
339         nfs_wb_page_priority(inode, page, priority);
340
341         err = 0;
342         offset = nfs_page_length(page);
343         if (!offset)
344                 goto out;
345
346         ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
347         if (ctx == NULL) {
348                 err = -EBADF;
349                 goto out;
350         }
351         lock_kernel();
352         if (!IS_SYNC(inode) && inode_referenced) {
353                 err = nfs_writepage_async(ctx, inode, page, 0, offset);
354                 if (!wbc->for_writepages)
355                         nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
356         } else {
357                 err = nfs_writepage_sync(ctx, inode, page, 0,
358                                                 offset, priority);
359                 if (err >= 0) {
360                         if (err != offset)
361                                 redirty_page_for_writepage(wbc, page);
362                         err = 0;
363                 }
364         }
365         unlock_kernel();
366         put_nfs_open_context(ctx);
367 out:
368         unlock_page(page);
369         if (inode_referenced)
370                 iput(inode);
371         return err; 
372 }
373
374 /*
375  * Note: causes nfs_update_request() to block on the assumption
376  *       that the writeback is generated due to memory pressure.
377  */
378 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
379 {
380         struct backing_dev_info *bdi = mapping->backing_dev_info;
381         struct inode *inode = mapping->host;
382         int err;
383
384         nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
385
386         err = generic_writepages(mapping, wbc);
387         if (err)
388                 return err;
389         while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) {
390                 if (wbc->nonblocking)
391                         return 0;
392                 nfs_wait_on_write_congestion(mapping, 0);
393         }
394         err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc));
395         if (err < 0)
396                 goto out;
397         nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
398         if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
399                 err = nfs_wait_on_requests(inode, 0, 0);
400                 if (err < 0)
401                         goto out;
402         }
403         err = nfs_commit_inode(inode, wb_priority(wbc));
404         if (err > 0)
405                 err = 0;
406 out:
407         clear_bit(BDI_write_congested, &bdi->state);
408         wake_up_all(&nfs_write_congestion);
409         congestion_end(WRITE);
410         return err;
411 }
412
413 /*
414  * Insert a write request into an inode
415  */
416 static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
417 {
418         struct nfs_inode *nfsi = NFS_I(inode);
419         int error;
420
421         error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
422         BUG_ON(error == -EEXIST);
423         if (error)
424                 return error;
425         if (!nfsi->npages) {
426                 igrab(inode);
427                 nfs_begin_data_update(inode);
428                 if (nfs_have_delegation(inode, FMODE_WRITE))
429                         nfsi->change_attr++;
430         }
431         SetPagePrivate(req->wb_page);
432         set_page_private(req->wb_page, (unsigned long)req);
433         nfsi->npages++;
434         atomic_inc(&req->wb_count);
435         return 0;
436 }
437
438 /*
439  * Insert a write request into an inode
440  */
441 static void nfs_inode_remove_request(struct nfs_page *req)
442 {
443         struct inode *inode = req->wb_context->dentry->d_inode;
444         struct nfs_inode *nfsi = NFS_I(inode);
445
446         BUG_ON (!NFS_WBACK_BUSY(req));
447
448         spin_lock(&nfsi->req_lock);
449         set_page_private(req->wb_page, 0);
450         ClearPagePrivate(req->wb_page);
451         radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
452         nfsi->npages--;
453         if (!nfsi->npages) {
454                 spin_unlock(&nfsi->req_lock);
455                 nfs_end_data_update(inode);
456                 iput(inode);
457         } else
458                 spin_unlock(&nfsi->req_lock);
459         nfs_clear_request(req);
460         nfs_release_request(req);
461 }
462
463 /*
464  * Add a request to the inode's dirty list.
465  */
466 static void
467 nfs_mark_request_dirty(struct nfs_page *req)
468 {
469         struct inode *inode = req->wb_context->dentry->d_inode;
470         struct nfs_inode *nfsi = NFS_I(inode);
471
472         spin_lock(&nfsi->req_lock);
473         radix_tree_tag_set(&nfsi->nfs_page_tree,
474                         req->wb_index, NFS_PAGE_TAG_DIRTY);
475         nfs_list_add_request(req, &nfsi->dirty);
476         nfsi->ndirty++;
477         spin_unlock(&nfsi->req_lock);
478         inc_zone_page_state(req->wb_page, NR_FILE_DIRTY);
479         mark_inode_dirty(inode);
480 }
481
482 /*
483  * Check if a request is dirty
484  */
485 static inline int
486 nfs_dirty_request(struct nfs_page *req)
487 {
488         struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
489         return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty;
490 }
491
492 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
493 /*
494  * Add a request to the inode's commit list.
495  */
496 static void
497 nfs_mark_request_commit(struct nfs_page *req)
498 {
499         struct inode *inode = req->wb_context->dentry->d_inode;
500         struct nfs_inode *nfsi = NFS_I(inode);
501
502         spin_lock(&nfsi->req_lock);
503         nfs_list_add_request(req, &nfsi->commit);
504         nfsi->ncommit++;
505         spin_unlock(&nfsi->req_lock);
506         inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
507         mark_inode_dirty(inode);
508 }
509 #endif
510
511 /*
512  * Wait for a request to complete.
513  *
514  * Interruptible by signals only if mounted with intr flag.
515  */
516 static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
517 {
518         struct nfs_inode *nfsi = NFS_I(inode);
519         struct nfs_page *req;
520         unsigned long           idx_end, next;
521         unsigned int            res = 0;
522         int                     error;
523
524         if (npages == 0)
525                 idx_end = ~0;
526         else
527                 idx_end = idx_start + npages - 1;
528
529         next = idx_start;
530         while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
531                 if (req->wb_index > idx_end)
532                         break;
533
534                 next = req->wb_index + 1;
535                 BUG_ON(!NFS_WBACK_BUSY(req));
536
537                 atomic_inc(&req->wb_count);
538                 spin_unlock(&nfsi->req_lock);
539                 error = nfs_wait_on_request(req);
540                 nfs_release_request(req);
541                 spin_lock(&nfsi->req_lock);
542                 if (error < 0)
543                         return error;
544                 res++;
545         }
546         return res;
547 }
548
549 static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
550 {
551         struct nfs_inode *nfsi = NFS_I(inode);
552         int ret;
553
554         spin_lock(&nfsi->req_lock);
555         ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
556         spin_unlock(&nfsi->req_lock);
557         return ret;
558 }
559
560 static void nfs_cancel_dirty_list(struct list_head *head)
561 {
562         struct nfs_page *req;
563         while(!list_empty(head)) {
564                 req = nfs_list_entry(head->next);
565                 nfs_list_remove_request(req);
566                 nfs_inode_remove_request(req);
567                 nfs_clear_page_writeback(req);
568         }
569 }
570
571 static void nfs_cancel_commit_list(struct list_head *head)
572 {
573         struct nfs_page *req;
574
575         while(!list_empty(head)) {
576                 req = nfs_list_entry(head->next);
577                 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
578                 nfs_list_remove_request(req);
579                 nfs_inode_remove_request(req);
580                 nfs_unlock_request(req);
581         }
582 }
583
584 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
585 /*
586  * nfs_scan_commit - Scan an inode for commit requests
587  * @inode: NFS inode to scan
588  * @dst: destination list
589  * @idx_start: lower bound of page->index to scan.
590  * @npages: idx_start + npages sets the upper bound to scan.
591  *
592  * Moves requests from the inode's 'commit' request list.
593  * The requests are *not* checked to ensure that they form a contiguous set.
594  */
595 static int
596 nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
597 {
598         struct nfs_inode *nfsi = NFS_I(inode);
599         int res = 0;
600
601         if (nfsi->ncommit != 0) {
602                 res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages);
603                 nfsi->ncommit -= res;
604                 if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
605                         printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
606         }
607         return res;
608 }
609 #else
610 static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
611 {
612         return 0;
613 }
614 #endif
615
616 static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
617 {
618         struct backing_dev_info *bdi = mapping->backing_dev_info;
619         DEFINE_WAIT(wait);
620         int ret = 0;
621
622         might_sleep();
623
624         if (!bdi_write_congested(bdi))
625                 return 0;
626
627         nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
628
629         if (intr) {
630                 struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
631                 sigset_t oldset;
632
633                 rpc_clnt_sigmask(clnt, &oldset);
634                 prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE);
635                 if (bdi_write_congested(bdi)) {
636                         if (signalled())
637                                 ret = -ERESTARTSYS;
638                         else
639                                 schedule();
640                 }
641                 rpc_clnt_sigunmask(clnt, &oldset);
642         } else {
643                 prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE);
644                 if (bdi_write_congested(bdi))
645                         schedule();
646         }
647         finish_wait(&nfs_write_congestion, &wait);
648         return ret;
649 }
650
651
652 /*
653  * Try to update any existing write request, or create one if there is none.
654  * In order to match, the request's credentials must match those of
655  * the calling process.
656  *
657  * Note: Should always be called with the Page Lock held!
658  */
659 static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
660                 struct inode *inode, struct page *page,
661                 unsigned int offset, unsigned int bytes)
662 {
663         struct nfs_server *server = NFS_SERVER(inode);
664         struct nfs_inode *nfsi = NFS_I(inode);
665         struct nfs_page         *req, *new = NULL;
666         unsigned long           rqend, end;
667
668         end = offset + bytes;
669
670         if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR))
671                 return ERR_PTR(-ERESTARTSYS);
672         for (;;) {
673                 /* Loop over all inode entries and see if we find
674                  * A request for the page we wish to update
675                  */
676                 spin_lock(&nfsi->req_lock);
677                 req = nfs_page_find_request_locked(page);
678                 if (req) {
679                         if (!nfs_lock_request_dontget(req)) {
680                                 int error;
681
682                                 spin_unlock(&nfsi->req_lock);
683                                 error = nfs_wait_on_request(req);
684                                 nfs_release_request(req);
685                                 if (error < 0) {
686                                         if (new)
687                                                 nfs_release_request(new);
688                                         return ERR_PTR(error);
689                                 }
690                                 continue;
691                         }
692                         spin_unlock(&nfsi->req_lock);
693                         if (new)
694                                 nfs_release_request(new);
695                         break;
696                 }
697
698                 if (new) {
699                         int error;
700                         nfs_lock_request_dontget(new);
701                         error = nfs_inode_add_request(inode, new);
702                         if (error) {
703                                 spin_unlock(&nfsi->req_lock);
704                                 nfs_unlock_request(new);
705                                 return ERR_PTR(error);
706                         }
707                         spin_unlock(&nfsi->req_lock);
708                         nfs_mark_request_dirty(new);
709                         return new;
710                 }
711                 spin_unlock(&nfsi->req_lock);
712
713                 new = nfs_create_request(ctx, inode, page, offset, bytes);
714                 if (IS_ERR(new))
715                         return new;
716         }
717
718         /* We have a request for our page.
719          * If the creds don't match, or the
720          * page addresses don't match,
721          * tell the caller to wait on the conflicting
722          * request.
723          */
724         rqend = req->wb_offset + req->wb_bytes;
725         if (req->wb_context != ctx
726             || req->wb_page != page
727             || !nfs_dirty_request(req)
728             || offset > rqend || end < req->wb_offset) {
729                 nfs_unlock_request(req);
730                 return ERR_PTR(-EBUSY);
731         }
732
733         /* Okay, the request matches. Update the region */
734         if (offset < req->wb_offset) {
735                 req->wb_offset = offset;
736                 req->wb_pgbase = offset;
737                 req->wb_bytes = rqend - req->wb_offset;
738         }
739
740         if (end > rqend)
741                 req->wb_bytes = end - req->wb_offset;
742
743         return req;
744 }
745
746 int nfs_flush_incompatible(struct file *file, struct page *page)
747 {
748         struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
749         struct nfs_page *req;
750         int             status = 0;
751         /*
752          * Look for a request corresponding to this page. If there
753          * is one, and it belongs to another file, we flush it out
754          * before we try to copy anything into the page. Do this
755          * due to the lack of an ACCESS-type call in NFSv2.
756          * Also do the same if we find a request from an existing
757          * dropped page.
758          */
759         req = nfs_page_find_request(page);
760         if (req != NULL) {
761                 int do_flush = req->wb_page != page || req->wb_context != ctx;
762
763                 nfs_release_request(req);
764                 if (do_flush)
765                         status = nfs_wb_page(page->mapping->host, page);
766         }
767         return (status < 0) ? status : 0;
768 }
769
770 /*
771  * Update and possibly write a cached page of an NFS file.
772  *
773  * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
774  * things with a page scheduled for an RPC call (e.g. invalidate it).
775  */
776 int nfs_updatepage(struct file *file, struct page *page,
777                 unsigned int offset, unsigned int count)
778 {
779         struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
780         struct inode    *inode = page->mapping->host;
781         struct nfs_page *req;
782         int             status = 0;
783
784         nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
785
786         dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
787                 file->f_dentry->d_parent->d_name.name,
788                 file->f_dentry->d_name.name, count,
789                 (long long)(page_offset(page) +offset));
790
791         if (IS_SYNC(inode)) {
792                 status = nfs_writepage_sync(ctx, inode, page, offset, count, 0);
793                 if (status > 0) {
794                         if (offset == 0 && status == PAGE_CACHE_SIZE)
795                                 SetPageUptodate(page);
796                         return 0;
797                 }
798                 return status;
799         }
800
801         /* If we're not using byte range locks, and we know the page
802          * is entirely in cache, it may be more efficient to avoid
803          * fragmenting write requests.
804          */
805         if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) {
806                 count = max(count + offset, nfs_page_length(page));
807                 offset = 0;
808         }
809
810         /*
811          * Try to find an NFS request corresponding to this page
812          * and update it.
813          * If the existing request cannot be updated, we must flush
814          * it out now.
815          */
816         do {
817                 req = nfs_update_request(ctx, inode, page, offset, count);
818                 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
819                 if (status != -EBUSY)
820                         break;
821                 /* Request could not be updated. Flush it out and try again */
822                 status = nfs_wb_page(inode, page);
823         } while (status >= 0);
824         if (status < 0)
825                 goto done;
826
827         status = 0;
828
829         /* Update file length */
830         nfs_grow_file(page, offset, count);
831         /* Set the PG_uptodate flag? */
832         nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
833         nfs_unlock_request(req);
834 done:
835         dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
836                         status, (long long)i_size_read(inode));
837         if (status < 0)
838                 ClearPageUptodate(page);
839         return status;
840 }
841
842 static void nfs_writepage_release(struct nfs_page *req)
843 {
844         end_page_writeback(req->wb_page);
845
846 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
847         if (!PageError(req->wb_page)) {
848                 if (NFS_NEED_RESCHED(req)) {
849                         nfs_mark_request_dirty(req);
850                         goto out;
851                 } else if (NFS_NEED_COMMIT(req)) {
852                         nfs_mark_request_commit(req);
853                         goto out;
854                 }
855         }
856         nfs_inode_remove_request(req);
857
858 out:
859         nfs_clear_commit(req);
860         nfs_clear_reschedule(req);
861 #else
862         nfs_inode_remove_request(req);
863 #endif
864         nfs_clear_page_writeback(req);
865 }
866
867 static inline int flush_task_priority(int how)
868 {
869         switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
870                 case FLUSH_HIGHPRI:
871                         return RPC_PRIORITY_HIGH;
872                 case FLUSH_LOWPRI:
873                         return RPC_PRIORITY_LOW;
874         }
875         return RPC_PRIORITY_NORMAL;
876 }
877
878 /*
879  * Set up the argument/result storage required for the RPC call.
880  */
881 static void nfs_write_rpcsetup(struct nfs_page *req,
882                 struct nfs_write_data *data,
883                 const struct rpc_call_ops *call_ops,
884                 unsigned int count, unsigned int offset,
885                 int how)
886 {
887         struct inode            *inode;
888         int flags;
889
890         /* Set up the RPC argument and reply structs
891          * NB: take care not to mess about with data->commit et al. */
892
893         data->req = req;
894         data->inode = inode = req->wb_context->dentry->d_inode;
895         data->cred = req->wb_context->cred;
896
897         data->args.fh     = NFS_FH(inode);
898         data->args.offset = req_offset(req) + offset;
899         data->args.pgbase = req->wb_pgbase + offset;
900         data->args.pages  = data->pagevec;
901         data->args.count  = count;
902         data->args.context = req->wb_context;
903
904         data->res.fattr   = &data->fattr;
905         data->res.count   = count;
906         data->res.verf    = &data->verf;
907         nfs_fattr_init(&data->fattr);
908
909         /* Set up the initial task struct.  */
910         flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
911         rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
912         NFS_PROTO(inode)->write_setup(data, how);
913
914         data->task.tk_priority = flush_task_priority(how);
915         data->task.tk_cookie = (unsigned long)inode;
916
917         dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
918                 data->task.tk_pid,
919                 inode->i_sb->s_id,
920                 (long long)NFS_FILEID(inode),
921                 count,
922                 (unsigned long long)data->args.offset);
923 }
924
925 static void nfs_execute_write(struct nfs_write_data *data)
926 {
927         struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
928         sigset_t oldset;
929
930         rpc_clnt_sigmask(clnt, &oldset);
931         rpc_execute(&data->task);
932         rpc_clnt_sigunmask(clnt, &oldset);
933 }
934
935 /*
936  * Generate multiple small requests to write out a single
937  * contiguous dirty area on one page.
938  */
939 static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
940 {
941         struct nfs_page *req = nfs_list_entry(head->next);
942         struct page *page = req->wb_page;
943         struct nfs_write_data *data;
944         size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
945         unsigned int offset;
946         int requests = 0;
947         LIST_HEAD(list);
948
949         nfs_list_remove_request(req);
950
951         nbytes = req->wb_bytes;
952         do {
953                 size_t len = min(nbytes, wsize);
954
955                 data = nfs_writedata_alloc(len);
956                 if (!data)
957                         goto out_bad;
958                 list_add(&data->pages, &list);
959                 requests++;
960                 nbytes -= len;
961         } while (nbytes != 0);
962         atomic_set(&req->wb_complete, requests);
963
964         ClearPageError(page);
965         set_page_writeback(page);
966         offset = 0;
967         nbytes = req->wb_bytes;
968         do {
969                 data = list_entry(list.next, struct nfs_write_data, pages);
970                 list_del_init(&data->pages);
971
972                 data->pagevec[0] = page;
973
974                 if (nbytes > wsize) {
975                         nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
976                                         wsize, offset, how);
977                         offset += wsize;
978                         nbytes -= wsize;
979                 } else {
980                         nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
981                                         nbytes, offset, how);
982                         nbytes = 0;
983                 }
984                 nfs_execute_write(data);
985         } while (nbytes != 0);
986
987         return 0;
988
989 out_bad:
990         while (!list_empty(&list)) {
991                 data = list_entry(list.next, struct nfs_write_data, pages);
992                 list_del(&data->pages);
993                 nfs_writedata_release(data);
994         }
995         nfs_mark_request_dirty(req);
996         nfs_clear_page_writeback(req);
997         return -ENOMEM;
998 }
999
1000 /*
1001  * Create an RPC task for the given write request and kick it.
1002  * The page must have been locked by the caller.
1003  *
1004  * It may happen that the page we're passed is not marked dirty.
1005  * This is the case if nfs_updatepage detects a conflicting request
1006  * that has been written but not committed.
1007  */
1008 static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
1009 {
1010         struct nfs_page         *req;
1011         struct page             **pages;
1012         struct nfs_write_data   *data;
1013         unsigned int            count;
1014
1015         data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize);
1016         if (!data)
1017                 goto out_bad;
1018
1019         pages = data->pagevec;
1020         count = 0;
1021         while (!list_empty(head)) {
1022                 req = nfs_list_entry(head->next);
1023                 nfs_list_remove_request(req);
1024                 nfs_list_add_request(req, &data->pages);
1025                 ClearPageError(req->wb_page);
1026                 set_page_writeback(req->wb_page);
1027                 *pages++ = req->wb_page;
1028                 count += req->wb_bytes;
1029         }
1030         req = nfs_list_entry(data->pages.next);
1031
1032         /* Set up the argument struct */
1033         nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
1034
1035         nfs_execute_write(data);
1036         return 0;
1037  out_bad:
1038         while (!list_empty(head)) {
1039                 struct nfs_page *req = nfs_list_entry(head->next);
1040                 nfs_list_remove_request(req);
1041                 nfs_mark_request_dirty(req);
1042                 nfs_clear_page_writeback(req);
1043         }
1044         return -ENOMEM;
1045 }
1046
1047 static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
1048 {
1049         LIST_HEAD(one_request);
1050         int (*flush_one)(struct inode *, struct list_head *, int);
1051         struct nfs_page *req;
1052         int wpages = NFS_SERVER(inode)->wpages;
1053         int wsize = NFS_SERVER(inode)->wsize;
1054         int error;
1055
1056         flush_one = nfs_flush_one;
1057         if (wsize < PAGE_CACHE_SIZE)
1058                 flush_one = nfs_flush_multi;
1059         /* For single writes, FLUSH_STABLE is more efficient */
1060         if (npages <= wpages && npages == NFS_I(inode)->npages
1061                         && nfs_list_entry(head->next)->wb_bytes <= wsize)
1062                 how |= FLUSH_STABLE;
1063
1064         do {
1065                 nfs_coalesce_requests(head, &one_request, wpages);
1066                 req = nfs_list_entry(one_request.next);
1067                 error = flush_one(inode, &one_request, how);
1068                 if (error < 0)
1069                         goto out_err;
1070         } while (!list_empty(head));
1071         return 0;
1072 out_err:
1073         while (!list_empty(head)) {
1074                 req = nfs_list_entry(head->next);
1075                 nfs_list_remove_request(req);
1076                 nfs_mark_request_dirty(req);
1077                 nfs_clear_page_writeback(req);
1078         }
1079         return error;
1080 }
1081
1082 /*
1083  * Handle a write reply that flushed part of a page.
1084  */
1085 static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1086 {
1087         struct nfs_write_data   *data = calldata;
1088         struct nfs_page         *req = data->req;
1089         struct page             *page = req->wb_page;
1090
1091         dprintk("NFS: write (%s/%Ld %d@%Ld)",
1092                 req->wb_context->dentry->d_inode->i_sb->s_id,
1093                 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1094                 req->wb_bytes,
1095                 (long long)req_offset(req));
1096
1097         if (nfs_writeback_done(task, data) != 0)
1098                 return;
1099
1100         if (task->tk_status < 0) {
1101                 ClearPageUptodate(page);
1102                 SetPageError(page);
1103                 req->wb_context->error = task->tk_status;
1104                 dprintk(", error = %d\n", task->tk_status);
1105         } else {
1106 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1107                 if (data->verf.committed < NFS_FILE_SYNC) {
1108                         if (!NFS_NEED_COMMIT(req)) {
1109                                 nfs_defer_commit(req);
1110                                 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1111                                 dprintk(" defer commit\n");
1112                         } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
1113                                 nfs_defer_reschedule(req);
1114                                 dprintk(" server reboot detected\n");
1115                         }
1116                 } else
1117 #endif
1118                         dprintk(" OK\n");
1119         }
1120
1121         if (atomic_dec_and_test(&req->wb_complete))
1122                 nfs_writepage_release(req);
1123 }
1124
1125 static const struct rpc_call_ops nfs_write_partial_ops = {
1126         .rpc_call_done = nfs_writeback_done_partial,
1127         .rpc_release = nfs_writedata_release,
1128 };
1129
1130 /*
1131  * Handle a write reply that flushes a whole page.
1132  *
1133  * FIXME: There is an inherent race with invalidate_inode_pages and
1134  *        writebacks since the page->count is kept > 1 for as long
1135  *        as the page has a write request pending.
1136  */
1137 static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
1138 {
1139         struct nfs_write_data   *data = calldata;
1140         struct nfs_page         *req;
1141         struct page             *page;
1142
1143         if (nfs_writeback_done(task, data) != 0)
1144                 return;
1145
1146         /* Update attributes as result of writeback. */
1147         while (!list_empty(&data->pages)) {
1148                 req = nfs_list_entry(data->pages.next);
1149                 nfs_list_remove_request(req);
1150                 page = req->wb_page;
1151
1152                 dprintk("NFS: write (%s/%Ld %d@%Ld)",
1153                         req->wb_context->dentry->d_inode->i_sb->s_id,
1154                         (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1155                         req->wb_bytes,
1156                         (long long)req_offset(req));
1157
1158                 if (task->tk_status < 0) {
1159                         ClearPageUptodate(page);
1160                         SetPageError(page);
1161                         req->wb_context->error = task->tk_status;
1162                         end_page_writeback(page);
1163                         nfs_inode_remove_request(req);
1164                         dprintk(", error = %d\n", task->tk_status);
1165                         goto next;
1166                 }
1167                 end_page_writeback(page);
1168
1169 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1170                 if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
1171                         nfs_inode_remove_request(req);
1172                         dprintk(" OK\n");
1173                         goto next;
1174                 }
1175                 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1176                 nfs_mark_request_commit(req);
1177                 dprintk(" marked for commit\n");
1178 #else
1179                 nfs_inode_remove_request(req);
1180 #endif
1181         next:
1182                 nfs_clear_page_writeback(req);
1183         }
1184 }
1185
1186 static const struct rpc_call_ops nfs_write_full_ops = {
1187         .rpc_call_done = nfs_writeback_done_full,
1188         .rpc_release = nfs_writedata_release,
1189 };
1190
1191
1192 /*
1193  * This function is called when the WRITE call is complete.
1194  */
1195 int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1196 {
1197         struct nfs_writeargs    *argp = &data->args;
1198         struct nfs_writeres     *resp = &data->res;
1199         int status;
1200
1201         dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1202                 task->tk_pid, task->tk_status);
1203
1204         /*
1205          * ->write_done will attempt to use post-op attributes to detect
1206          * conflicting writes by other clients.  A strict interpretation
1207          * of close-to-open would allow us to continue caching even if
1208          * another writer had changed the file, but some applications
1209          * depend on tighter cache coherency when writing.
1210          */
1211         status = NFS_PROTO(data->inode)->write_done(task, data);
1212         if (status != 0)
1213                 return status;
1214         nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1215
1216 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1217         if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1218                 /* We tried a write call, but the server did not
1219                  * commit data to stable storage even though we
1220                  * requested it.
1221                  * Note: There is a known bug in Tru64 < 5.0 in which
1222                  *       the server reports NFS_DATA_SYNC, but performs
1223                  *       NFS_FILE_SYNC. We therefore implement this checking
1224                  *       as a dprintk() in order to avoid filling syslog.
1225                  */
1226                 static unsigned long    complain;
1227
1228                 if (time_before(complain, jiffies)) {
1229                         dprintk("NFS: faulty NFS server %s:"
1230                                 " (committed = %d) != (stable = %d)\n",
1231                                 NFS_SERVER(data->inode)->nfs_client->cl_hostname,
1232                                 resp->verf->committed, argp->stable);
1233                         complain = jiffies + 300 * HZ;
1234                 }
1235         }
1236 #endif
1237         /* Is this a short write? */
1238         if (task->tk_status >= 0 && resp->count < argp->count) {
1239                 static unsigned long    complain;
1240
1241                 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
1242
1243                 /* Has the server at least made some progress? */
1244                 if (resp->count != 0) {
1245                         /* Was this an NFSv2 write or an NFSv3 stable write? */
1246                         if (resp->verf->committed != NFS_UNSTABLE) {
1247                                 /* Resend from where the server left off */
1248                                 argp->offset += resp->count;
1249                                 argp->pgbase += resp->count;
1250                                 argp->count -= resp->count;
1251                         } else {
1252                                 /* Resend as a stable write in order to avoid
1253                                  * headaches in the case of a server crash.
1254                                  */
1255                                 argp->stable = NFS_FILE_SYNC;
1256                         }
1257                         rpc_restart_call(task);
1258                         return -EAGAIN;
1259                 }
1260                 if (time_before(complain, jiffies)) {
1261                         printk(KERN_WARNING
1262                                "NFS: Server wrote zero bytes, expected %u.\n",
1263                                         argp->count);
1264                         complain = jiffies + 300 * HZ;
1265                 }
1266                 /* Can't do anything about it except throw an error. */
1267                 task->tk_status = -EIO;
1268         }
1269         return 0;
1270 }
1271
1272
1273 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1274 void nfs_commit_release(void *wdata)
1275 {
1276         nfs_commit_free(wdata);
1277 }
1278
1279 /*
1280  * Set up the argument/result storage required for the RPC call.
1281  */
1282 static void nfs_commit_rpcsetup(struct list_head *head,
1283                 struct nfs_write_data *data,
1284                 int how)
1285 {
1286         struct nfs_page         *first;
1287         struct inode            *inode;
1288         int flags;
1289
1290         /* Set up the RPC argument and reply structs
1291          * NB: take care not to mess about with data->commit et al. */
1292
1293         list_splice_init(head, &data->pages);
1294         first = nfs_list_entry(data->pages.next);
1295         inode = first->wb_context->dentry->d_inode;
1296
1297         data->inode       = inode;
1298         data->cred        = first->wb_context->cred;
1299
1300         data->args.fh     = NFS_FH(data->inode);
1301         /* Note: we always request a commit of the entire inode */
1302         data->args.offset = 0;
1303         data->args.count  = 0;
1304         data->res.count   = 0;
1305         data->res.fattr   = &data->fattr;
1306         data->res.verf    = &data->verf;
1307         nfs_fattr_init(&data->fattr);
1308
1309         /* Set up the initial task struct.  */
1310         flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
1311         rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
1312         NFS_PROTO(inode)->commit_setup(data, how);
1313
1314         data->task.tk_priority = flush_task_priority(how);
1315         data->task.tk_cookie = (unsigned long)inode;
1316         
1317         dprintk("NFS: %4d initiated commit call\n", data->task.tk_pid);
1318 }
1319
1320 /*
1321  * Commit dirty pages
1322  */
1323 static int
1324 nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1325 {
1326         struct nfs_write_data   *data;
1327         struct nfs_page         *req;
1328
1329         data = nfs_commit_alloc();
1330
1331         if (!data)
1332                 goto out_bad;
1333
1334         /* Set up the argument struct */
1335         nfs_commit_rpcsetup(head, data, how);
1336
1337         nfs_execute_write(data);
1338         return 0;
1339  out_bad:
1340         while (!list_empty(head)) {
1341                 req = nfs_list_entry(head->next);
1342                 nfs_list_remove_request(req);
1343                 nfs_mark_request_commit(req);
1344                 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1345                 nfs_clear_page_writeback(req);
1346         }
1347         return -ENOMEM;
1348 }
1349
1350 /*
1351  * COMMIT call returned
1352  */
1353 static void nfs_commit_done(struct rpc_task *task, void *calldata)
1354 {
1355         struct nfs_write_data   *data = calldata;
1356         struct nfs_page         *req;
1357
1358         dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1359                                 task->tk_pid, task->tk_status);
1360
1361         /* Call the NFS version-specific code */
1362         if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
1363                 return;
1364
1365         while (!list_empty(&data->pages)) {
1366                 req = nfs_list_entry(data->pages.next);
1367                 nfs_list_remove_request(req);
1368                 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1369
1370                 dprintk("NFS: commit (%s/%Ld %d@%Ld)",
1371                         req->wb_context->dentry->d_inode->i_sb->s_id,
1372                         (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1373                         req->wb_bytes,
1374                         (long long)req_offset(req));
1375                 if (task->tk_status < 0) {
1376                         req->wb_context->error = task->tk_status;
1377                         nfs_inode_remove_request(req);
1378                         dprintk(", error = %d\n", task->tk_status);
1379                         goto next;
1380                 }
1381
1382                 /* Okay, COMMIT succeeded, apparently. Check the verifier
1383                  * returned by the server against all stored verfs. */
1384                 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1385                         /* We have a match */
1386                         nfs_inode_remove_request(req);
1387                         dprintk(" OK\n");
1388                         goto next;
1389                 }
1390                 /* We have a mismatch. Write the page again */
1391                 dprintk(" mismatch\n");
1392                 nfs_mark_request_dirty(req);
1393         next:
1394                 nfs_clear_page_writeback(req);
1395         }
1396 }
1397
1398 static const struct rpc_call_ops nfs_commit_ops = {
1399         .rpc_call_done = nfs_commit_done,
1400         .rpc_release = nfs_commit_release,
1401 };
1402 #else
1403 static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1404 {
1405         return 0;
1406 }
1407 #endif
1408
1409 static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
1410 {
1411         struct nfs_inode *nfsi = NFS_I(mapping->host);
1412         LIST_HEAD(head);
1413         long res;
1414
1415         spin_lock(&nfsi->req_lock);
1416         res = nfs_scan_dirty(mapping, wbc, &head);
1417         spin_unlock(&nfsi->req_lock);
1418         if (res) {
1419                 int error = nfs_flush_list(mapping->host, &head, res, how);
1420                 if (error < 0)
1421                         return error;
1422         }
1423         return res;
1424 }
1425
1426 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1427 int nfs_commit_inode(struct inode *inode, int how)
1428 {
1429         struct nfs_inode *nfsi = NFS_I(inode);
1430         LIST_HEAD(head);
1431         int res;
1432
1433         spin_lock(&nfsi->req_lock);
1434         res = nfs_scan_commit(inode, &head, 0, 0);
1435         spin_unlock(&nfsi->req_lock);
1436         if (res) {
1437                 int error = nfs_commit_list(inode, &head, how);
1438                 if (error < 0)
1439                         return error;
1440         }
1441         return res;
1442 }
1443 #endif
1444
1445 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
1446 {
1447         struct inode *inode = mapping->host;
1448         struct nfs_inode *nfsi = NFS_I(inode);
1449         unsigned long idx_start, idx_end;
1450         unsigned int npages = 0;
1451         LIST_HEAD(head);
1452         int nocommit = how & FLUSH_NOCOMMIT;
1453         long pages, ret;
1454
1455         /* FIXME */
1456         if (wbc->range_cyclic)
1457                 idx_start = 0;
1458         else {
1459                 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
1460                 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
1461                 if (idx_end > idx_start) {
1462                         unsigned long l_npages = 1 + idx_end - idx_start;
1463                         npages = l_npages;
1464                         if (sizeof(npages) != sizeof(l_npages) &&
1465                                         (unsigned long)npages != l_npages)
1466                                 npages = 0;
1467                 }
1468         }
1469         how &= ~FLUSH_NOCOMMIT;
1470         spin_lock(&nfsi->req_lock);
1471         do {
1472                 wbc->pages_skipped = 0;
1473                 ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
1474                 if (ret != 0)
1475                         continue;
1476                 pages = nfs_scan_dirty(mapping, wbc, &head);
1477                 if (pages != 0) {
1478                         spin_unlock(&nfsi->req_lock);
1479                         if (how & FLUSH_INVALIDATE) {
1480                                 nfs_cancel_dirty_list(&head);
1481                                 ret = pages;
1482                         } else
1483                                 ret = nfs_flush_list(inode, &head, pages, how);
1484                         spin_lock(&nfsi->req_lock);
1485                         continue;
1486                 }
1487                 if (wbc->pages_skipped != 0)
1488                         continue;
1489                 if (nocommit)
1490                         break;
1491                 pages = nfs_scan_commit(inode, &head, idx_start, npages);
1492                 if (pages == 0) {
1493                         if (wbc->pages_skipped != 0)
1494                                 continue;
1495                         break;
1496                 }
1497                 if (how & FLUSH_INVALIDATE) {
1498                         spin_unlock(&nfsi->req_lock);
1499                         nfs_cancel_commit_list(&head);
1500                         ret = pages;
1501                         spin_lock(&nfsi->req_lock);
1502                         continue;
1503                 }
1504                 pages += nfs_scan_commit(inode, &head, 0, 0);
1505                 spin_unlock(&nfsi->req_lock);
1506                 ret = nfs_commit_list(inode, &head, how);
1507                 spin_lock(&nfsi->req_lock);
1508         } while (ret >= 0);
1509         spin_unlock(&nfsi->req_lock);
1510         return ret;
1511 }
1512
1513 /*
1514  * flush the inode to disk.
1515  */
1516 int nfs_wb_all(struct inode *inode)
1517 {
1518         struct address_space *mapping = inode->i_mapping;
1519         struct writeback_control wbc = {
1520                 .bdi = mapping->backing_dev_info,
1521                 .sync_mode = WB_SYNC_ALL,
1522                 .nr_to_write = LONG_MAX,
1523                 .range_cyclic = 1,
1524         };
1525         int ret;
1526
1527         ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
1528         if (ret >= 0)
1529                 return 0;
1530         return ret;
1531 }
1532
1533 int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how)
1534 {
1535         struct writeback_control wbc = {
1536                 .bdi = mapping->backing_dev_info,
1537                 .sync_mode = WB_SYNC_ALL,
1538                 .nr_to_write = LONG_MAX,
1539                 .range_start = range_start,
1540                 .range_end = range_end,
1541         };
1542         int ret;
1543
1544         ret = nfs_sync_mapping_wait(mapping, &wbc, how);
1545         if (ret >= 0)
1546                 return 0;
1547         return ret;
1548 }
1549
1550 static int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
1551 {
1552         loff_t range_start = page_offset(page);
1553         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
1554
1555         return nfs_sync_mapping_range(inode->i_mapping, range_start, range_end, how | FLUSH_STABLE);
1556 }
1557
1558 /*
1559  * Write back all requests on one page - we do this before reading it.
1560  */
1561 int nfs_wb_page(struct inode *inode, struct page* page)
1562 {
1563         return nfs_wb_page_priority(inode, page, 0);
1564 }
1565
1566
1567 int __init nfs_init_writepagecache(void)
1568 {
1569         nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1570                                              sizeof(struct nfs_write_data),
1571                                              0, SLAB_HWCACHE_ALIGN,
1572                                              NULL, NULL);
1573         if (nfs_wdata_cachep == NULL)
1574                 return -ENOMEM;
1575
1576         nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
1577                                                      nfs_wdata_cachep);
1578         if (nfs_wdata_mempool == NULL)
1579                 return -ENOMEM;
1580
1581         nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1582                                                       nfs_wdata_cachep);
1583         if (nfs_commit_mempool == NULL)
1584                 return -ENOMEM;
1585
1586         return 0;
1587 }
1588
1589 void nfs_destroy_writepagecache(void)
1590 {
1591         mempool_destroy(nfs_commit_mempool);
1592         mempool_destroy(nfs_wdata_mempool);
1593         kmem_cache_destroy(nfs_wdata_cachep);
1594 }
1595