4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2007
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <asm/div64.h>
37 #include "cifsproto.h"
38 #include "cifs_unicode.h"
39 #include "cifs_debug.h"
40 #include "cifs_fs_sb.h"
42 static inline struct cifsFileInfo *cifs_init_private(
43 struct cifsFileInfo *private_data, struct inode *inode,
44 struct file *file, __u16 netfid)
46 memset(private_data, 0, sizeof(struct cifsFileInfo));
47 private_data->netfid = netfid;
48 private_data->pid = current->tgid;
49 mutex_init(&private_data->fh_mutex);
50 mutex_init(&private_data->lock_mutex);
51 INIT_LIST_HEAD(&private_data->llist);
52 private_data->pfile = file; /* needed for writepage */
53 private_data->pInode = inode;
54 private_data->invalidHandle = false;
55 private_data->closePend = false;
56 /* we have to track num writers to the inode, since writepages
57 does not tell us which handle the write is for so there can
58 be a close (overlapping with write) of the filehandle that
59 cifs_writepages chose to use */
60 atomic_set(&private_data->wrtPending, 0);
65 static inline int cifs_convert_flags(unsigned int flags)
67 if ((flags & O_ACCMODE) == O_RDONLY)
69 else if ((flags & O_ACCMODE) == O_WRONLY)
71 else if ((flags & O_ACCMODE) == O_RDWR) {
72 /* GENERIC_ALL is too much permission to request
73 can cause unnecessary access denied on create */
74 /* return GENERIC_ALL; */
75 return (GENERIC_READ | GENERIC_WRITE);
78 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
79 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
83 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
85 fmode_t posix_flags = 0;
87 if ((flags & O_ACCMODE) == O_RDONLY)
88 posix_flags = FMODE_READ;
89 else if ((flags & O_ACCMODE) == O_WRONLY)
90 posix_flags = FMODE_WRITE;
91 else if ((flags & O_ACCMODE) == O_RDWR) {
92 /* GENERIC_ALL is too much permission to request
93 can cause unnecessary access denied on create */
94 /* return GENERIC_ALL; */
95 posix_flags = FMODE_READ | FMODE_WRITE;
97 /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
98 reopening a file. They had their effect on the original open */
100 posix_flags |= (fmode_t)O_APPEND;
102 posix_flags |= (fmode_t)O_SYNC;
103 if (flags & O_DIRECTORY)
104 posix_flags |= (fmode_t)O_DIRECTORY;
105 if (flags & O_NOFOLLOW)
106 posix_flags |= (fmode_t)O_NOFOLLOW;
107 if (flags & O_DIRECT)
108 posix_flags |= (fmode_t)O_DIRECT;
113 static inline int cifs_get_disposition(unsigned int flags)
115 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
117 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
118 return FILE_OVERWRITE_IF;
119 else if ((flags & O_CREAT) == O_CREAT)
121 else if ((flags & O_TRUNC) == O_TRUNC)
122 return FILE_OVERWRITE;
127 /* all arguments to this function must be checked for validity in caller */
128 static inline int cifs_posix_open_inode_helper(struct inode *inode,
129 struct file *file, struct cifsInodeInfo *pCifsInode,
130 struct cifsFileInfo *pCifsFile, int oplock, u16 netfid)
132 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
133 /* struct timespec temp; */ /* BB REMOVEME BB */
135 file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
136 if (file->private_data == NULL)
138 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
139 write_lock(&GlobalSMBSeslock);
140 list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
142 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
143 if (pCifsInode == NULL) {
144 write_unlock(&GlobalSMBSeslock);
148 /* want handles we can use to read with first
149 in the list so we do not have to walk the
150 list to search for one in write_begin */
151 if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
152 list_add_tail(&pCifsFile->flist,
153 &pCifsInode->openFileList);
155 list_add(&pCifsFile->flist,
156 &pCifsInode->openFileList);
159 if (pCifsInode->clientCanCacheRead) {
160 /* we have the inode open somewhere else
161 no need to discard cache data */
162 goto psx_client_can_cache;
165 /* BB FIXME need to fix this check to move it earlier into posix_open
166 BB fIX following section BB FIXME */
168 /* if not oplocked, invalidate inode pages if mtime or file
170 /* temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
171 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
172 (file->f_path.dentry->d_inode->i_size ==
173 (loff_t)le64_to_cpu(buf->EndOfFile))) {
174 cFYI(1, ("inode unchanged on server"));
176 if (file->f_path.dentry->d_inode->i_mapping) {
177 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
179 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
181 cFYI(1, ("invalidating remote inode since open detected it "
183 invalidate_remote_inode(file->f_path.dentry->d_inode);
186 psx_client_can_cache:
187 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
188 pCifsInode->clientCanCacheAll = true;
189 pCifsInode->clientCanCacheRead = true;
190 cFYI(1, ("Exclusive Oplock granted on inode %p",
191 file->f_path.dentry->d_inode));
192 } else if ((oplock & 0xF) == OPLOCK_READ)
193 pCifsInode->clientCanCacheRead = true;
195 /* will have to change the unlock if we reenable the
196 filemap_fdatawrite (which does not seem necessary */
197 write_unlock(&GlobalSMBSeslock);
201 /* all arguments to this function must be checked for validity in caller */
202 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
203 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
204 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
205 char *full_path, int xid)
207 struct timespec temp;
210 /* want handles we can use to read with first
211 in the list so we do not have to walk the
212 list to search for one in write_begin */
213 if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
214 list_add_tail(&pCifsFile->flist,
215 &pCifsInode->openFileList);
217 list_add(&pCifsFile->flist,
218 &pCifsInode->openFileList);
220 write_unlock(&GlobalSMBSeslock);
221 if (pCifsInode->clientCanCacheRead) {
222 /* we have the inode open somewhere else
223 no need to discard cache data */
224 goto client_can_cache;
227 /* BB need same check in cifs_create too? */
228 /* if not oplocked, invalidate inode pages if mtime or file
230 temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
231 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
232 (file->f_path.dentry->d_inode->i_size ==
233 (loff_t)le64_to_cpu(buf->EndOfFile))) {
234 cFYI(1, ("inode unchanged on server"));
236 if (file->f_path.dentry->d_inode->i_mapping) {
237 /* BB no need to lock inode until after invalidate
238 since namei code should already have it locked? */
239 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
241 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
243 cFYI(1, ("invalidating remote inode since open detected it "
245 invalidate_remote_inode(file->f_path.dentry->d_inode);
250 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
251 full_path, inode->i_sb, xid);
253 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
254 full_path, buf, inode->i_sb, xid, NULL);
256 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
257 pCifsInode->clientCanCacheAll = true;
258 pCifsInode->clientCanCacheRead = true;
259 cFYI(1, ("Exclusive Oplock granted on inode %p",
260 file->f_path.dentry->d_inode));
261 } else if ((*oplock & 0xF) == OPLOCK_READ)
262 pCifsInode->clientCanCacheRead = true;
267 int cifs_open(struct inode *inode, struct file *file)
271 struct cifs_sb_info *cifs_sb;
272 struct cifsTconInfo *tcon;
273 struct cifsFileInfo *pCifsFile;
274 struct cifsInodeInfo *pCifsInode;
275 struct list_head *tmp;
276 char *full_path = NULL;
280 FILE_ALL_INFO *buf = NULL;
284 cifs_sb = CIFS_SB(inode->i_sb);
285 tcon = cifs_sb->tcon;
287 /* search inode for this file and fill in file->private_data */
288 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
289 read_lock(&GlobalSMBSeslock);
290 list_for_each(tmp, &pCifsInode->openFileList) {
291 pCifsFile = list_entry(tmp, struct cifsFileInfo,
293 if ((pCifsFile->pfile == NULL) &&
294 (pCifsFile->pid == current->tgid)) {
295 /* mode set in cifs_create */
297 /* needed for writepage */
298 pCifsFile->pfile = file;
300 file->private_data = pCifsFile;
304 read_unlock(&GlobalSMBSeslock);
306 if (file->private_data != NULL) {
310 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
311 cERROR(1, ("could not find file instance for "
312 "new file %p", file));
314 full_path = build_path_from_dentry(file->f_path.dentry);
315 if (full_path == NULL) {
320 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
321 inode, file->f_flags, full_path));
328 if (!tcon->broken_posix_open && tcon->unix_ext &&
329 (tcon->ses->capabilities & CAP_UNIX) &&
330 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
331 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
332 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
333 /* can not refresh inode info since size could be stale */
334 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
335 cifs_sb->mnt_file_mode /* ignored */,
336 oflags, &oplock, &netfid, xid);
338 cFYI(1, ("posix open succeeded"));
339 /* no need for special case handling of setting mode
340 on read only files needed here */
342 cifs_posix_open_inode_helper(inode, file, pCifsInode,
343 pCifsFile, oplock, netfid);
345 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
346 if (tcon->ses->serverNOS)
347 cERROR(1, ("server %s of type %s returned"
348 " unexpected error on SMB posix open"
349 ", disabling posix open support."
350 " Check if server update available.",
351 tcon->ses->serverName,
352 tcon->ses->serverNOS));
353 tcon->broken_posix_open = true;
354 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
355 (rc != -EOPNOTSUPP)) /* path not found or net err */
357 /* else fallthrough to retry open the old way on network i/o
361 desiredAccess = cifs_convert_flags(file->f_flags);
363 /*********************************************************************
364 * open flag mapping table:
366 * POSIX Flag CIFS Disposition
367 * ---------- ----------------
368 * O_CREAT FILE_OPEN_IF
369 * O_CREAT | O_EXCL FILE_CREATE
370 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
371 * O_TRUNC FILE_OVERWRITE
372 * none of the above FILE_OPEN
374 * Note that there is not a direct match between disposition
375 * FILE_SUPERSEDE (ie create whether or not file exists although
376 * O_CREAT | O_TRUNC is similar but truncates the existing
377 * file rather than creating a new file as FILE_SUPERSEDE does
378 * (which uses the attributes / metadata passed in on open call)
380 *? O_SYNC is a reasonable match to CIFS writethrough flag
381 *? and the read write flags match reasonably. O_LARGEFILE
382 *? is irrelevant because largefile support is always used
383 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
384 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
385 *********************************************************************/
387 disposition = cifs_get_disposition(file->f_flags);
389 /* BB pass O_SYNC flag through on file attributes .. BB */
391 /* Also refresh inode by passing in file_info buf returned by SMBOpen
392 and calling get_inode_info with returned buf (at least helps
393 non-Unix server case) */
395 /* BB we can not do this if this is the second open of a file
396 and the first handle has writebehind data, we might be
397 able to simply do a filemap_fdatawrite/filemap_fdatawait first */
398 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
404 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
405 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
406 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
407 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
408 & CIFS_MOUNT_MAP_SPECIAL_CHR);
410 rc = -EIO; /* no NT SMB support fall into legacy open below */
413 /* Old server, try legacy style OpenX */
414 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
415 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
416 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
417 & CIFS_MOUNT_MAP_SPECIAL_CHR);
420 cFYI(1, ("cifs_open returned 0x%x", rc));
424 kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
425 if (file->private_data == NULL) {
429 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
430 write_lock(&GlobalSMBSeslock);
431 list_add(&pCifsFile->tlist, &tcon->openFileList);
433 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
435 rc = cifs_open_inode_helper(inode, file, pCifsInode,
437 &oplock, buf, full_path, xid);
439 write_unlock(&GlobalSMBSeslock);
442 if (oplock & CIFS_CREATE_ACTION) {
443 /* time to set mode which we can not set earlier due to
444 problems creating new read-only files */
445 if (tcon->unix_ext) {
446 struct cifs_unix_set_info_args args = {
447 .mode = inode->i_mode,
450 .ctime = NO_CHANGE_64,
451 .atime = NO_CHANGE_64,
452 .mtime = NO_CHANGE_64,
455 CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
457 cifs_sb->mnt_cifs_flags &
458 CIFS_MOUNT_MAP_SPECIAL_CHR);
469 /* Try to reacquire byte range locks that were released when session */
470 /* to server was lost */
471 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
475 /* BB list all locks open on this file and relock */
480 static int cifs_reopen_file(struct file *file, bool can_flush)
484 struct cifs_sb_info *cifs_sb;
485 struct cifsTconInfo *tcon;
486 struct cifsFileInfo *pCifsFile;
487 struct cifsInodeInfo *pCifsInode;
489 char *full_path = NULL;
491 int disposition = FILE_OPEN;
494 if (file->private_data)
495 pCifsFile = (struct cifsFileInfo *)file->private_data;
500 mutex_unlock(&pCifsFile->fh_mutex);
501 if (!pCifsFile->invalidHandle) {
502 mutex_lock(&pCifsFile->fh_mutex);
507 if (file->f_path.dentry == NULL) {
508 cERROR(1, ("no valid name if dentry freed"));
511 goto reopen_error_exit;
514 inode = file->f_path.dentry->d_inode;
516 cERROR(1, ("inode not valid"));
519 goto reopen_error_exit;
522 cifs_sb = CIFS_SB(inode->i_sb);
523 tcon = cifs_sb->tcon;
525 /* can not grab rename sem here because various ops, including
526 those that already have the rename sem can end up causing writepage
527 to get called and if the server was down that means we end up here,
528 and we can never tell if the caller already has the rename_sem */
529 full_path = build_path_from_dentry(file->f_path.dentry);
530 if (full_path == NULL) {
533 mutex_lock(&pCifsFile->fh_mutex);
538 cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
539 inode, file->f_flags, full_path));
546 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
547 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
548 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
549 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
550 /* can not refresh inode info since size could be stale */
551 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
552 cifs_sb->mnt_file_mode /* ignored */,
553 oflags, &oplock, &netfid, xid);
555 cFYI(1, ("posix reopen succeeded"));
558 /* fallthrough to retry open the old way on errors, especially
559 in the reconnect path it is important to retry hard */
562 desiredAccess = cifs_convert_flags(file->f_flags);
564 /* Can not refresh inode by passing in file_info buf to be returned
565 by SMBOpen and then calling get_inode_info with returned buf
566 since file might have write behind data that needs to be flushed
567 and server version of file size can be stale. If we knew for sure
568 that inode was not dirty locally we could do this */
570 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
571 CREATE_NOT_DIR, &netfid, &oplock, NULL,
572 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
573 CIFS_MOUNT_MAP_SPECIAL_CHR);
575 mutex_lock(&pCifsFile->fh_mutex);
576 cFYI(1, ("cifs_open returned 0x%x", rc));
577 cFYI(1, ("oplock: %d", oplock));
580 pCifsFile->netfid = netfid;
581 pCifsFile->invalidHandle = false;
582 mutex_lock(&pCifsFile->fh_mutex);
583 pCifsInode = CIFS_I(inode);
586 rc = filemap_write_and_wait(inode->i_mapping);
588 CIFS_I(inode)->write_behind_rc = rc;
589 /* temporarily disable caching while we
590 go to server to get inode info */
591 pCifsInode->clientCanCacheAll = false;
592 pCifsInode->clientCanCacheRead = false;
594 rc = cifs_get_inode_info_unix(&inode,
595 full_path, inode->i_sb, xid);
597 rc = cifs_get_inode_info(&inode,
598 full_path, NULL, inode->i_sb,
600 } /* else we are writing out data to server already
601 and could deadlock if we tried to flush data, and
602 since we do not know if we have data that would
603 invalidate the current end of file on the server
604 we can not go to the server to get the new inod
606 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
607 pCifsInode->clientCanCacheAll = true;
608 pCifsInode->clientCanCacheRead = true;
609 cFYI(1, ("Exclusive Oplock granted on inode %p",
610 file->f_path.dentry->d_inode));
611 } else if ((oplock & 0xF) == OPLOCK_READ) {
612 pCifsInode->clientCanCacheRead = true;
613 pCifsInode->clientCanCacheAll = false;
615 pCifsInode->clientCanCacheRead = false;
616 pCifsInode->clientCanCacheAll = false;
618 cifs_relock_file(pCifsFile);
626 int cifs_close(struct inode *inode, struct file *file)
630 struct cifs_sb_info *cifs_sb;
631 struct cifsTconInfo *pTcon;
632 struct cifsFileInfo *pSMBFile =
633 (struct cifsFileInfo *)file->private_data;
637 cifs_sb = CIFS_SB(inode->i_sb);
638 pTcon = cifs_sb->tcon;
640 struct cifsLockInfo *li, *tmp;
641 write_lock(&GlobalSMBSeslock);
642 pSMBFile->closePend = true;
644 /* no sense reconnecting to close a file that is
646 if (!pTcon->need_reconnect) {
647 write_unlock(&GlobalSMBSeslock);
649 while ((atomic_read(&pSMBFile->wrtPending) != 0)
650 && (timeout <= 2048)) {
651 /* Give write a better chance to get to
652 server ahead of the close. We do not
653 want to add a wait_q here as it would
654 increase the memory utilization as
655 the struct would be in each open file,
656 but this should give enough time to
659 ("close delay, write pending"));
663 if (atomic_read(&pSMBFile->wrtPending))
664 cERROR(1, ("close with pending write"));
665 if (!pTcon->need_reconnect &&
666 !pSMBFile->invalidHandle)
667 rc = CIFSSMBClose(xid, pTcon,
670 write_unlock(&GlobalSMBSeslock);
672 write_unlock(&GlobalSMBSeslock);
674 /* Delete any outstanding lock records.
675 We'll lose them when the file is closed anyway. */
676 mutex_lock(&pSMBFile->lock_mutex);
677 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
678 list_del(&li->llist);
681 mutex_unlock(&pSMBFile->lock_mutex);
683 write_lock(&GlobalSMBSeslock);
684 list_del(&pSMBFile->flist);
685 list_del(&pSMBFile->tlist);
686 write_unlock(&GlobalSMBSeslock);
688 /* We waited above to give the SMBWrite a chance to issue
689 on the wire (so we do not get SMBWrite returning EBADF
690 if writepages is racing with close. Note that writepages
691 does not specify a file handle, so it is possible for a file
692 to be opened twice, and the application close the "wrong"
693 file handle - in these cases we delay long enough to allow
694 the SMBWrite to get on the wire before the SMB Close.
695 We allow total wait here over 45 seconds, more than
696 oplock break time, and more than enough to allow any write
697 to complete on the server, or to time out on the client */
698 while ((atomic_read(&pSMBFile->wrtPending) != 0)
699 && (timeout <= 50000)) {
700 cERROR(1, ("writes pending, delay free of handle"));
704 kfree(file->private_data);
705 file->private_data = NULL;
709 read_lock(&GlobalSMBSeslock);
710 if (list_empty(&(CIFS_I(inode)->openFileList))) {
711 cFYI(1, ("closing last open instance for inode %p", inode));
712 /* if the file is not open we do not know if we can cache info
713 on this inode, much less write behind and read ahead */
714 CIFS_I(inode)->clientCanCacheRead = false;
715 CIFS_I(inode)->clientCanCacheAll = false;
717 read_unlock(&GlobalSMBSeslock);
718 if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
719 rc = CIFS_I(inode)->write_behind_rc;
724 int cifs_closedir(struct inode *inode, struct file *file)
728 struct cifsFileInfo *pCFileStruct =
729 (struct cifsFileInfo *)file->private_data;
732 cFYI(1, ("Closedir inode = 0x%p", inode));
737 struct cifsTconInfo *pTcon;
738 struct cifs_sb_info *cifs_sb =
739 CIFS_SB(file->f_path.dentry->d_sb);
741 pTcon = cifs_sb->tcon;
743 cFYI(1, ("Freeing private data in close dir"));
744 write_lock(&GlobalSMBSeslock);
745 if (!pCFileStruct->srch_inf.endOfSearch &&
746 !pCFileStruct->invalidHandle) {
747 pCFileStruct->invalidHandle = true;
748 write_unlock(&GlobalSMBSeslock);
749 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
750 cFYI(1, ("Closing uncompleted readdir with rc %d",
752 /* not much we can do if it fails anyway, ignore rc */
755 write_unlock(&GlobalSMBSeslock);
756 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
758 cFYI(1, ("closedir free smb buf in srch struct"));
759 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
760 if (pCFileStruct->srch_inf.smallBuf)
761 cifs_small_buf_release(ptmp);
763 cifs_buf_release(ptmp);
765 kfree(file->private_data);
766 file->private_data = NULL;
768 /* BB can we lock the filestruct while this is going on? */
773 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
774 __u64 offset, __u8 lockType)
776 struct cifsLockInfo *li =
777 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
783 mutex_lock(&fid->lock_mutex);
784 list_add(&li->llist, &fid->llist);
785 mutex_unlock(&fid->lock_mutex);
789 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
795 bool wait_flag = false;
796 struct cifs_sb_info *cifs_sb;
797 struct cifsTconInfo *tcon;
799 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
800 bool posix_locking = 0;
802 length = 1 + pfLock->fl_end - pfLock->fl_start;
806 cFYI(1, ("Lock parm: 0x%x flockflags: "
807 "0x%x flocktype: 0x%x start: %lld end: %lld",
808 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
811 if (pfLock->fl_flags & FL_POSIX)
813 if (pfLock->fl_flags & FL_FLOCK)
815 if (pfLock->fl_flags & FL_SLEEP) {
816 cFYI(1, ("Blocking lock"));
819 if (pfLock->fl_flags & FL_ACCESS)
820 cFYI(1, ("Process suspended by mandatory locking - "
821 "not implemented yet"));
822 if (pfLock->fl_flags & FL_LEASE)
823 cFYI(1, ("Lease on file - not implemented yet"));
824 if (pfLock->fl_flags &
825 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
826 cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags));
828 if (pfLock->fl_type == F_WRLCK) {
829 cFYI(1, ("F_WRLCK "));
831 } else if (pfLock->fl_type == F_UNLCK) {
832 cFYI(1, ("F_UNLCK"));
834 /* Check if unlock includes more than
836 } else if (pfLock->fl_type == F_RDLCK) {
837 cFYI(1, ("F_RDLCK"));
838 lockType |= LOCKING_ANDX_SHARED_LOCK;
840 } else if (pfLock->fl_type == F_EXLCK) {
841 cFYI(1, ("F_EXLCK"));
843 } else if (pfLock->fl_type == F_SHLCK) {
844 cFYI(1, ("F_SHLCK"));
845 lockType |= LOCKING_ANDX_SHARED_LOCK;
848 cFYI(1, ("Unknown type of lock"));
850 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
851 tcon = cifs_sb->tcon;
853 if (file->private_data == NULL) {
857 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
859 if ((tcon->ses->capabilities & CAP_UNIX) &&
860 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
861 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
863 /* BB add code here to normalize offset and length to
864 account for negative length which we can not accept over the
869 if (lockType & LOCKING_ANDX_SHARED_LOCK)
870 posix_lock_type = CIFS_RDLCK;
872 posix_lock_type = CIFS_WRLCK;
873 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
875 posix_lock_type, wait_flag);
880 /* BB we could chain these into one lock request BB */
881 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
882 0, 1, lockType, 0 /* wait flag */ );
884 rc = CIFSSMBLock(xid, tcon, netfid, length,
885 pfLock->fl_start, 1 /* numUnlock */ ,
886 0 /* numLock */ , lockType,
888 pfLock->fl_type = F_UNLCK;
890 cERROR(1, ("Error unlocking previously locked "
891 "range %d during test of lock", rc));
895 /* if rc == ERR_SHARING_VIOLATION ? */
896 rc = 0; /* do not change lock type to unlock
897 since range in use */
904 if (!numLock && !numUnlock) {
905 /* if no lock or unlock then nothing
906 to do since we do not know what it is */
913 if (lockType & LOCKING_ANDX_SHARED_LOCK)
914 posix_lock_type = CIFS_RDLCK;
916 posix_lock_type = CIFS_WRLCK;
919 posix_lock_type = CIFS_UNLCK;
921 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
923 posix_lock_type, wait_flag);
925 struct cifsFileInfo *fid =
926 (struct cifsFileInfo *)file->private_data;
929 rc = CIFSSMBLock(xid, tcon, netfid, length,
931 0, numLock, lockType, wait_flag);
934 /* For Windows locks we must store them. */
935 rc = store_file_lock(fid, length,
936 pfLock->fl_start, lockType);
938 } else if (numUnlock) {
939 /* For each stored lock that this unlock overlaps
940 completely, unlock it. */
942 struct cifsLockInfo *li, *tmp;
945 mutex_lock(&fid->lock_mutex);
946 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
947 if (pfLock->fl_start <= li->offset &&
948 (pfLock->fl_start + length) >=
949 (li->offset + li->length)) {
950 stored_rc = CIFSSMBLock(xid, tcon,
952 li->length, li->offset,
953 1, 0, li->type, false);
957 list_del(&li->llist);
961 mutex_unlock(&fid->lock_mutex);
965 if (pfLock->fl_flags & FL_POSIX)
966 posix_lock_file_wait(file, pfLock);
972 * Set the timeout on write requests past EOF. For some servers (Windows)
973 * these calls can be very long.
975 * If we're writing >10M past the EOF we give a 180s timeout. Anything less
976 * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
977 * The 10M cutoff is totally arbitrary. A better scheme for this would be
978 * welcome if someone wants to suggest one.
980 * We may be able to do a better job with this if there were some way to
981 * declare that a file should be sparse.
984 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
986 if (offset <= cifsi->server_eof)
988 else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
989 return CIFS_VLONG_OP;
994 /* update the file size (if needed) after a write */
996 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
997 unsigned int bytes_written)
999 loff_t end_of_write = offset + bytes_written;
1001 if (end_of_write > cifsi->server_eof)
1002 cifsi->server_eof = end_of_write;
1005 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
1006 size_t write_size, loff_t *poffset)
1009 unsigned int bytes_written = 0;
1010 unsigned int total_written;
1011 struct cifs_sb_info *cifs_sb;
1012 struct cifsTconInfo *pTcon;
1014 struct cifsFileInfo *open_file;
1015 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1017 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1019 pTcon = cifs_sb->tcon;
1022 (" write %d bytes to offset %lld of %s", write_size,
1023 *poffset, file->f_path.dentry->d_name.name)); */
1025 if (file->private_data == NULL)
1027 open_file = (struct cifsFileInfo *) file->private_data;
1029 rc = generic_write_checks(file, poffset, &write_size, 0);
1035 long_op = cifs_write_timeout(cifsi, *poffset);
1036 for (total_written = 0; write_size > total_written;
1037 total_written += bytes_written) {
1039 while (rc == -EAGAIN) {
1040 if (file->private_data == NULL) {
1041 /* file has been closed on us */
1043 /* if we have gotten here we have written some data
1044 and blocked, and the file has been freed on us while
1045 we blocked so return what we managed to write */
1046 return total_written;
1048 if (open_file->closePend) {
1051 return total_written;
1055 if (open_file->invalidHandle) {
1056 /* we could deadlock if we called
1057 filemap_fdatawait from here so tell
1058 reopen_file not to flush data to server
1060 rc = cifs_reopen_file(file, false);
1065 rc = CIFSSMBWrite(xid, pTcon,
1067 min_t(const int, cifs_sb->wsize,
1068 write_size - total_written),
1069 *poffset, &bytes_written,
1070 NULL, write_data + total_written, long_op);
1072 if (rc || (bytes_written == 0)) {
1080 cifs_update_eof(cifsi, *poffset, bytes_written);
1081 *poffset += bytes_written;
1083 long_op = CIFS_STD_OP; /* subsequent writes fast -
1084 15 seconds is plenty */
1087 cifs_stats_bytes_written(pTcon, total_written);
1089 /* since the write may have blocked check these pointers again */
1090 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1091 struct inode *inode = file->f_path.dentry->d_inode;
1092 /* Do not update local mtime - server will set its actual value on write
1093 * inode->i_ctime = inode->i_mtime =
1094 * current_fs_time(inode->i_sb);*/
1095 if (total_written > 0) {
1096 spin_lock(&inode->i_lock);
1097 if (*poffset > file->f_path.dentry->d_inode->i_size)
1098 i_size_write(file->f_path.dentry->d_inode,
1100 spin_unlock(&inode->i_lock);
1102 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1105 return total_written;
1108 static ssize_t cifs_write(struct file *file, const char *write_data,
1109 size_t write_size, loff_t *poffset)
1112 unsigned int bytes_written = 0;
1113 unsigned int total_written;
1114 struct cifs_sb_info *cifs_sb;
1115 struct cifsTconInfo *pTcon;
1117 struct cifsFileInfo *open_file;
1118 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1120 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1122 pTcon = cifs_sb->tcon;
1124 cFYI(1, ("write %zd bytes to offset %lld of %s", write_size,
1125 *poffset, file->f_path.dentry->d_name.name));
1127 if (file->private_data == NULL)
1129 open_file = (struct cifsFileInfo *)file->private_data;
1133 long_op = cifs_write_timeout(cifsi, *poffset);
1134 for (total_written = 0; write_size > total_written;
1135 total_written += bytes_written) {
1137 while (rc == -EAGAIN) {
1138 if (file->private_data == NULL) {
1139 /* file has been closed on us */
1141 /* if we have gotten here we have written some data
1142 and blocked, and the file has been freed on us
1143 while we blocked so return what we managed to
1145 return total_written;
1147 if (open_file->closePend) {
1150 return total_written;
1154 if (open_file->invalidHandle) {
1155 /* we could deadlock if we called
1156 filemap_fdatawait from here so tell
1157 reopen_file not to flush data to
1159 rc = cifs_reopen_file(file, false);
1163 if (experimEnabled || (pTcon->ses->server &&
1164 ((pTcon->ses->server->secMode &
1165 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1170 len = min((size_t)cifs_sb->wsize,
1171 write_size - total_written);
1172 /* iov[0] is reserved for smb header */
1173 iov[1].iov_base = (char *)write_data +
1175 iov[1].iov_len = len;
1176 rc = CIFSSMBWrite2(xid, pTcon,
1177 open_file->netfid, len,
1178 *poffset, &bytes_written,
1181 rc = CIFSSMBWrite(xid, pTcon,
1183 min_t(const int, cifs_sb->wsize,
1184 write_size - total_written),
1185 *poffset, &bytes_written,
1186 write_data + total_written,
1189 if (rc || (bytes_written == 0)) {
1197 cifs_update_eof(cifsi, *poffset, bytes_written);
1198 *poffset += bytes_written;
1200 long_op = CIFS_STD_OP; /* subsequent writes fast -
1201 15 seconds is plenty */
1204 cifs_stats_bytes_written(pTcon, total_written);
1206 /* since the write may have blocked check these pointers again */
1207 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1208 /*BB We could make this contingent on superblock ATIME flag too */
1209 /* file->f_path.dentry->d_inode->i_ctime =
1210 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1211 if (total_written > 0) {
1212 spin_lock(&file->f_path.dentry->d_inode->i_lock);
1213 if (*poffset > file->f_path.dentry->d_inode->i_size)
1214 i_size_write(file->f_path.dentry->d_inode,
1216 spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1218 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1221 return total_written;
1224 #ifdef CONFIG_CIFS_EXPERIMENTAL
1225 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1227 struct cifsFileInfo *open_file = NULL;
1229 read_lock(&GlobalSMBSeslock);
1230 /* we could simply get the first_list_entry since write-only entries
1231 are always at the end of the list but since the first entry might
1232 have a close pending, we go through the whole list */
1233 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1234 if (open_file->closePend)
1236 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1237 (open_file->pfile->f_flags & O_RDONLY))) {
1238 if (!open_file->invalidHandle) {
1239 /* found a good file */
1240 /* lock it so it will not be closed on us */
1241 atomic_inc(&open_file->wrtPending);
1242 read_unlock(&GlobalSMBSeslock);
1244 } /* else might as well continue, and look for
1245 another, or simply have the caller reopen it
1246 again rather than trying to fix this handle */
1247 } else /* write only file */
1248 break; /* write only files are last so must be done */
1250 read_unlock(&GlobalSMBSeslock);
1255 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1257 struct cifsFileInfo *open_file;
1258 bool any_available = false;
1261 /* Having a null inode here (because mapping->host was set to zero by
1262 the VFS or MM) should not happen but we had reports of on oops (due to
1263 it being zero) during stress testcases so we need to check for it */
1265 if (cifs_inode == NULL) {
1266 cERROR(1, ("Null inode passed to cifs_writeable_file"));
1271 read_lock(&GlobalSMBSeslock);
1273 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1274 if (open_file->closePend ||
1275 (!any_available && open_file->pid != current->tgid))
1278 if (open_file->pfile &&
1279 ((open_file->pfile->f_flags & O_RDWR) ||
1280 (open_file->pfile->f_flags & O_WRONLY))) {
1281 atomic_inc(&open_file->wrtPending);
1283 if (!open_file->invalidHandle) {
1284 /* found a good writable file */
1285 read_unlock(&GlobalSMBSeslock);
1289 read_unlock(&GlobalSMBSeslock);
1290 /* Had to unlock since following call can block */
1291 rc = cifs_reopen_file(open_file->pfile, false);
1293 if (!open_file->closePend)
1295 else { /* start over in case this was deleted */
1296 /* since the list could be modified */
1297 read_lock(&GlobalSMBSeslock);
1298 atomic_dec(&open_file->wrtPending);
1299 goto refind_writable;
1303 /* if it fails, try another handle if possible -
1304 (we can not do this if closePending since
1305 loop could be modified - in which case we
1306 have to start at the beginning of the list
1307 again. Note that it would be bad
1308 to hold up writepages here (rather than
1309 in caller) with continuous retries */
1310 cFYI(1, ("wp failed on reopen file"));
1311 read_lock(&GlobalSMBSeslock);
1312 /* can not use this handle, no write
1313 pending on this one after all */
1314 atomic_dec(&open_file->wrtPending);
1316 if (open_file->closePend) /* list could have changed */
1317 goto refind_writable;
1318 /* else we simply continue to the next entry. Thus
1319 we do not loop on reopen errors. If we
1320 can not reopen the file, for example if we
1321 reconnected to a server with another client
1322 racing to delete or lock the file we would not
1323 make progress if we restarted before the beginning
1324 of the loop here. */
1327 /* couldn't find useable FH with same pid, try any available */
1328 if (!any_available) {
1329 any_available = true;
1330 goto refind_writable;
1332 read_unlock(&GlobalSMBSeslock);
1336 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1338 struct address_space *mapping = page->mapping;
1339 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1342 int bytes_written = 0;
1343 struct cifs_sb_info *cifs_sb;
1344 struct cifsTconInfo *pTcon;
1345 struct inode *inode;
1346 struct cifsFileInfo *open_file;
1348 if (!mapping || !mapping->host)
1351 inode = page->mapping->host;
1352 cifs_sb = CIFS_SB(inode->i_sb);
1353 pTcon = cifs_sb->tcon;
1355 offset += (loff_t)from;
1356 write_data = kmap(page);
1359 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1364 /* racing with truncate? */
1365 if (offset > mapping->host->i_size) {
1367 return 0; /* don't care */
1370 /* check to make sure that we are not extending the file */
1371 if (mapping->host->i_size - offset < (loff_t)to)
1372 to = (unsigned)(mapping->host->i_size - offset);
1374 open_file = find_writable_file(CIFS_I(mapping->host));
1376 bytes_written = cifs_write(open_file->pfile, write_data,
1378 atomic_dec(&open_file->wrtPending);
1379 /* Does mm or vfs already set times? */
1380 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1381 if ((bytes_written > 0) && (offset))
1383 else if (bytes_written < 0)
1386 cFYI(1, ("No writeable filehandles for inode"));
1394 static int cifs_writepages(struct address_space *mapping,
1395 struct writeback_control *wbc)
1397 struct backing_dev_info *bdi = mapping->backing_dev_info;
1398 unsigned int bytes_to_write;
1399 unsigned int bytes_written;
1400 struct cifs_sb_info *cifs_sb;
1404 int range_whole = 0;
1411 struct cifsFileInfo *open_file;
1412 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1414 struct pagevec pvec;
1419 cifs_sb = CIFS_SB(mapping->host->i_sb);
1422 * If wsize is smaller that the page cache size, default to writing
1423 * one page at a time via cifs_writepage
1425 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1426 return generic_writepages(mapping, wbc);
1428 if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1429 if (cifs_sb->tcon->ses->server->secMode &
1430 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1431 if (!experimEnabled)
1432 return generic_writepages(mapping, wbc);
1434 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1436 return generic_writepages(mapping, wbc);
1440 * BB: Is this meaningful for a non-block-device file system?
1441 * If it is, we should test it again after we do I/O
1443 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1444 wbc->encountered_congestion = 1;
1451 pagevec_init(&pvec, 0);
1452 if (wbc->range_cyclic) {
1453 index = mapping->writeback_index; /* Start from prev offset */
1456 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1457 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1458 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1463 while (!done && (index <= end) &&
1464 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1465 PAGECACHE_TAG_DIRTY,
1466 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1475 for (i = 0; i < nr_pages; i++) {
1476 page = pvec.pages[i];
1478 * At this point we hold neither mapping->tree_lock nor
1479 * lock on the page itself: the page may be truncated or
1480 * invalidated (changing page->mapping to NULL), or even
1481 * swizzled back from swapper_space to tmpfs file
1487 else if (!trylock_page(page))
1490 if (unlikely(page->mapping != mapping)) {
1495 if (!wbc->range_cyclic && page->index > end) {
1501 if (next && (page->index != next)) {
1502 /* Not next consecutive page */
1507 if (wbc->sync_mode != WB_SYNC_NONE)
1508 wait_on_page_writeback(page);
1510 if (PageWriteback(page) ||
1511 !clear_page_dirty_for_io(page)) {
1517 * This actually clears the dirty bit in the radix tree.
1518 * See cifs_writepage() for more commentary.
1520 set_page_writeback(page);
1522 if (page_offset(page) >= mapping->host->i_size) {
1525 end_page_writeback(page);
1530 * BB can we get rid of this? pages are held by pvec
1532 page_cache_get(page);
1534 len = min(mapping->host->i_size - page_offset(page),
1535 (loff_t)PAGE_CACHE_SIZE);
1537 /* reserve iov[0] for the smb header */
1539 iov[n_iov].iov_base = kmap(page);
1540 iov[n_iov].iov_len = len;
1541 bytes_to_write += len;
1545 offset = page_offset(page);
1547 next = page->index + 1;
1548 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1552 /* Search for a writable handle every time we call
1553 * CIFSSMBWrite2. We can't rely on the last handle
1554 * we used to still be valid
1556 open_file = find_writable_file(CIFS_I(mapping->host));
1558 cERROR(1, ("No writable handles for inode"));
1561 long_op = cifs_write_timeout(cifsi, offset);
1562 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1564 bytes_to_write, offset,
1565 &bytes_written, iov, n_iov,
1567 atomic_dec(&open_file->wrtPending);
1568 cifs_update_eof(cifsi, offset, bytes_written);
1570 if (rc || bytes_written < bytes_to_write) {
1571 cERROR(1, ("Write2 ret %d, wrote %d",
1572 rc, bytes_written));
1573 /* BB what if continued retry is
1574 requested via mount flags? */
1576 set_bit(AS_ENOSPC, &mapping->flags);
1578 set_bit(AS_EIO, &mapping->flags);
1580 cifs_stats_bytes_written(cifs_sb->tcon,
1584 for (i = 0; i < n_iov; i++) {
1585 page = pvec.pages[first + i];
1586 /* Should we also set page error on
1587 success rc but too little data written? */
1588 /* BB investigate retry logic on temporary
1589 server crash cases and how recovery works
1590 when page marked as error */
1595 end_page_writeback(page);
1596 page_cache_release(page);
1598 if ((wbc->nr_to_write -= n_iov) <= 0)
1602 /* Need to re-find the pages we skipped */
1603 index = pvec.pages[0]->index + 1;
1605 pagevec_release(&pvec);
1607 if (!scanned && !done) {
1609 * We hit the last page and there is more work to be done: wrap
1610 * back to the start of the file
1616 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1617 mapping->writeback_index = index;
1624 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1630 /* BB add check for wbc flags */
1631 page_cache_get(page);
1632 if (!PageUptodate(page))
1633 cFYI(1, ("ppw - page not up to date"));
1636 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1638 * A writepage() implementation always needs to do either this,
1639 * or re-dirty the page with "redirty_page_for_writepage()" in
1640 * the case of a failure.
1642 * Just unlocking the page will cause the radix tree tag-bits
1643 * to fail to update with the state of the page correctly.
1645 set_page_writeback(page);
1646 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1647 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1649 end_page_writeback(page);
1650 page_cache_release(page);
1655 static int cifs_write_end(struct file *file, struct address_space *mapping,
1656 loff_t pos, unsigned len, unsigned copied,
1657 struct page *page, void *fsdata)
1660 struct inode *inode = mapping->host;
1662 cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
1663 page, pos, copied));
1665 if (PageChecked(page)) {
1667 SetPageUptodate(page);
1668 ClearPageChecked(page);
1669 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1670 SetPageUptodate(page);
1672 if (!PageUptodate(page)) {
1674 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1678 /* this is probably better than directly calling
1679 partialpage_write since in this function the file handle is
1680 known which we might as well leverage */
1681 /* BB check if anything else missing out of ppw
1682 such as updating last write time */
1683 page_data = kmap(page);
1684 rc = cifs_write(file, page_data + offset, copied, &pos);
1685 /* if (rc < 0) should we set writebehind rc? */
1692 set_page_dirty(page);
1696 spin_lock(&inode->i_lock);
1697 if (pos > inode->i_size)
1698 i_size_write(inode, pos);
1699 spin_unlock(&inode->i_lock);
1703 page_cache_release(page);
1708 int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1712 struct cifsTconInfo *tcon;
1713 struct cifsFileInfo *smbfile =
1714 (struct cifsFileInfo *)file->private_data;
1715 struct inode *inode = file->f_path.dentry->d_inode;
1719 cFYI(1, ("Sync file - name: %s datasync: 0x%x",
1720 dentry->d_name.name, datasync));
1722 rc = filemap_write_and_wait(inode->i_mapping);
1724 rc = CIFS_I(inode)->write_behind_rc;
1725 CIFS_I(inode)->write_behind_rc = 0;
1726 tcon = CIFS_SB(inode->i_sb)->tcon;
1727 if (!rc && tcon && smbfile &&
1728 !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1729 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1736 /* static void cifs_sync_page(struct page *page)
1738 struct address_space *mapping;
1739 struct inode *inode;
1740 unsigned long index = page->index;
1741 unsigned int rpages = 0;
1744 cFYI(1, ("sync page %p",page));
1745 mapping = page->mapping;
1748 inode = mapping->host;
1752 /* fill in rpages then
1753 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1755 /* cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index));
1765 * As file closes, flush all cached write data for this inode checking
1766 * for write behind errors.
1768 int cifs_flush(struct file *file, fl_owner_t id)
1770 struct inode *inode = file->f_path.dentry->d_inode;
1773 /* Rather than do the steps manually:
1774 lock the inode for writing
1775 loop through pages looking for write behind data (dirty pages)
1776 coalesce into contiguous 16K (or smaller) chunks to write to server
1777 send to server (prefer in parallel)
1778 deal with writebehind errors
1779 unlock inode for writing
1780 filemapfdatawrite appears easier for the time being */
1782 rc = filemap_fdatawrite(inode->i_mapping);
1783 /* reset wb rc if we were able to write out dirty pages */
1785 rc = CIFS_I(inode)->write_behind_rc;
1786 CIFS_I(inode)->write_behind_rc = 0;
1789 cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc));
1794 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1795 size_t read_size, loff_t *poffset)
1798 unsigned int bytes_read = 0;
1799 unsigned int total_read = 0;
1800 unsigned int current_read_size;
1801 struct cifs_sb_info *cifs_sb;
1802 struct cifsTconInfo *pTcon;
1804 struct cifsFileInfo *open_file;
1805 char *smb_read_data;
1806 char __user *current_offset;
1807 struct smb_com_read_rsp *pSMBr;
1810 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1811 pTcon = cifs_sb->tcon;
1813 if (file->private_data == NULL) {
1817 open_file = (struct cifsFileInfo *)file->private_data;
1819 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1820 cFYI(1, ("attempting read on write only file instance"));
1822 for (total_read = 0, current_offset = read_data;
1823 read_size > total_read;
1824 total_read += bytes_read, current_offset += bytes_read) {
1825 current_read_size = min_t(const int, read_size - total_read,
1828 smb_read_data = NULL;
1829 while (rc == -EAGAIN) {
1830 int buf_type = CIFS_NO_BUFFER;
1831 if ((open_file->invalidHandle) &&
1832 (!open_file->closePend)) {
1833 rc = cifs_reopen_file(file, true);
1837 rc = CIFSSMBRead(xid, pTcon,
1839 current_read_size, *poffset,
1840 &bytes_read, &smb_read_data,
1842 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1843 if (smb_read_data) {
1844 if (copy_to_user(current_offset,
1846 4 /* RFC1001 length field */ +
1847 le16_to_cpu(pSMBr->DataOffset),
1851 if (buf_type == CIFS_SMALL_BUFFER)
1852 cifs_small_buf_release(smb_read_data);
1853 else if (buf_type == CIFS_LARGE_BUFFER)
1854 cifs_buf_release(smb_read_data);
1855 smb_read_data = NULL;
1858 if (rc || (bytes_read == 0)) {
1866 cifs_stats_bytes_read(pTcon, bytes_read);
1867 *poffset += bytes_read;
1875 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1879 unsigned int bytes_read = 0;
1880 unsigned int total_read;
1881 unsigned int current_read_size;
1882 struct cifs_sb_info *cifs_sb;
1883 struct cifsTconInfo *pTcon;
1885 char *current_offset;
1886 struct cifsFileInfo *open_file;
1887 int buf_type = CIFS_NO_BUFFER;
1890 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1891 pTcon = cifs_sb->tcon;
1893 if (file->private_data == NULL) {
1897 open_file = (struct cifsFileInfo *)file->private_data;
1899 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1900 cFYI(1, ("attempting read on write only file instance"));
1902 for (total_read = 0, current_offset = read_data;
1903 read_size > total_read;
1904 total_read += bytes_read, current_offset += bytes_read) {
1905 current_read_size = min_t(const int, read_size - total_read,
1907 /* For windows me and 9x we do not want to request more
1908 than it negotiated since it will refuse the read then */
1910 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1911 current_read_size = min_t(const int, current_read_size,
1912 pTcon->ses->server->maxBuf - 128);
1915 while (rc == -EAGAIN) {
1916 if ((open_file->invalidHandle) &&
1917 (!open_file->closePend)) {
1918 rc = cifs_reopen_file(file, true);
1922 rc = CIFSSMBRead(xid, pTcon,
1924 current_read_size, *poffset,
1925 &bytes_read, ¤t_offset,
1928 if (rc || (bytes_read == 0)) {
1936 cifs_stats_bytes_read(pTcon, total_read);
1937 *poffset += bytes_read;
1944 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1946 struct dentry *dentry = file->f_path.dentry;
1950 rc = cifs_revalidate(dentry);
1952 cFYI(1, ("Validation prior to mmap failed, error=%d", rc));
1956 rc = generic_file_mmap(file, vma);
1962 static void cifs_copy_cache_pages(struct address_space *mapping,
1963 struct list_head *pages, int bytes_read, char *data,
1964 struct pagevec *plru_pvec)
1969 while (bytes_read > 0) {
1970 if (list_empty(pages))
1973 page = list_entry(pages->prev, struct page, lru);
1974 list_del(&page->lru);
1976 if (add_to_page_cache(page, mapping, page->index,
1978 page_cache_release(page);
1979 cFYI(1, ("Add page cache failed"));
1980 data += PAGE_CACHE_SIZE;
1981 bytes_read -= PAGE_CACHE_SIZE;
1985 target = kmap_atomic(page, KM_USER0);
1987 if (PAGE_CACHE_SIZE > bytes_read) {
1988 memcpy(target, data, bytes_read);
1989 /* zero the tail end of this partial page */
1990 memset(target + bytes_read, 0,
1991 PAGE_CACHE_SIZE - bytes_read);
1994 memcpy(target, data, PAGE_CACHE_SIZE);
1995 bytes_read -= PAGE_CACHE_SIZE;
1997 kunmap_atomic(target, KM_USER0);
1999 flush_dcache_page(page);
2000 SetPageUptodate(page);
2002 if (!pagevec_add(plru_pvec, page))
2003 __pagevec_lru_add_file(plru_pvec);
2004 data += PAGE_CACHE_SIZE;
2009 static int cifs_readpages(struct file *file, struct address_space *mapping,
2010 struct list_head *page_list, unsigned num_pages)
2016 struct cifs_sb_info *cifs_sb;
2017 struct cifsTconInfo *pTcon;
2018 unsigned int bytes_read = 0;
2019 unsigned int read_size, i;
2020 char *smb_read_data = NULL;
2021 struct smb_com_read_rsp *pSMBr;
2022 struct pagevec lru_pvec;
2023 struct cifsFileInfo *open_file;
2024 int buf_type = CIFS_NO_BUFFER;
2027 if (file->private_data == NULL) {
2031 open_file = (struct cifsFileInfo *)file->private_data;
2032 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2033 pTcon = cifs_sb->tcon;
2035 pagevec_init(&lru_pvec, 0);
2036 cFYI(DBG2, ("rpages: num pages %d", num_pages));
2037 for (i = 0; i < num_pages; ) {
2038 unsigned contig_pages;
2039 struct page *tmp_page;
2040 unsigned long expected_index;
2042 if (list_empty(page_list))
2045 page = list_entry(page_list->prev, struct page, lru);
2046 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2048 /* count adjacent pages that we will read into */
2051 list_entry(page_list->prev, struct page, lru)->index;
2052 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2053 if (tmp_page->index == expected_index) {
2059 if (contig_pages + i > num_pages)
2060 contig_pages = num_pages - i;
2062 /* for reads over a certain size could initiate async
2065 read_size = contig_pages * PAGE_CACHE_SIZE;
2066 /* Read size needs to be in multiples of one page */
2067 read_size = min_t(const unsigned int, read_size,
2068 cifs_sb->rsize & PAGE_CACHE_MASK);
2069 cFYI(DBG2, ("rpages: read size 0x%x contiguous pages %d",
2070 read_size, contig_pages));
2072 while (rc == -EAGAIN) {
2073 if ((open_file->invalidHandle) &&
2074 (!open_file->closePend)) {
2075 rc = cifs_reopen_file(file, true);
2080 rc = CIFSSMBRead(xid, pTcon,
2083 &bytes_read, &smb_read_data,
2085 /* BB more RC checks ? */
2086 if (rc == -EAGAIN) {
2087 if (smb_read_data) {
2088 if (buf_type == CIFS_SMALL_BUFFER)
2089 cifs_small_buf_release(smb_read_data);
2090 else if (buf_type == CIFS_LARGE_BUFFER)
2091 cifs_buf_release(smb_read_data);
2092 smb_read_data = NULL;
2096 if ((rc < 0) || (smb_read_data == NULL)) {
2097 cFYI(1, ("Read error in readpages: %d", rc));
2099 } else if (bytes_read > 0) {
2100 task_io_account_read(bytes_read);
2101 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2102 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2103 smb_read_data + 4 /* RFC1001 hdr */ +
2104 le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
2106 i += bytes_read >> PAGE_CACHE_SHIFT;
2107 cifs_stats_bytes_read(pTcon, bytes_read);
2108 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2109 i++; /* account for partial page */
2111 /* server copy of file can have smaller size
2113 /* BB do we need to verify this common case ?
2114 this case is ok - if we are at server EOF
2115 we will hit it on next read */
2120 cFYI(1, ("No bytes read (%d) at offset %lld . "
2121 "Cleaning remaining pages from readahead list",
2122 bytes_read, offset));
2123 /* BB turn off caching and do new lookup on
2124 file size at server? */
2127 if (smb_read_data) {
2128 if (buf_type == CIFS_SMALL_BUFFER)
2129 cifs_small_buf_release(smb_read_data);
2130 else if (buf_type == CIFS_LARGE_BUFFER)
2131 cifs_buf_release(smb_read_data);
2132 smb_read_data = NULL;
2137 pagevec_lru_add_file(&lru_pvec);
2139 /* need to free smb_read_data buf before exit */
2140 if (smb_read_data) {
2141 if (buf_type == CIFS_SMALL_BUFFER)
2142 cifs_small_buf_release(smb_read_data);
2143 else if (buf_type == CIFS_LARGE_BUFFER)
2144 cifs_buf_release(smb_read_data);
2145 smb_read_data = NULL;
2152 static int cifs_readpage_worker(struct file *file, struct page *page,
2158 page_cache_get(page);
2159 read_data = kmap(page);
2160 /* for reads over a certain size could initiate async read ahead */
2162 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2167 cFYI(1, ("Bytes read %d", rc));
2169 file->f_path.dentry->d_inode->i_atime =
2170 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2172 if (PAGE_CACHE_SIZE > rc)
2173 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2175 flush_dcache_page(page);
2176 SetPageUptodate(page);
2181 page_cache_release(page);
2185 static int cifs_readpage(struct file *file, struct page *page)
2187 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2193 if (file->private_data == NULL) {
2198 cFYI(1, ("readpage %p at offset %d 0x%x\n",
2199 page, (int)offset, (int)offset));
2201 rc = cifs_readpage_worker(file, page, &offset);
2209 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2211 struct cifsFileInfo *open_file;
2213 read_lock(&GlobalSMBSeslock);
2214 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2215 if (open_file->closePend)
2217 if (open_file->pfile &&
2218 ((open_file->pfile->f_flags & O_RDWR) ||
2219 (open_file->pfile->f_flags & O_WRONLY))) {
2220 read_unlock(&GlobalSMBSeslock);
2224 read_unlock(&GlobalSMBSeslock);
2228 /* We do not want to update the file size from server for inodes
2229 open for write - to avoid races with writepage extending
2230 the file - in the future we could consider allowing
2231 refreshing the inode only on increases in the file size
2232 but this is tricky to do without racing with writebehind
2233 page caching in the current Linux kernel design */
2234 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2239 if (is_inode_writable(cifsInode)) {
2240 /* This inode is open for write at least once */
2241 struct cifs_sb_info *cifs_sb;
2243 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2244 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2245 /* since no page cache to corrupt on directio
2246 we can change size safely */
2250 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2258 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2259 loff_t pos, unsigned len, unsigned flags,
2260 struct page **pagep, void **fsdata)
2262 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2263 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2264 loff_t page_start = pos & PAGE_MASK;
2269 cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
2271 page = grab_cache_page_write_begin(mapping, index, flags);
2277 if (PageUptodate(page))
2281 * If we write a full page it will be up to date, no need to read from
2282 * the server. If the write is short, we'll end up doing a sync write
2285 if (len == PAGE_CACHE_SIZE)
2289 * optimize away the read when we have an oplock, and we're not
2290 * expecting to use any of the data we'd be reading in. That
2291 * is, when the page lies beyond the EOF, or straddles the EOF
2292 * and the write will cover all of the existing data.
2294 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2295 i_size = i_size_read(mapping->host);
2296 if (page_start >= i_size ||
2297 (offset == 0 && (pos + len) >= i_size)) {
2298 zero_user_segments(page, 0, offset,
2302 * PageChecked means that the parts of the page
2303 * to which we're not writing are considered up
2304 * to date. Once the data is copied to the
2305 * page, it can be set uptodate.
2307 SetPageChecked(page);
2312 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2314 * might as well read a page, it is fast enough. If we get
2315 * an error, we don't need to return it. cifs_write_end will
2316 * do a sync write instead since PG_uptodate isn't set.
2318 cifs_readpage_worker(file, page, &page_start);
2320 /* we could try using another file handle if there is one -
2321 but how would we lock it to prevent close of that handle
2322 racing with this read? In any case
2323 this will be written out by write_end so is fine */
2330 const struct address_space_operations cifs_addr_ops = {
2331 .readpage = cifs_readpage,
2332 .readpages = cifs_readpages,
2333 .writepage = cifs_writepage,
2334 .writepages = cifs_writepages,
2335 .write_begin = cifs_write_begin,
2336 .write_end = cifs_write_end,
2337 .set_page_dirty = __set_page_dirty_nobuffers,
2338 /* .sync_page = cifs_sync_page, */
2343 * cifs_readpages requires the server to support a buffer large enough to
2344 * contain the header plus one complete page of data. Otherwise, we need
2345 * to leave cifs_readpages out of the address space operations.
2347 const struct address_space_operations cifs_addr_ops_smallbuf = {
2348 .readpage = cifs_readpage,
2349 .writepage = cifs_writepage,
2350 .writepages = cifs_writepages,
2351 .write_begin = cifs_write_begin,
2352 .write_end = cifs_write_end,
2353 .set_page_dirty = __set_page_dirty_nobuffers,
2354 /* .sync_page = cifs_sync_page, */