4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2007
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <asm/div64.h>
37 #include "cifsproto.h"
38 #include "cifs_unicode.h"
39 #include "cifs_debug.h"
40 #include "cifs_fs_sb.h"
42 static inline struct cifsFileInfo *cifs_init_private(
43 struct cifsFileInfo *private_data, struct inode *inode,
44 struct file *file, __u16 netfid)
46 memset(private_data, 0, sizeof(struct cifsFileInfo));
47 private_data->netfid = netfid;
48 private_data->pid = current->tgid;
49 init_MUTEX(&private_data->fh_sem);
50 mutex_init(&private_data->lock_mutex);
51 INIT_LIST_HEAD(&private_data->llist);
52 private_data->pfile = file; /* needed for writepage */
53 private_data->pInode = inode;
54 private_data->invalidHandle = false;
55 private_data->closePend = false;
56 /* we have to track num writers to the inode, since writepages
57 does not tell us which handle the write is for so there can
58 be a close (overlapping with write) of the filehandle that
59 cifs_writepages chose to use */
60 atomic_set(&private_data->wrtPending, 0);
65 static inline int cifs_convert_flags(unsigned int flags)
67 if ((flags & O_ACCMODE) == O_RDONLY)
69 else if ((flags & O_ACCMODE) == O_WRONLY)
71 else if ((flags & O_ACCMODE) == O_RDWR) {
72 /* GENERIC_ALL is too much permission to request
73 can cause unnecessary access denied on create */
74 /* return GENERIC_ALL; */
75 return (GENERIC_READ | GENERIC_WRITE);
78 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
79 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
83 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
85 fmode_t posix_flags = 0;
87 if ((flags & O_ACCMODE) == O_RDONLY)
88 posix_flags = FMODE_READ;
89 else if ((flags & O_ACCMODE) == O_WRONLY)
90 posix_flags = FMODE_WRITE;
91 else if ((flags & O_ACCMODE) == O_RDWR) {
92 /* GENERIC_ALL is too much permission to request
93 can cause unnecessary access denied on create */
94 /* return GENERIC_ALL; */
95 posix_flags = FMODE_READ | FMODE_WRITE;
97 /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
98 reopening a file. They had their effect on the original open */
100 posix_flags |= (fmode_t)O_APPEND;
102 posix_flags |= (fmode_t)O_SYNC;
103 if (flags & O_DIRECTORY)
104 posix_flags |= (fmode_t)O_DIRECTORY;
105 if (flags & O_NOFOLLOW)
106 posix_flags |= (fmode_t)O_NOFOLLOW;
107 if (flags & O_DIRECT)
108 posix_flags |= (fmode_t)O_DIRECT;
113 static inline int cifs_get_disposition(unsigned int flags)
115 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
117 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
118 return FILE_OVERWRITE_IF;
119 else if ((flags & O_CREAT) == O_CREAT)
121 else if ((flags & O_TRUNC) == O_TRUNC)
122 return FILE_OVERWRITE;
127 /* all arguments to this function must be checked for validity in caller */
128 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
129 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
130 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
131 char *full_path, int xid)
133 struct timespec temp;
136 /* want handles we can use to read with first
137 in the list so we do not have to walk the
138 list to search for one in write_begin */
139 if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
140 list_add_tail(&pCifsFile->flist,
141 &pCifsInode->openFileList);
143 list_add(&pCifsFile->flist,
144 &pCifsInode->openFileList);
146 write_unlock(&GlobalSMBSeslock);
147 if (pCifsInode->clientCanCacheRead) {
148 /* we have the inode open somewhere else
149 no need to discard cache data */
150 goto client_can_cache;
153 /* BB need same check in cifs_create too? */
154 /* if not oplocked, invalidate inode pages if mtime or file
156 temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
157 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
158 (file->f_path.dentry->d_inode->i_size ==
159 (loff_t)le64_to_cpu(buf->EndOfFile))) {
160 cFYI(1, ("inode unchanged on server"));
162 if (file->f_path.dentry->d_inode->i_mapping) {
163 /* BB no need to lock inode until after invalidate
164 since namei code should already have it locked? */
165 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
167 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
169 cFYI(1, ("invalidating remote inode since open detected it "
171 invalidate_remote_inode(file->f_path.dentry->d_inode);
176 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
177 full_path, inode->i_sb, xid);
179 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
180 full_path, buf, inode->i_sb, xid, NULL);
182 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
183 pCifsInode->clientCanCacheAll = true;
184 pCifsInode->clientCanCacheRead = true;
185 cFYI(1, ("Exclusive Oplock granted on inode %p",
186 file->f_path.dentry->d_inode));
187 } else if ((*oplock & 0xF) == OPLOCK_READ)
188 pCifsInode->clientCanCacheRead = true;
193 int cifs_open(struct inode *inode, struct file *file)
197 struct cifs_sb_info *cifs_sb;
198 struct cifsTconInfo *pTcon;
199 struct cifsFileInfo *pCifsFile;
200 struct cifsInodeInfo *pCifsInode;
201 struct list_head *tmp;
202 char *full_path = NULL;
206 FILE_ALL_INFO *buf = NULL;
210 cifs_sb = CIFS_SB(inode->i_sb);
211 pTcon = cifs_sb->tcon;
213 if (file->f_flags & O_CREAT) {
214 /* search inode for this file and fill in file->private_data */
215 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
216 read_lock(&GlobalSMBSeslock);
217 list_for_each(tmp, &pCifsInode->openFileList) {
218 pCifsFile = list_entry(tmp, struct cifsFileInfo,
220 if ((pCifsFile->pfile == NULL) &&
221 (pCifsFile->pid == current->tgid)) {
222 /* mode set in cifs_create */
224 /* needed for writepage */
225 pCifsFile->pfile = file;
227 file->private_data = pCifsFile;
231 read_unlock(&GlobalSMBSeslock);
232 if (file->private_data != NULL) {
237 if (file->f_flags & O_EXCL)
238 cERROR(1, ("could not find file instance for "
239 "new file %p", file));
243 full_path = build_path_from_dentry(file->f_path.dentry);
244 if (full_path == NULL) {
249 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
250 inode, file->f_flags, full_path));
251 desiredAccess = cifs_convert_flags(file->f_flags);
253 /*********************************************************************
254 * open flag mapping table:
256 * POSIX Flag CIFS Disposition
257 * ---------- ----------------
258 * O_CREAT FILE_OPEN_IF
259 * O_CREAT | O_EXCL FILE_CREATE
260 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
261 * O_TRUNC FILE_OVERWRITE
262 * none of the above FILE_OPEN
264 * Note that there is not a direct match between disposition
265 * FILE_SUPERSEDE (ie create whether or not file exists although
266 * O_CREAT | O_TRUNC is similar but truncates the existing
267 * file rather than creating a new file as FILE_SUPERSEDE does
268 * (which uses the attributes / metadata passed in on open call)
270 *? O_SYNC is a reasonable match to CIFS writethrough flag
271 *? and the read write flags match reasonably. O_LARGEFILE
272 *? is irrelevant because largefile support is always used
273 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
274 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
275 *********************************************************************/
277 disposition = cifs_get_disposition(file->f_flags);
284 /* BB pass O_SYNC flag through on file attributes .. BB */
286 /* Also refresh inode by passing in file_info buf returned by SMBOpen
287 and calling get_inode_info with returned buf (at least helps
288 non-Unix server case) */
290 /* BB we can not do this if this is the second open of a file
291 and the first handle has writebehind data, we might be
292 able to simply do a filemap_fdatawrite/filemap_fdatawait first */
293 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
299 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
300 rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
301 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
302 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
303 & CIFS_MOUNT_MAP_SPECIAL_CHR);
305 rc = -EIO; /* no NT SMB support fall into legacy open below */
308 /* Old server, try legacy style OpenX */
309 rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
310 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
311 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
312 & CIFS_MOUNT_MAP_SPECIAL_CHR);
315 cFYI(1, ("cifs_open returned 0x%x", rc));
319 kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
320 if (file->private_data == NULL) {
324 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
325 write_lock(&GlobalSMBSeslock);
326 list_add(&pCifsFile->tlist, &pTcon->openFileList);
328 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
330 rc = cifs_open_inode_helper(inode, file, pCifsInode,
332 &oplock, buf, full_path, xid);
334 write_unlock(&GlobalSMBSeslock);
337 if (oplock & CIFS_CREATE_ACTION) {
338 /* time to set mode which we can not set earlier due to
339 problems creating new read-only files */
340 if (pTcon->unix_ext) {
341 struct cifs_unix_set_info_args args = {
342 .mode = inode->i_mode,
345 .ctime = NO_CHANGE_64,
346 .atime = NO_CHANGE_64,
347 .mtime = NO_CHANGE_64,
350 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
352 cifs_sb->mnt_cifs_flags &
353 CIFS_MOUNT_MAP_SPECIAL_CHR);
364 /* Try to reacquire byte range locks that were released when session */
365 /* to server was lost */
366 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
370 /* BB list all locks open on this file and relock */
375 static int cifs_reopen_file(struct file *file, bool can_flush)
379 struct cifs_sb_info *cifs_sb;
380 struct cifsTconInfo *tcon;
381 struct cifsFileInfo *pCifsFile;
382 struct cifsInodeInfo *pCifsInode;
384 char *full_path = NULL;
386 int disposition = FILE_OPEN;
389 if (file->private_data)
390 pCifsFile = (struct cifsFileInfo *)file->private_data;
395 down(&pCifsFile->fh_sem);
396 if (!pCifsFile->invalidHandle) {
397 up(&pCifsFile->fh_sem);
402 if (file->f_path.dentry == NULL) {
403 cERROR(1, ("no valid name if dentry freed"));
406 goto reopen_error_exit;
409 inode = file->f_path.dentry->d_inode;
411 cERROR(1, ("inode not valid"));
414 goto reopen_error_exit;
417 cifs_sb = CIFS_SB(inode->i_sb);
418 tcon = cifs_sb->tcon;
420 /* can not grab rename sem here because various ops, including
421 those that already have the rename sem can end up causing writepage
422 to get called and if the server was down that means we end up here,
423 and we can never tell if the caller already has the rename_sem */
424 full_path = build_path_from_dentry(file->f_path.dentry);
425 if (full_path == NULL) {
428 up(&pCifsFile->fh_sem);
433 cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
434 inode, file->f_flags, full_path));
441 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
442 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
443 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
444 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
445 /* can not refresh inode info since size could be stale */
446 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
447 cifs_sb->mnt_file_mode /* ignored */,
448 oflags, &oplock, &netfid, xid);
450 cFYI(1, ("posix reopen succeeded"));
453 /* fallthrough to retry open the old way on errors, especially
454 in the reconnect path it is important to retry hard */
457 desiredAccess = cifs_convert_flags(file->f_flags);
459 /* Can not refresh inode by passing in file_info buf to be returned
460 by SMBOpen and then calling get_inode_info with returned buf
461 since file might have write behind data that needs to be flushed
462 and server version of file size can be stale. If we knew for sure
463 that inode was not dirty locally we could do this */
465 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
466 CREATE_NOT_DIR, &netfid, &oplock, NULL,
467 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
468 CIFS_MOUNT_MAP_SPECIAL_CHR);
470 up(&pCifsFile->fh_sem);
471 cFYI(1, ("cifs_open returned 0x%x", rc));
472 cFYI(1, ("oplock: %d", oplock));
475 pCifsFile->netfid = netfid;
476 pCifsFile->invalidHandle = false;
477 up(&pCifsFile->fh_sem);
478 pCifsInode = CIFS_I(inode);
481 rc = filemap_write_and_wait(inode->i_mapping);
483 CIFS_I(inode)->write_behind_rc = rc;
484 /* temporarily disable caching while we
485 go to server to get inode info */
486 pCifsInode->clientCanCacheAll = false;
487 pCifsInode->clientCanCacheRead = false;
489 rc = cifs_get_inode_info_unix(&inode,
490 full_path, inode->i_sb, xid);
492 rc = cifs_get_inode_info(&inode,
493 full_path, NULL, inode->i_sb,
495 } /* else we are writing out data to server already
496 and could deadlock if we tried to flush data, and
497 since we do not know if we have data that would
498 invalidate the current end of file on the server
499 we can not go to the server to get the new inod
501 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
502 pCifsInode->clientCanCacheAll = true;
503 pCifsInode->clientCanCacheRead = true;
504 cFYI(1, ("Exclusive Oplock granted on inode %p",
505 file->f_path.dentry->d_inode));
506 } else if ((oplock & 0xF) == OPLOCK_READ) {
507 pCifsInode->clientCanCacheRead = true;
508 pCifsInode->clientCanCacheAll = false;
510 pCifsInode->clientCanCacheRead = false;
511 pCifsInode->clientCanCacheAll = false;
513 cifs_relock_file(pCifsFile);
521 int cifs_close(struct inode *inode, struct file *file)
525 struct cifs_sb_info *cifs_sb;
526 struct cifsTconInfo *pTcon;
527 struct cifsFileInfo *pSMBFile =
528 (struct cifsFileInfo *)file->private_data;
532 cifs_sb = CIFS_SB(inode->i_sb);
533 pTcon = cifs_sb->tcon;
535 struct cifsLockInfo *li, *tmp;
536 write_lock(&GlobalSMBSeslock);
537 pSMBFile->closePend = true;
539 /* no sense reconnecting to close a file that is
541 if (!pTcon->need_reconnect) {
542 write_unlock(&GlobalSMBSeslock);
544 while ((atomic_read(&pSMBFile->wrtPending) != 0)
545 && (timeout <= 2048)) {
546 /* Give write a better chance to get to
547 server ahead of the close. We do not
548 want to add a wait_q here as it would
549 increase the memory utilization as
550 the struct would be in each open file,
551 but this should give enough time to
554 ("close delay, write pending"));
558 if (atomic_read(&pSMBFile->wrtPending))
559 cERROR(1, ("close with pending write"));
560 if (!pTcon->need_reconnect &&
561 !pSMBFile->invalidHandle)
562 rc = CIFSSMBClose(xid, pTcon,
565 write_unlock(&GlobalSMBSeslock);
567 write_unlock(&GlobalSMBSeslock);
569 /* Delete any outstanding lock records.
570 We'll lose them when the file is closed anyway. */
571 mutex_lock(&pSMBFile->lock_mutex);
572 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
573 list_del(&li->llist);
576 mutex_unlock(&pSMBFile->lock_mutex);
578 write_lock(&GlobalSMBSeslock);
579 list_del(&pSMBFile->flist);
580 list_del(&pSMBFile->tlist);
581 write_unlock(&GlobalSMBSeslock);
583 /* We waited above to give the SMBWrite a chance to issue
584 on the wire (so we do not get SMBWrite returning EBADF
585 if writepages is racing with close. Note that writepages
586 does not specify a file handle, so it is possible for a file
587 to be opened twice, and the application close the "wrong"
588 file handle - in these cases we delay long enough to allow
589 the SMBWrite to get on the wire before the SMB Close.
590 We allow total wait here over 45 seconds, more than
591 oplock break time, and more than enough to allow any write
592 to complete on the server, or to time out on the client */
593 while ((atomic_read(&pSMBFile->wrtPending) != 0)
594 && (timeout <= 50000)) {
595 cERROR(1, ("writes pending, delay free of handle"));
599 kfree(file->private_data);
600 file->private_data = NULL;
604 read_lock(&GlobalSMBSeslock);
605 if (list_empty(&(CIFS_I(inode)->openFileList))) {
606 cFYI(1, ("closing last open instance for inode %p", inode));
607 /* if the file is not open we do not know if we can cache info
608 on this inode, much less write behind and read ahead */
609 CIFS_I(inode)->clientCanCacheRead = false;
610 CIFS_I(inode)->clientCanCacheAll = false;
612 read_unlock(&GlobalSMBSeslock);
613 if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
614 rc = CIFS_I(inode)->write_behind_rc;
619 int cifs_closedir(struct inode *inode, struct file *file)
623 struct cifsFileInfo *pCFileStruct =
624 (struct cifsFileInfo *)file->private_data;
627 cFYI(1, ("Closedir inode = 0x%p", inode));
632 struct cifsTconInfo *pTcon;
633 struct cifs_sb_info *cifs_sb =
634 CIFS_SB(file->f_path.dentry->d_sb);
636 pTcon = cifs_sb->tcon;
638 cFYI(1, ("Freeing private data in close dir"));
639 write_lock(&GlobalSMBSeslock);
640 if (!pCFileStruct->srch_inf.endOfSearch &&
641 !pCFileStruct->invalidHandle) {
642 pCFileStruct->invalidHandle = true;
643 write_unlock(&GlobalSMBSeslock);
644 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
645 cFYI(1, ("Closing uncompleted readdir with rc %d",
647 /* not much we can do if it fails anyway, ignore rc */
650 write_unlock(&GlobalSMBSeslock);
651 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
653 cFYI(1, ("closedir free smb buf in srch struct"));
654 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
655 if (pCFileStruct->srch_inf.smallBuf)
656 cifs_small_buf_release(ptmp);
658 cifs_buf_release(ptmp);
660 kfree(file->private_data);
661 file->private_data = NULL;
663 /* BB can we lock the filestruct while this is going on? */
668 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
669 __u64 offset, __u8 lockType)
671 struct cifsLockInfo *li =
672 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
678 mutex_lock(&fid->lock_mutex);
679 list_add(&li->llist, &fid->llist);
680 mutex_unlock(&fid->lock_mutex);
684 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
690 bool wait_flag = false;
691 struct cifs_sb_info *cifs_sb;
692 struct cifsTconInfo *tcon;
694 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
695 bool posix_locking = 0;
697 length = 1 + pfLock->fl_end - pfLock->fl_start;
701 cFYI(1, ("Lock parm: 0x%x flockflags: "
702 "0x%x flocktype: 0x%x start: %lld end: %lld",
703 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
706 if (pfLock->fl_flags & FL_POSIX)
708 if (pfLock->fl_flags & FL_FLOCK)
710 if (pfLock->fl_flags & FL_SLEEP) {
711 cFYI(1, ("Blocking lock"));
714 if (pfLock->fl_flags & FL_ACCESS)
715 cFYI(1, ("Process suspended by mandatory locking - "
716 "not implemented yet"));
717 if (pfLock->fl_flags & FL_LEASE)
718 cFYI(1, ("Lease on file - not implemented yet"));
719 if (pfLock->fl_flags &
720 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
721 cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags));
723 if (pfLock->fl_type == F_WRLCK) {
724 cFYI(1, ("F_WRLCK "));
726 } else if (pfLock->fl_type == F_UNLCK) {
727 cFYI(1, ("F_UNLCK"));
729 /* Check if unlock includes more than
731 } else if (pfLock->fl_type == F_RDLCK) {
732 cFYI(1, ("F_RDLCK"));
733 lockType |= LOCKING_ANDX_SHARED_LOCK;
735 } else if (pfLock->fl_type == F_EXLCK) {
736 cFYI(1, ("F_EXLCK"));
738 } else if (pfLock->fl_type == F_SHLCK) {
739 cFYI(1, ("F_SHLCK"));
740 lockType |= LOCKING_ANDX_SHARED_LOCK;
743 cFYI(1, ("Unknown type of lock"));
745 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
746 tcon = cifs_sb->tcon;
748 if (file->private_data == NULL) {
752 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
754 if ((tcon->ses->capabilities & CAP_UNIX) &&
755 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
756 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
758 /* BB add code here to normalize offset and length to
759 account for negative length which we can not accept over the
764 if (lockType & LOCKING_ANDX_SHARED_LOCK)
765 posix_lock_type = CIFS_RDLCK;
767 posix_lock_type = CIFS_WRLCK;
768 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
770 posix_lock_type, wait_flag);
775 /* BB we could chain these into one lock request BB */
776 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
777 0, 1, lockType, 0 /* wait flag */ );
779 rc = CIFSSMBLock(xid, tcon, netfid, length,
780 pfLock->fl_start, 1 /* numUnlock */ ,
781 0 /* numLock */ , lockType,
783 pfLock->fl_type = F_UNLCK;
785 cERROR(1, ("Error unlocking previously locked "
786 "range %d during test of lock", rc));
790 /* if rc == ERR_SHARING_VIOLATION ? */
791 rc = 0; /* do not change lock type to unlock
792 since range in use */
799 if (!numLock && !numUnlock) {
800 /* if no lock or unlock then nothing
801 to do since we do not know what it is */
808 if (lockType & LOCKING_ANDX_SHARED_LOCK)
809 posix_lock_type = CIFS_RDLCK;
811 posix_lock_type = CIFS_WRLCK;
814 posix_lock_type = CIFS_UNLCK;
816 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
818 posix_lock_type, wait_flag);
820 struct cifsFileInfo *fid =
821 (struct cifsFileInfo *)file->private_data;
824 rc = CIFSSMBLock(xid, tcon, netfid, length,
826 0, numLock, lockType, wait_flag);
829 /* For Windows locks we must store them. */
830 rc = store_file_lock(fid, length,
831 pfLock->fl_start, lockType);
833 } else if (numUnlock) {
834 /* For each stored lock that this unlock overlaps
835 completely, unlock it. */
837 struct cifsLockInfo *li, *tmp;
840 mutex_lock(&fid->lock_mutex);
841 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
842 if (pfLock->fl_start <= li->offset &&
843 (pfLock->fl_start + length) >=
844 (li->offset + li->length)) {
845 stored_rc = CIFSSMBLock(xid, tcon,
847 li->length, li->offset,
848 1, 0, li->type, false);
852 list_del(&li->llist);
856 mutex_unlock(&fid->lock_mutex);
860 if (pfLock->fl_flags & FL_POSIX)
861 posix_lock_file_wait(file, pfLock);
866 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
867 size_t write_size, loff_t *poffset)
870 unsigned int bytes_written = 0;
871 unsigned int total_written;
872 struct cifs_sb_info *cifs_sb;
873 struct cifsTconInfo *pTcon;
875 struct cifsFileInfo *open_file;
877 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
879 pTcon = cifs_sb->tcon;
882 (" write %d bytes to offset %lld of %s", write_size,
883 *poffset, file->f_path.dentry->d_name.name)); */
885 if (file->private_data == NULL)
887 open_file = (struct cifsFileInfo *) file->private_data;
889 rc = generic_write_checks(file, poffset, &write_size, 0);
895 if (*poffset > file->f_path.dentry->d_inode->i_size)
896 long_op = CIFS_VLONG_OP; /* writes past EOF take long time */
898 long_op = CIFS_LONG_OP;
900 for (total_written = 0; write_size > total_written;
901 total_written += bytes_written) {
903 while (rc == -EAGAIN) {
904 if (file->private_data == NULL) {
905 /* file has been closed on us */
907 /* if we have gotten here we have written some data
908 and blocked, and the file has been freed on us while
909 we blocked so return what we managed to write */
910 return total_written;
912 if (open_file->closePend) {
915 return total_written;
919 if (open_file->invalidHandle) {
920 /* we could deadlock if we called
921 filemap_fdatawait from here so tell
922 reopen_file not to flush data to server
924 rc = cifs_reopen_file(file, false);
929 rc = CIFSSMBWrite(xid, pTcon,
931 min_t(const int, cifs_sb->wsize,
932 write_size - total_written),
933 *poffset, &bytes_written,
934 NULL, write_data + total_written, long_op);
936 if (rc || (bytes_written == 0)) {
944 *poffset += bytes_written;
945 long_op = CIFS_STD_OP; /* subsequent writes fast -
946 15 seconds is plenty */
949 cifs_stats_bytes_written(pTcon, total_written);
951 /* since the write may have blocked check these pointers again */
952 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
953 struct inode *inode = file->f_path.dentry->d_inode;
954 /* Do not update local mtime - server will set its actual value on write
955 * inode->i_ctime = inode->i_mtime =
956 * current_fs_time(inode->i_sb);*/
957 if (total_written > 0) {
958 spin_lock(&inode->i_lock);
959 if (*poffset > file->f_path.dentry->d_inode->i_size)
960 i_size_write(file->f_path.dentry->d_inode,
962 spin_unlock(&inode->i_lock);
964 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
967 return total_written;
970 static ssize_t cifs_write(struct file *file, const char *write_data,
971 size_t write_size, loff_t *poffset)
974 unsigned int bytes_written = 0;
975 unsigned int total_written;
976 struct cifs_sb_info *cifs_sb;
977 struct cifsTconInfo *pTcon;
979 struct cifsFileInfo *open_file;
981 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
983 pTcon = cifs_sb->tcon;
985 cFYI(1, ("write %zd bytes to offset %lld of %s", write_size,
986 *poffset, file->f_path.dentry->d_name.name));
988 if (file->private_data == NULL)
990 open_file = (struct cifsFileInfo *)file->private_data;
994 if (*poffset > file->f_path.dentry->d_inode->i_size)
995 long_op = CIFS_VLONG_OP; /* writes past EOF can be slow */
997 long_op = CIFS_LONG_OP;
999 for (total_written = 0; write_size > total_written;
1000 total_written += bytes_written) {
1002 while (rc == -EAGAIN) {
1003 if (file->private_data == NULL) {
1004 /* file has been closed on us */
1006 /* if we have gotten here we have written some data
1007 and blocked, and the file has been freed on us
1008 while we blocked so return what we managed to
1010 return total_written;
1012 if (open_file->closePend) {
1015 return total_written;
1019 if (open_file->invalidHandle) {
1020 /* we could deadlock if we called
1021 filemap_fdatawait from here so tell
1022 reopen_file not to flush data to
1024 rc = cifs_reopen_file(file, false);
1028 if (experimEnabled || (pTcon->ses->server &&
1029 ((pTcon->ses->server->secMode &
1030 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1035 len = min((size_t)cifs_sb->wsize,
1036 write_size - total_written);
1037 /* iov[0] is reserved for smb header */
1038 iov[1].iov_base = (char *)write_data +
1040 iov[1].iov_len = len;
1041 rc = CIFSSMBWrite2(xid, pTcon,
1042 open_file->netfid, len,
1043 *poffset, &bytes_written,
1046 rc = CIFSSMBWrite(xid, pTcon,
1048 min_t(const int, cifs_sb->wsize,
1049 write_size - total_written),
1050 *poffset, &bytes_written,
1051 write_data + total_written,
1054 if (rc || (bytes_written == 0)) {
1062 *poffset += bytes_written;
1063 long_op = CIFS_STD_OP; /* subsequent writes fast -
1064 15 seconds is plenty */
1067 cifs_stats_bytes_written(pTcon, total_written);
1069 /* since the write may have blocked check these pointers again */
1070 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1071 /*BB We could make this contingent on superblock ATIME flag too */
1072 /* file->f_path.dentry->d_inode->i_ctime =
1073 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1074 if (total_written > 0) {
1075 spin_lock(&file->f_path.dentry->d_inode->i_lock);
1076 if (*poffset > file->f_path.dentry->d_inode->i_size)
1077 i_size_write(file->f_path.dentry->d_inode,
1079 spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1081 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1084 return total_written;
1087 #ifdef CONFIG_CIFS_EXPERIMENTAL
1088 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1090 struct cifsFileInfo *open_file = NULL;
1092 read_lock(&GlobalSMBSeslock);
1093 /* we could simply get the first_list_entry since write-only entries
1094 are always at the end of the list but since the first entry might
1095 have a close pending, we go through the whole list */
1096 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1097 if (open_file->closePend)
1099 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1100 (open_file->pfile->f_flags & O_RDONLY))) {
1101 if (!open_file->invalidHandle) {
1102 /* found a good file */
1103 /* lock it so it will not be closed on us */
1104 atomic_inc(&open_file->wrtPending);
1105 read_unlock(&GlobalSMBSeslock);
1107 } /* else might as well continue, and look for
1108 another, or simply have the caller reopen it
1109 again rather than trying to fix this handle */
1110 } else /* write only file */
1111 break; /* write only files are last so must be done */
1113 read_unlock(&GlobalSMBSeslock);
1118 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1120 struct cifsFileInfo *open_file;
1121 bool any_available = false;
1124 /* Having a null inode here (because mapping->host was set to zero by
1125 the VFS or MM) should not happen but we had reports of on oops (due to
1126 it being zero) during stress testcases so we need to check for it */
1128 if (cifs_inode == NULL) {
1129 cERROR(1, ("Null inode passed to cifs_writeable_file"));
1134 read_lock(&GlobalSMBSeslock);
1136 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1137 if (open_file->closePend ||
1138 (!any_available && open_file->pid != current->tgid))
1141 if (open_file->pfile &&
1142 ((open_file->pfile->f_flags & O_RDWR) ||
1143 (open_file->pfile->f_flags & O_WRONLY))) {
1144 atomic_inc(&open_file->wrtPending);
1146 if (!open_file->invalidHandle) {
1147 /* found a good writable file */
1148 read_unlock(&GlobalSMBSeslock);
1152 read_unlock(&GlobalSMBSeslock);
1153 /* Had to unlock since following call can block */
1154 rc = cifs_reopen_file(open_file->pfile, false);
1156 if (!open_file->closePend)
1158 else { /* start over in case this was deleted */
1159 /* since the list could be modified */
1160 read_lock(&GlobalSMBSeslock);
1161 atomic_dec(&open_file->wrtPending);
1162 goto refind_writable;
1166 /* if it fails, try another handle if possible -
1167 (we can not do this if closePending since
1168 loop could be modified - in which case we
1169 have to start at the beginning of the list
1170 again. Note that it would be bad
1171 to hold up writepages here (rather than
1172 in caller) with continuous retries */
1173 cFYI(1, ("wp failed on reopen file"));
1174 read_lock(&GlobalSMBSeslock);
1175 /* can not use this handle, no write
1176 pending on this one after all */
1177 atomic_dec(&open_file->wrtPending);
1179 if (open_file->closePend) /* list could have changed */
1180 goto refind_writable;
1181 /* else we simply continue to the next entry. Thus
1182 we do not loop on reopen errors. If we
1183 can not reopen the file, for example if we
1184 reconnected to a server with another client
1185 racing to delete or lock the file we would not
1186 make progress if we restarted before the beginning
1187 of the loop here. */
1190 /* couldn't find useable FH with same pid, try any available */
1191 if (!any_available) {
1192 any_available = true;
1193 goto refind_writable;
1195 read_unlock(&GlobalSMBSeslock);
1199 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1201 struct address_space *mapping = page->mapping;
1202 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1205 int bytes_written = 0;
1206 struct cifs_sb_info *cifs_sb;
1207 struct cifsTconInfo *pTcon;
1208 struct inode *inode;
1209 struct cifsFileInfo *open_file;
1211 if (!mapping || !mapping->host)
1214 inode = page->mapping->host;
1215 cifs_sb = CIFS_SB(inode->i_sb);
1216 pTcon = cifs_sb->tcon;
1218 offset += (loff_t)from;
1219 write_data = kmap(page);
1222 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1227 /* racing with truncate? */
1228 if (offset > mapping->host->i_size) {
1230 return 0; /* don't care */
1233 /* check to make sure that we are not extending the file */
1234 if (mapping->host->i_size - offset < (loff_t)to)
1235 to = (unsigned)(mapping->host->i_size - offset);
1237 open_file = find_writable_file(CIFS_I(mapping->host));
1239 bytes_written = cifs_write(open_file->pfile, write_data,
1241 atomic_dec(&open_file->wrtPending);
1242 /* Does mm or vfs already set times? */
1243 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1244 if ((bytes_written > 0) && (offset))
1246 else if (bytes_written < 0)
1249 cFYI(1, ("No writeable filehandles for inode"));
1257 static int cifs_writepages(struct address_space *mapping,
1258 struct writeback_control *wbc)
1260 struct backing_dev_info *bdi = mapping->backing_dev_info;
1261 unsigned int bytes_to_write;
1262 unsigned int bytes_written;
1263 struct cifs_sb_info *cifs_sb;
1267 int range_whole = 0;
1274 struct cifsFileInfo *open_file;
1276 struct pagevec pvec;
1281 cifs_sb = CIFS_SB(mapping->host->i_sb);
1284 * If wsize is smaller that the page cache size, default to writing
1285 * one page at a time via cifs_writepage
1287 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1288 return generic_writepages(mapping, wbc);
1290 if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1291 if (cifs_sb->tcon->ses->server->secMode &
1292 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1293 if (!experimEnabled)
1294 return generic_writepages(mapping, wbc);
1296 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1298 return generic_writepages(mapping, wbc);
1302 * BB: Is this meaningful for a non-block-device file system?
1303 * If it is, we should test it again after we do I/O
1305 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1306 wbc->encountered_congestion = 1;
1313 pagevec_init(&pvec, 0);
1314 if (wbc->range_cyclic) {
1315 index = mapping->writeback_index; /* Start from prev offset */
1318 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1319 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1320 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1325 while (!done && (index <= end) &&
1326 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1327 PAGECACHE_TAG_DIRTY,
1328 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1337 for (i = 0; i < nr_pages; i++) {
1338 page = pvec.pages[i];
1340 * At this point we hold neither mapping->tree_lock nor
1341 * lock on the page itself: the page may be truncated or
1342 * invalidated (changing page->mapping to NULL), or even
1343 * swizzled back from swapper_space to tmpfs file
1349 else if (!trylock_page(page))
1352 if (unlikely(page->mapping != mapping)) {
1357 if (!wbc->range_cyclic && page->index > end) {
1363 if (next && (page->index != next)) {
1364 /* Not next consecutive page */
1369 if (wbc->sync_mode != WB_SYNC_NONE)
1370 wait_on_page_writeback(page);
1372 if (PageWriteback(page) ||
1373 !clear_page_dirty_for_io(page)) {
1379 * This actually clears the dirty bit in the radix tree.
1380 * See cifs_writepage() for more commentary.
1382 set_page_writeback(page);
1384 if (page_offset(page) >= mapping->host->i_size) {
1387 end_page_writeback(page);
1392 * BB can we get rid of this? pages are held by pvec
1394 page_cache_get(page);
1396 len = min(mapping->host->i_size - page_offset(page),
1397 (loff_t)PAGE_CACHE_SIZE);
1399 /* reserve iov[0] for the smb header */
1401 iov[n_iov].iov_base = kmap(page);
1402 iov[n_iov].iov_len = len;
1403 bytes_to_write += len;
1407 offset = page_offset(page);
1409 next = page->index + 1;
1410 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1414 /* Search for a writable handle every time we call
1415 * CIFSSMBWrite2. We can't rely on the last handle
1416 * we used to still be valid
1418 open_file = find_writable_file(CIFS_I(mapping->host));
1420 cERROR(1, ("No writable handles for inode"));
1423 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1425 bytes_to_write, offset,
1426 &bytes_written, iov, n_iov,
1428 atomic_dec(&open_file->wrtPending);
1429 if (rc || bytes_written < bytes_to_write) {
1430 cERROR(1, ("Write2 ret %d, wrote %d",
1431 rc, bytes_written));
1432 /* BB what if continued retry is
1433 requested via mount flags? */
1435 set_bit(AS_ENOSPC, &mapping->flags);
1437 set_bit(AS_EIO, &mapping->flags);
1439 cifs_stats_bytes_written(cifs_sb->tcon,
1443 for (i = 0; i < n_iov; i++) {
1444 page = pvec.pages[first + i];
1445 /* Should we also set page error on
1446 success rc but too little data written? */
1447 /* BB investigate retry logic on temporary
1448 server crash cases and how recovery works
1449 when page marked as error */
1454 end_page_writeback(page);
1455 page_cache_release(page);
1457 if ((wbc->nr_to_write -= n_iov) <= 0)
1461 /* Need to re-find the pages we skipped */
1462 index = pvec.pages[0]->index + 1;
1464 pagevec_release(&pvec);
1466 if (!scanned && !done) {
1468 * We hit the last page and there is more work to be done: wrap
1469 * back to the start of the file
1475 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1476 mapping->writeback_index = index;
1483 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1489 /* BB add check for wbc flags */
1490 page_cache_get(page);
1491 if (!PageUptodate(page))
1492 cFYI(1, ("ppw - page not up to date"));
1495 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1497 * A writepage() implementation always needs to do either this,
1498 * or re-dirty the page with "redirty_page_for_writepage()" in
1499 * the case of a failure.
1501 * Just unlocking the page will cause the radix tree tag-bits
1502 * to fail to update with the state of the page correctly.
1504 set_page_writeback(page);
1505 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1506 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1508 end_page_writeback(page);
1509 page_cache_release(page);
1514 static int cifs_write_end(struct file *file, struct address_space *mapping,
1515 loff_t pos, unsigned len, unsigned copied,
1516 struct page *page, void *fsdata)
1519 struct inode *inode = mapping->host;
1521 cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
1522 page, pos, copied));
1524 if (PageChecked(page)) {
1526 SetPageUptodate(page);
1527 ClearPageChecked(page);
1528 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1529 SetPageUptodate(page);
1531 if (!PageUptodate(page)) {
1533 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1537 /* this is probably better than directly calling
1538 partialpage_write since in this function the file handle is
1539 known which we might as well leverage */
1540 /* BB check if anything else missing out of ppw
1541 such as updating last write time */
1542 page_data = kmap(page);
1543 rc = cifs_write(file, page_data + offset, copied, &pos);
1544 /* if (rc < 0) should we set writebehind rc? */
1551 set_page_dirty(page);
1555 spin_lock(&inode->i_lock);
1556 if (pos > inode->i_size)
1557 i_size_write(inode, pos);
1558 spin_unlock(&inode->i_lock);
1562 page_cache_release(page);
1567 int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1571 struct cifsTconInfo *tcon;
1572 struct cifsFileInfo *smbfile =
1573 (struct cifsFileInfo *)file->private_data;
1574 struct inode *inode = file->f_path.dentry->d_inode;
1578 cFYI(1, ("Sync file - name: %s datasync: 0x%x",
1579 dentry->d_name.name, datasync));
1581 rc = filemap_write_and_wait(inode->i_mapping);
1583 rc = CIFS_I(inode)->write_behind_rc;
1584 CIFS_I(inode)->write_behind_rc = 0;
1585 tcon = CIFS_SB(inode->i_sb)->tcon;
1586 if (!rc && tcon && smbfile &&
1587 !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_SSYNC))
1588 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1595 /* static void cifs_sync_page(struct page *page)
1597 struct address_space *mapping;
1598 struct inode *inode;
1599 unsigned long index = page->index;
1600 unsigned int rpages = 0;
1603 cFYI(1, ("sync page %p",page));
1604 mapping = page->mapping;
1607 inode = mapping->host;
1611 /* fill in rpages then
1612 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1614 /* cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index));
1624 * As file closes, flush all cached write data for this inode checking
1625 * for write behind errors.
1627 int cifs_flush(struct file *file, fl_owner_t id)
1629 struct inode *inode = file->f_path.dentry->d_inode;
1632 /* Rather than do the steps manually:
1633 lock the inode for writing
1634 loop through pages looking for write behind data (dirty pages)
1635 coalesce into contiguous 16K (or smaller) chunks to write to server
1636 send to server (prefer in parallel)
1637 deal with writebehind errors
1638 unlock inode for writing
1639 filemapfdatawrite appears easier for the time being */
1641 rc = filemap_fdatawrite(inode->i_mapping);
1642 /* reset wb rc if we were able to write out dirty pages */
1644 rc = CIFS_I(inode)->write_behind_rc;
1645 CIFS_I(inode)->write_behind_rc = 0;
1648 cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc));
1653 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1654 size_t read_size, loff_t *poffset)
1657 unsigned int bytes_read = 0;
1658 unsigned int total_read = 0;
1659 unsigned int current_read_size;
1660 struct cifs_sb_info *cifs_sb;
1661 struct cifsTconInfo *pTcon;
1663 struct cifsFileInfo *open_file;
1664 char *smb_read_data;
1665 char __user *current_offset;
1666 struct smb_com_read_rsp *pSMBr;
1669 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1670 pTcon = cifs_sb->tcon;
1672 if (file->private_data == NULL) {
1676 open_file = (struct cifsFileInfo *)file->private_data;
1678 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1679 cFYI(1, ("attempting read on write only file instance"));
1681 for (total_read = 0, current_offset = read_data;
1682 read_size > total_read;
1683 total_read += bytes_read, current_offset += bytes_read) {
1684 current_read_size = min_t(const int, read_size - total_read,
1687 smb_read_data = NULL;
1688 while (rc == -EAGAIN) {
1689 int buf_type = CIFS_NO_BUFFER;
1690 if ((open_file->invalidHandle) &&
1691 (!open_file->closePend)) {
1692 rc = cifs_reopen_file(file, true);
1696 rc = CIFSSMBRead(xid, pTcon,
1698 current_read_size, *poffset,
1699 &bytes_read, &smb_read_data,
1701 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1702 if (smb_read_data) {
1703 if (copy_to_user(current_offset,
1705 4 /* RFC1001 length field */ +
1706 le16_to_cpu(pSMBr->DataOffset),
1710 if (buf_type == CIFS_SMALL_BUFFER)
1711 cifs_small_buf_release(smb_read_data);
1712 else if (buf_type == CIFS_LARGE_BUFFER)
1713 cifs_buf_release(smb_read_data);
1714 smb_read_data = NULL;
1717 if (rc || (bytes_read == 0)) {
1725 cifs_stats_bytes_read(pTcon, bytes_read);
1726 *poffset += bytes_read;
1734 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1738 unsigned int bytes_read = 0;
1739 unsigned int total_read;
1740 unsigned int current_read_size;
1741 struct cifs_sb_info *cifs_sb;
1742 struct cifsTconInfo *pTcon;
1744 char *current_offset;
1745 struct cifsFileInfo *open_file;
1746 int buf_type = CIFS_NO_BUFFER;
1749 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1750 pTcon = cifs_sb->tcon;
1752 if (file->private_data == NULL) {
1756 open_file = (struct cifsFileInfo *)file->private_data;
1758 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1759 cFYI(1, ("attempting read on write only file instance"));
1761 for (total_read = 0, current_offset = read_data;
1762 read_size > total_read;
1763 total_read += bytes_read, current_offset += bytes_read) {
1764 current_read_size = min_t(const int, read_size - total_read,
1766 /* For windows me and 9x we do not want to request more
1767 than it negotiated since it will refuse the read then */
1769 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1770 current_read_size = min_t(const int, current_read_size,
1771 pTcon->ses->server->maxBuf - 128);
1774 while (rc == -EAGAIN) {
1775 if ((open_file->invalidHandle) &&
1776 (!open_file->closePend)) {
1777 rc = cifs_reopen_file(file, true);
1781 rc = CIFSSMBRead(xid, pTcon,
1783 current_read_size, *poffset,
1784 &bytes_read, ¤t_offset,
1787 if (rc || (bytes_read == 0)) {
1795 cifs_stats_bytes_read(pTcon, total_read);
1796 *poffset += bytes_read;
1803 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1805 struct dentry *dentry = file->f_path.dentry;
1809 rc = cifs_revalidate(dentry);
1811 cFYI(1, ("Validation prior to mmap failed, error=%d", rc));
1815 rc = generic_file_mmap(file, vma);
1821 static void cifs_copy_cache_pages(struct address_space *mapping,
1822 struct list_head *pages, int bytes_read, char *data,
1823 struct pagevec *plru_pvec)
1828 while (bytes_read > 0) {
1829 if (list_empty(pages))
1832 page = list_entry(pages->prev, struct page, lru);
1833 list_del(&page->lru);
1835 if (add_to_page_cache(page, mapping, page->index,
1837 page_cache_release(page);
1838 cFYI(1, ("Add page cache failed"));
1839 data += PAGE_CACHE_SIZE;
1840 bytes_read -= PAGE_CACHE_SIZE;
1844 target = kmap_atomic(page, KM_USER0);
1846 if (PAGE_CACHE_SIZE > bytes_read) {
1847 memcpy(target, data, bytes_read);
1848 /* zero the tail end of this partial page */
1849 memset(target + bytes_read, 0,
1850 PAGE_CACHE_SIZE - bytes_read);
1853 memcpy(target, data, PAGE_CACHE_SIZE);
1854 bytes_read -= PAGE_CACHE_SIZE;
1856 kunmap_atomic(target, KM_USER0);
1858 flush_dcache_page(page);
1859 SetPageUptodate(page);
1861 if (!pagevec_add(plru_pvec, page))
1862 __pagevec_lru_add_file(plru_pvec);
1863 data += PAGE_CACHE_SIZE;
1868 static int cifs_readpages(struct file *file, struct address_space *mapping,
1869 struct list_head *page_list, unsigned num_pages)
1875 struct cifs_sb_info *cifs_sb;
1876 struct cifsTconInfo *pTcon;
1877 unsigned int bytes_read = 0;
1878 unsigned int read_size, i;
1879 char *smb_read_data = NULL;
1880 struct smb_com_read_rsp *pSMBr;
1881 struct pagevec lru_pvec;
1882 struct cifsFileInfo *open_file;
1883 int buf_type = CIFS_NO_BUFFER;
1886 if (file->private_data == NULL) {
1890 open_file = (struct cifsFileInfo *)file->private_data;
1891 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1892 pTcon = cifs_sb->tcon;
1894 pagevec_init(&lru_pvec, 0);
1895 cFYI(DBG2, ("rpages: num pages %d", num_pages));
1896 for (i = 0; i < num_pages; ) {
1897 unsigned contig_pages;
1898 struct page *tmp_page;
1899 unsigned long expected_index;
1901 if (list_empty(page_list))
1904 page = list_entry(page_list->prev, struct page, lru);
1905 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1907 /* count adjacent pages that we will read into */
1910 list_entry(page_list->prev, struct page, lru)->index;
1911 list_for_each_entry_reverse(tmp_page, page_list, lru) {
1912 if (tmp_page->index == expected_index) {
1918 if (contig_pages + i > num_pages)
1919 contig_pages = num_pages - i;
1921 /* for reads over a certain size could initiate async
1924 read_size = contig_pages * PAGE_CACHE_SIZE;
1925 /* Read size needs to be in multiples of one page */
1926 read_size = min_t(const unsigned int, read_size,
1927 cifs_sb->rsize & PAGE_CACHE_MASK);
1928 cFYI(DBG2, ("rpages: read size 0x%x contiguous pages %d",
1929 read_size, contig_pages));
1931 while (rc == -EAGAIN) {
1932 if ((open_file->invalidHandle) &&
1933 (!open_file->closePend)) {
1934 rc = cifs_reopen_file(file, true);
1939 rc = CIFSSMBRead(xid, pTcon,
1942 &bytes_read, &smb_read_data,
1944 /* BB more RC checks ? */
1945 if (rc == -EAGAIN) {
1946 if (smb_read_data) {
1947 if (buf_type == CIFS_SMALL_BUFFER)
1948 cifs_small_buf_release(smb_read_data);
1949 else if (buf_type == CIFS_LARGE_BUFFER)
1950 cifs_buf_release(smb_read_data);
1951 smb_read_data = NULL;
1955 if ((rc < 0) || (smb_read_data == NULL)) {
1956 cFYI(1, ("Read error in readpages: %d", rc));
1958 } else if (bytes_read > 0) {
1959 task_io_account_read(bytes_read);
1960 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1961 cifs_copy_cache_pages(mapping, page_list, bytes_read,
1962 smb_read_data + 4 /* RFC1001 hdr */ +
1963 le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
1965 i += bytes_read >> PAGE_CACHE_SHIFT;
1966 cifs_stats_bytes_read(pTcon, bytes_read);
1967 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
1968 i++; /* account for partial page */
1970 /* server copy of file can have smaller size
1972 /* BB do we need to verify this common case ?
1973 this case is ok - if we are at server EOF
1974 we will hit it on next read */
1979 cFYI(1, ("No bytes read (%d) at offset %lld . "
1980 "Cleaning remaining pages from readahead list",
1981 bytes_read, offset));
1982 /* BB turn off caching and do new lookup on
1983 file size at server? */
1986 if (smb_read_data) {
1987 if (buf_type == CIFS_SMALL_BUFFER)
1988 cifs_small_buf_release(smb_read_data);
1989 else if (buf_type == CIFS_LARGE_BUFFER)
1990 cifs_buf_release(smb_read_data);
1991 smb_read_data = NULL;
1996 pagevec_lru_add_file(&lru_pvec);
1998 /* need to free smb_read_data buf before exit */
1999 if (smb_read_data) {
2000 if (buf_type == CIFS_SMALL_BUFFER)
2001 cifs_small_buf_release(smb_read_data);
2002 else if (buf_type == CIFS_LARGE_BUFFER)
2003 cifs_buf_release(smb_read_data);
2004 smb_read_data = NULL;
2011 static int cifs_readpage_worker(struct file *file, struct page *page,
2017 page_cache_get(page);
2018 read_data = kmap(page);
2019 /* for reads over a certain size could initiate async read ahead */
2021 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2026 cFYI(1, ("Bytes read %d", rc));
2028 file->f_path.dentry->d_inode->i_atime =
2029 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2031 if (PAGE_CACHE_SIZE > rc)
2032 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2034 flush_dcache_page(page);
2035 SetPageUptodate(page);
2040 page_cache_release(page);
2044 static int cifs_readpage(struct file *file, struct page *page)
2046 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2052 if (file->private_data == NULL) {
2057 cFYI(1, ("readpage %p at offset %d 0x%x\n",
2058 page, (int)offset, (int)offset));
2060 rc = cifs_readpage_worker(file, page, &offset);
2068 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2070 struct cifsFileInfo *open_file;
2072 read_lock(&GlobalSMBSeslock);
2073 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2074 if (open_file->closePend)
2076 if (open_file->pfile &&
2077 ((open_file->pfile->f_flags & O_RDWR) ||
2078 (open_file->pfile->f_flags & O_WRONLY))) {
2079 read_unlock(&GlobalSMBSeslock);
2083 read_unlock(&GlobalSMBSeslock);
2087 /* We do not want to update the file size from server for inodes
2088 open for write - to avoid races with writepage extending
2089 the file - in the future we could consider allowing
2090 refreshing the inode only on increases in the file size
2091 but this is tricky to do without racing with writebehind
2092 page caching in the current Linux kernel design */
2093 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2098 if (is_inode_writable(cifsInode)) {
2099 /* This inode is open for write at least once */
2100 struct cifs_sb_info *cifs_sb;
2102 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2103 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2104 /* since no page cache to corrupt on directio
2105 we can change size safely */
2109 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2117 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2118 loff_t pos, unsigned len, unsigned flags,
2119 struct page **pagep, void **fsdata)
2121 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2122 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2123 loff_t page_start = pos & PAGE_MASK;
2128 cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
2130 page = grab_cache_page_write_begin(mapping, index, flags);
2136 if (PageUptodate(page))
2140 * If we write a full page it will be up to date, no need to read from
2141 * the server. If the write is short, we'll end up doing a sync write
2144 if (len == PAGE_CACHE_SIZE)
2148 * optimize away the read when we have an oplock, and we're not
2149 * expecting to use any of the data we'd be reading in. That
2150 * is, when the page lies beyond the EOF, or straddles the EOF
2151 * and the write will cover all of the existing data.
2153 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2154 i_size = i_size_read(mapping->host);
2155 if (page_start >= i_size ||
2156 (offset == 0 && (pos + len) >= i_size)) {
2157 zero_user_segments(page, 0, offset,
2161 * PageChecked means that the parts of the page
2162 * to which we're not writing are considered up
2163 * to date. Once the data is copied to the
2164 * page, it can be set uptodate.
2166 SetPageChecked(page);
2171 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2173 * might as well read a page, it is fast enough. If we get
2174 * an error, we don't need to return it. cifs_write_end will
2175 * do a sync write instead since PG_uptodate isn't set.
2177 cifs_readpage_worker(file, page, &page_start);
2179 /* we could try using another file handle if there is one -
2180 but how would we lock it to prevent close of that handle
2181 racing with this read? In any case
2182 this will be written out by write_end so is fine */
2189 const struct address_space_operations cifs_addr_ops = {
2190 .readpage = cifs_readpage,
2191 .readpages = cifs_readpages,
2192 .writepage = cifs_writepage,
2193 .writepages = cifs_writepages,
2194 .write_begin = cifs_write_begin,
2195 .write_end = cifs_write_end,
2196 .set_page_dirty = __set_page_dirty_nobuffers,
2197 /* .sync_page = cifs_sync_page, */
2202 * cifs_readpages requires the server to support a buffer large enough to
2203 * contain the header plus one complete page of data. Otherwise, we need
2204 * to leave cifs_readpages out of the address space operations.
2206 const struct address_space_operations cifs_addr_ops_smallbuf = {
2207 .readpage = cifs_readpage,
2208 .writepage = cifs_writepage,
2209 .writepages = cifs_writepages,
2210 .write_begin = cifs_write_begin,
2211 .write_end = cifs_write_end,
2212 .set_page_dirty = __set_page_dirty_nobuffers,
2213 /* .sync_page = cifs_sync_page, */