Merge commit '900cfa46191a7d87cf1891924cb90499287fd235'; branches 'timers/nohz',...
[linux-2.6] / fs / xfs / linux-2.6 / xfs_file.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_bit.h"
20 #include "xfs_log.h"
21 #include "xfs_inum.h"
22 #include "xfs_sb.h"
23 #include "xfs_ag.h"
24 #include "xfs_dir2.h"
25 #include "xfs_trans.h"
26 #include "xfs_dmapi.h"
27 #include "xfs_mount.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_alloc_btree.h"
30 #include "xfs_ialloc_btree.h"
31 #include "xfs_alloc.h"
32 #include "xfs_btree.h"
33 #include "xfs_attr_sf.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_error.h"
38 #include "xfs_rw.h"
39 #include "xfs_ioctl32.h"
40 #include "xfs_vnodeops.h"
41
42 #include <linux/dcache.h>
43 #include <linux/smp_lock.h>
44
45 static struct vm_operations_struct xfs_file_vm_ops;
46
47 STATIC_INLINE ssize_t
48 __xfs_file_read(
49         struct kiocb            *iocb,
50         const struct iovec      *iov,
51         unsigned long           nr_segs,
52         int                     ioflags,
53         loff_t                  pos)
54 {
55         struct file             *file = iocb->ki_filp;
56
57         BUG_ON(iocb->ki_pos != pos);
58         if (unlikely(file->f_flags & O_DIRECT))
59                 ioflags |= IO_ISDIRECT;
60         return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
61                                 nr_segs, &iocb->ki_pos, ioflags);
62 }
63
64 STATIC ssize_t
65 xfs_file_aio_read(
66         struct kiocb            *iocb,
67         const struct iovec      *iov,
68         unsigned long           nr_segs,
69         loff_t                  pos)
70 {
71         return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
72 }
73
74 STATIC ssize_t
75 xfs_file_aio_read_invis(
76         struct kiocb            *iocb,
77         const struct iovec      *iov,
78         unsigned long           nr_segs,
79         loff_t                  pos)
80 {
81         return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
82 }
83
84 STATIC_INLINE ssize_t
85 __xfs_file_write(
86         struct kiocb            *iocb,
87         const struct iovec      *iov,
88         unsigned long           nr_segs,
89         int                     ioflags,
90         loff_t                  pos)
91 {
92         struct file     *file = iocb->ki_filp;
93
94         BUG_ON(iocb->ki_pos != pos);
95         if (unlikely(file->f_flags & O_DIRECT))
96                 ioflags |= IO_ISDIRECT;
97         return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
98                                 &iocb->ki_pos, ioflags);
99 }
100
101 STATIC ssize_t
102 xfs_file_aio_write(
103         struct kiocb            *iocb,
104         const struct iovec      *iov,
105         unsigned long           nr_segs,
106         loff_t                  pos)
107 {
108         return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
109 }
110
111 STATIC ssize_t
112 xfs_file_aio_write_invis(
113         struct kiocb            *iocb,
114         const struct iovec      *iov,
115         unsigned long           nr_segs,
116         loff_t                  pos)
117 {
118         return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
119 }
120
121 STATIC ssize_t
122 xfs_file_splice_read(
123         struct file             *infilp,
124         loff_t                  *ppos,
125         struct pipe_inode_info  *pipe,
126         size_t                  len,
127         unsigned int            flags)
128 {
129         return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
130                                    infilp, ppos, pipe, len, flags, 0);
131 }
132
133 STATIC ssize_t
134 xfs_file_splice_read_invis(
135         struct file             *infilp,
136         loff_t                  *ppos,
137         struct pipe_inode_info  *pipe,
138         size_t                  len,
139         unsigned int            flags)
140 {
141         return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
142                                    infilp, ppos, pipe, len, flags, IO_INVIS);
143 }
144
145 STATIC ssize_t
146 xfs_file_splice_write(
147         struct pipe_inode_info  *pipe,
148         struct file             *outfilp,
149         loff_t                  *ppos,
150         size_t                  len,
151         unsigned int            flags)
152 {
153         return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
154                                     pipe, outfilp, ppos, len, flags, 0);
155 }
156
157 STATIC ssize_t
158 xfs_file_splice_write_invis(
159         struct pipe_inode_info  *pipe,
160         struct file             *outfilp,
161         loff_t                  *ppos,
162         size_t                  len,
163         unsigned int            flags)
164 {
165         return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
166                                     pipe, outfilp, ppos, len, flags, IO_INVIS);
167 }
168
169 STATIC int
170 xfs_file_open(
171         struct inode    *inode,
172         struct file     *filp)
173 {
174         if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
175                 return -EFBIG;
176         return -xfs_open(XFS_I(inode));
177 }
178
179 STATIC int
180 xfs_file_release(
181         struct inode    *inode,
182         struct file     *filp)
183 {
184         return -xfs_release(XFS_I(inode));
185 }
186
187 /*
188  * We ignore the datasync flag here because a datasync is effectively
189  * identical to an fsync. That is, datasync implies that we need to write
190  * only the metadata needed to be able to access the data that is written
191  * if we crash after the call completes. Hence if we are writing beyond
192  * EOF we have to log the inode size change as well, which makes it a
193  * full fsync. If we don't write beyond EOF, the inode core will be
194  * clean in memory and so we don't need to log the inode, just like
195  * fsync.
196  */
197 STATIC int
198 xfs_file_fsync(
199         struct file     *filp,
200         struct dentry   *dentry,
201         int             datasync)
202 {
203         xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
204         return -xfs_fsync(XFS_I(dentry->d_inode));
205 }
206
207 /*
208  * Unfortunately we can't just use the clean and simple readdir implementation
209  * below, because nfs might call back into ->lookup from the filldir callback
210  * and that will deadlock the low-level btree code.
211  *
212  * Hopefully we'll find a better workaround that allows to use the optimal
213  * version at least for local readdirs for 2.6.25.
214  */
215 #if 0
216 STATIC int
217 xfs_file_readdir(
218         struct file     *filp,
219         void            *dirent,
220         filldir_t       filldir)
221 {
222         struct inode    *inode = filp->f_path.dentry->d_inode;
223         xfs_inode_t     *ip = XFS_I(inode);
224         int             error;
225         size_t          bufsize;
226
227         /*
228          * The Linux API doesn't pass down the total size of the buffer
229          * we read into down to the filesystem.  With the filldir concept
230          * it's not needed for correct information, but the XFS dir2 leaf
231          * code wants an estimate of the buffer size to calculate it's
232          * readahead window and size the buffers used for mapping to
233          * physical blocks.
234          *
235          * Try to give it an estimate that's good enough, maybe at some
236          * point we can change the ->readdir prototype to include the
237          * buffer size.
238          */
239         bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size);
240
241         error = xfs_readdir(ip, dirent, bufsize,
242                                 (xfs_off_t *)&filp->f_pos, filldir);
243         if (error)
244                 return -error;
245         return 0;
246 }
247 #else
248
249 struct hack_dirent {
250         u64             ino;
251         loff_t          offset;
252         int             namlen;
253         unsigned int    d_type;
254         char            name[];
255 };
256
257 struct hack_callback {
258         char            *dirent;
259         size_t          len;
260         size_t          used;
261 };
262
263 STATIC int
264 xfs_hack_filldir(
265         void            *__buf,
266         const char      *name,
267         int             namlen,
268         loff_t          offset,
269         u64             ino,
270         unsigned int    d_type)
271 {
272         struct hack_callback *buf = __buf;
273         struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used);
274         unsigned int reclen;
275
276         reclen = ALIGN(sizeof(struct hack_dirent) + namlen, sizeof(u64));
277         if (buf->used + reclen > buf->len)
278                 return -EINVAL;
279
280         de->namlen = namlen;
281         de->offset = offset;
282         de->ino = ino;
283         de->d_type = d_type;
284         memcpy(de->name, name, namlen);
285         buf->used += reclen;
286         return 0;
287 }
288
289 STATIC int
290 xfs_file_readdir(
291         struct file     *filp,
292         void            *dirent,
293         filldir_t       filldir)
294 {
295         struct inode    *inode = filp->f_path.dentry->d_inode;
296         xfs_inode_t     *ip = XFS_I(inode);
297         struct hack_callback buf;
298         struct hack_dirent *de;
299         int             error;
300         loff_t          size;
301         int             eof = 0;
302         xfs_off_t       start_offset, curr_offset, offset;
303
304         /*
305          * Try fairly hard to get memory
306          */
307         buf.len = PAGE_CACHE_SIZE;
308         do {
309                 buf.dirent = kmalloc(buf.len, GFP_KERNEL);
310                 if (buf.dirent)
311                         break;
312                 buf.len >>= 1;
313         } while (buf.len >= 1024);
314
315         if (!buf.dirent)
316                 return -ENOMEM;
317
318         curr_offset = filp->f_pos;
319         if (curr_offset == 0x7fffffff)
320                 offset = 0xffffffff;
321         else
322                 offset = filp->f_pos;
323
324         while (!eof) {
325                 unsigned int reclen;
326
327                 start_offset = offset;
328
329                 buf.used = 0;
330                 error = -xfs_readdir(ip, &buf, buf.len, &offset,
331                                      xfs_hack_filldir);
332                 if (error || offset == start_offset) {
333                         size = 0;
334                         break;
335                 }
336
337                 size = buf.used;
338                 de = (struct hack_dirent *)buf.dirent;
339                 while (size > 0) {
340                         curr_offset = de->offset /* & 0x7fffffff */;
341                         if (filldir(dirent, de->name, de->namlen,
342                                         curr_offset & 0x7fffffff,
343                                         de->ino, de->d_type)) {
344                                 goto done;
345                         }
346
347                         reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen,
348                                        sizeof(u64));
349                         size -= reclen;
350                         de = (struct hack_dirent *)((char *)de + reclen);
351                 }
352         }
353
354  done:
355         if (!error) {
356                 if (size == 0)
357                         filp->f_pos = offset & 0x7fffffff;
358                 else if (de)
359                         filp->f_pos = curr_offset;
360         }
361
362         kfree(buf.dirent);
363         return error;
364 }
365 #endif
366
367 STATIC int
368 xfs_file_mmap(
369         struct file     *filp,
370         struct vm_area_struct *vma)
371 {
372         vma->vm_ops = &xfs_file_vm_ops;
373         vma->vm_flags |= VM_CAN_NONLINEAR;
374
375         file_accessed(filp);
376         return 0;
377 }
378
379 STATIC long
380 xfs_file_ioctl(
381         struct file     *filp,
382         unsigned int    cmd,
383         unsigned long   p)
384 {
385         int             error;
386         struct inode    *inode = filp->f_path.dentry->d_inode;
387
388         error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
389         xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
390
391         /* NOTE:  some of the ioctl's return positive #'s as a
392          *        byte count indicating success, such as
393          *        readlink_by_handle.  So we don't "sign flip"
394          *        like most other routines.  This means true
395          *        errors need to be returned as a negative value.
396          */
397         return error;
398 }
399
400 STATIC long
401 xfs_file_ioctl_invis(
402         struct file     *filp,
403         unsigned int    cmd,
404         unsigned long   p)
405 {
406         int             error;
407         struct inode    *inode = filp->f_path.dentry->d_inode;
408
409         error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
410         xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
411
412         /* NOTE:  some of the ioctl's return positive #'s as a
413          *        byte count indicating success, such as
414          *        readlink_by_handle.  So we don't "sign flip"
415          *        like most other routines.  This means true
416          *        errors need to be returned as a negative value.
417          */
418         return error;
419 }
420
421 /*
422  * mmap()d file has taken write protection fault and is being made
423  * writable. We can set the page state up correctly for a writable
424  * page, which means we can do correct delalloc accounting (ENOSPC
425  * checking!) and unwritten extent mapping.
426  */
427 STATIC int
428 xfs_vm_page_mkwrite(
429         struct vm_area_struct   *vma,
430         struct page             *page)
431 {
432         return block_page_mkwrite(vma, page, xfs_get_blocks);
433 }
434
435 const struct file_operations xfs_file_operations = {
436         .llseek         = generic_file_llseek,
437         .read           = do_sync_read,
438         .write          = do_sync_write,
439         .aio_read       = xfs_file_aio_read,
440         .aio_write      = xfs_file_aio_write,
441         .splice_read    = xfs_file_splice_read,
442         .splice_write   = xfs_file_splice_write,
443         .unlocked_ioctl = xfs_file_ioctl,
444 #ifdef CONFIG_COMPAT
445         .compat_ioctl   = xfs_file_compat_ioctl,
446 #endif
447         .mmap           = xfs_file_mmap,
448         .open           = xfs_file_open,
449         .release        = xfs_file_release,
450         .fsync          = xfs_file_fsync,
451 #ifdef HAVE_FOP_OPEN_EXEC
452         .open_exec      = xfs_file_open_exec,
453 #endif
454 };
455
456 const struct file_operations xfs_invis_file_operations = {
457         .llseek         = generic_file_llseek,
458         .read           = do_sync_read,
459         .write          = do_sync_write,
460         .aio_read       = xfs_file_aio_read_invis,
461         .aio_write      = xfs_file_aio_write_invis,
462         .splice_read    = xfs_file_splice_read_invis,
463         .splice_write   = xfs_file_splice_write_invis,
464         .unlocked_ioctl = xfs_file_ioctl_invis,
465 #ifdef CONFIG_COMPAT
466         .compat_ioctl   = xfs_file_compat_invis_ioctl,
467 #endif
468         .mmap           = xfs_file_mmap,
469         .open           = xfs_file_open,
470         .release        = xfs_file_release,
471         .fsync          = xfs_file_fsync,
472 };
473
474
475 const struct file_operations xfs_dir_file_operations = {
476         .read           = generic_read_dir,
477         .readdir        = xfs_file_readdir,
478         .unlocked_ioctl = xfs_file_ioctl,
479 #ifdef CONFIG_COMPAT
480         .compat_ioctl   = xfs_file_compat_ioctl,
481 #endif
482         .fsync          = xfs_file_fsync,
483 };
484
485 static struct vm_operations_struct xfs_file_vm_ops = {
486         .fault          = filemap_fault,
487         .page_mkwrite   = xfs_vm_page_mkwrite,
488 };