Allow elevators to sort/merge discard requests
[linux-2.6] / block / ioctl.c
1 #include <linux/capability.h>
2 #include <linux/blkdev.h>
3 #include <linux/blkpg.h>
4 #include <linux/hdreg.h>
5 #include <linux/backing-dev.h>
6 #include <linux/buffer_head.h>
7 #include <linux/smp_lock.h>
8 #include <linux/blktrace_api.h>
9 #include <asm/uaccess.h>
10
11 static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
12 {
13         struct block_device *bdevp;
14         struct gendisk *disk;
15         struct blkpg_ioctl_arg a;
16         struct blkpg_partition p;
17         long long start, length;
18         int part;
19         int i;
20         int err;
21
22         if (!capable(CAP_SYS_ADMIN))
23                 return -EACCES;
24         if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
25                 return -EFAULT;
26         if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
27                 return -EFAULT;
28         disk = bdev->bd_disk;
29         if (bdev != bdev->bd_contains)
30                 return -EINVAL;
31         part = p.pno;
32         if (part <= 0 || part >= disk->minors)
33                 return -EINVAL;
34         switch (a.op) {
35                 case BLKPG_ADD_PARTITION:
36                         start = p.start >> 9;
37                         length = p.length >> 9;
38                         /* check for fit in a hd_struct */ 
39                         if (sizeof(sector_t) == sizeof(long) && 
40                             sizeof(long long) > sizeof(long)) {
41                                 long pstart = start, plength = length;
42                                 if (pstart != start || plength != length
43                                     || pstart < 0 || plength < 0)
44                                         return -EINVAL;
45                         }
46                         /* partition number in use? */
47                         mutex_lock(&bdev->bd_mutex);
48                         if (disk->part[part - 1]) {
49                                 mutex_unlock(&bdev->bd_mutex);
50                                 return -EBUSY;
51                         }
52                         /* overlap? */
53                         for (i = 0; i < disk->minors - 1; i++) {
54                                 struct hd_struct *s = disk->part[i];
55
56                                 if (!s)
57                                         continue;
58                                 if (!(start+length <= s->start_sect ||
59                                       start >= s->start_sect + s->nr_sects)) {
60                                         mutex_unlock(&bdev->bd_mutex);
61                                         return -EBUSY;
62                                 }
63                         }
64                         /* all seems OK */
65                         err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
66                         mutex_unlock(&bdev->bd_mutex);
67                         return err;
68                 case BLKPG_DEL_PARTITION:
69                         if (!disk->part[part-1])
70                                 return -ENXIO;
71                         if (disk->part[part - 1]->nr_sects == 0)
72                                 return -ENXIO;
73                         bdevp = bdget_disk(disk, part);
74                         if (!bdevp)
75                                 return -ENOMEM;
76                         mutex_lock(&bdevp->bd_mutex);
77                         if (bdevp->bd_openers) {
78                                 mutex_unlock(&bdevp->bd_mutex);
79                                 bdput(bdevp);
80                                 return -EBUSY;
81                         }
82                         /* all seems OK */
83                         fsync_bdev(bdevp);
84                         invalidate_bdev(bdevp);
85
86                         mutex_lock_nested(&bdev->bd_mutex, 1);
87                         delete_partition(disk, part);
88                         mutex_unlock(&bdev->bd_mutex);
89                         mutex_unlock(&bdevp->bd_mutex);
90                         bdput(bdevp);
91
92                         return 0;
93                 default:
94                         return -EINVAL;
95         }
96 }
97
98 static int blkdev_reread_part(struct block_device *bdev)
99 {
100         struct gendisk *disk = bdev->bd_disk;
101         int res;
102
103         if (disk->minors == 1 || bdev != bdev->bd_contains)
104                 return -EINVAL;
105         if (!capable(CAP_SYS_ADMIN))
106                 return -EACCES;
107         if (!mutex_trylock(&bdev->bd_mutex))
108                 return -EBUSY;
109         res = rescan_partitions(disk, bdev);
110         mutex_unlock(&bdev->bd_mutex);
111         return res;
112 }
113
114 static void blk_ioc_discard_endio(struct bio *bio, int err)
115 {
116         if (err) {
117                 if (err == -EOPNOTSUPP)
118                         set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
119                 clear_bit(BIO_UPTODATE, &bio->bi_flags);
120         }
121         complete(bio->bi_private);
122 }
123
124 static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
125                              uint64_t len)
126 {
127         struct request_queue *q = bdev_get_queue(bdev);
128         int ret = 0;
129
130         if (start & 511)
131                 return -EINVAL;
132         if (len & 511)
133                 return -EINVAL;
134         start >>= 9;
135         len >>= 9;
136
137         if (start + len > (bdev->bd_inode->i_size >> 9))
138                 return -EINVAL;
139
140         if (!q->prepare_discard_fn)
141                 return -EOPNOTSUPP;
142
143         while (len && !ret) {
144                 DECLARE_COMPLETION_ONSTACK(wait);
145                 struct bio *bio;
146
147                 bio = bio_alloc(GFP_KERNEL, 0);
148                 if (!bio)
149                         return -ENOMEM;
150
151                 bio->bi_end_io = blk_ioc_discard_endio;
152                 bio->bi_bdev = bdev;
153                 bio->bi_private = &wait;
154                 bio->bi_sector = start;
155
156                 if (len > q->max_hw_sectors) {
157                         bio->bi_size = q->max_hw_sectors << 9;
158                         len -= q->max_hw_sectors;
159                         start += q->max_hw_sectors;
160                 } else {
161                         bio->bi_size = len << 9;
162                         len = 0;
163                 }
164                 submit_bio(DISCARD_NOBARRIER, bio);
165
166                 wait_for_completion(&wait);
167
168                 if (bio_flagged(bio, BIO_EOPNOTSUPP))
169                         ret = -EOPNOTSUPP;
170                 else if (!bio_flagged(bio, BIO_UPTODATE))
171                         ret = -EIO;
172                 bio_put(bio);
173         }
174         return ret;
175 }
176
177 static int put_ushort(unsigned long arg, unsigned short val)
178 {
179         return put_user(val, (unsigned short __user *)arg);
180 }
181
182 static int put_int(unsigned long arg, int val)
183 {
184         return put_user(val, (int __user *)arg);
185 }
186
187 static int put_long(unsigned long arg, long val)
188 {
189         return put_user(val, (long __user *)arg);
190 }
191
192 static int put_ulong(unsigned long arg, unsigned long val)
193 {
194         return put_user(val, (unsigned long __user *)arg);
195 }
196
197 static int put_u64(unsigned long arg, u64 val)
198 {
199         return put_user(val, (u64 __user *)arg);
200 }
201
202 static int blkdev_locked_ioctl(struct file *file, struct block_device *bdev,
203                                 unsigned cmd, unsigned long arg)
204 {
205         struct backing_dev_info *bdi;
206         int ret, n;
207
208         switch (cmd) {
209         case BLKRAGET:
210         case BLKFRAGET:
211                 if (!arg)
212                         return -EINVAL;
213                 bdi = blk_get_backing_dev_info(bdev);
214                 if (bdi == NULL)
215                         return -ENOTTY;
216                 return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
217         case BLKROGET:
218                 return put_int(arg, bdev_read_only(bdev) != 0);
219         case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
220                 return put_int(arg, block_size(bdev));
221         case BLKSSZGET: /* get block device hardware sector size */
222                 return put_int(arg, bdev_hardsect_size(bdev));
223         case BLKSECTGET:
224                 return put_ushort(arg, bdev_get_queue(bdev)->max_sectors);
225         case BLKRASET:
226         case BLKFRASET:
227                 if(!capable(CAP_SYS_ADMIN))
228                         return -EACCES;
229                 bdi = blk_get_backing_dev_info(bdev);
230                 if (bdi == NULL)
231                         return -ENOTTY;
232                 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
233                 return 0;
234         case BLKBSZSET:
235                 /* set the logical block size */
236                 if (!capable(CAP_SYS_ADMIN))
237                         return -EACCES;
238                 if (!arg)
239                         return -EINVAL;
240                 if (get_user(n, (int __user *) arg))
241                         return -EFAULT;
242                 if (bd_claim(bdev, file) < 0)
243                         return -EBUSY;
244                 ret = set_blocksize(bdev, n);
245                 bd_release(bdev);
246                 return ret;
247         case BLKPG:
248                 return blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
249         case BLKRRPART:
250                 return blkdev_reread_part(bdev);
251         case BLKGETSIZE:
252                 if ((bdev->bd_inode->i_size >> 9) > ~0UL)
253                         return -EFBIG;
254                 return put_ulong(arg, bdev->bd_inode->i_size >> 9);
255         case BLKGETSIZE64:
256                 return put_u64(arg, bdev->bd_inode->i_size);
257         case BLKTRACESTART:
258         case BLKTRACESTOP:
259         case BLKTRACESETUP:
260         case BLKTRACETEARDOWN:
261                 return blk_trace_ioctl(bdev, cmd, (char __user *) arg);
262         }
263         return -ENOIOCTLCMD;
264 }
265
266 int blkdev_driver_ioctl(struct inode *inode, struct file *file,
267                         struct gendisk *disk, unsigned cmd, unsigned long arg)
268 {
269         int ret;
270         if (disk->fops->unlocked_ioctl)
271                 return disk->fops->unlocked_ioctl(file, cmd, arg);
272
273         if (disk->fops->ioctl) {
274                 lock_kernel();
275                 ret = disk->fops->ioctl(inode, file, cmd, arg);
276                 unlock_kernel();
277                 return ret;
278         }
279
280         return -ENOTTY;
281 }
282 EXPORT_SYMBOL_GPL(blkdev_driver_ioctl);
283
284 /*
285  * always keep this in sync with compat_blkdev_ioctl() and
286  * compat_blkdev_locked_ioctl()
287  */
288 int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
289                         unsigned long arg)
290 {
291         struct block_device *bdev = inode->i_bdev;
292         struct gendisk *disk = bdev->bd_disk;
293         int ret, n;
294
295         switch(cmd) {
296         case BLKFLSBUF:
297                 if (!capable(CAP_SYS_ADMIN))
298                         return -EACCES;
299
300                 ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg);
301                 /* -EINVAL to handle old uncorrected drivers */
302                 if (ret != -EINVAL && ret != -ENOTTY)
303                         return ret;
304
305                 lock_kernel();
306                 fsync_bdev(bdev);
307                 invalidate_bdev(bdev);
308                 unlock_kernel();
309                 return 0;
310
311         case BLKROSET:
312                 ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg);
313                 /* -EINVAL to handle old uncorrected drivers */
314                 if (ret != -EINVAL && ret != -ENOTTY)
315                         return ret;
316                 if (!capable(CAP_SYS_ADMIN))
317                         return -EACCES;
318                 if (get_user(n, (int __user *)(arg)))
319                         return -EFAULT;
320                 lock_kernel();
321                 set_device_ro(bdev, n);
322                 unlock_kernel();
323                 return 0;
324
325         case BLKDISCARD: {
326                 uint64_t range[2];
327
328                 if (!(file->f_mode & FMODE_WRITE))
329                         return -EBADF;
330
331                 if (copy_from_user(range, (void __user *)arg, sizeof(range)))
332                         return -EFAULT;
333
334                 return blk_ioctl_discard(bdev, range[0], range[1]);
335         }
336
337         case HDIO_GETGEO: {
338                 struct hd_geometry geo;
339
340                 if (!arg)
341                         return -EINVAL;
342                 if (!disk->fops->getgeo)
343                         return -ENOTTY;
344
345                 /*
346                  * We need to set the startsect first, the driver may
347                  * want to override it.
348                  */
349                 geo.start = get_start_sect(bdev);
350                 ret = disk->fops->getgeo(bdev, &geo);
351                 if (ret)
352                         return ret;
353                 if (copy_to_user((struct hd_geometry __user *)arg, &geo,
354                                         sizeof(geo)))
355                         return -EFAULT;
356                 return 0;
357         }
358         }
359
360         lock_kernel();
361         ret = blkdev_locked_ioctl(file, bdev, cmd, arg);
362         unlock_kernel();
363         if (ret != -ENOIOCTLCMD)
364                 return ret;
365
366         return blkdev_driver_ioctl(inode, file, disk, cmd, arg);
367 }
368 EXPORT_SYMBOL_GPL(blkdev_ioctl);