block: fix disk->part[] dereferencing race
[linux-2.6] / block / ioctl.c
1 #include <linux/capability.h>
2 #include <linux/blkdev.h>
3 #include <linux/blkpg.h>
4 #include <linux/hdreg.h>
5 #include <linux/backing-dev.h>
6 #include <linux/buffer_head.h>
7 #include <linux/smp_lock.h>
8 #include <linux/blktrace_api.h>
9 #include <asm/uaccess.h>
10
11 static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
12 {
13         struct block_device *bdevp;
14         struct gendisk *disk;
15         struct hd_struct *part;
16         struct blkpg_ioctl_arg a;
17         struct blkpg_partition p;
18         struct disk_part_iter piter;
19         long long start, length;
20         int partno;
21         int err;
22
23         if (!capable(CAP_SYS_ADMIN))
24                 return -EACCES;
25         if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
26                 return -EFAULT;
27         if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
28                 return -EFAULT;
29         disk = bdev->bd_disk;
30         if (bdev != bdev->bd_contains)
31                 return -EINVAL;
32         partno = p.pno;
33         if (partno <= 0 || partno > disk_max_parts(disk))
34                 return -EINVAL;
35         switch (a.op) {
36                 case BLKPG_ADD_PARTITION:
37                         start = p.start >> 9;
38                         length = p.length >> 9;
39                         /* check for fit in a hd_struct */ 
40                         if (sizeof(sector_t) == sizeof(long) && 
41                             sizeof(long long) > sizeof(long)) {
42                                 long pstart = start, plength = length;
43                                 if (pstart != start || plength != length
44                                     || pstart < 0 || plength < 0)
45                                         return -EINVAL;
46                         }
47
48                         mutex_lock(&bdev->bd_mutex);
49
50                         /* overlap? */
51                         disk_part_iter_init(&piter, disk,
52                                             DISK_PITER_INCL_EMPTY);
53                         while ((part = disk_part_iter_next(&piter))) {
54                                 if (!(start + length <= part->start_sect ||
55                                       start >= part->start_sect + part->nr_sects)) {
56                                         disk_part_iter_exit(&piter);
57                                         mutex_unlock(&bdev->bd_mutex);
58                                         return -EBUSY;
59                                 }
60                         }
61                         disk_part_iter_exit(&piter);
62
63                         /* all seems OK */
64                         err = add_partition(disk, partno, start, length,
65                                             ADDPART_FLAG_NONE);
66                         mutex_unlock(&bdev->bd_mutex);
67                         return err;
68                 case BLKPG_DEL_PARTITION:
69                         part = disk_get_part(disk, partno);
70                         if (!part)
71                                 return -ENXIO;
72
73                         bdevp = bdget(part_devt(part));
74                         disk_put_part(part);
75                         if (!bdevp)
76                                 return -ENOMEM;
77
78                         mutex_lock(&bdevp->bd_mutex);
79                         if (bdevp->bd_openers) {
80                                 mutex_unlock(&bdevp->bd_mutex);
81                                 bdput(bdevp);
82                                 return -EBUSY;
83                         }
84                         /* all seems OK */
85                         fsync_bdev(bdevp);
86                         invalidate_bdev(bdevp);
87
88                         mutex_lock_nested(&bdev->bd_mutex, 1);
89                         delete_partition(disk, partno);
90                         mutex_unlock(&bdev->bd_mutex);
91                         mutex_unlock(&bdevp->bd_mutex);
92                         bdput(bdevp);
93
94                         return 0;
95                 default:
96                         return -EINVAL;
97         }
98 }
99
100 static int blkdev_reread_part(struct block_device *bdev)
101 {
102         struct gendisk *disk = bdev->bd_disk;
103         int res;
104
105         if (!disk_max_parts(disk) || bdev != bdev->bd_contains)
106                 return -EINVAL;
107         if (!capable(CAP_SYS_ADMIN))
108                 return -EACCES;
109         if (!mutex_trylock(&bdev->bd_mutex))
110                 return -EBUSY;
111         res = rescan_partitions(disk, bdev);
112         mutex_unlock(&bdev->bd_mutex);
113         return res;
114 }
115
116 static void blk_ioc_discard_endio(struct bio *bio, int err)
117 {
118         if (err) {
119                 if (err == -EOPNOTSUPP)
120                         set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
121                 clear_bit(BIO_UPTODATE, &bio->bi_flags);
122         }
123         complete(bio->bi_private);
124 }
125
126 static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
127                              uint64_t len)
128 {
129         struct request_queue *q = bdev_get_queue(bdev);
130         int ret = 0;
131
132         if (start & 511)
133                 return -EINVAL;
134         if (len & 511)
135                 return -EINVAL;
136         start >>= 9;
137         len >>= 9;
138
139         if (start + len > (bdev->bd_inode->i_size >> 9))
140                 return -EINVAL;
141
142         if (!q->prepare_discard_fn)
143                 return -EOPNOTSUPP;
144
145         while (len && !ret) {
146                 DECLARE_COMPLETION_ONSTACK(wait);
147                 struct bio *bio;
148
149                 bio = bio_alloc(GFP_KERNEL, 0);
150                 if (!bio)
151                         return -ENOMEM;
152
153                 bio->bi_end_io = blk_ioc_discard_endio;
154                 bio->bi_bdev = bdev;
155                 bio->bi_private = &wait;
156                 bio->bi_sector = start;
157
158                 if (len > q->max_hw_sectors) {
159                         bio->bi_size = q->max_hw_sectors << 9;
160                         len -= q->max_hw_sectors;
161                         start += q->max_hw_sectors;
162                 } else {
163                         bio->bi_size = len << 9;
164                         len = 0;
165                 }
166                 submit_bio(DISCARD_NOBARRIER, bio);
167
168                 wait_for_completion(&wait);
169
170                 if (bio_flagged(bio, BIO_EOPNOTSUPP))
171                         ret = -EOPNOTSUPP;
172                 else if (!bio_flagged(bio, BIO_UPTODATE))
173                         ret = -EIO;
174                 bio_put(bio);
175         }
176         return ret;
177 }
178
179 static int put_ushort(unsigned long arg, unsigned short val)
180 {
181         return put_user(val, (unsigned short __user *)arg);
182 }
183
184 static int put_int(unsigned long arg, int val)
185 {
186         return put_user(val, (int __user *)arg);
187 }
188
189 static int put_long(unsigned long arg, long val)
190 {
191         return put_user(val, (long __user *)arg);
192 }
193
194 static int put_ulong(unsigned long arg, unsigned long val)
195 {
196         return put_user(val, (unsigned long __user *)arg);
197 }
198
199 static int put_u64(unsigned long arg, u64 val)
200 {
201         return put_user(val, (u64 __user *)arg);
202 }
203
204 static int blkdev_locked_ioctl(struct file *file, struct block_device *bdev,
205                                 unsigned cmd, unsigned long arg)
206 {
207         struct backing_dev_info *bdi;
208         int ret, n;
209
210         switch (cmd) {
211         case BLKRAGET:
212         case BLKFRAGET:
213                 if (!arg)
214                         return -EINVAL;
215                 bdi = blk_get_backing_dev_info(bdev);
216                 if (bdi == NULL)
217                         return -ENOTTY;
218                 return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
219         case BLKROGET:
220                 return put_int(arg, bdev_read_only(bdev) != 0);
221         case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
222                 return put_int(arg, block_size(bdev));
223         case BLKSSZGET: /* get block device hardware sector size */
224                 return put_int(arg, bdev_hardsect_size(bdev));
225         case BLKSECTGET:
226                 return put_ushort(arg, bdev_get_queue(bdev)->max_sectors);
227         case BLKRASET:
228         case BLKFRASET:
229                 if(!capable(CAP_SYS_ADMIN))
230                         return -EACCES;
231                 bdi = blk_get_backing_dev_info(bdev);
232                 if (bdi == NULL)
233                         return -ENOTTY;
234                 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
235                 return 0;
236         case BLKBSZSET:
237                 /* set the logical block size */
238                 if (!capable(CAP_SYS_ADMIN))
239                         return -EACCES;
240                 if (!arg)
241                         return -EINVAL;
242                 if (get_user(n, (int __user *) arg))
243                         return -EFAULT;
244                 if (bd_claim(bdev, file) < 0)
245                         return -EBUSY;
246                 ret = set_blocksize(bdev, n);
247                 bd_release(bdev);
248                 return ret;
249         case BLKPG:
250                 return blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
251         case BLKRRPART:
252                 return blkdev_reread_part(bdev);
253         case BLKGETSIZE:
254                 if ((bdev->bd_inode->i_size >> 9) > ~0UL)
255                         return -EFBIG;
256                 return put_ulong(arg, bdev->bd_inode->i_size >> 9);
257         case BLKGETSIZE64:
258                 return put_u64(arg, bdev->bd_inode->i_size);
259         case BLKTRACESTART:
260         case BLKTRACESTOP:
261         case BLKTRACESETUP:
262         case BLKTRACETEARDOWN:
263                 return blk_trace_ioctl(bdev, cmd, (char __user *) arg);
264         }
265         return -ENOIOCTLCMD;
266 }
267
268 int blkdev_driver_ioctl(struct inode *inode, struct file *file,
269                         struct gendisk *disk, unsigned cmd, unsigned long arg)
270 {
271         int ret;
272         if (disk->fops->unlocked_ioctl)
273                 return disk->fops->unlocked_ioctl(file, cmd, arg);
274
275         if (disk->fops->ioctl) {
276                 lock_kernel();
277                 ret = disk->fops->ioctl(inode, file, cmd, arg);
278                 unlock_kernel();
279                 return ret;
280         }
281
282         return -ENOTTY;
283 }
284 EXPORT_SYMBOL_GPL(blkdev_driver_ioctl);
285
286 /*
287  * always keep this in sync with compat_blkdev_ioctl() and
288  * compat_blkdev_locked_ioctl()
289  */
290 int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
291                         unsigned long arg)
292 {
293         struct block_device *bdev = inode->i_bdev;
294         struct gendisk *disk = bdev->bd_disk;
295         int ret, n;
296
297         switch(cmd) {
298         case BLKFLSBUF:
299                 if (!capable(CAP_SYS_ADMIN))
300                         return -EACCES;
301
302                 ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg);
303                 /* -EINVAL to handle old uncorrected drivers */
304                 if (ret != -EINVAL && ret != -ENOTTY)
305                         return ret;
306
307                 lock_kernel();
308                 fsync_bdev(bdev);
309                 invalidate_bdev(bdev);
310                 unlock_kernel();
311                 return 0;
312
313         case BLKROSET:
314                 ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg);
315                 /* -EINVAL to handle old uncorrected drivers */
316                 if (ret != -EINVAL && ret != -ENOTTY)
317                         return ret;
318                 if (!capable(CAP_SYS_ADMIN))
319                         return -EACCES;
320                 if (get_user(n, (int __user *)(arg)))
321                         return -EFAULT;
322                 lock_kernel();
323                 set_device_ro(bdev, n);
324                 unlock_kernel();
325                 return 0;
326
327         case BLKDISCARD: {
328                 uint64_t range[2];
329
330                 if (!(file->f_mode & FMODE_WRITE))
331                         return -EBADF;
332
333                 if (copy_from_user(range, (void __user *)arg, sizeof(range)))
334                         return -EFAULT;
335
336                 return blk_ioctl_discard(bdev, range[0], range[1]);
337         }
338
339         case HDIO_GETGEO: {
340                 struct hd_geometry geo;
341
342                 if (!arg)
343                         return -EINVAL;
344                 if (!disk->fops->getgeo)
345                         return -ENOTTY;
346
347                 /*
348                  * We need to set the startsect first, the driver may
349                  * want to override it.
350                  */
351                 geo.start = get_start_sect(bdev);
352                 ret = disk->fops->getgeo(bdev, &geo);
353                 if (ret)
354                         return ret;
355                 if (copy_to_user((struct hd_geometry __user *)arg, &geo,
356                                         sizeof(geo)))
357                         return -EFAULT;
358                 return 0;
359         }
360         }
361
362         lock_kernel();
363         ret = blkdev_locked_ioctl(file, bdev, cmd, arg);
364         unlock_kernel();
365         if (ret != -ENOIOCTLCMD)
366                 return ret;
367
368         return blkdev_driver_ioctl(inode, file, disk, cmd, arg);
369 }
370 EXPORT_SYMBOL_GPL(blkdev_ioctl);