3 * Authors: Dave Boutcher <boutcher@us.ibm.com>
4 * Ryan Arnold <ryanarn@us.ibm.com>
5 * Colin Devilbiss <devilbis@us.ibm.com>
6 * Stephen Rothwell <sfr@au1.ibm.com>
8 * (C) Copyright 2000-2004 IBM Corporation
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 * This routine provides access to disk space (termed "DASD" in historical
25 * IBM terms) owned and managed by an OS/400 partition running on the
26 * same box as this Linux partition.
28 * All disk operations are performed by sending messages back and forth to
29 * the OS/400 partition.
31 #include <linux/major.h>
33 #include <linux/module.h>
34 #include <linux/kernel.h>
35 #include <linux/blkdev.h>
36 #include <linux/genhd.h>
37 #include <linux/hdreg.h>
38 #include <linux/errno.h>
39 #include <linux/init.h>
40 #include <linux/string.h>
41 #include <linux/dma-mapping.h>
42 #include <linux/completion.h>
43 #include <linux/device.h>
44 #include <linux/kernel.h>
46 #include <asm/uaccess.h>
48 #include <asm/iseries/hv_types.h>
49 #include <asm/iseries/hv_lp_event.h>
50 #include <asm/iseries/hv_lp_config.h>
51 #include <asm/iseries/vio.h>
53 MODULE_DESCRIPTION("iSeries Virtual DASD");
54 MODULE_AUTHOR("Dave Boutcher");
55 MODULE_LICENSE("GPL");
58 * We only support 7 partitions per physical disk....so with minor
59 * numbers 0-255 we get a maximum of 32 disks.
61 #define VIOD_GENHD_NAME "iseries/vd"
62 #define VIOD_GENHD_DEVFS_NAME "iseries/disc"
64 #define VIOD_VERS "1.64"
66 #define VIOD_KERN_WARNING KERN_WARNING "viod: "
67 #define VIOD_KERN_INFO KERN_INFO "viod: "
71 MAX_DISKNO = HVMAXARCHITECTEDVIRTUALDISKS,
72 MAX_DISK_NAME = sizeof(((struct gendisk *)0)->disk_name)
75 static DEFINE_SPINLOCK(viodasd_spinlock);
78 #define VIOMAXBLOCKDMA 12
80 #define DEVICE_NO(cell) ((struct viodasd_device *)(cell) - &viodasd_devices[0])
97 } dma_info[VIOMAXBLOCKDMA];
100 struct vioblocklpevent {
101 struct HvLpEvent event;
108 struct open_data open_data;
109 struct rw_data rw_data;
114 #define vioblockflags_ro 0x0001
116 enum vioblocksubtype {
117 vioblockopen = 0x0001,
118 vioblockclose = 0x0002,
119 vioblockread = 0x0003,
120 vioblockwrite = 0x0004,
121 vioblockflush = 0x0005,
122 vioblockcheck = 0x0007
125 struct viodasd_waitevent {
126 struct completion com;
129 int max_disk; /* open */
132 static const struct vio_error_entry viodasd_err_table[] = {
133 { 0x0201, EINVAL, "Invalid Range" },
134 { 0x0202, EINVAL, "Invalid Token" },
135 { 0x0203, EIO, "DMA Error" },
136 { 0x0204, EIO, "Use Error" },
137 { 0x0205, EIO, "Release Error" },
138 { 0x0206, EINVAL, "Invalid Disk" },
139 { 0x0207, EBUSY, "Cant Lock" },
140 { 0x0208, EIO, "Already Locked" },
141 { 0x0209, EIO, "Already Unlocked" },
142 { 0x020A, EIO, "Invalid Arg" },
143 { 0x020B, EIO, "Bad IFS File" },
144 { 0x020C, EROFS, "Read Only Device" },
145 { 0x02FF, EIO, "Internal Error" },
150 * Figure out the biggest I/O request (in sectors) we can accept
152 #define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA)
155 * Number of disk I/O requests we've sent to OS/400
157 static int num_req_outstanding;
160 * This is our internal structure for keeping track of disk devices
162 struct viodasd_device {
166 u16 bytes_per_sector;
170 struct gendisk *disk;
172 } viodasd_devices[MAX_DISKNO];
175 * External open entry point.
177 static int viodasd_open(struct inode *ino, struct file *fil)
179 struct viodasd_device *d = ino->i_bdev->bd_disk->private_data;
181 struct viodasd_waitevent we;
185 if ((fil != NULL) && (fil->f_mode & FMODE_WRITE))
187 flags = vioblockflags_ro;
190 init_completion(&we.com);
192 /* Send the open event to OS/400 */
193 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
194 HvLpEvent_Type_VirtualIo,
195 viomajorsubtype_blockio | vioblockopen,
196 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
197 viopath_sourceinst(viopath_hostLp),
198 viopath_targetinst(viopath_hostLp),
199 (u64)(unsigned long)&we, VIOVERSION << 16,
200 ((u64)DEVICE_NO(d) << 48) | ((u64)flags << 32),
203 printk(VIOD_KERN_WARNING "HV open failed %d\n", (int)hvrc);
207 wait_for_completion(&we.com);
209 /* Check the return code */
211 const struct vio_error_entry *err =
212 vio_lookup_rc(viodasd_err_table, we.sub_result);
214 printk(VIOD_KERN_WARNING
215 "bad rc opening disk: %d:0x%04x (%s)\n",
216 (int)we.rc, we.sub_result, err->msg);
224 * External release entry point.
226 static int viodasd_release(struct inode *ino, struct file *fil)
228 struct viodasd_device *d = ino->i_bdev->bd_disk->private_data;
231 /* Send the event to OS/400. We DON'T expect a response */
232 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
233 HvLpEvent_Type_VirtualIo,
234 viomajorsubtype_blockio | vioblockclose,
235 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
236 viopath_sourceinst(viopath_hostLp),
237 viopath_targetinst(viopath_hostLp),
239 ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */,
242 printk(VIOD_KERN_WARNING "HV close call failed %d\n",
248 /* External ioctl entry point.
250 static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
252 struct gendisk *disk = bdev->bd_disk;
253 struct viodasd_device *d = disk->private_data;
255 geo->sectors = d->sectors ? d->sectors : 0;
256 geo->heads = d->tracks ? d->tracks : 64;
257 geo->cylinders = d->cylinders ? d->cylinders :
258 get_capacity(disk) / (geo->cylinders * geo->heads);
264 * Our file operations table
266 static struct block_device_operations viodasd_fops = {
267 .owner = THIS_MODULE,
268 .open = viodasd_open,
269 .release = viodasd_release,
270 .getgeo = viodasd_getgeo,
276 static void viodasd_end_request(struct request *req, int uptodate,
279 if (end_that_request_first(req, uptodate, num_sectors))
281 add_disk_randomness(req->rq_disk);
282 end_that_request_last(req, uptodate);
286 * Send an actual I/O request to OS/400
288 static int send_request(struct request *req)
295 struct vioblocklpevent *bevent;
296 struct HvLpEvent *hev;
297 struct scatterlist sg[VIOMAXBLOCKDMA];
300 struct viodasd_device *d;
303 start = (u64)req->sector << 9;
305 if (rq_data_dir(req) == READ) {
306 direction = DMA_FROM_DEVICE;
307 viocmd = viomajorsubtype_blockio | vioblockread;
310 direction = DMA_TO_DEVICE;
311 viocmd = viomajorsubtype_blockio | vioblockwrite;
315 d = req->rq_disk->private_data;
317 /* Now build the scatter-gather list */
318 nsg = blk_rq_map_sg(req->q, req, sg);
319 nsg = dma_map_sg(d->dev, sg, nsg, direction);
321 spin_lock_irqsave(&viodasd_spinlock, flags);
322 num_req_outstanding++;
324 /* This optimization handles a single DMA block */
326 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
327 HvLpEvent_Type_VirtualIo, viocmd,
328 HvLpEvent_AckInd_DoAck,
329 HvLpEvent_AckType_ImmediateAck,
330 viopath_sourceinst(viopath_hostLp),
331 viopath_targetinst(viopath_hostLp),
332 (u64)(unsigned long)req, VIOVERSION << 16,
333 ((u64)DEVICE_NO(d) << 48), start,
334 ((u64)sg_dma_address(&sg[0])) << 32,
337 bevent = (struct vioblocklpevent *)
338 vio_get_event_buffer(viomajorsubtype_blockio);
339 if (bevent == NULL) {
340 printk(VIOD_KERN_WARNING
341 "error allocating disk event buffer\n");
346 * Now build up the actual request. Note that we store
347 * the pointer to the request in the correlation
348 * token so we can match the response up later
350 memset(bevent, 0, sizeof(struct vioblocklpevent));
351 hev = &bevent->event;
352 hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DO_ACK |
354 hev->xType = HvLpEvent_Type_VirtualIo;
355 hev->xSubtype = viocmd;
356 hev->xSourceLp = HvLpConfig_getLpIndex();
357 hev->xTargetLp = viopath_hostLp;
359 offsetof(struct vioblocklpevent, u.rw_data.dma_info) +
360 (sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1;
361 hev->xSourceInstanceId = viopath_sourceinst(viopath_hostLp);
362 hev->xTargetInstanceId = viopath_targetinst(viopath_hostLp);
363 hev->xCorrelationToken = (u64)req;
364 bevent->version = VIOVERSION;
365 bevent->disk = DEVICE_NO(d);
366 bevent->u.rw_data.offset = start;
369 * Copy just the dma information from the sg list
372 for (sgindex = 0; sgindex < nsg; sgindex++) {
373 bevent->u.rw_data.dma_info[sgindex].token =
374 sg_dma_address(&sg[sgindex]);
375 bevent->u.rw_data.dma_info[sgindex].len =
376 sg_dma_len(&sg[sgindex]);
379 /* Send the request */
380 hvrc = HvCallEvent_signalLpEvent(&bevent->event);
381 vio_free_event_buffer(viomajorsubtype_blockio, bevent);
384 if (hvrc != HvLpEvent_Rc_Good) {
385 printk(VIOD_KERN_WARNING
386 "error sending disk event to OS/400 (rc %d)\n",
390 spin_unlock_irqrestore(&viodasd_spinlock, flags);
394 num_req_outstanding--;
395 spin_unlock_irqrestore(&viodasd_spinlock, flags);
396 dma_unmap_sg(d->dev, sg, nsg, direction);
401 * This is the external request processing routine
403 static void do_viodasd_request(request_queue_t *q)
408 * If we already have the maximum number of requests
409 * outstanding to OS/400 just bail out. We'll come
412 while (num_req_outstanding < VIOMAXREQ) {
413 req = elv_next_request(q);
416 /* dequeue the current request from the queue */
417 blkdev_dequeue_request(req);
418 /* check that request contains a valid command */
419 if (!blk_fs_request(req)) {
420 viodasd_end_request(req, 0, req->hard_nr_sectors);
423 /* Try sending the request */
424 if (send_request(req) != 0)
425 viodasd_end_request(req, 0, req->hard_nr_sectors);
430 * Probe a single disk and fill in the viodasd_device structure
433 static void probe_disk(struct viodasd_device *d)
436 struct viodasd_waitevent we;
437 int dev_no = DEVICE_NO(d);
439 struct request_queue *q;
443 init_completion(&we.com);
445 /* Send the open event to OS/400 */
446 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
447 HvLpEvent_Type_VirtualIo,
448 viomajorsubtype_blockio | vioblockopen,
449 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
450 viopath_sourceinst(viopath_hostLp),
451 viopath_targetinst(viopath_hostLp),
452 (u64)(unsigned long)&we, VIOVERSION << 16,
453 ((u64)dev_no << 48) | ((u64)flags<< 32),
456 printk(VIOD_KERN_WARNING "bad rc on HV open %d\n", (int)hvrc);
460 wait_for_completion(&we.com);
465 /* try again with read only flag set */
466 flags = vioblockflags_ro;
469 if (we.max_disk > (MAX_DISKNO - 1)) {
474 printk(VIOD_KERN_INFO
475 "Only examining the first %d "
476 "of %d disks connected\n",
477 MAX_DISKNO, we.max_disk + 1);
481 /* Send the close event to OS/400. We DON'T expect a response */
482 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
483 HvLpEvent_Type_VirtualIo,
484 viomajorsubtype_blockio | vioblockclose,
485 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
486 viopath_sourceinst(viopath_hostLp),
487 viopath_targetinst(viopath_hostLp),
489 ((u64)dev_no << 48) | ((u64)flags << 32),
492 printk(VIOD_KERN_WARNING
493 "bad rc sending event to OS/400 %d\n", (int)hvrc);
496 /* create the request queue for the disk */
497 spin_lock_init(&d->q_lock);
498 q = blk_init_queue(do_viodasd_request, &d->q_lock);
500 printk(VIOD_KERN_WARNING "cannot allocate queue for disk %d\n",
504 g = alloc_disk(1 << PARTITION_SHIFT);
506 printk(VIOD_KERN_WARNING
507 "cannot allocate disk structure for disk %d\n",
509 blk_cleanup_queue(q);
514 blk_queue_max_hw_segments(q, VIOMAXBLOCKDMA);
515 blk_queue_max_phys_segments(q, VIOMAXBLOCKDMA);
516 blk_queue_max_sectors(q, VIODASD_MAXSECTORS);
517 g->major = VIODASD_MAJOR;
518 g->first_minor = dev_no << PARTITION_SHIFT;
520 snprintf(g->disk_name, sizeof(g->disk_name),
521 VIOD_GENHD_NAME "%c%c",
522 'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26));
524 snprintf(g->disk_name, sizeof(g->disk_name),
525 VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26));
526 snprintf(g->devfs_name, sizeof(g->devfs_name),
527 "%s%d", VIOD_GENHD_DEVFS_NAME, dev_no);
528 g->fops = &viodasd_fops;
531 g->driverfs_dev = d->dev;
532 set_capacity(g, d->size >> 9);
534 printk(VIOD_KERN_INFO "disk %d: %lu sectors (%lu MB) "
535 "CHS=%d/%d/%d sector size %d%s\n",
536 dev_no, (unsigned long)(d->size >> 9),
537 (unsigned long)(d->size >> 20),
538 (int)d->cylinders, (int)d->tracks,
539 (int)d->sectors, (int)d->bytes_per_sector,
540 d->read_only ? " (RO)" : "");
542 /* register us in the global list */
546 /* returns the total number of scatterlist elements converted */
547 static int block_event_to_scatterlist(const struct vioblocklpevent *bevent,
548 struct scatterlist *sg, int *total_len)
551 const struct rw_data *rw_data = &bevent->u.rw_data;
552 static const int offset =
553 offsetof(struct vioblocklpevent, u.rw_data.dma_info);
554 static const int element_size = sizeof(rw_data->dma_info[0]);
556 numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size;
557 if (numsg > VIOMAXBLOCKDMA)
558 numsg = VIOMAXBLOCKDMA;
561 memset(sg, 0, sizeof(sg[0]) * VIOMAXBLOCKDMA);
563 for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) {
564 sg_dma_address(&sg[i]) = rw_data->dma_info[i].token;
565 sg_dma_len(&sg[i]) = rw_data->dma_info[i].len;
566 *total_len += rw_data->dma_info[i].len;
572 * Restart all queues, starting with the one _after_ the disk given,
573 * thus reducing the chance of starvation of higher numbered disks.
575 static void viodasd_restart_all_queues_starting_from(int first_index)
579 for (i = first_index + 1; i < MAX_DISKNO; ++i)
580 if (viodasd_devices[i].disk)
581 blk_run_queue(viodasd_devices[i].disk->queue);
582 for (i = 0; i <= first_index; ++i)
583 if (viodasd_devices[i].disk)
584 blk_run_queue(viodasd_devices[i].disk->queue);
588 * For read and write requests, decrement the number of outstanding requests,
589 * Free the DMA buffers we allocated.
591 static int viodasd_handle_read_write(struct vioblocklpevent *bevent)
593 int num_sg, num_sect, pci_direction, total_len;
595 struct scatterlist sg[VIOMAXBLOCKDMA];
596 struct HvLpEvent *event = &bevent->event;
597 unsigned long irq_flags;
598 struct viodasd_device *d;
602 num_sg = block_event_to_scatterlist(bevent, sg, &total_len);
603 num_sect = total_len >> 9;
604 if (event->xSubtype == (viomajorsubtype_blockio | vioblockread))
605 pci_direction = DMA_FROM_DEVICE;
607 pci_direction = DMA_TO_DEVICE;
608 req = (struct request *)bevent->event.xCorrelationToken;
609 d = req->rq_disk->private_data;
611 dma_unmap_sg(d->dev, sg, num_sg, pci_direction);
614 * Since this is running in interrupt mode, we need to make sure
615 * we're not stepping on any global I/O operations
617 spin_lock_irqsave(&viodasd_spinlock, irq_flags);
618 num_req_outstanding--;
619 spin_unlock_irqrestore(&viodasd_spinlock, irq_flags);
621 error = event->xRc != HvLpEvent_Rc_Good;
623 const struct vio_error_entry *err;
624 err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
625 printk(VIOD_KERN_WARNING "read/write error %d:0x%04x (%s)\n",
626 event->xRc, bevent->sub_result, err->msg);
627 num_sect = req->hard_nr_sectors;
629 qlock = req->q->queue_lock;
630 spin_lock_irqsave(qlock, irq_flags);
631 viodasd_end_request(req, !error, num_sect);
632 spin_unlock_irqrestore(qlock, irq_flags);
634 /* Finally, try to get more requests off of this device's queue */
635 viodasd_restart_all_queues_starting_from(DEVICE_NO(d));
640 /* This routine handles incoming block LP events */
641 static void handle_block_event(struct HvLpEvent *event)
643 struct vioblocklpevent *bevent = (struct vioblocklpevent *)event;
644 struct viodasd_waitevent *pwe;
647 /* Notification that a partition went away! */
649 /* First, we should NEVER get an int here...only acks */
650 if (hvlpevent_is_int(event)) {
651 printk(VIOD_KERN_WARNING
652 "Yikes! got an int in viodasd event handler!\n");
653 if (hvlpevent_need_ack(event)) {
654 event->xRc = HvLpEvent_Rc_InvalidSubtype;
655 HvCallEvent_ackLpEvent(event);
659 switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
662 * Handle a response to an open request. We get all the
663 * disk information in the response, so update it. The
664 * correlation token contains a pointer to a waitevent
665 * structure that has a completion in it. update the
666 * return code in the waitevent structure and post the
667 * completion to wake up the guy who sent the request
669 pwe = (struct viodasd_waitevent *)event->xCorrelationToken;
670 pwe->rc = event->xRc;
671 pwe->sub_result = bevent->sub_result;
672 if (event->xRc == HvLpEvent_Rc_Good) {
673 const struct open_data *data = &bevent->u.open_data;
674 struct viodasd_device *device =
675 &viodasd_devices[bevent->disk];
677 bevent->flags & vioblockflags_ro;
678 device->size = data->disk_size;
679 device->cylinders = data->cylinders;
680 device->tracks = data->tracks;
681 device->sectors = data->sectors;
682 device->bytes_per_sector = data->bytes_per_sector;
683 pwe->max_disk = data->max_disk;
691 viodasd_handle_read_write(bevent);
695 printk(VIOD_KERN_WARNING "invalid subtype!");
696 if (hvlpevent_need_ack(event)) {
697 event->xRc = HvLpEvent_Rc_InvalidSubtype;
698 HvCallEvent_ackLpEvent(event);
704 * Get the driver to reprobe for more disks.
706 static ssize_t probe_disks(struct device_driver *drv, const char *buf,
709 struct viodasd_device *d;
711 for (d = viodasd_devices; d < &viodasd_devices[MAX_DISKNO]; d++) {
717 static DRIVER_ATTR(probe, S_IWUSR, NULL, probe_disks);
719 static int viodasd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
721 struct viodasd_device *d = &viodasd_devices[vdev->unit_address];
730 static int viodasd_remove(struct vio_dev *vdev)
732 struct viodasd_device *d;
734 d = &viodasd_devices[vdev->unit_address];
736 del_gendisk(d->disk);
737 blk_cleanup_queue(d->disk->queue);
746 * viodasd_device_table: Used by vio.c to match devices that we
749 static struct vio_device_id viodasd_device_table[] __devinitdata = {
750 { "block", "IBM,iSeries-viodasd" },
753 MODULE_DEVICE_TABLE(vio, viodasd_device_table);
755 static struct vio_driver viodasd_driver = {
756 .id_table = viodasd_device_table,
757 .probe = viodasd_probe,
758 .remove = viodasd_remove,
761 .owner = THIS_MODULE,
766 * Initialize the whole device driver. Handle module and non-module
769 static int __init viodasd_init(void)
773 /* Try to open to our host lp */
774 if (viopath_hostLp == HvLpIndexInvalid)
777 if (viopath_hostLp == HvLpIndexInvalid) {
778 printk(VIOD_KERN_WARNING "invalid hosting partition\n");
782 printk(VIOD_KERN_INFO "vers " VIOD_VERS ", hosting partition %d\n",
785 /* register the block device */
786 if (register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME)) {
787 printk(VIOD_KERN_WARNING
788 "Unable to get major number %d for %s\n",
789 VIODASD_MAJOR, VIOD_GENHD_NAME);
792 /* Actually open the path to the hosting partition */
793 if (viopath_open(viopath_hostLp, viomajorsubtype_blockio,
795 printk(VIOD_KERN_WARNING
796 "error opening path to host partition %d\n",
798 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
802 /* Initialize our request handler */
803 vio_setHandler(viomajorsubtype_blockio, handle_block_event);
805 rc = vio_register_driver(&viodasd_driver);
807 driver_create_file(&viodasd_driver.driver, &driver_attr_probe);
810 module_init(viodasd_init);
812 void viodasd_exit(void)
814 driver_remove_file(&viodasd_driver.driver, &driver_attr_probe);
815 vio_unregister_driver(&viodasd_driver);
816 vio_clearHandler(viomajorsubtype_blockio);
817 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
818 viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
821 module_exit(viodasd_exit);