Merge branch 'upstream-davem' of master.kernel.org:/pub/scm/linux/kernel/git/linville...
[linux-2.6] / drivers / block / xen-blkfront.c
1 /*
2  * blkfront.c
3  *
4  * XenLinux virtual block device driver.
5  *
6  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8  * Copyright (c) 2004, Christian Limpach
9  * Copyright (c) 2004, Andrew Warfield
10  * Copyright (c) 2005, Christopher Clark
11  * Copyright (c) 2005, XenSource Ltd
12  *
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version 2
15  * as published by the Free Software Foundation; or, when distributed
16  * separately from the Linux kernel or incorporated into other
17  * software packages, subject to the following license:
18  *
19  * Permission is hereby granted, free of charge, to any person obtaining a copy
20  * of this source file (the "Software"), to deal in the Software without
21  * restriction, including without limitation the rights to use, copy, modify,
22  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23  * and to permit persons to whom the Software is furnished to do so, subject to
24  * the following conditions:
25  *
26  * The above copyright notice and this permission notice shall be included in
27  * all copies or substantial portions of the Software.
28  *
29  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35  * IN THE SOFTWARE.
36  */
37
38 #include <linux/interrupt.h>
39 #include <linux/blkdev.h>
40 #include <linux/module.h>
41
42 #include <xen/xenbus.h>
43 #include <xen/grant_table.h>
44 #include <xen/events.h>
45 #include <xen/page.h>
46
47 #include <xen/interface/grant_table.h>
48 #include <xen/interface/io/blkif.h>
49
50 #include <asm/xen/hypervisor.h>
51
52 enum blkif_state {
53         BLKIF_STATE_DISCONNECTED,
54         BLKIF_STATE_CONNECTED,
55         BLKIF_STATE_SUSPENDED,
56 };
57
58 struct blk_shadow {
59         struct blkif_request req;
60         unsigned long request;
61         unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
62 };
63
64 static struct block_device_operations xlvbd_block_fops;
65
66 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
67
68 /*
69  * We have one of these per vbd, whether ide, scsi or 'other'.  They
70  * hang in private_data off the gendisk structure. We may end up
71  * putting all kinds of interesting stuff here :-)
72  */
73 struct blkfront_info
74 {
75         struct xenbus_device *xbdev;
76         dev_t dev;
77         struct gendisk *gd;
78         int vdevice;
79         blkif_vdev_t handle;
80         enum blkif_state connected;
81         int ring_ref;
82         struct blkif_front_ring ring;
83         unsigned int evtchn, irq;
84         struct request_queue *rq;
85         struct work_struct work;
86         struct gnttab_free_callback callback;
87         struct blk_shadow shadow[BLK_RING_SIZE];
88         unsigned long shadow_free;
89         int feature_barrier;
90
91         /**
92          * The number of people holding this device open.  We won't allow a
93          * hot-unplug unless this is 0.
94          */
95         int users;
96 };
97
98 static DEFINE_SPINLOCK(blkif_io_lock);
99
100 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
101         (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
102 #define GRANT_INVALID_REF       0
103
104 #define PARTS_PER_DISK          16
105
106 #define BLKIF_MAJOR(dev) ((dev)>>8)
107 #define BLKIF_MINOR(dev) ((dev) & 0xff)
108
109 #define DEV_NAME        "xvd"   /* name in /dev */
110
111 /* Information about our VBDs. */
112 #define MAX_VBDS 64
113 static LIST_HEAD(vbds_list);
114
115 static int get_id_from_freelist(struct blkfront_info *info)
116 {
117         unsigned long free = info->shadow_free;
118         BUG_ON(free > BLK_RING_SIZE);
119         info->shadow_free = info->shadow[free].req.id;
120         info->shadow[free].req.id = 0x0fffffee; /* debug */
121         return free;
122 }
123
124 static void add_id_to_freelist(struct blkfront_info *info,
125                                unsigned long id)
126 {
127         info->shadow[id].req.id  = info->shadow_free;
128         info->shadow[id].request = 0;
129         info->shadow_free = id;
130 }
131
132 static void blkif_restart_queue_callback(void *arg)
133 {
134         struct blkfront_info *info = (struct blkfront_info *)arg;
135         schedule_work(&info->work);
136 }
137
138 /*
139  * blkif_queue_request
140  *
141  * request block io
142  *
143  * id: for guest use only.
144  * operation: BLKIF_OP_{READ,WRITE,PROBE}
145  * buffer: buffer to read/write into. this should be a
146  *   virtual address in the guest os.
147  */
148 static int blkif_queue_request(struct request *req)
149 {
150         struct blkfront_info *info = req->rq_disk->private_data;
151         unsigned long buffer_mfn;
152         struct blkif_request *ring_req;
153         struct bio *bio;
154         struct bio_vec *bvec;
155         int idx;
156         unsigned long id;
157         unsigned int fsect, lsect;
158         int ref;
159         grant_ref_t gref_head;
160
161         if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
162                 return 1;
163
164         if (gnttab_alloc_grant_references(
165                 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
166                 gnttab_request_free_callback(
167                         &info->callback,
168                         blkif_restart_queue_callback,
169                         info,
170                         BLKIF_MAX_SEGMENTS_PER_REQUEST);
171                 return 1;
172         }
173
174         /* Fill out a communications ring structure. */
175         ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
176         id = get_id_from_freelist(info);
177         info->shadow[id].request = (unsigned long)req;
178
179         ring_req->id = id;
180         ring_req->sector_number = (blkif_sector_t)req->sector;
181         ring_req->handle = info->handle;
182
183         ring_req->operation = rq_data_dir(req) ?
184                 BLKIF_OP_WRITE : BLKIF_OP_READ;
185         if (blk_barrier_rq(req))
186                 ring_req->operation = BLKIF_OP_WRITE_BARRIER;
187
188         ring_req->nr_segments = 0;
189         rq_for_each_bio (bio, req) {
190                 bio_for_each_segment (bvec, bio, idx) {
191                         BUG_ON(ring_req->nr_segments
192                                == BLKIF_MAX_SEGMENTS_PER_REQUEST);
193                         buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
194                         fsect = bvec->bv_offset >> 9;
195                         lsect = fsect + (bvec->bv_len >> 9) - 1;
196                         /* install a grant reference. */
197                         ref = gnttab_claim_grant_reference(&gref_head);
198                         BUG_ON(ref == -ENOSPC);
199
200                         gnttab_grant_foreign_access_ref(
201                                 ref,
202                                 info->xbdev->otherend_id,
203                                 buffer_mfn,
204                                 rq_data_dir(req) );
205
206                         info->shadow[id].frame[ring_req->nr_segments] =
207                                 mfn_to_pfn(buffer_mfn);
208
209                         ring_req->seg[ring_req->nr_segments] =
210                                 (struct blkif_request_segment) {
211                                         .gref       = ref,
212                                         .first_sect = fsect,
213                                         .last_sect  = lsect };
214
215                         ring_req->nr_segments++;
216                 }
217         }
218
219         info->ring.req_prod_pvt++;
220
221         /* Keep a private copy so we can reissue requests when recovering. */
222         info->shadow[id].req = *ring_req;
223
224         gnttab_free_grant_references(gref_head);
225
226         return 0;
227 }
228
229
230 static inline void flush_requests(struct blkfront_info *info)
231 {
232         int notify;
233
234         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
235
236         if (notify)
237                 notify_remote_via_irq(info->irq);
238 }
239
240 /*
241  * do_blkif_request
242  *  read a block; request is in a request queue
243  */
244 static void do_blkif_request(request_queue_t *rq)
245 {
246         struct blkfront_info *info = NULL;
247         struct request *req;
248         int queued;
249
250         pr_debug("Entered do_blkif_request\n");
251
252         queued = 0;
253
254         while ((req = elv_next_request(rq)) != NULL) {
255                 info = req->rq_disk->private_data;
256                 if (!blk_fs_request(req)) {
257                         end_request(req, 0);
258                         continue;
259                 }
260
261                 if (RING_FULL(&info->ring))
262                         goto wait;
263
264                 pr_debug("do_blk_req %p: cmd %p, sec %lx, "
265                          "(%u/%li) buffer:%p [%s]\n",
266                          req, req->cmd, (unsigned long)req->sector,
267                          req->current_nr_sectors,
268                          req->nr_sectors, req->buffer,
269                          rq_data_dir(req) ? "write" : "read");
270
271
272                 blkdev_dequeue_request(req);
273                 if (blkif_queue_request(req)) {
274                         blk_requeue_request(rq, req);
275 wait:
276                         /* Avoid pointless unplugs. */
277                         blk_stop_queue(rq);
278                         break;
279                 }
280
281                 queued++;
282         }
283
284         if (queued != 0)
285                 flush_requests(info);
286 }
287
288 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
289 {
290         request_queue_t *rq;
291
292         rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
293         if (rq == NULL)
294                 return -1;
295
296         elevator_init(rq, "noop");
297
298         /* Hard sector size and max sectors impersonate the equiv. hardware. */
299         blk_queue_hardsect_size(rq, sector_size);
300         blk_queue_max_sectors(rq, 512);
301
302         /* Each segment in a request is up to an aligned page in size. */
303         blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
304         blk_queue_max_segment_size(rq, PAGE_SIZE);
305
306         /* Ensure a merged request will fit in a single I/O ring slot. */
307         blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
308         blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
309
310         /* Make sure buffer addresses are sector-aligned. */
311         blk_queue_dma_alignment(rq, 511);
312
313         gd->queue = rq;
314
315         return 0;
316 }
317
318
319 static int xlvbd_barrier(struct blkfront_info *info)
320 {
321         int err;
322
323         err = blk_queue_ordered(info->rq,
324                                 info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
325                                 NULL);
326
327         if (err)
328                 return err;
329
330         printk(KERN_INFO "blkfront: %s: barriers %s\n",
331                info->gd->disk_name,
332                info->feature_barrier ? "enabled" : "disabled");
333         return 0;
334 }
335
336
337 static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
338                                int vdevice, u16 vdisk_info, u16 sector_size,
339                                struct blkfront_info *info)
340 {
341         struct gendisk *gd;
342         int nr_minors = 1;
343         int err = -ENODEV;
344
345         BUG_ON(info->gd != NULL);
346         BUG_ON(info->rq != NULL);
347
348         if ((minor % PARTS_PER_DISK) == 0)
349                 nr_minors = PARTS_PER_DISK;
350
351         gd = alloc_disk(nr_minors);
352         if (gd == NULL)
353                 goto out;
354
355         if (nr_minors > 1)
356                 sprintf(gd->disk_name, "%s%c", DEV_NAME,
357                         'a' + minor / PARTS_PER_DISK);
358         else
359                 sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
360                         'a' + minor / PARTS_PER_DISK,
361                         minor % PARTS_PER_DISK);
362
363         gd->major = XENVBD_MAJOR;
364         gd->first_minor = minor;
365         gd->fops = &xlvbd_block_fops;
366         gd->private_data = info;
367         gd->driverfs_dev = &(info->xbdev->dev);
368         set_capacity(gd, capacity);
369
370         if (xlvbd_init_blk_queue(gd, sector_size)) {
371                 del_gendisk(gd);
372                 goto out;
373         }
374
375         info->rq = gd->queue;
376         info->gd = gd;
377
378         if (info->feature_barrier)
379                 xlvbd_barrier(info);
380
381         if (vdisk_info & VDISK_READONLY)
382                 set_disk_ro(gd, 1);
383
384         if (vdisk_info & VDISK_REMOVABLE)
385                 gd->flags |= GENHD_FL_REMOVABLE;
386
387         if (vdisk_info & VDISK_CDROM)
388                 gd->flags |= GENHD_FL_CD;
389
390         return 0;
391
392  out:
393         return err;
394 }
395
396 static void kick_pending_request_queues(struct blkfront_info *info)
397 {
398         if (!RING_FULL(&info->ring)) {
399                 /* Re-enable calldowns. */
400                 blk_start_queue(info->rq);
401                 /* Kick things off immediately. */
402                 do_blkif_request(info->rq);
403         }
404 }
405
406 static void blkif_restart_queue(struct work_struct *work)
407 {
408         struct blkfront_info *info = container_of(work, struct blkfront_info, work);
409
410         spin_lock_irq(&blkif_io_lock);
411         if (info->connected == BLKIF_STATE_CONNECTED)
412                 kick_pending_request_queues(info);
413         spin_unlock_irq(&blkif_io_lock);
414 }
415
416 static void blkif_free(struct blkfront_info *info, int suspend)
417 {
418         /* Prevent new requests being issued until we fix things up. */
419         spin_lock_irq(&blkif_io_lock);
420         info->connected = suspend ?
421                 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
422         /* No more blkif_request(). */
423         if (info->rq)
424                 blk_stop_queue(info->rq);
425         /* No more gnttab callback work. */
426         gnttab_cancel_free_callback(&info->callback);
427         spin_unlock_irq(&blkif_io_lock);
428
429         /* Flush gnttab callback work. Must be done with no locks held. */
430         flush_scheduled_work();
431
432         /* Free resources associated with old device channel. */
433         if (info->ring_ref != GRANT_INVALID_REF) {
434                 gnttab_end_foreign_access(info->ring_ref, 0,
435                                           (unsigned long)info->ring.sring);
436                 info->ring_ref = GRANT_INVALID_REF;
437                 info->ring.sring = NULL;
438         }
439         if (info->irq)
440                 unbind_from_irqhandler(info->irq, info);
441         info->evtchn = info->irq = 0;
442
443 }
444
445 static void blkif_completion(struct blk_shadow *s)
446 {
447         int i;
448         for (i = 0; i < s->req.nr_segments; i++)
449                 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
450 }
451
452 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
453 {
454         struct request *req;
455         struct blkif_response *bret;
456         RING_IDX i, rp;
457         unsigned long flags;
458         struct blkfront_info *info = (struct blkfront_info *)dev_id;
459         int uptodate;
460
461         spin_lock_irqsave(&blkif_io_lock, flags);
462
463         if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
464                 spin_unlock_irqrestore(&blkif_io_lock, flags);
465                 return IRQ_HANDLED;
466         }
467
468  again:
469         rp = info->ring.sring->rsp_prod;
470         rmb(); /* Ensure we see queued responses up to 'rp'. */
471
472         for (i = info->ring.rsp_cons; i != rp; i++) {
473                 unsigned long id;
474                 int ret;
475
476                 bret = RING_GET_RESPONSE(&info->ring, i);
477                 id   = bret->id;
478                 req  = (struct request *)info->shadow[id].request;
479
480                 blkif_completion(&info->shadow[id]);
481
482                 add_id_to_freelist(info, id);
483
484                 uptodate = (bret->status == BLKIF_RSP_OKAY);
485                 switch (bret->operation) {
486                 case BLKIF_OP_WRITE_BARRIER:
487                         if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
488                                 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
489                                        info->gd->disk_name);
490                                 uptodate = -EOPNOTSUPP;
491                                 info->feature_barrier = 0;
492                                 xlvbd_barrier(info);
493                         }
494                         /* fall through */
495                 case BLKIF_OP_READ:
496                 case BLKIF_OP_WRITE:
497                         if (unlikely(bret->status != BLKIF_RSP_OKAY))
498                                 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
499                                         "request: %x\n", bret->status);
500
501                         ret = end_that_request_first(req, uptodate,
502                                 req->hard_nr_sectors);
503                         BUG_ON(ret);
504                         end_that_request_last(req, uptodate);
505                         break;
506                 default:
507                         BUG();
508                 }
509         }
510
511         info->ring.rsp_cons = i;
512
513         if (i != info->ring.req_prod_pvt) {
514                 int more_to_do;
515                 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
516                 if (more_to_do)
517                         goto again;
518         } else
519                 info->ring.sring->rsp_event = i + 1;
520
521         kick_pending_request_queues(info);
522
523         spin_unlock_irqrestore(&blkif_io_lock, flags);
524
525         return IRQ_HANDLED;
526 }
527
528
529 static int setup_blkring(struct xenbus_device *dev,
530                          struct blkfront_info *info)
531 {
532         struct blkif_sring *sring;
533         int err;
534
535         info->ring_ref = GRANT_INVALID_REF;
536
537         sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL);
538         if (!sring) {
539                 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
540                 return -ENOMEM;
541         }
542         SHARED_RING_INIT(sring);
543         FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
544
545         err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
546         if (err < 0) {
547                 free_page((unsigned long)sring);
548                 info->ring.sring = NULL;
549                 goto fail;
550         }
551         info->ring_ref = err;
552
553         err = xenbus_alloc_evtchn(dev, &info->evtchn);
554         if (err)
555                 goto fail;
556
557         err = bind_evtchn_to_irqhandler(info->evtchn,
558                                         blkif_interrupt,
559                                         IRQF_SAMPLE_RANDOM, "blkif", info);
560         if (err <= 0) {
561                 xenbus_dev_fatal(dev, err,
562                                  "bind_evtchn_to_irqhandler failed");
563                 goto fail;
564         }
565         info->irq = err;
566
567         return 0;
568 fail:
569         blkif_free(info, 0);
570         return err;
571 }
572
573
574 /* Common code used when first setting up, and when resuming. */
575 static int talk_to_backend(struct xenbus_device *dev,
576                            struct blkfront_info *info)
577 {
578         const char *message = NULL;
579         struct xenbus_transaction xbt;
580         int err;
581
582         /* Create shared ring, alloc event channel. */
583         err = setup_blkring(dev, info);
584         if (err)
585                 goto out;
586
587 again:
588         err = xenbus_transaction_start(&xbt);
589         if (err) {
590                 xenbus_dev_fatal(dev, err, "starting transaction");
591                 goto destroy_blkring;
592         }
593
594         err = xenbus_printf(xbt, dev->nodename,
595                             "ring-ref", "%u", info->ring_ref);
596         if (err) {
597                 message = "writing ring-ref";
598                 goto abort_transaction;
599         }
600         err = xenbus_printf(xbt, dev->nodename,
601                             "event-channel", "%u", info->evtchn);
602         if (err) {
603                 message = "writing event-channel";
604                 goto abort_transaction;
605         }
606
607         err = xenbus_transaction_end(xbt, 0);
608         if (err) {
609                 if (err == -EAGAIN)
610                         goto again;
611                 xenbus_dev_fatal(dev, err, "completing transaction");
612                 goto destroy_blkring;
613         }
614
615         xenbus_switch_state(dev, XenbusStateInitialised);
616
617         return 0;
618
619  abort_transaction:
620         xenbus_transaction_end(xbt, 1);
621         if (message)
622                 xenbus_dev_fatal(dev, err, "%s", message);
623  destroy_blkring:
624         blkif_free(info, 0);
625  out:
626         return err;
627 }
628
629
630 /**
631  * Entry point to this code when a new device is created.  Allocate the basic
632  * structures and the ring buffer for communication with the backend, and
633  * inform the backend of the appropriate details for those.  Switch to
634  * Initialised state.
635  */
636 static int blkfront_probe(struct xenbus_device *dev,
637                           const struct xenbus_device_id *id)
638 {
639         int err, vdevice, i;
640         struct blkfront_info *info;
641
642         /* FIXME: Use dynamic device id if this is not set. */
643         err = xenbus_scanf(XBT_NIL, dev->nodename,
644                            "virtual-device", "%i", &vdevice);
645         if (err != 1) {
646                 xenbus_dev_fatal(dev, err, "reading virtual-device");
647                 return err;
648         }
649
650         info = kzalloc(sizeof(*info), GFP_KERNEL);
651         if (!info) {
652                 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
653                 return -ENOMEM;
654         }
655
656         info->xbdev = dev;
657         info->vdevice = vdevice;
658         info->connected = BLKIF_STATE_DISCONNECTED;
659         INIT_WORK(&info->work, blkif_restart_queue);
660
661         for (i = 0; i < BLK_RING_SIZE; i++)
662                 info->shadow[i].req.id = i+1;
663         info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
664
665         /* Front end dir is a number, which is used as the id. */
666         info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
667         dev->dev.driver_data = info;
668
669         err = talk_to_backend(dev, info);
670         if (err) {
671                 kfree(info);
672                 dev->dev.driver_data = NULL;
673                 return err;
674         }
675
676         return 0;
677 }
678
679
680 static int blkif_recover(struct blkfront_info *info)
681 {
682         int i;
683         struct blkif_request *req;
684         struct blk_shadow *copy;
685         int j;
686
687         /* Stage 1: Make a safe copy of the shadow state. */
688         copy = kmalloc(sizeof(info->shadow), GFP_KERNEL);
689         if (!copy)
690                 return -ENOMEM;
691         memcpy(copy, info->shadow, sizeof(info->shadow));
692
693         /* Stage 2: Set up free list. */
694         memset(&info->shadow, 0, sizeof(info->shadow));
695         for (i = 0; i < BLK_RING_SIZE; i++)
696                 info->shadow[i].req.id = i+1;
697         info->shadow_free = info->ring.req_prod_pvt;
698         info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
699
700         /* Stage 3: Find pending requests and requeue them. */
701         for (i = 0; i < BLK_RING_SIZE; i++) {
702                 /* Not in use? */
703                 if (copy[i].request == 0)
704                         continue;
705
706                 /* Grab a request slot and copy shadow state into it. */
707                 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
708                 *req = copy[i].req;
709
710                 /* We get a new request id, and must reset the shadow state. */
711                 req->id = get_id_from_freelist(info);
712                 memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
713
714                 /* Rewrite any grant references invalidated by susp/resume. */
715                 for (j = 0; j < req->nr_segments; j++)
716                         gnttab_grant_foreign_access_ref(
717                                 req->seg[j].gref,
718                                 info->xbdev->otherend_id,
719                                 pfn_to_mfn(info->shadow[req->id].frame[j]),
720                                 rq_data_dir(
721                                         (struct request *)
722                                         info->shadow[req->id].request));
723                 info->shadow[req->id].req = *req;
724
725                 info->ring.req_prod_pvt++;
726         }
727
728         kfree(copy);
729
730         xenbus_switch_state(info->xbdev, XenbusStateConnected);
731
732         spin_lock_irq(&blkif_io_lock);
733
734         /* Now safe for us to use the shared ring */
735         info->connected = BLKIF_STATE_CONNECTED;
736
737         /* Send off requeued requests */
738         flush_requests(info);
739
740         /* Kick any other new requests queued since we resumed */
741         kick_pending_request_queues(info);
742
743         spin_unlock_irq(&blkif_io_lock);
744
745         return 0;
746 }
747
748 /**
749  * We are reconnecting to the backend, due to a suspend/resume, or a backend
750  * driver restart.  We tear down our blkif structure and recreate it, but
751  * leave the device-layer structures intact so that this is transparent to the
752  * rest of the kernel.
753  */
754 static int blkfront_resume(struct xenbus_device *dev)
755 {
756         struct blkfront_info *info = dev->dev.driver_data;
757         int err;
758
759         dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
760
761         blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
762
763         err = talk_to_backend(dev, info);
764         if (info->connected == BLKIF_STATE_SUSPENDED && !err)
765                 err = blkif_recover(info);
766
767         return err;
768 }
769
770
771 /*
772  * Invoked when the backend is finally 'ready' (and has told produced
773  * the details about the physical device - #sectors, size, etc).
774  */
775 static void blkfront_connect(struct blkfront_info *info)
776 {
777         unsigned long long sectors;
778         unsigned long sector_size;
779         unsigned int binfo;
780         int err;
781
782         if ((info->connected == BLKIF_STATE_CONNECTED) ||
783             (info->connected == BLKIF_STATE_SUSPENDED) )
784                 return;
785
786         dev_dbg(&info->xbdev->dev, "%s:%s.\n",
787                 __func__, info->xbdev->otherend);
788
789         err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
790                             "sectors", "%llu", &sectors,
791                             "info", "%u", &binfo,
792                             "sector-size", "%lu", &sector_size,
793                             NULL);
794         if (err) {
795                 xenbus_dev_fatal(info->xbdev, err,
796                                  "reading backend fields at %s",
797                                  info->xbdev->otherend);
798                 return;
799         }
800
801         err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
802                             "feature-barrier", "%lu", &info->feature_barrier,
803                             NULL);
804         if (err)
805                 info->feature_barrier = 0;
806
807         err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
808                                   sectors, info->vdevice,
809                                   binfo, sector_size, info);
810         if (err) {
811                 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
812                                  info->xbdev->otherend);
813                 return;
814         }
815
816         xenbus_switch_state(info->xbdev, XenbusStateConnected);
817
818         /* Kick pending requests. */
819         spin_lock_irq(&blkif_io_lock);
820         info->connected = BLKIF_STATE_CONNECTED;
821         kick_pending_request_queues(info);
822         spin_unlock_irq(&blkif_io_lock);
823
824         add_disk(info->gd);
825 }
826
827 /**
828  * Handle the change of state of the backend to Closing.  We must delete our
829  * device-layer structures now, to ensure that writes are flushed through to
830  * the backend.  Once is this done, we can switch to Closed in
831  * acknowledgement.
832  */
833 static void blkfront_closing(struct xenbus_device *dev)
834 {
835         struct blkfront_info *info = dev->dev.driver_data;
836         unsigned long flags;
837
838         dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
839
840         if (info->rq == NULL)
841                 goto out;
842
843         spin_lock_irqsave(&blkif_io_lock, flags);
844
845         del_gendisk(info->gd);
846
847         /* No more blkif_request(). */
848         blk_stop_queue(info->rq);
849
850         /* No more gnttab callback work. */
851         gnttab_cancel_free_callback(&info->callback);
852         spin_unlock_irqrestore(&blkif_io_lock, flags);
853
854         /* Flush gnttab callback work. Must be done with no locks held. */
855         flush_scheduled_work();
856
857         blk_cleanup_queue(info->rq);
858         info->rq = NULL;
859
860  out:
861         xenbus_frontend_closed(dev);
862 }
863
864 /**
865  * Callback received when the backend's state changes.
866  */
867 static void backend_changed(struct xenbus_device *dev,
868                             enum xenbus_state backend_state)
869 {
870         struct blkfront_info *info = dev->dev.driver_data;
871         struct block_device *bd;
872
873         dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
874
875         switch (backend_state) {
876         case XenbusStateInitialising:
877         case XenbusStateInitWait:
878         case XenbusStateInitialised:
879         case XenbusStateUnknown:
880         case XenbusStateClosed:
881                 break;
882
883         case XenbusStateConnected:
884                 blkfront_connect(info);
885                 break;
886
887         case XenbusStateClosing:
888                 bd = bdget(info->dev);
889                 if (bd == NULL)
890                         xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
891
892                 mutex_lock(&bd->bd_mutex);
893                 if (info->users > 0)
894                         xenbus_dev_error(dev, -EBUSY,
895                                          "Device in use; refusing to close");
896                 else
897                         blkfront_closing(dev);
898                 mutex_unlock(&bd->bd_mutex);
899                 bdput(bd);
900                 break;
901         }
902 }
903
904 static int blkfront_remove(struct xenbus_device *dev)
905 {
906         struct blkfront_info *info = dev->dev.driver_data;
907
908         dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
909
910         blkif_free(info, 0);
911
912         kfree(info);
913
914         return 0;
915 }
916
917 static int blkif_open(struct inode *inode, struct file *filep)
918 {
919         struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
920         info->users++;
921         return 0;
922 }
923
924 static int blkif_release(struct inode *inode, struct file *filep)
925 {
926         struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
927         info->users--;
928         if (info->users == 0) {
929                 /* Check whether we have been instructed to close.  We will
930                    have ignored this request initially, as the device was
931                    still mounted. */
932                 struct xenbus_device *dev = info->xbdev;
933                 enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
934
935                 if (state == XenbusStateClosing)
936                         blkfront_closing(dev);
937         }
938         return 0;
939 }
940
941 static struct block_device_operations xlvbd_block_fops =
942 {
943         .owner = THIS_MODULE,
944         .open = blkif_open,
945         .release = blkif_release,
946 };
947
948
949 static struct xenbus_device_id blkfront_ids[] = {
950         { "vbd" },
951         { "" }
952 };
953
954 static struct xenbus_driver blkfront = {
955         .name = "vbd",
956         .owner = THIS_MODULE,
957         .ids = blkfront_ids,
958         .probe = blkfront_probe,
959         .remove = blkfront_remove,
960         .resume = blkfront_resume,
961         .otherend_changed = backend_changed,
962 };
963
964 static int __init xlblk_init(void)
965 {
966         if (!is_running_on_xen())
967                 return -ENODEV;
968
969         if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
970                 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
971                        XENVBD_MAJOR, DEV_NAME);
972                 return -ENODEV;
973         }
974
975         return xenbus_register_frontend(&blkfront);
976 }
977 module_init(xlblk_init);
978
979
980 static void xlblk_exit(void)
981 {
982         return xenbus_unregister_driver(&blkfront);
983 }
984 module_exit(xlblk_exit);
985
986 MODULE_DESCRIPTION("Xen virtual block device frontend");
987 MODULE_LICENSE("GPL");
988 MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);