[BLOCK] limit request_fn recursion
[linux-2.6] / block / elevator.c
1 /*
2  *  Block device elevator/IO-scheduler.
3  *
4  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5  *
6  * 30042000 Jens Axboe <axboe@suse.de> :
7  *
8  * Split the elevator a bit so that it is possible to choose a different
9  * one or even write a new "plug in". There are three pieces:
10  * - elevator_fn, inserts a new request in the queue list
11  * - elevator_merge_fn, decides whether a new buffer can be merged with
12  *   an existing request
13  * - elevator_dequeue_fn, called when a request is taken off the active list
14  *
15  * 20082000 Dave Jones <davej@suse.de> :
16  * Removed tests for max-bomb-segments, which was breaking elvtune
17  *  when run without -bN
18  *
19  * Jens:
20  * - Rework again to work with bio instead of buffer_heads
21  * - loose bi_dev comparisons, partition handling is right now
22  * - completely modularize elevator setup and teardown
23  *
24  */
25 #include <linux/kernel.h>
26 #include <linux/fs.h>
27 #include <linux/blkdev.h>
28 #include <linux/elevator.h>
29 #include <linux/bio.h>
30 #include <linux/config.h>
31 #include <linux/module.h>
32 #include <linux/slab.h>
33 #include <linux/init.h>
34 #include <linux/compiler.h>
35 #include <linux/delay.h>
36 #include <linux/blktrace_api.h>
37
38 #include <asm/uaccess.h>
39
40 static DEFINE_SPINLOCK(elv_list_lock);
41 static LIST_HEAD(elv_list);
42
43 /*
44  * can we safely merge with this request?
45  */
46 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
47 {
48         if (!rq_mergeable(rq))
49                 return 0;
50
51         /*
52          * different data direction or already started, don't merge
53          */
54         if (bio_data_dir(bio) != rq_data_dir(rq))
55                 return 0;
56
57         /*
58          * same device and no special stuff set, merge is ok
59          */
60         if (rq->rq_disk == bio->bi_bdev->bd_disk &&
61             !rq->waiting && !rq->special)
62                 return 1;
63
64         return 0;
65 }
66 EXPORT_SYMBOL(elv_rq_merge_ok);
67
68 static inline int elv_try_merge(struct request *__rq, struct bio *bio)
69 {
70         int ret = ELEVATOR_NO_MERGE;
71
72         /*
73          * we can merge and sequence is ok, check if it's possible
74          */
75         if (elv_rq_merge_ok(__rq, bio)) {
76                 if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
77                         ret = ELEVATOR_BACK_MERGE;
78                 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
79                         ret = ELEVATOR_FRONT_MERGE;
80         }
81
82         return ret;
83 }
84
85 static struct elevator_type *elevator_find(const char *name)
86 {
87         struct elevator_type *e = NULL;
88         struct list_head *entry;
89
90         list_for_each(entry, &elv_list) {
91                 struct elevator_type *__e;
92
93                 __e = list_entry(entry, struct elevator_type, list);
94
95                 if (!strcmp(__e->elevator_name, name)) {
96                         e = __e;
97                         break;
98                 }
99         }
100
101         return e;
102 }
103
104 static void elevator_put(struct elevator_type *e)
105 {
106         module_put(e->elevator_owner);
107 }
108
109 static struct elevator_type *elevator_get(const char *name)
110 {
111         struct elevator_type *e;
112
113         spin_lock_irq(&elv_list_lock);
114
115         e = elevator_find(name);
116         if (e && !try_module_get(e->elevator_owner))
117                 e = NULL;
118
119         spin_unlock_irq(&elv_list_lock);
120
121         return e;
122 }
123
124 static int elevator_attach(request_queue_t *q, struct elevator_queue *eq)
125 {
126         int ret = 0;
127
128         q->elevator = eq;
129
130         if (eq->ops->elevator_init_fn)
131                 ret = eq->ops->elevator_init_fn(q, eq);
132
133         return ret;
134 }
135
136 static char chosen_elevator[16];
137
138 static int __init elevator_setup(char *str)
139 {
140         /*
141          * Be backwards-compatible with previous kernels, so users
142          * won't get the wrong elevator.
143          */
144         if (!strcmp(str, "as"))
145                 strcpy(chosen_elevator, "anticipatory");
146         else
147                 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
148         return 1;
149 }
150
151 __setup("elevator=", elevator_setup);
152
153 static struct kobj_type elv_ktype;
154
155 static elevator_t *elevator_alloc(struct elevator_type *e)
156 {
157         elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
158         if (eq) {
159                 memset(eq, 0, sizeof(*eq));
160                 eq->ops = &e->ops;
161                 eq->elevator_type = e;
162                 kobject_init(&eq->kobj);
163                 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
164                 eq->kobj.ktype = &elv_ktype;
165                 mutex_init(&eq->sysfs_lock);
166         } else {
167                 elevator_put(e);
168         }
169         return eq;
170 }
171
172 static void elevator_release(struct kobject *kobj)
173 {
174         elevator_t *e = container_of(kobj, elevator_t, kobj);
175         elevator_put(e->elevator_type);
176         kfree(e);
177 }
178
179 int elevator_init(request_queue_t *q, char *name)
180 {
181         struct elevator_type *e = NULL;
182         struct elevator_queue *eq;
183         int ret = 0;
184
185         INIT_LIST_HEAD(&q->queue_head);
186         q->last_merge = NULL;
187         q->end_sector = 0;
188         q->boundary_rq = NULL;
189
190         if (name && !(e = elevator_get(name)))
191                 return -EINVAL;
192
193         if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
194                 printk("I/O scheduler %s not found\n", chosen_elevator);
195
196         if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
197                 printk("Default I/O scheduler not found, using no-op\n");
198                 e = elevator_get("noop");
199         }
200
201         eq = elevator_alloc(e);
202         if (!eq)
203                 return -ENOMEM;
204
205         ret = elevator_attach(q, eq);
206         if (ret)
207                 kobject_put(&eq->kobj);
208
209         return ret;
210 }
211
212 void elevator_exit(elevator_t *e)
213 {
214         mutex_lock(&e->sysfs_lock);
215         if (e->ops->elevator_exit_fn)
216                 e->ops->elevator_exit_fn(e);
217         e->ops = NULL;
218         mutex_unlock(&e->sysfs_lock);
219
220         kobject_put(&e->kobj);
221 }
222
223 /*
224  * Insert rq into dispatch queue of q.  Queue lock must be held on
225  * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
226  * appended to the dispatch queue.  To be used by specific elevators.
227  */
228 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
229 {
230         sector_t boundary;
231         struct list_head *entry;
232
233         if (q->last_merge == rq)
234                 q->last_merge = NULL;
235         q->nr_sorted--;
236
237         boundary = q->end_sector;
238
239         list_for_each_prev(entry, &q->queue_head) {
240                 struct request *pos = list_entry_rq(entry);
241
242                 if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
243                         break;
244                 if (rq->sector >= boundary) {
245                         if (pos->sector < boundary)
246                                 continue;
247                 } else {
248                         if (pos->sector >= boundary)
249                                 break;
250                 }
251                 if (rq->sector >= pos->sector)
252                         break;
253         }
254
255         list_add(&rq->queuelist, entry);
256 }
257
258 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
259 {
260         elevator_t *e = q->elevator;
261         int ret;
262
263         if (q->last_merge) {
264                 ret = elv_try_merge(q->last_merge, bio);
265                 if (ret != ELEVATOR_NO_MERGE) {
266                         *req = q->last_merge;
267                         return ret;
268                 }
269         }
270
271         if (e->ops->elevator_merge_fn)
272                 return e->ops->elevator_merge_fn(q, req, bio);
273
274         return ELEVATOR_NO_MERGE;
275 }
276
277 void elv_merged_request(request_queue_t *q, struct request *rq)
278 {
279         elevator_t *e = q->elevator;
280
281         if (e->ops->elevator_merged_fn)
282                 e->ops->elevator_merged_fn(q, rq);
283
284         q->last_merge = rq;
285 }
286
287 void elv_merge_requests(request_queue_t *q, struct request *rq,
288                              struct request *next)
289 {
290         elevator_t *e = q->elevator;
291
292         if (e->ops->elevator_merge_req_fn)
293                 e->ops->elevator_merge_req_fn(q, rq, next);
294         q->nr_sorted--;
295
296         q->last_merge = rq;
297 }
298
299 void elv_requeue_request(request_queue_t *q, struct request *rq)
300 {
301         elevator_t *e = q->elevator;
302
303         /*
304          * it already went through dequeue, we need to decrement the
305          * in_flight count again
306          */
307         if (blk_account_rq(rq)) {
308                 q->in_flight--;
309                 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
310                         e->ops->elevator_deactivate_req_fn(q, rq);
311         }
312
313         rq->flags &= ~REQ_STARTED;
314
315         elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
316 }
317
318 static void elv_drain_elevator(request_queue_t *q)
319 {
320         static int printed;
321         while (q->elevator->ops->elevator_dispatch_fn(q, 1))
322                 ;
323         if (q->nr_sorted == 0)
324                 return;
325         if (printed++ < 10) {
326                 printk(KERN_ERR "%s: forced dispatching is broken "
327                        "(nr_sorted=%u), please report this\n",
328                        q->elevator->elevator_type->elevator_name, q->nr_sorted);
329         }
330 }
331
332 void elv_insert(request_queue_t *q, struct request *rq, int where)
333 {
334         struct list_head *pos;
335         unsigned ordseq;
336         int unplug_it = 1;
337
338         blk_add_trace_rq(q, rq, BLK_TA_INSERT);
339
340         rq->q = q;
341
342         switch (where) {
343         case ELEVATOR_INSERT_FRONT:
344                 rq->flags |= REQ_SOFTBARRIER;
345
346                 list_add(&rq->queuelist, &q->queue_head);
347                 break;
348
349         case ELEVATOR_INSERT_BACK:
350                 rq->flags |= REQ_SOFTBARRIER;
351                 elv_drain_elevator(q);
352                 list_add_tail(&rq->queuelist, &q->queue_head);
353                 /*
354                  * We kick the queue here for the following reasons.
355                  * - The elevator might have returned NULL previously
356                  *   to delay requests and returned them now.  As the
357                  *   queue wasn't empty before this request, ll_rw_blk
358                  *   won't run the queue on return, resulting in hang.
359                  * - Usually, back inserted requests won't be merged
360                  *   with anything.  There's no point in delaying queue
361                  *   processing.
362                  */
363                 blk_remove_plug(q);
364                 q->request_fn(q);
365                 break;
366
367         case ELEVATOR_INSERT_SORT:
368                 BUG_ON(!blk_fs_request(rq));
369                 rq->flags |= REQ_SORTED;
370                 q->nr_sorted++;
371                 if (q->last_merge == NULL && rq_mergeable(rq))
372                         q->last_merge = rq;
373                 /*
374                  * Some ioscheds (cfq) run q->request_fn directly, so
375                  * rq cannot be accessed after calling
376                  * elevator_add_req_fn.
377                  */
378                 q->elevator->ops->elevator_add_req_fn(q, rq);
379                 break;
380
381         case ELEVATOR_INSERT_REQUEUE:
382                 /*
383                  * If ordered flush isn't in progress, we do front
384                  * insertion; otherwise, requests should be requeued
385                  * in ordseq order.
386                  */
387                 rq->flags |= REQ_SOFTBARRIER;
388
389                 if (q->ordseq == 0) {
390                         list_add(&rq->queuelist, &q->queue_head);
391                         break;
392                 }
393
394                 ordseq = blk_ordered_req_seq(rq);
395
396                 list_for_each(pos, &q->queue_head) {
397                         struct request *pos_rq = list_entry_rq(pos);
398                         if (ordseq <= blk_ordered_req_seq(pos_rq))
399                                 break;
400                 }
401
402                 list_add_tail(&rq->queuelist, pos);
403                 /*
404                  * most requeues happen because of a busy condition, don't
405                  * force unplug of the queue for that case.
406                  */
407                 unplug_it = 0;
408                 break;
409
410         default:
411                 printk(KERN_ERR "%s: bad insertion point %d\n",
412                        __FUNCTION__, where);
413                 BUG();
414         }
415
416         if (unplug_it && blk_queue_plugged(q)) {
417                 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
418                         - q->in_flight;
419
420                 if (nrq >= q->unplug_thresh)
421                         __generic_unplug_device(q);
422         }
423 }
424
425 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
426                        int plug)
427 {
428         if (q->ordcolor)
429                 rq->flags |= REQ_ORDERED_COLOR;
430
431         if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
432                 /*
433                  * toggle ordered color
434                  */
435                 if (blk_barrier_rq(rq))
436                         q->ordcolor ^= 1;
437
438                 /*
439                  * barriers implicitly indicate back insertion
440                  */
441                 if (where == ELEVATOR_INSERT_SORT)
442                         where = ELEVATOR_INSERT_BACK;
443
444                 /*
445                  * this request is scheduling boundary, update
446                  * end_sector
447                  */
448                 if (blk_fs_request(rq)) {
449                         q->end_sector = rq_end_sector(rq);
450                         q->boundary_rq = rq;
451                 }
452         } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
453                 where = ELEVATOR_INSERT_BACK;
454
455         if (plug)
456                 blk_plug_device(q);
457
458         elv_insert(q, rq, where);
459 }
460
461 void elv_add_request(request_queue_t *q, struct request *rq, int where,
462                      int plug)
463 {
464         unsigned long flags;
465
466         spin_lock_irqsave(q->queue_lock, flags);
467         __elv_add_request(q, rq, where, plug);
468         spin_unlock_irqrestore(q->queue_lock, flags);
469 }
470
471 static inline struct request *__elv_next_request(request_queue_t *q)
472 {
473         struct request *rq;
474
475         while (1) {
476                 while (!list_empty(&q->queue_head)) {
477                         rq = list_entry_rq(q->queue_head.next);
478                         if (blk_do_ordered(q, &rq))
479                                 return rq;
480                 }
481
482                 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
483                         return NULL;
484         }
485 }
486
487 struct request *elv_next_request(request_queue_t *q)
488 {
489         struct request *rq;
490         int ret;
491
492         while ((rq = __elv_next_request(q)) != NULL) {
493                 if (!(rq->flags & REQ_STARTED)) {
494                         elevator_t *e = q->elevator;
495
496                         /*
497                          * This is the first time the device driver
498                          * sees this request (possibly after
499                          * requeueing).  Notify IO scheduler.
500                          */
501                         if (blk_sorted_rq(rq) &&
502                             e->ops->elevator_activate_req_fn)
503                                 e->ops->elevator_activate_req_fn(q, rq);
504
505                         /*
506                          * just mark as started even if we don't start
507                          * it, a request that has been delayed should
508                          * not be passed by new incoming requests
509                          */
510                         rq->flags |= REQ_STARTED;
511                         blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
512                 }
513
514                 if (!q->boundary_rq || q->boundary_rq == rq) {
515                         q->end_sector = rq_end_sector(rq);
516                         q->boundary_rq = NULL;
517                 }
518
519                 if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
520                         break;
521
522                 ret = q->prep_rq_fn(q, rq);
523                 if (ret == BLKPREP_OK) {
524                         break;
525                 } else if (ret == BLKPREP_DEFER) {
526                         /*
527                          * the request may have been (partially) prepped.
528                          * we need to keep this request in the front to
529                          * avoid resource deadlock.  REQ_STARTED will
530                          * prevent other fs requests from passing this one.
531                          */
532                         rq = NULL;
533                         break;
534                 } else if (ret == BLKPREP_KILL) {
535                         int nr_bytes = rq->hard_nr_sectors << 9;
536
537                         if (!nr_bytes)
538                                 nr_bytes = rq->data_len;
539
540                         blkdev_dequeue_request(rq);
541                         rq->flags |= REQ_QUIET;
542                         end_that_request_chunk(rq, 0, nr_bytes);
543                         end_that_request_last(rq, 0);
544                 } else {
545                         printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
546                                                                 ret);
547                         break;
548                 }
549         }
550
551         return rq;
552 }
553
554 void elv_dequeue_request(request_queue_t *q, struct request *rq)
555 {
556         BUG_ON(list_empty(&rq->queuelist));
557
558         list_del_init(&rq->queuelist);
559
560         /*
561          * the time frame between a request being removed from the lists
562          * and to it is freed is accounted as io that is in progress at
563          * the driver side.
564          */
565         if (blk_account_rq(rq))
566                 q->in_flight++;
567 }
568
569 int elv_queue_empty(request_queue_t *q)
570 {
571         elevator_t *e = q->elevator;
572
573         if (!list_empty(&q->queue_head))
574                 return 0;
575
576         if (e->ops->elevator_queue_empty_fn)
577                 return e->ops->elevator_queue_empty_fn(q);
578
579         return 1;
580 }
581
582 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
583 {
584         elevator_t *e = q->elevator;
585
586         if (e->ops->elevator_latter_req_fn)
587                 return e->ops->elevator_latter_req_fn(q, rq);
588         return NULL;
589 }
590
591 struct request *elv_former_request(request_queue_t *q, struct request *rq)
592 {
593         elevator_t *e = q->elevator;
594
595         if (e->ops->elevator_former_req_fn)
596                 return e->ops->elevator_former_req_fn(q, rq);
597         return NULL;
598 }
599
600 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
601                     gfp_t gfp_mask)
602 {
603         elevator_t *e = q->elevator;
604
605         if (e->ops->elevator_set_req_fn)
606                 return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
607
608         rq->elevator_private = NULL;
609         return 0;
610 }
611
612 void elv_put_request(request_queue_t *q, struct request *rq)
613 {
614         elevator_t *e = q->elevator;
615
616         if (e->ops->elevator_put_req_fn)
617                 e->ops->elevator_put_req_fn(q, rq);
618 }
619
620 int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
621 {
622         elevator_t *e = q->elevator;
623
624         if (e->ops->elevator_may_queue_fn)
625                 return e->ops->elevator_may_queue_fn(q, rw, bio);
626
627         return ELV_MQUEUE_MAY;
628 }
629
630 void elv_completed_request(request_queue_t *q, struct request *rq)
631 {
632         elevator_t *e = q->elevator;
633
634         /*
635          * request is released from the driver, io must be done
636          */
637         if (blk_account_rq(rq)) {
638                 q->in_flight--;
639                 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
640                         e->ops->elevator_completed_req_fn(q, rq);
641         }
642
643         /*
644          * Check if the queue is waiting for fs requests to be
645          * drained for flush sequence.
646          */
647         if (unlikely(q->ordseq)) {
648                 struct request *first_rq = list_entry_rq(q->queue_head.next);
649                 if (q->in_flight == 0 &&
650                     blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
651                     blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
652                         blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
653                         q->request_fn(q);
654                 }
655         }
656 }
657
658 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
659
660 static ssize_t
661 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
662 {
663         elevator_t *e = container_of(kobj, elevator_t, kobj);
664         struct elv_fs_entry *entry = to_elv(attr);
665         ssize_t error;
666
667         if (!entry->show)
668                 return -EIO;
669
670         mutex_lock(&e->sysfs_lock);
671         error = e->ops ? entry->show(e, page) : -ENOENT;
672         mutex_unlock(&e->sysfs_lock);
673         return error;
674 }
675
676 static ssize_t
677 elv_attr_store(struct kobject *kobj, struct attribute *attr,
678                const char *page, size_t length)
679 {
680         elevator_t *e = container_of(kobj, elevator_t, kobj);
681         struct elv_fs_entry *entry = to_elv(attr);
682         ssize_t error;
683
684         if (!entry->store)
685                 return -EIO;
686
687         mutex_lock(&e->sysfs_lock);
688         error = e->ops ? entry->store(e, page, length) : -ENOENT;
689         mutex_unlock(&e->sysfs_lock);
690         return error;
691 }
692
693 static struct sysfs_ops elv_sysfs_ops = {
694         .show   = elv_attr_show,
695         .store  = elv_attr_store,
696 };
697
698 static struct kobj_type elv_ktype = {
699         .sysfs_ops      = &elv_sysfs_ops,
700         .release        = elevator_release,
701 };
702
703 int elv_register_queue(struct request_queue *q)
704 {
705         elevator_t *e = q->elevator;
706         int error;
707
708         e->kobj.parent = &q->kobj;
709
710         error = kobject_add(&e->kobj);
711         if (!error) {
712                 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
713                 if (attr) {
714                         while (attr->attr.name) {
715                                 if (sysfs_create_file(&e->kobj, &attr->attr))
716                                         break;
717                                 attr++;
718                         }
719                 }
720                 kobject_uevent(&e->kobj, KOBJ_ADD);
721         }
722         return error;
723 }
724
725 void elv_unregister_queue(struct request_queue *q)
726 {
727         if (q) {
728                 elevator_t *e = q->elevator;
729                 kobject_uevent(&e->kobj, KOBJ_REMOVE);
730                 kobject_del(&e->kobj);
731         }
732 }
733
734 int elv_register(struct elevator_type *e)
735 {
736         spin_lock_irq(&elv_list_lock);
737         BUG_ON(elevator_find(e->elevator_name));
738         list_add_tail(&e->list, &elv_list);
739         spin_unlock_irq(&elv_list_lock);
740
741         printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
742         if (!strcmp(e->elevator_name, chosen_elevator) ||
743                         (!*chosen_elevator &&
744                          !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
745                                 printk(" (default)");
746         printk("\n");
747         return 0;
748 }
749 EXPORT_SYMBOL_GPL(elv_register);
750
751 void elv_unregister(struct elevator_type *e)
752 {
753         struct task_struct *g, *p;
754
755         /*
756          * Iterate every thread in the process to remove the io contexts.
757          */
758         if (e->ops.trim) {
759                 read_lock(&tasklist_lock);
760                 do_each_thread(g, p) {
761                         task_lock(p);
762                         e->ops.trim(p->io_context);
763                         task_unlock(p);
764                 } while_each_thread(g, p);
765                 read_unlock(&tasklist_lock);
766         }
767
768         spin_lock_irq(&elv_list_lock);
769         list_del_init(&e->list);
770         spin_unlock_irq(&elv_list_lock);
771 }
772 EXPORT_SYMBOL_GPL(elv_unregister);
773
774 /*
775  * switch to new_e io scheduler. be careful not to introduce deadlocks -
776  * we don't free the old io scheduler, before we have allocated what we
777  * need for the new one. this way we have a chance of going back to the old
778  * one, if the new one fails init for some reason.
779  */
780 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
781 {
782         elevator_t *old_elevator, *e;
783
784         /*
785          * Allocate new elevator
786          */
787         e = elevator_alloc(new_e);
788         if (!e)
789                 return 0;
790
791         /*
792          * Turn on BYPASS and drain all requests w/ elevator private data
793          */
794         spin_lock_irq(q->queue_lock);
795
796         set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
797
798         elv_drain_elevator(q);
799
800         while (q->rq.elvpriv) {
801                 blk_remove_plug(q);
802                 q->request_fn(q);
803                 spin_unlock_irq(q->queue_lock);
804                 msleep(10);
805                 spin_lock_irq(q->queue_lock);
806                 elv_drain_elevator(q);
807         }
808
809         spin_unlock_irq(q->queue_lock);
810
811         /*
812          * unregister old elevator data
813          */
814         elv_unregister_queue(q);
815         old_elevator = q->elevator;
816
817         /*
818          * attach and start new elevator
819          */
820         if (elevator_attach(q, e))
821                 goto fail;
822
823         if (elv_register_queue(q))
824                 goto fail_register;
825
826         /*
827          * finally exit old elevator and turn off BYPASS.
828          */
829         elevator_exit(old_elevator);
830         clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
831         return 1;
832
833 fail_register:
834         /*
835          * switch failed, exit the new io scheduler and reattach the old
836          * one again (along with re-adding the sysfs dir)
837          */
838         elevator_exit(e);
839         e = NULL;
840 fail:
841         q->elevator = old_elevator;
842         elv_register_queue(q);
843         clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
844         if (e)
845                 kobject_put(&e->kobj);
846         return 0;
847 }
848
849 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
850 {
851         char elevator_name[ELV_NAME_MAX];
852         size_t len;
853         struct elevator_type *e;
854
855         elevator_name[sizeof(elevator_name) - 1] = '\0';
856         strncpy(elevator_name, name, sizeof(elevator_name) - 1);
857         len = strlen(elevator_name);
858
859         if (len && elevator_name[len - 1] == '\n')
860                 elevator_name[len - 1] = '\0';
861
862         e = elevator_get(elevator_name);
863         if (!e) {
864                 printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
865                 return -EINVAL;
866         }
867
868         if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
869                 elevator_put(e);
870                 return count;
871         }
872
873         if (!elevator_switch(q, e))
874                 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
875         return count;
876 }
877
878 ssize_t elv_iosched_show(request_queue_t *q, char *name)
879 {
880         elevator_t *e = q->elevator;
881         struct elevator_type *elv = e->elevator_type;
882         struct list_head *entry;
883         int len = 0;
884
885         spin_lock_irq(q->queue_lock);
886         list_for_each(entry, &elv_list) {
887                 struct elevator_type *__e;
888
889                 __e = list_entry(entry, struct elevator_type, list);
890                 if (!strcmp(elv->elevator_name, __e->elevator_name))
891                         len += sprintf(name+len, "[%s] ", elv->elevator_name);
892                 else
893                         len += sprintf(name+len, "%s ", __e->elevator_name);
894         }
895         spin_unlock_irq(q->queue_lock);
896
897         len += sprintf(len+name, "\n");
898         return len;
899 }
900
901 EXPORT_SYMBOL(elv_dispatch_sort);
902 EXPORT_SYMBOL(elv_add_request);
903 EXPORT_SYMBOL(__elv_add_request);
904 EXPORT_SYMBOL(elv_next_request);
905 EXPORT_SYMBOL(elv_dequeue_request);
906 EXPORT_SYMBOL(elv_queue_empty);
907 EXPORT_SYMBOL(elevator_exit);
908 EXPORT_SYMBOL(elevator_init);