[PATCH] sys_pselect7 vs compat_sys_pselect7 uaccess error handling
[linux-2.6] / block / elevator.c
1 /*
2  *  Block device elevator/IO-scheduler.
3  *
4  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5  *
6  * 30042000 Jens Axboe <axboe@kernel.dk> :
7  *
8  * Split the elevator a bit so that it is possible to choose a different
9  * one or even write a new "plug in". There are three pieces:
10  * - elevator_fn, inserts a new request in the queue list
11  * - elevator_merge_fn, decides whether a new buffer can be merged with
12  *   an existing request
13  * - elevator_dequeue_fn, called when a request is taken off the active list
14  *
15  * 20082000 Dave Jones <davej@suse.de> :
16  * Removed tests for max-bomb-segments, which was breaking elvtune
17  *  when run without -bN
18  *
19  * Jens:
20  * - Rework again to work with bio instead of buffer_heads
21  * - loose bi_dev comparisons, partition handling is right now
22  * - completely modularize elevator setup and teardown
23  *
24  */
25 #include <linux/kernel.h>
26 #include <linux/fs.h>
27 #include <linux/blkdev.h>
28 #include <linux/elevator.h>
29 #include <linux/bio.h>
30 #include <linux/module.h>
31 #include <linux/slab.h>
32 #include <linux/init.h>
33 #include <linux/compiler.h>
34 #include <linux/delay.h>
35 #include <linux/blktrace_api.h>
36 #include <linux/hash.h>
37
38 #include <asm/uaccess.h>
39
40 static DEFINE_SPINLOCK(elv_list_lock);
41 static LIST_HEAD(elv_list);
42
43 /*
44  * Merge hash stuff.
45  */
46 static const int elv_hash_shift = 6;
47 #define ELV_HASH_BLOCK(sec)     ((sec) >> 3)
48 #define ELV_HASH_FN(sec)        (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
49 #define ELV_HASH_ENTRIES        (1 << elv_hash_shift)
50 #define rq_hash_key(rq)         ((rq)->sector + (rq)->nr_sectors)
51 #define ELV_ON_HASH(rq)         (!hlist_unhashed(&(rq)->hash))
52
53 /*
54  * can we safely merge with this request?
55  */
56 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
57 {
58         if (!rq_mergeable(rq))
59                 return 0;
60
61         /*
62          * different data direction or already started, don't merge
63          */
64         if (bio_data_dir(bio) != rq_data_dir(rq))
65                 return 0;
66
67         /*
68          * same device and no special stuff set, merge is ok
69          */
70         if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special)
71                 return 1;
72
73         return 0;
74 }
75 EXPORT_SYMBOL(elv_rq_merge_ok);
76
77 static inline int elv_try_merge(struct request *__rq, struct bio *bio)
78 {
79         int ret = ELEVATOR_NO_MERGE;
80
81         /*
82          * we can merge and sequence is ok, check if it's possible
83          */
84         if (elv_rq_merge_ok(__rq, bio)) {
85                 if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
86                         ret = ELEVATOR_BACK_MERGE;
87                 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
88                         ret = ELEVATOR_FRONT_MERGE;
89         }
90
91         return ret;
92 }
93
94 static struct elevator_type *elevator_find(const char *name)
95 {
96         struct elevator_type *e;
97         struct list_head *entry;
98
99         list_for_each(entry, &elv_list) {
100
101                 e = list_entry(entry, struct elevator_type, list);
102
103                 if (!strcmp(e->elevator_name, name))
104                         return e;
105         }
106
107         return NULL;
108 }
109
110 static void elevator_put(struct elevator_type *e)
111 {
112         module_put(e->elevator_owner);
113 }
114
115 static struct elevator_type *elevator_get(const char *name)
116 {
117         struct elevator_type *e;
118
119         spin_lock_irq(&elv_list_lock);
120
121         e = elevator_find(name);
122         if (e && !try_module_get(e->elevator_owner))
123                 e = NULL;
124
125         spin_unlock_irq(&elv_list_lock);
126
127         return e;
128 }
129
130 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
131 {
132         return eq->ops->elevator_init_fn(q, eq);
133 }
134
135 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
136                            void *data)
137 {
138         q->elevator = eq;
139         eq->elevator_data = data;
140 }
141
142 static char chosen_elevator[16];
143
144 static int __init elevator_setup(char *str)
145 {
146         /*
147          * Be backwards-compatible with previous kernels, so users
148          * won't get the wrong elevator.
149          */
150         if (!strcmp(str, "as"))
151                 strcpy(chosen_elevator, "anticipatory");
152         else
153                 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
154         return 1;
155 }
156
157 __setup("elevator=", elevator_setup);
158
159 static struct kobj_type elv_ktype;
160
161 static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
162 {
163         elevator_t *eq;
164         int i;
165
166         eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
167         if (unlikely(!eq))
168                 goto err;
169
170         memset(eq, 0, sizeof(*eq));
171         eq->ops = &e->ops;
172         eq->elevator_type = e;
173         kobject_init(&eq->kobj);
174         snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
175         eq->kobj.ktype = &elv_ktype;
176         mutex_init(&eq->sysfs_lock);
177
178         eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
179                                         GFP_KERNEL, q->node);
180         if (!eq->hash)
181                 goto err;
182
183         for (i = 0; i < ELV_HASH_ENTRIES; i++)
184                 INIT_HLIST_HEAD(&eq->hash[i]);
185
186         return eq;
187 err:
188         kfree(eq);
189         elevator_put(e);
190         return NULL;
191 }
192
193 static void elevator_release(struct kobject *kobj)
194 {
195         elevator_t *e = container_of(kobj, elevator_t, kobj);
196
197         elevator_put(e->elevator_type);
198         kfree(e->hash);
199         kfree(e);
200 }
201
202 int elevator_init(request_queue_t *q, char *name)
203 {
204         struct elevator_type *e = NULL;
205         struct elevator_queue *eq;
206         int ret = 0;
207         void *data;
208
209         INIT_LIST_HEAD(&q->queue_head);
210         q->last_merge = NULL;
211         q->end_sector = 0;
212         q->boundary_rq = NULL;
213
214         if (name && !(e = elevator_get(name)))
215                 return -EINVAL;
216
217         if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
218                 printk("I/O scheduler %s not found\n", chosen_elevator);
219
220         if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
221                 printk("Default I/O scheduler not found, using no-op\n");
222                 e = elevator_get("noop");
223         }
224
225         eq = elevator_alloc(q, e);
226         if (!eq)
227                 return -ENOMEM;
228
229         data = elevator_init_queue(q, eq);
230         if (!data) {
231                 kobject_put(&eq->kobj);
232                 return -ENOMEM;
233         }
234
235         elevator_attach(q, eq, data);
236         return ret;
237 }
238
239 EXPORT_SYMBOL(elevator_init);
240
241 void elevator_exit(elevator_t *e)
242 {
243         mutex_lock(&e->sysfs_lock);
244         if (e->ops->elevator_exit_fn)
245                 e->ops->elevator_exit_fn(e);
246         e->ops = NULL;
247         mutex_unlock(&e->sysfs_lock);
248
249         kobject_put(&e->kobj);
250 }
251
252 EXPORT_SYMBOL(elevator_exit);
253
254 static inline void __elv_rqhash_del(struct request *rq)
255 {
256         hlist_del_init(&rq->hash);
257 }
258
259 static void elv_rqhash_del(request_queue_t *q, struct request *rq)
260 {
261         if (ELV_ON_HASH(rq))
262                 __elv_rqhash_del(rq);
263 }
264
265 static void elv_rqhash_add(request_queue_t *q, struct request *rq)
266 {
267         elevator_t *e = q->elevator;
268
269         BUG_ON(ELV_ON_HASH(rq));
270         hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
271 }
272
273 static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
274 {
275         __elv_rqhash_del(rq);
276         elv_rqhash_add(q, rq);
277 }
278
279 static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
280 {
281         elevator_t *e = q->elevator;
282         struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
283         struct hlist_node *entry, *next;
284         struct request *rq;
285
286         hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
287                 BUG_ON(!ELV_ON_HASH(rq));
288
289                 if (unlikely(!rq_mergeable(rq))) {
290                         __elv_rqhash_del(rq);
291                         continue;
292                 }
293
294                 if (rq_hash_key(rq) == offset)
295                         return rq;
296         }
297
298         return NULL;
299 }
300
301 /*
302  * RB-tree support functions for inserting/lookup/removal of requests
303  * in a sorted RB tree.
304  */
305 struct request *elv_rb_add(struct rb_root *root, struct request *rq)
306 {
307         struct rb_node **p = &root->rb_node;
308         struct rb_node *parent = NULL;
309         struct request *__rq;
310
311         while (*p) {
312                 parent = *p;
313                 __rq = rb_entry(parent, struct request, rb_node);
314
315                 if (rq->sector < __rq->sector)
316                         p = &(*p)->rb_left;
317                 else if (rq->sector > __rq->sector)
318                         p = &(*p)->rb_right;
319                 else
320                         return __rq;
321         }
322
323         rb_link_node(&rq->rb_node, parent, p);
324         rb_insert_color(&rq->rb_node, root);
325         return NULL;
326 }
327
328 EXPORT_SYMBOL(elv_rb_add);
329
330 void elv_rb_del(struct rb_root *root, struct request *rq)
331 {
332         BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
333         rb_erase(&rq->rb_node, root);
334         RB_CLEAR_NODE(&rq->rb_node);
335 }
336
337 EXPORT_SYMBOL(elv_rb_del);
338
339 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
340 {
341         struct rb_node *n = root->rb_node;
342         struct request *rq;
343
344         while (n) {
345                 rq = rb_entry(n, struct request, rb_node);
346
347                 if (sector < rq->sector)
348                         n = n->rb_left;
349                 else if (sector > rq->sector)
350                         n = n->rb_right;
351                 else
352                         return rq;
353         }
354
355         return NULL;
356 }
357
358 EXPORT_SYMBOL(elv_rb_find);
359
360 /*
361  * Insert rq into dispatch queue of q.  Queue lock must be held on
362  * entry.  rq is sort insted into the dispatch queue. To be used by
363  * specific elevators.
364  */
365 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
366 {
367         sector_t boundary;
368         struct list_head *entry;
369
370         if (q->last_merge == rq)
371                 q->last_merge = NULL;
372
373         elv_rqhash_del(q, rq);
374
375         q->nr_sorted--;
376
377         boundary = q->end_sector;
378
379         list_for_each_prev(entry, &q->queue_head) {
380                 struct request *pos = list_entry_rq(entry);
381
382                 if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
383                         break;
384                 if (rq->sector >= boundary) {
385                         if (pos->sector < boundary)
386                                 continue;
387                 } else {
388                         if (pos->sector >= boundary)
389                                 break;
390                 }
391                 if (rq->sector >= pos->sector)
392                         break;
393         }
394
395         list_add(&rq->queuelist, entry);
396 }
397
398 EXPORT_SYMBOL(elv_dispatch_sort);
399
400 /*
401  * Insert rq into dispatch queue of q.  Queue lock must be held on
402  * entry.  rq is added to the back of the dispatch queue. To be used by
403  * specific elevators.
404  */
405 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
406 {
407         if (q->last_merge == rq)
408                 q->last_merge = NULL;
409
410         elv_rqhash_del(q, rq);
411
412         q->nr_sorted--;
413
414         q->end_sector = rq_end_sector(rq);
415         q->boundary_rq = rq;
416         list_add_tail(&rq->queuelist, &q->queue_head);
417 }
418
419 EXPORT_SYMBOL(elv_dispatch_add_tail);
420
421 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
422 {
423         elevator_t *e = q->elevator;
424         struct request *__rq;
425         int ret;
426
427         /*
428          * First try one-hit cache.
429          */
430         if (q->last_merge) {
431                 ret = elv_try_merge(q->last_merge, bio);
432                 if (ret != ELEVATOR_NO_MERGE) {
433                         *req = q->last_merge;
434                         return ret;
435                 }
436         }
437
438         /*
439          * See if our hash lookup can find a potential backmerge.
440          */
441         __rq = elv_rqhash_find(q, bio->bi_sector);
442         if (__rq && elv_rq_merge_ok(__rq, bio)) {
443                 *req = __rq;
444                 return ELEVATOR_BACK_MERGE;
445         }
446
447         if (e->ops->elevator_merge_fn)
448                 return e->ops->elevator_merge_fn(q, req, bio);
449
450         return ELEVATOR_NO_MERGE;
451 }
452
453 void elv_merged_request(request_queue_t *q, struct request *rq, int type)
454 {
455         elevator_t *e = q->elevator;
456
457         if (e->ops->elevator_merged_fn)
458                 e->ops->elevator_merged_fn(q, rq, type);
459
460         if (type == ELEVATOR_BACK_MERGE)
461                 elv_rqhash_reposition(q, rq);
462
463         q->last_merge = rq;
464 }
465
466 void elv_merge_requests(request_queue_t *q, struct request *rq,
467                              struct request *next)
468 {
469         elevator_t *e = q->elevator;
470
471         if (e->ops->elevator_merge_req_fn)
472                 e->ops->elevator_merge_req_fn(q, rq, next);
473
474         elv_rqhash_reposition(q, rq);
475         elv_rqhash_del(q, next);
476
477         q->nr_sorted--;
478         q->last_merge = rq;
479 }
480
481 void elv_requeue_request(request_queue_t *q, struct request *rq)
482 {
483         elevator_t *e = q->elevator;
484
485         /*
486          * it already went through dequeue, we need to decrement the
487          * in_flight count again
488          */
489         if (blk_account_rq(rq)) {
490                 q->in_flight--;
491                 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
492                         e->ops->elevator_deactivate_req_fn(q, rq);
493         }
494
495         rq->cmd_flags &= ~REQ_STARTED;
496
497         elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
498 }
499
500 static void elv_drain_elevator(request_queue_t *q)
501 {
502         static int printed;
503         while (q->elevator->ops->elevator_dispatch_fn(q, 1))
504                 ;
505         if (q->nr_sorted == 0)
506                 return;
507         if (printed++ < 10) {
508                 printk(KERN_ERR "%s: forced dispatching is broken "
509                        "(nr_sorted=%u), please report this\n",
510                        q->elevator->elevator_type->elevator_name, q->nr_sorted);
511         }
512 }
513
514 void elv_insert(request_queue_t *q, struct request *rq, int where)
515 {
516         struct list_head *pos;
517         unsigned ordseq;
518         int unplug_it = 1;
519
520         blk_add_trace_rq(q, rq, BLK_TA_INSERT);
521
522         rq->q = q;
523
524         switch (where) {
525         case ELEVATOR_INSERT_FRONT:
526                 rq->cmd_flags |= REQ_SOFTBARRIER;
527
528                 list_add(&rq->queuelist, &q->queue_head);
529                 break;
530
531         case ELEVATOR_INSERT_BACK:
532                 rq->cmd_flags |= REQ_SOFTBARRIER;
533                 elv_drain_elevator(q);
534                 list_add_tail(&rq->queuelist, &q->queue_head);
535                 /*
536                  * We kick the queue here for the following reasons.
537                  * - The elevator might have returned NULL previously
538                  *   to delay requests and returned them now.  As the
539                  *   queue wasn't empty before this request, ll_rw_blk
540                  *   won't run the queue on return, resulting in hang.
541                  * - Usually, back inserted requests won't be merged
542                  *   with anything.  There's no point in delaying queue
543                  *   processing.
544                  */
545                 blk_remove_plug(q);
546                 q->request_fn(q);
547                 break;
548
549         case ELEVATOR_INSERT_SORT:
550                 BUG_ON(!blk_fs_request(rq));
551                 rq->cmd_flags |= REQ_SORTED;
552                 q->nr_sorted++;
553                 if (rq_mergeable(rq)) {
554                         elv_rqhash_add(q, rq);
555                         if (!q->last_merge)
556                                 q->last_merge = rq;
557                 }
558
559                 /*
560                  * Some ioscheds (cfq) run q->request_fn directly, so
561                  * rq cannot be accessed after calling
562                  * elevator_add_req_fn.
563                  */
564                 q->elevator->ops->elevator_add_req_fn(q, rq);
565                 break;
566
567         case ELEVATOR_INSERT_REQUEUE:
568                 /*
569                  * If ordered flush isn't in progress, we do front
570                  * insertion; otherwise, requests should be requeued
571                  * in ordseq order.
572                  */
573                 rq->cmd_flags |= REQ_SOFTBARRIER;
574
575                 if (q->ordseq == 0) {
576                         list_add(&rq->queuelist, &q->queue_head);
577                         break;
578                 }
579
580                 ordseq = blk_ordered_req_seq(rq);
581
582                 list_for_each(pos, &q->queue_head) {
583                         struct request *pos_rq = list_entry_rq(pos);
584                         if (ordseq <= blk_ordered_req_seq(pos_rq))
585                                 break;
586                 }
587
588                 list_add_tail(&rq->queuelist, pos);
589                 /*
590                  * most requeues happen because of a busy condition, don't
591                  * force unplug of the queue for that case.
592                  */
593                 unplug_it = 0;
594                 break;
595
596         default:
597                 printk(KERN_ERR "%s: bad insertion point %d\n",
598                        __FUNCTION__, where);
599                 BUG();
600         }
601
602         if (unplug_it && blk_queue_plugged(q)) {
603                 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
604                         - q->in_flight;
605
606                 if (nrq >= q->unplug_thresh)
607                         __generic_unplug_device(q);
608         }
609 }
610
611 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
612                        int plug)
613 {
614         if (q->ordcolor)
615                 rq->cmd_flags |= REQ_ORDERED_COLOR;
616
617         if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
618                 /*
619                  * toggle ordered color
620                  */
621                 if (blk_barrier_rq(rq))
622                         q->ordcolor ^= 1;
623
624                 /*
625                  * barriers implicitly indicate back insertion
626                  */
627                 if (where == ELEVATOR_INSERT_SORT)
628                         where = ELEVATOR_INSERT_BACK;
629
630                 /*
631                  * this request is scheduling boundary, update
632                  * end_sector
633                  */
634                 if (blk_fs_request(rq)) {
635                         q->end_sector = rq_end_sector(rq);
636                         q->boundary_rq = rq;
637                 }
638         } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
639                 where = ELEVATOR_INSERT_BACK;
640
641         if (plug)
642                 blk_plug_device(q);
643
644         elv_insert(q, rq, where);
645 }
646
647 EXPORT_SYMBOL(__elv_add_request);
648
649 void elv_add_request(request_queue_t *q, struct request *rq, int where,
650                      int plug)
651 {
652         unsigned long flags;
653
654         spin_lock_irqsave(q->queue_lock, flags);
655         __elv_add_request(q, rq, where, plug);
656         spin_unlock_irqrestore(q->queue_lock, flags);
657 }
658
659 EXPORT_SYMBOL(elv_add_request);
660
661 static inline struct request *__elv_next_request(request_queue_t *q)
662 {
663         struct request *rq;
664
665         while (1) {
666                 while (!list_empty(&q->queue_head)) {
667                         rq = list_entry_rq(q->queue_head.next);
668                         if (blk_do_ordered(q, &rq))
669                                 return rq;
670                 }
671
672                 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
673                         return NULL;
674         }
675 }
676
677 struct request *elv_next_request(request_queue_t *q)
678 {
679         struct request *rq;
680         int ret;
681
682         while ((rq = __elv_next_request(q)) != NULL) {
683                 if (!(rq->cmd_flags & REQ_STARTED)) {
684                         elevator_t *e = q->elevator;
685
686                         /*
687                          * This is the first time the device driver
688                          * sees this request (possibly after
689                          * requeueing).  Notify IO scheduler.
690                          */
691                         if (blk_sorted_rq(rq) &&
692                             e->ops->elevator_activate_req_fn)
693                                 e->ops->elevator_activate_req_fn(q, rq);
694
695                         /*
696                          * just mark as started even if we don't start
697                          * it, a request that has been delayed should
698                          * not be passed by new incoming requests
699                          */
700                         rq->cmd_flags |= REQ_STARTED;
701                         blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
702                 }
703
704                 if (!q->boundary_rq || q->boundary_rq == rq) {
705                         q->end_sector = rq_end_sector(rq);
706                         q->boundary_rq = NULL;
707                 }
708
709                 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
710                         break;
711
712                 ret = q->prep_rq_fn(q, rq);
713                 if (ret == BLKPREP_OK) {
714                         break;
715                 } else if (ret == BLKPREP_DEFER) {
716                         /*
717                          * the request may have been (partially) prepped.
718                          * we need to keep this request in the front to
719                          * avoid resource deadlock.  REQ_STARTED will
720                          * prevent other fs requests from passing this one.
721                          */
722                         rq = NULL;
723                         break;
724                 } else if (ret == BLKPREP_KILL) {
725                         int nr_bytes = rq->hard_nr_sectors << 9;
726
727                         if (!nr_bytes)
728                                 nr_bytes = rq->data_len;
729
730                         blkdev_dequeue_request(rq);
731                         rq->cmd_flags |= REQ_QUIET;
732                         end_that_request_chunk(rq, 0, nr_bytes);
733                         end_that_request_last(rq, 0);
734                 } else {
735                         printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
736                                                                 ret);
737                         break;
738                 }
739         }
740
741         return rq;
742 }
743
744 EXPORT_SYMBOL(elv_next_request);
745
746 void elv_dequeue_request(request_queue_t *q, struct request *rq)
747 {
748         BUG_ON(list_empty(&rq->queuelist));
749         BUG_ON(ELV_ON_HASH(rq));
750
751         list_del_init(&rq->queuelist);
752
753         /*
754          * the time frame between a request being removed from the lists
755          * and to it is freed is accounted as io that is in progress at
756          * the driver side.
757          */
758         if (blk_account_rq(rq))
759                 q->in_flight++;
760 }
761
762 EXPORT_SYMBOL(elv_dequeue_request);
763
764 int elv_queue_empty(request_queue_t *q)
765 {
766         elevator_t *e = q->elevator;
767
768         if (!list_empty(&q->queue_head))
769                 return 0;
770
771         if (e->ops->elevator_queue_empty_fn)
772                 return e->ops->elevator_queue_empty_fn(q);
773
774         return 1;
775 }
776
777 EXPORT_SYMBOL(elv_queue_empty);
778
779 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
780 {
781         elevator_t *e = q->elevator;
782
783         if (e->ops->elevator_latter_req_fn)
784                 return e->ops->elevator_latter_req_fn(q, rq);
785         return NULL;
786 }
787
788 struct request *elv_former_request(request_queue_t *q, struct request *rq)
789 {
790         elevator_t *e = q->elevator;
791
792         if (e->ops->elevator_former_req_fn)
793                 return e->ops->elevator_former_req_fn(q, rq);
794         return NULL;
795 }
796
797 int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
798 {
799         elevator_t *e = q->elevator;
800
801         if (e->ops->elevator_set_req_fn)
802                 return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
803
804         rq->elevator_private = NULL;
805         return 0;
806 }
807
808 void elv_put_request(request_queue_t *q, struct request *rq)
809 {
810         elevator_t *e = q->elevator;
811
812         if (e->ops->elevator_put_req_fn)
813                 e->ops->elevator_put_req_fn(q, rq);
814 }
815
816 int elv_may_queue(request_queue_t *q, int rw)
817 {
818         elevator_t *e = q->elevator;
819
820         if (e->ops->elevator_may_queue_fn)
821                 return e->ops->elevator_may_queue_fn(q, rw);
822
823         return ELV_MQUEUE_MAY;
824 }
825
826 void elv_completed_request(request_queue_t *q, struct request *rq)
827 {
828         elevator_t *e = q->elevator;
829
830         /*
831          * request is released from the driver, io must be done
832          */
833         if (blk_account_rq(rq)) {
834                 q->in_flight--;
835                 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
836                         e->ops->elevator_completed_req_fn(q, rq);
837         }
838
839         /*
840          * Check if the queue is waiting for fs requests to be
841          * drained for flush sequence.
842          */
843         if (unlikely(q->ordseq)) {
844                 struct request *first_rq = list_entry_rq(q->queue_head.next);
845                 if (q->in_flight == 0 &&
846                     blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
847                     blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
848                         blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
849                         q->request_fn(q);
850                 }
851         }
852 }
853
854 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
855
856 static ssize_t
857 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
858 {
859         elevator_t *e = container_of(kobj, elevator_t, kobj);
860         struct elv_fs_entry *entry = to_elv(attr);
861         ssize_t error;
862
863         if (!entry->show)
864                 return -EIO;
865
866         mutex_lock(&e->sysfs_lock);
867         error = e->ops ? entry->show(e, page) : -ENOENT;
868         mutex_unlock(&e->sysfs_lock);
869         return error;
870 }
871
872 static ssize_t
873 elv_attr_store(struct kobject *kobj, struct attribute *attr,
874                const char *page, size_t length)
875 {
876         elevator_t *e = container_of(kobj, elevator_t, kobj);
877         struct elv_fs_entry *entry = to_elv(attr);
878         ssize_t error;
879
880         if (!entry->store)
881                 return -EIO;
882
883         mutex_lock(&e->sysfs_lock);
884         error = e->ops ? entry->store(e, page, length) : -ENOENT;
885         mutex_unlock(&e->sysfs_lock);
886         return error;
887 }
888
889 static struct sysfs_ops elv_sysfs_ops = {
890         .show   = elv_attr_show,
891         .store  = elv_attr_store,
892 };
893
894 static struct kobj_type elv_ktype = {
895         .sysfs_ops      = &elv_sysfs_ops,
896         .release        = elevator_release,
897 };
898
899 int elv_register_queue(struct request_queue *q)
900 {
901         elevator_t *e = q->elevator;
902         int error;
903
904         e->kobj.parent = &q->kobj;
905
906         error = kobject_add(&e->kobj);
907         if (!error) {
908                 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
909                 if (attr) {
910                         while (attr->attr.name) {
911                                 if (sysfs_create_file(&e->kobj, &attr->attr))
912                                         break;
913                                 attr++;
914                         }
915                 }
916                 kobject_uevent(&e->kobj, KOBJ_ADD);
917         }
918         return error;
919 }
920
921 static void __elv_unregister_queue(elevator_t *e)
922 {
923         kobject_uevent(&e->kobj, KOBJ_REMOVE);
924         kobject_del(&e->kobj);
925 }
926
927 void elv_unregister_queue(struct request_queue *q)
928 {
929         if (q)
930                 __elv_unregister_queue(q->elevator);
931 }
932
933 int elv_register(struct elevator_type *e)
934 {
935         spin_lock_irq(&elv_list_lock);
936         BUG_ON(elevator_find(e->elevator_name));
937         list_add_tail(&e->list, &elv_list);
938         spin_unlock_irq(&elv_list_lock);
939
940         printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
941         if (!strcmp(e->elevator_name, chosen_elevator) ||
942                         (!*chosen_elevator &&
943                          !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
944                                 printk(" (default)");
945         printk("\n");
946         return 0;
947 }
948 EXPORT_SYMBOL_GPL(elv_register);
949
950 void elv_unregister(struct elevator_type *e)
951 {
952         struct task_struct *g, *p;
953
954         /*
955          * Iterate every thread in the process to remove the io contexts.
956          */
957         if (e->ops.trim) {
958                 read_lock(&tasklist_lock);
959                 do_each_thread(g, p) {
960                         task_lock(p);
961                         if (p->io_context)
962                                 e->ops.trim(p->io_context);
963                         task_unlock(p);
964                 } while_each_thread(g, p);
965                 read_unlock(&tasklist_lock);
966         }
967
968         spin_lock_irq(&elv_list_lock);
969         list_del_init(&e->list);
970         spin_unlock_irq(&elv_list_lock);
971 }
972 EXPORT_SYMBOL_GPL(elv_unregister);
973
974 /*
975  * switch to new_e io scheduler. be careful not to introduce deadlocks -
976  * we don't free the old io scheduler, before we have allocated what we
977  * need for the new one. this way we have a chance of going back to the old
978  * one, if the new one fails init for some reason.
979  */
980 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
981 {
982         elevator_t *old_elevator, *e;
983         void *data;
984
985         /*
986          * Allocate new elevator
987          */
988         e = elevator_alloc(q, new_e);
989         if (!e)
990                 return 0;
991
992         data = elevator_init_queue(q, e);
993         if (!data) {
994                 kobject_put(&e->kobj);
995                 return 0;
996         }
997
998         /*
999          * Turn on BYPASS and drain all requests w/ elevator private data
1000          */
1001         spin_lock_irq(q->queue_lock);
1002
1003         set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1004
1005         elv_drain_elevator(q);
1006
1007         while (q->rq.elvpriv) {
1008                 blk_remove_plug(q);
1009                 q->request_fn(q);
1010                 spin_unlock_irq(q->queue_lock);
1011                 msleep(10);
1012                 spin_lock_irq(q->queue_lock);
1013                 elv_drain_elevator(q);
1014         }
1015
1016         /*
1017          * Remember old elevator.
1018          */
1019         old_elevator = q->elevator;
1020
1021         /*
1022          * attach and start new elevator
1023          */
1024         elevator_attach(q, e, data);
1025
1026         spin_unlock_irq(q->queue_lock);
1027
1028         __elv_unregister_queue(old_elevator);
1029
1030         if (elv_register_queue(q))
1031                 goto fail_register;
1032
1033         /*
1034          * finally exit old elevator and turn off BYPASS.
1035          */
1036         elevator_exit(old_elevator);
1037         clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1038         return 1;
1039
1040 fail_register:
1041         /*
1042          * switch failed, exit the new io scheduler and reattach the old
1043          * one again (along with re-adding the sysfs dir)
1044          */
1045         elevator_exit(e);
1046         q->elevator = old_elevator;
1047         elv_register_queue(q);
1048         clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
1049         return 0;
1050 }
1051
1052 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
1053 {
1054         char elevator_name[ELV_NAME_MAX];
1055         size_t len;
1056         struct elevator_type *e;
1057
1058         elevator_name[sizeof(elevator_name) - 1] = '\0';
1059         strncpy(elevator_name, name, sizeof(elevator_name) - 1);
1060         len = strlen(elevator_name);
1061
1062         if (len && elevator_name[len - 1] == '\n')
1063                 elevator_name[len - 1] = '\0';
1064
1065         e = elevator_get(elevator_name);
1066         if (!e) {
1067                 printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
1068                 return -EINVAL;
1069         }
1070
1071         if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
1072                 elevator_put(e);
1073                 return count;
1074         }
1075
1076         if (!elevator_switch(q, e))
1077                 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
1078         return count;
1079 }
1080
1081 ssize_t elv_iosched_show(request_queue_t *q, char *name)
1082 {
1083         elevator_t *e = q->elevator;
1084         struct elevator_type *elv = e->elevator_type;
1085         struct list_head *entry;
1086         int len = 0;
1087
1088         spin_lock_irq(&elv_list_lock);
1089         list_for_each(entry, &elv_list) {
1090                 struct elevator_type *__e;
1091
1092                 __e = list_entry(entry, struct elevator_type, list);
1093                 if (!strcmp(elv->elevator_name, __e->elevator_name))
1094                         len += sprintf(name+len, "[%s] ", elv->elevator_name);
1095                 else
1096                         len += sprintf(name+len, "%s ", __e->elevator_name);
1097         }
1098         spin_unlock_irq(&elv_list_lock);
1099
1100         len += sprintf(len+name, "\n");
1101         return len;
1102 }
1103
1104 struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
1105 {
1106         struct rb_node *rbprev = rb_prev(&rq->rb_node);
1107
1108         if (rbprev)
1109                 return rb_entry_rq(rbprev);
1110
1111         return NULL;
1112 }
1113
1114 EXPORT_SYMBOL(elv_rb_former_request);
1115
1116 struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
1117 {
1118         struct rb_node *rbnext = rb_next(&rq->rb_node);
1119
1120         if (rbnext)
1121                 return rb_entry_rq(rbnext);
1122
1123         return NULL;
1124 }
1125
1126 EXPORT_SYMBOL(elv_rb_latter_request);