Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6] / drivers / md / dm-delay.c
1 /*
2  * Copyright (C) 2005-2007 Red Hat GmbH
3  *
4  * A target that delays reads and/or writes and can send
5  * them to different devices.
6  *
7  * This file is released under the GPL.
8  */
9
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/blkdev.h>
13 #include <linux/bio.h>
14 #include <linux/slab.h>
15
16 #include <linux/device-mapper.h>
17
18 #include "dm-bio-list.h"
19
20 #define DM_MSG_PREFIX "delay"
21
22 struct delay_c {
23         struct timer_list delay_timer;
24         struct mutex timer_lock;
25         struct work_struct flush_expired_bios;
26         struct list_head delayed_bios;
27         atomic_t may_delay;
28         mempool_t *delayed_pool;
29
30         struct dm_dev *dev_read;
31         sector_t start_read;
32         unsigned read_delay;
33         unsigned reads;
34
35         struct dm_dev *dev_write;
36         sector_t start_write;
37         unsigned write_delay;
38         unsigned writes;
39 };
40
41 struct dm_delay_info {
42         struct delay_c *context;
43         struct list_head list;
44         struct bio *bio;
45         unsigned long expires;
46 };
47
48 static DEFINE_MUTEX(delayed_bios_lock);
49
50 static struct workqueue_struct *kdelayd_wq;
51 static struct kmem_cache *delayed_cache;
52
53 static void handle_delayed_timer(unsigned long data)
54 {
55         struct delay_c *dc = (struct delay_c *)data;
56
57         queue_work(kdelayd_wq, &dc->flush_expired_bios);
58 }
59
60 static void queue_timeout(struct delay_c *dc, unsigned long expires)
61 {
62         mutex_lock(&dc->timer_lock);
63
64         if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
65                 mod_timer(&dc->delay_timer, expires);
66
67         mutex_unlock(&dc->timer_lock);
68 }
69
70 static void flush_bios(struct bio *bio)
71 {
72         struct bio *n;
73
74         while (bio) {
75                 n = bio->bi_next;
76                 bio->bi_next = NULL;
77                 generic_make_request(bio);
78                 bio = n;
79         }
80 }
81
82 static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
83 {
84         struct dm_delay_info *delayed, *next;
85         unsigned long next_expires = 0;
86         int start_timer = 0;
87         struct bio_list flush_bios = { };
88
89         mutex_lock(&delayed_bios_lock);
90         list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
91                 if (flush_all || time_after_eq(jiffies, delayed->expires)) {
92                         list_del(&delayed->list);
93                         bio_list_add(&flush_bios, delayed->bio);
94                         if ((bio_data_dir(delayed->bio) == WRITE))
95                                 delayed->context->writes--;
96                         else
97                                 delayed->context->reads--;
98                         mempool_free(delayed, dc->delayed_pool);
99                         continue;
100                 }
101
102                 if (!start_timer) {
103                         start_timer = 1;
104                         next_expires = delayed->expires;
105                 } else
106                         next_expires = min(next_expires, delayed->expires);
107         }
108
109         mutex_unlock(&delayed_bios_lock);
110
111         if (start_timer)
112                 queue_timeout(dc, next_expires);
113
114         return bio_list_get(&flush_bios);
115 }
116
117 static void flush_expired_bios(struct work_struct *work)
118 {
119         struct delay_c *dc;
120
121         dc = container_of(work, struct delay_c, flush_expired_bios);
122         flush_bios(flush_delayed_bios(dc, 0));
123 }
124
125 /*
126  * Mapping parameters:
127  *    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
128  *
129  * With separate write parameters, the first set is only used for reads.
130  * Delays are specified in milliseconds.
131  */
132 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
133 {
134         struct delay_c *dc;
135         unsigned long long tmpll;
136
137         if (argc != 3 && argc != 6) {
138                 ti->error = "requires exactly 3 or 6 arguments";
139                 return -EINVAL;
140         }
141
142         dc = kmalloc(sizeof(*dc), GFP_KERNEL);
143         if (!dc) {
144                 ti->error = "Cannot allocate context";
145                 return -ENOMEM;
146         }
147
148         dc->reads = dc->writes = 0;
149
150         if (sscanf(argv[1], "%llu", &tmpll) != 1) {
151                 ti->error = "Invalid device sector";
152                 goto bad;
153         }
154         dc->start_read = tmpll;
155
156         if (sscanf(argv[2], "%u", &dc->read_delay) != 1) {
157                 ti->error = "Invalid delay";
158                 goto bad;
159         }
160
161         if (dm_get_device(ti, argv[0], dc->start_read, ti->len,
162                           dm_table_get_mode(ti->table), &dc->dev_read)) {
163                 ti->error = "Device lookup failed";
164                 goto bad;
165         }
166
167         dc->dev_write = NULL;
168         if (argc == 3)
169                 goto out;
170
171         if (sscanf(argv[4], "%llu", &tmpll) != 1) {
172                 ti->error = "Invalid write device sector";
173                 goto bad_dev_read;
174         }
175         dc->start_write = tmpll;
176
177         if (sscanf(argv[5], "%u", &dc->write_delay) != 1) {
178                 ti->error = "Invalid write delay";
179                 goto bad_dev_read;
180         }
181
182         if (dm_get_device(ti, argv[3], dc->start_write, ti->len,
183                           dm_table_get_mode(ti->table), &dc->dev_write)) {
184                 ti->error = "Write device lookup failed";
185                 goto bad_dev_read;
186         }
187
188 out:
189         dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache);
190         if (!dc->delayed_pool) {
191                 DMERR("Couldn't create delayed bio pool.");
192                 goto bad_dev_write;
193         }
194
195         setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
196
197         INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
198         INIT_LIST_HEAD(&dc->delayed_bios);
199         mutex_init(&dc->timer_lock);
200         atomic_set(&dc->may_delay, 1);
201
202         ti->private = dc;
203         return 0;
204
205 bad_dev_write:
206         if (dc->dev_write)
207                 dm_put_device(ti, dc->dev_write);
208 bad_dev_read:
209         dm_put_device(ti, dc->dev_read);
210 bad:
211         kfree(dc);
212         return -EINVAL;
213 }
214
215 static void delay_dtr(struct dm_target *ti)
216 {
217         struct delay_c *dc = ti->private;
218
219         flush_workqueue(kdelayd_wq);
220
221         dm_put_device(ti, dc->dev_read);
222
223         if (dc->dev_write)
224                 dm_put_device(ti, dc->dev_write);
225
226         mempool_destroy(dc->delayed_pool);
227         kfree(dc);
228 }
229
230 static int delay_bio(struct delay_c *dc, int delay, struct bio *bio)
231 {
232         struct dm_delay_info *delayed;
233         unsigned long expires = 0;
234
235         if (!delay || !atomic_read(&dc->may_delay))
236                 return 1;
237
238         delayed = mempool_alloc(dc->delayed_pool, GFP_NOIO);
239
240         delayed->context = dc;
241         delayed->bio = bio;
242         delayed->expires = expires = jiffies + (delay * HZ / 1000);
243
244         mutex_lock(&delayed_bios_lock);
245
246         if (bio_data_dir(bio) == WRITE)
247                 dc->writes++;
248         else
249                 dc->reads++;
250
251         list_add_tail(&delayed->list, &dc->delayed_bios);
252
253         mutex_unlock(&delayed_bios_lock);
254
255         queue_timeout(dc, expires);
256
257         return 0;
258 }
259
260 static void delay_presuspend(struct dm_target *ti)
261 {
262         struct delay_c *dc = ti->private;
263
264         atomic_set(&dc->may_delay, 0);
265         del_timer_sync(&dc->delay_timer);
266         flush_bios(flush_delayed_bios(dc, 1));
267 }
268
269 static void delay_resume(struct dm_target *ti)
270 {
271         struct delay_c *dc = ti->private;
272
273         atomic_set(&dc->may_delay, 1);
274 }
275
276 static int delay_map(struct dm_target *ti, struct bio *bio,
277                      union map_info *map_context)
278 {
279         struct delay_c *dc = ti->private;
280
281         if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
282                 bio->bi_bdev = dc->dev_write->bdev;
283                 bio->bi_sector = dc->start_write +
284                                  (bio->bi_sector - ti->begin);
285
286                 return delay_bio(dc, dc->write_delay, bio);
287         }
288
289         bio->bi_bdev = dc->dev_read->bdev;
290         bio->bi_sector = dc->start_read +
291                          (bio->bi_sector - ti->begin);
292
293         return delay_bio(dc, dc->read_delay, bio);
294 }
295
296 static int delay_status(struct dm_target *ti, status_type_t type,
297                         char *result, unsigned maxlen)
298 {
299         struct delay_c *dc = ti->private;
300         int sz = 0;
301
302         switch (type) {
303         case STATUSTYPE_INFO:
304                 DMEMIT("%u %u", dc->reads, dc->writes);
305                 break;
306
307         case STATUSTYPE_TABLE:
308                 DMEMIT("%s %llu %u", dc->dev_read->name,
309                        (unsigned long long) dc->start_read,
310                        dc->read_delay);
311                 if (dc->dev_write)
312                         DMEMIT(" %s %llu %u", dc->dev_write->name,
313                                (unsigned long long) dc->start_write,
314                                dc->write_delay);
315                 break;
316         }
317
318         return 0;
319 }
320
321 static struct target_type delay_target = {
322         .name        = "delay",
323         .version     = {1, 0, 2},
324         .module      = THIS_MODULE,
325         .ctr         = delay_ctr,
326         .dtr         = delay_dtr,
327         .map         = delay_map,
328         .presuspend  = delay_presuspend,
329         .resume      = delay_resume,
330         .status      = delay_status,
331 };
332
333 static int __init dm_delay_init(void)
334 {
335         int r = -ENOMEM;
336
337         kdelayd_wq = create_workqueue("kdelayd");
338         if (!kdelayd_wq) {
339                 DMERR("Couldn't start kdelayd");
340                 goto bad_queue;
341         }
342
343         delayed_cache = KMEM_CACHE(dm_delay_info, 0);
344         if (!delayed_cache) {
345                 DMERR("Couldn't create delayed bio cache.");
346                 goto bad_memcache;
347         }
348
349         r = dm_register_target(&delay_target);
350         if (r < 0) {
351                 DMERR("register failed %d", r);
352                 goto bad_register;
353         }
354
355         return 0;
356
357 bad_register:
358         kmem_cache_destroy(delayed_cache);
359 bad_memcache:
360         destroy_workqueue(kdelayd_wq);
361 bad_queue:
362         return r;
363 }
364
365 static void __exit dm_delay_exit(void)
366 {
367         dm_unregister_target(&delay_target);
368         kmem_cache_destroy(delayed_cache);
369         destroy_workqueue(kdelayd_wq);
370 }
371
372 /* Module hooks */
373 module_init(dm_delay_init);
374 module_exit(dm_delay_exit);
375
376 MODULE_DESCRIPTION(DM_NAME " delay target");
377 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
378 MODULE_LICENSE("GPL");