Merge branch 'for-linus' of git://git.o-hand.com/linux-rpurdie-leds
[linux-2.6] / drivers / md / dm-service-time.c
1 /*
2  * Copyright (C) 2007-2009 NEC Corporation.  All Rights Reserved.
3  *
4  * Module Author: Kiyoshi Ueda
5  *
6  * This file is released under the GPL.
7  *
8  * Throughput oriented path selector.
9  */
10
11 #include "dm.h"
12 #include "dm-path-selector.h"
13
14 #define DM_MSG_PREFIX   "multipath service-time"
15 #define ST_MIN_IO       1
16 #define ST_MAX_RELATIVE_THROUGHPUT      100
17 #define ST_MAX_RELATIVE_THROUGHPUT_SHIFT        7
18 #define ST_MAX_INFLIGHT_SIZE    ((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT)
19 #define ST_VERSION      "0.2.0"
20
21 struct selector {
22         struct list_head valid_paths;
23         struct list_head failed_paths;
24 };
25
26 struct path_info {
27         struct list_head list;
28         struct dm_path *path;
29         unsigned repeat_count;
30         unsigned relative_throughput;
31         atomic_t in_flight_size;        /* Total size of in-flight I/Os */
32 };
33
34 static struct selector *alloc_selector(void)
35 {
36         struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
37
38         if (s) {
39                 INIT_LIST_HEAD(&s->valid_paths);
40                 INIT_LIST_HEAD(&s->failed_paths);
41         }
42
43         return s;
44 }
45
46 static int st_create(struct path_selector *ps, unsigned argc, char **argv)
47 {
48         struct selector *s = alloc_selector();
49
50         if (!s)
51                 return -ENOMEM;
52
53         ps->context = s;
54         return 0;
55 }
56
57 static void free_paths(struct list_head *paths)
58 {
59         struct path_info *pi, *next;
60
61         list_for_each_entry_safe(pi, next, paths, list) {
62                 list_del(&pi->list);
63                 kfree(pi);
64         }
65 }
66
67 static void st_destroy(struct path_selector *ps)
68 {
69         struct selector *s = ps->context;
70
71         free_paths(&s->valid_paths);
72         free_paths(&s->failed_paths);
73         kfree(s);
74         ps->context = NULL;
75 }
76
77 static int st_status(struct path_selector *ps, struct dm_path *path,
78                      status_type_t type, char *result, unsigned maxlen)
79 {
80         unsigned sz = 0;
81         struct path_info *pi;
82
83         if (!path)
84                 DMEMIT("0 ");
85         else {
86                 pi = path->pscontext;
87
88                 switch (type) {
89                 case STATUSTYPE_INFO:
90                         DMEMIT("%d %u ", atomic_read(&pi->in_flight_size),
91                                pi->relative_throughput);
92                         break;
93                 case STATUSTYPE_TABLE:
94                         DMEMIT("%u %u ", pi->repeat_count,
95                                pi->relative_throughput);
96                         break;
97                 }
98         }
99
100         return sz;
101 }
102
103 static int st_add_path(struct path_selector *ps, struct dm_path *path,
104                        int argc, char **argv, char **error)
105 {
106         struct selector *s = ps->context;
107         struct path_info *pi;
108         unsigned repeat_count = ST_MIN_IO;
109         unsigned relative_throughput = 1;
110
111         /*
112          * Arguments: [<repeat_count> [<relative_throughput>]]
113          *      <repeat_count>: The number of I/Os before switching path.
114          *                      If not given, default (ST_MIN_IO) is used.
115          *      <relative_throughput>: The relative throughput value of
116          *                      the path among all paths in the path-group.
117          *                      The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT>
118          *                      If not given, minimum value '1' is used.
119          *                      If '0' is given, the path isn't selected while
120          *                      other paths having a positive value are
121          *                      available.
122          */
123         if (argc > 2) {
124                 *error = "service-time ps: incorrect number of arguments";
125                 return -EINVAL;
126         }
127
128         if (argc && (sscanf(argv[0], "%u", &repeat_count) != 1)) {
129                 *error = "service-time ps: invalid repeat count";
130                 return -EINVAL;
131         }
132
133         if ((argc == 2) &&
134             (sscanf(argv[1], "%u", &relative_throughput) != 1 ||
135              relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) {
136                 *error = "service-time ps: invalid relative_throughput value";
137                 return -EINVAL;
138         }
139
140         /* allocate the path */
141         pi = kmalloc(sizeof(*pi), GFP_KERNEL);
142         if (!pi) {
143                 *error = "service-time ps: Error allocating path context";
144                 return -ENOMEM;
145         }
146
147         pi->path = path;
148         pi->repeat_count = repeat_count;
149         pi->relative_throughput = relative_throughput;
150         atomic_set(&pi->in_flight_size, 0);
151
152         path->pscontext = pi;
153
154         list_add_tail(&pi->list, &s->valid_paths);
155
156         return 0;
157 }
158
159 static void st_fail_path(struct path_selector *ps, struct dm_path *path)
160 {
161         struct selector *s = ps->context;
162         struct path_info *pi = path->pscontext;
163
164         list_move(&pi->list, &s->failed_paths);
165 }
166
167 static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
168 {
169         struct selector *s = ps->context;
170         struct path_info *pi = path->pscontext;
171
172         list_move_tail(&pi->list, &s->valid_paths);
173
174         return 0;
175 }
176
177 /*
178  * Compare the estimated service time of 2 paths, pi1 and pi2,
179  * for the incoming I/O.
180  *
181  * Returns:
182  * < 0 : pi1 is better
183  * 0   : no difference between pi1 and pi2
184  * > 0 : pi2 is better
185  *
186  * Description:
187  * Basically, the service time is estimated by:
188  *     ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput'
189  * To reduce the calculation, some optimizations are made.
190  * (See comments inline)
191  */
192 static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
193                            size_t incoming)
194 {
195         size_t sz1, sz2, st1, st2;
196
197         sz1 = atomic_read(&pi1->in_flight_size);
198         sz2 = atomic_read(&pi2->in_flight_size);
199
200         /*
201          * Case 1: Both have same throughput value. Choose less loaded path.
202          */
203         if (pi1->relative_throughput == pi2->relative_throughput)
204                 return sz1 - sz2;
205
206         /*
207          * Case 2a: Both have same load. Choose higher throughput path.
208          * Case 2b: One path has no throughput value. Choose the other one.
209          */
210         if (sz1 == sz2 ||
211             !pi1->relative_throughput || !pi2->relative_throughput)
212                 return pi2->relative_throughput - pi1->relative_throughput;
213
214         /*
215          * Case 3: Calculate service time. Choose faster path.
216          *         Service time using pi1:
217          *             st1 = (sz1 + incoming) / pi1->relative_throughput
218          *         Service time using pi2:
219          *             st2 = (sz2 + incoming) / pi2->relative_throughput
220          *
221          *         To avoid the division, transform the expression to use
222          *         multiplication.
223          *         Because ->relative_throughput > 0 here, if st1 < st2,
224          *         the expressions below are the same meaning:
225          *             (sz1 + incoming) / pi1->relative_throughput <
226          *                 (sz2 + incoming) / pi2->relative_throughput
227          *             (sz1 + incoming) * pi2->relative_throughput <
228          *                 (sz2 + incoming) * pi1->relative_throughput
229          *         So use the later one.
230          */
231         sz1 += incoming;
232         sz2 += incoming;
233         if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE ||
234                      sz2 >= ST_MAX_INFLIGHT_SIZE)) {
235                 /*
236                  * Size may be too big for multiplying pi->relative_throughput
237                  * and overflow.
238                  * To avoid the overflow and mis-selection, shift down both.
239                  */
240                 sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
241                 sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
242         }
243         st1 = sz1 * pi2->relative_throughput;
244         st2 = sz2 * pi1->relative_throughput;
245         if (st1 != st2)
246                 return st1 - st2;
247
248         /*
249          * Case 4: Service time is equal. Choose higher throughput path.
250          */
251         return pi2->relative_throughput - pi1->relative_throughput;
252 }
253
254 static struct dm_path *st_select_path(struct path_selector *ps,
255                                       unsigned *repeat_count, size_t nr_bytes)
256 {
257         struct selector *s = ps->context;
258         struct path_info *pi = NULL, *best = NULL;
259
260         if (list_empty(&s->valid_paths))
261                 return NULL;
262
263         /* Change preferred (first in list) path to evenly balance. */
264         list_move_tail(s->valid_paths.next, &s->valid_paths);
265
266         list_for_each_entry(pi, &s->valid_paths, list)
267                 if (!best || (st_compare_load(pi, best, nr_bytes) < 0))
268                         best = pi;
269
270         if (!best)
271                 return NULL;
272
273         *repeat_count = best->repeat_count;
274
275         return best->path;
276 }
277
278 static int st_start_io(struct path_selector *ps, struct dm_path *path,
279                        size_t nr_bytes)
280 {
281         struct path_info *pi = path->pscontext;
282
283         atomic_add(nr_bytes, &pi->in_flight_size);
284
285         return 0;
286 }
287
288 static int st_end_io(struct path_selector *ps, struct dm_path *path,
289                      size_t nr_bytes)
290 {
291         struct path_info *pi = path->pscontext;
292
293         atomic_sub(nr_bytes, &pi->in_flight_size);
294
295         return 0;
296 }
297
298 static struct path_selector_type st_ps = {
299         .name           = "service-time",
300         .module         = THIS_MODULE,
301         .table_args     = 2,
302         .info_args      = 2,
303         .create         = st_create,
304         .destroy        = st_destroy,
305         .status         = st_status,
306         .add_path       = st_add_path,
307         .fail_path      = st_fail_path,
308         .reinstate_path = st_reinstate_path,
309         .select_path    = st_select_path,
310         .start_io       = st_start_io,
311         .end_io         = st_end_io,
312 };
313
314 static int __init dm_st_init(void)
315 {
316         int r = dm_register_path_selector(&st_ps);
317
318         if (r < 0)
319                 DMERR("register failed %d", r);
320
321         DMINFO("version " ST_VERSION " loaded");
322
323         return r;
324 }
325
326 static void __exit dm_st_exit(void)
327 {
328         int r = dm_unregister_path_selector(&st_ps);
329
330         if (r < 0)
331                 DMERR("unregister failed %d", r);
332 }
333
334 module_init(dm_st_init);
335 module_exit(dm_st_exit);
336
337 MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
338 MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
339 MODULE_LICENSE("GPL");