Merge git://git.kernel.org/pub/scm/linux/kernel/git/sfrench/cifs-2.6
[linux-2.6] / net / ipv4 / multipath_wrandom.c
1 /*
2  *              Weighted random policy for multipath.
3  *
4  *
5  * Version:     $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
6  *
7  * Authors:     Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
8  *
9  *              This program is free software; you can redistribute it and/or
10  *              modify it under the terms of the GNU General Public License
11  *              as published by the Free Software Foundation; either version
12  *              2 of the License, or (at your option) any later version.
13  */
14
15 #include <asm/system.h>
16 #include <asm/uaccess.h>
17 #include <linux/types.h>
18 #include <linux/errno.h>
19 #include <linux/timer.h>
20 #include <linux/mm.h>
21 #include <linux/kernel.h>
22 #include <linux/fcntl.h>
23 #include <linux/stat.h>
24 #include <linux/socket.h>
25 #include <linux/in.h>
26 #include <linux/inet.h>
27 #include <linux/netdevice.h>
28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h>
30 #include <linux/proc_fs.h>
31 #include <linux/seq_file.h>
32 #include <linux/module.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/random.h>
36 #include <net/ip.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
39 #include <net/sock.h>
40 #include <net/icmp.h>
41 #include <net/udp.h>
42 #include <net/raw.h>
43 #include <linux/notifier.h>
44 #include <linux/if_arp.h>
45 #include <linux/netfilter_ipv4.h>
46 #include <net/ipip.h>
47 #include <net/checksum.h>
48 #include <net/ip_fib.h>
49 #include <net/ip_mp_alg.h>
50
51 #define MULTIPATH_STATE_SIZE 15
52
53 struct multipath_candidate {
54         struct multipath_candidate      *next;
55         int                             power;
56         struct rtable                   *rt;
57 };
58
59 struct multipath_dest {
60         struct list_head        list;
61
62         const struct fib_nh     *nh_info;
63         __be32                  netmask;
64         __be32                  network;
65         unsigned char           prefixlen;
66
67         struct rcu_head         rcu;
68 };
69
70 struct multipath_bucket {
71         struct list_head        head;
72         spinlock_t              lock;
73 };
74
75 struct multipath_route {
76         struct list_head        list;
77
78         int                     oif;
79         __be32                  gw;
80         struct list_head        dests;
81
82         struct rcu_head         rcu;
83 };
84
85 /* state: primarily weight per route information */
86 static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
87
88 static unsigned char __multipath_lookup_weight(const struct flowi *fl,
89                                                const struct rtable *rt)
90 {
91         const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
92         struct multipath_route *r;
93         struct multipath_route *target_route = NULL;
94         struct multipath_dest *d;
95         int weight = 1;
96
97         /* lookup the weight information for a certain route */
98         rcu_read_lock();
99
100         /* find state entry for gateway or add one if necessary */
101         list_for_each_entry_rcu(r, &state[state_idx].head, list) {
102                 if (r->gw == rt->rt_gateway &&
103                     r->oif == rt->idev->dev->ifindex) {
104                         target_route = r;
105                         break;
106                 }
107         }
108
109         if (!target_route) {
110                 /* this should not happen... but we are prepared */
111                 printk( KERN_CRIT"%s: missing state for gateway: %u and " \
112                         "device %d\n", __FUNCTION__, rt->rt_gateway,
113                         rt->idev->dev->ifindex);
114                 goto out;
115         }
116
117         /* find state entry for destination */
118         list_for_each_entry_rcu(d, &target_route->dests, list) {
119                 __be32 targetnetwork = fl->fl4_dst &
120                         inet_make_mask(d->prefixlen);
121
122                 if ((targetnetwork & d->netmask) == d->network) {
123                         weight = d->nh_info->nh_weight;
124                         goto out;
125                 }
126         }
127
128 out:
129         rcu_read_unlock();
130         return weight;
131 }
132
133 static void wrandom_init_state(void)
134 {
135         int i;
136
137         for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
138                 INIT_LIST_HEAD(&state[i].head);
139                 spin_lock_init(&state[i].lock);
140         }
141 }
142
143 static void wrandom_select_route(const struct flowi *flp,
144                                  struct rtable *first,
145                                  struct rtable **rp)
146 {
147         struct rtable *rt;
148         struct rtable *decision;
149         struct multipath_candidate *first_mpc = NULL;
150         struct multipath_candidate *mpc, *last_mpc = NULL;
151         int power = 0;
152         int last_power;
153         int selector;
154         const size_t size_mpc = sizeof(struct multipath_candidate);
155
156         /* collect all candidates and identify their weights */
157         for (rt = rcu_dereference(first); rt;
158              rt = rcu_dereference(rt->u.dst.rt_next)) {
159                 if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
160                     multipath_comparekeys(&rt->fl, flp)) {
161                         struct multipath_candidate* mpc =
162                                 (struct multipath_candidate*)
163                                 kmalloc(size_mpc, GFP_ATOMIC);
164
165                         if (!mpc)
166                                 return;
167
168                         power += __multipath_lookup_weight(flp, rt) * 10000;
169
170                         mpc->power = power;
171                         mpc->rt = rt;
172                         mpc->next = NULL;
173
174                         if (!first_mpc)
175                                 first_mpc = mpc;
176                         else
177                                 last_mpc->next = mpc;
178
179                         last_mpc = mpc;
180                 }
181         }
182
183         /* choose a weighted random candidate */
184         decision = first;
185         selector = random32() % power;
186         last_power = 0;
187
188         /* select candidate, adjust GC data and cleanup local state */
189         decision = first;
190         last_mpc = NULL;
191         for (mpc = first_mpc; mpc; mpc = mpc->next) {
192                 mpc->rt->u.dst.lastuse = jiffies;
193                 if (last_power <= selector && selector < mpc->power)
194                         decision = mpc->rt;
195
196                 last_power = mpc->power;
197                 kfree(last_mpc);
198                 last_mpc = mpc;
199         }
200
201         /* concurrent __multipath_flush may lead to !last_mpc */
202         kfree(last_mpc);
203
204         decision->u.dst.__use++;
205         *rp = decision;
206 }
207
208 static void wrandom_set_nhinfo(__be32 network,
209                                __be32 netmask,
210                                unsigned char prefixlen,
211                                const struct fib_nh *nh)
212 {
213         const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
214         struct multipath_route *r, *target_route = NULL;
215         struct multipath_dest *d, *target_dest = NULL;
216
217         /* store the weight information for a certain route */
218         spin_lock_bh(&state[state_idx].lock);
219
220         /* find state entry for gateway or add one if necessary */
221         list_for_each_entry_rcu(r, &state[state_idx].head, list) {
222                 if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
223                         target_route = r;
224                         break;
225                 }
226         }
227
228         if (!target_route) {
229                 const size_t size_rt = sizeof(struct multipath_route);
230                 target_route = (struct multipath_route *)
231                         kmalloc(size_rt, GFP_ATOMIC);
232
233                 target_route->gw = nh->nh_gw;
234                 target_route->oif = nh->nh_oif;
235                 memset(&target_route->rcu, 0, sizeof(struct rcu_head));
236                 INIT_LIST_HEAD(&target_route->dests);
237
238                 list_add_rcu(&target_route->list, &state[state_idx].head);
239         }
240
241         /* find state entry for destination or add one if necessary */
242         list_for_each_entry_rcu(d, &target_route->dests, list) {
243                 if (d->nh_info == nh) {
244                         target_dest = d;
245                         break;
246                 }
247         }
248
249         if (!target_dest) {
250                 const size_t size_dst = sizeof(struct multipath_dest);
251                 target_dest = (struct multipath_dest*)
252                         kmalloc(size_dst, GFP_ATOMIC);
253
254                 target_dest->nh_info = nh;
255                 target_dest->network = network;
256                 target_dest->netmask = netmask;
257                 target_dest->prefixlen = prefixlen;
258                 memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
259
260                 list_add_rcu(&target_dest->list, &target_route->dests);
261         }
262         /* else: we already stored this info for another destination =>
263          * we are finished
264          */
265
266         spin_unlock_bh(&state[state_idx].lock);
267 }
268
269 static void __multipath_free(struct rcu_head *head)
270 {
271         struct multipath_route *rt = container_of(head, struct multipath_route,
272                                                   rcu);
273         kfree(rt);
274 }
275
276 static void __multipath_free_dst(struct rcu_head *head)
277 {
278         struct multipath_dest *dst = container_of(head,
279                                                   struct multipath_dest,
280                                                   rcu);
281         kfree(dst);
282 }
283
284 static void wrandom_flush(void)
285 {
286         int i;
287
288         /* defere delete to all entries */
289         for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
290                 struct multipath_route *r;
291
292                 spin_lock_bh(&state[i].lock);
293                 list_for_each_entry_rcu(r, &state[i].head, list) {
294                         struct multipath_dest *d;
295                         list_for_each_entry_rcu(d, &r->dests, list) {
296                                 list_del_rcu(&d->list);
297                                 call_rcu(&d->rcu,
298                                          __multipath_free_dst);
299                         }
300                         list_del_rcu(&r->list);
301                         call_rcu(&r->rcu,
302                                  __multipath_free);
303                 }
304
305                 spin_unlock_bh(&state[i].lock);
306         }
307 }
308
309 static struct ip_mp_alg_ops wrandom_ops = {
310         .mp_alg_select_route    =       wrandom_select_route,
311         .mp_alg_flush           =       wrandom_flush,
312         .mp_alg_set_nhinfo      =       wrandom_set_nhinfo,
313 };
314
315 static int __init wrandom_init(void)
316 {
317         wrandom_init_state();
318
319         return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
320 }
321
322 static void __exit wrandom_exit(void)
323 {
324         multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
325 }
326
327 module_init(wrandom_init);
328 module_exit(wrandom_exit);
329 MODULE_LICENSE("GPL");