Pull sony into release branch
[linux-2.6] / net / ipv4 / multipath_wrandom.c
1 /*
2  *              Weighted random policy for multipath.
3  *
4  *
5  * Version:     $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
6  *
7  * Authors:     Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
8  *
9  *              This program is free software; you can redistribute it and/or
10  *              modify it under the terms of the GNU General Public License
11  *              as published by the Free Software Foundation; either version
12  *              2 of the License, or (at your option) any later version.
13  */
14
15 #include <asm/system.h>
16 #include <asm/uaccess.h>
17 #include <linux/types.h>
18 #include <linux/errno.h>
19 #include <linux/timer.h>
20 #include <linux/mm.h>
21 #include <linux/kernel.h>
22 #include <linux/fcntl.h>
23 #include <linux/stat.h>
24 #include <linux/socket.h>
25 #include <linux/in.h>
26 #include <linux/inet.h>
27 #include <linux/netdevice.h>
28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h>
30 #include <linux/proc_fs.h>
31 #include <linux/seq_file.h>
32 #include <linux/module.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <net/ip.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
38 #include <net/sock.h>
39 #include <net/icmp.h>
40 #include <net/udp.h>
41 #include <net/raw.h>
42 #include <linux/notifier.h>
43 #include <linux/if_arp.h>
44 #include <linux/netfilter_ipv4.h>
45 #include <net/ipip.h>
46 #include <net/checksum.h>
47 #include <net/ip_fib.h>
48 #include <net/ip_mp_alg.h>
49
50 #define MULTIPATH_STATE_SIZE 15
51
52 struct multipath_candidate {
53         struct multipath_candidate      *next;
54         int                             power;
55         struct rtable                   *rt;
56 };
57
58 struct multipath_dest {
59         struct list_head        list;
60
61         const struct fib_nh     *nh_info;
62         __be32                  netmask;
63         __be32                  network;
64         unsigned char           prefixlen;
65
66         struct rcu_head         rcu;
67 };
68
69 struct multipath_bucket {
70         struct list_head        head;
71         spinlock_t              lock;
72 };
73
74 struct multipath_route {
75         struct list_head        list;
76
77         int                     oif;
78         __be32                  gw;
79         struct list_head        dests;
80
81         struct rcu_head         rcu;
82 };
83
84 /* state: primarily weight per route information */
85 static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
86
87 /* interface to random number generation */
88 static unsigned int RANDOM_SEED = 93186752;
89
90 static inline unsigned int random(unsigned int ubound)
91 {
92         static unsigned int a = 1588635695,
93                 q = 2,
94                 r = 1117695901;
95         RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
96         return RANDOM_SEED % ubound;
97 }
98
99 static unsigned char __multipath_lookup_weight(const struct flowi *fl,
100                                                const struct rtable *rt)
101 {
102         const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
103         struct multipath_route *r;
104         struct multipath_route *target_route = NULL;
105         struct multipath_dest *d;
106         int weight = 1;
107
108         /* lookup the weight information for a certain route */
109         rcu_read_lock();
110
111         /* find state entry for gateway or add one if necessary */
112         list_for_each_entry_rcu(r, &state[state_idx].head, list) {
113                 if (r->gw == rt->rt_gateway &&
114                     r->oif == rt->idev->dev->ifindex) {
115                         target_route = r;
116                         break;
117                 }
118         }
119
120         if (!target_route) {
121                 /* this should not happen... but we are prepared */
122                 printk( KERN_CRIT"%s: missing state for gateway: %u and " \
123                         "device %d\n", __FUNCTION__, rt->rt_gateway,
124                         rt->idev->dev->ifindex);
125                 goto out;
126         }
127
128         /* find state entry for destination */
129         list_for_each_entry_rcu(d, &target_route->dests, list) {
130                 __be32 targetnetwork = fl->fl4_dst &
131                         inet_make_mask(d->prefixlen);
132
133                 if ((targetnetwork & d->netmask) == d->network) {
134                         weight = d->nh_info->nh_weight;
135                         goto out;
136                 }
137         }
138
139 out:
140         rcu_read_unlock();
141         return weight;
142 }
143
144 static void wrandom_init_state(void)
145 {
146         int i;
147
148         for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
149                 INIT_LIST_HEAD(&state[i].head);
150                 spin_lock_init(&state[i].lock);
151         }
152 }
153
154 static void wrandom_select_route(const struct flowi *flp,
155                                  struct rtable *first,
156                                  struct rtable **rp)
157 {
158         struct rtable *rt;
159         struct rtable *decision;
160         struct multipath_candidate *first_mpc = NULL;
161         struct multipath_candidate *mpc, *last_mpc = NULL;
162         int power = 0;
163         int last_power;
164         int selector;
165         const size_t size_mpc = sizeof(struct multipath_candidate);
166
167         /* collect all candidates and identify their weights */
168         for (rt = rcu_dereference(first); rt;
169              rt = rcu_dereference(rt->u.dst.rt_next)) {
170                 if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
171                     multipath_comparekeys(&rt->fl, flp)) {
172                         struct multipath_candidate* mpc =
173                                 (struct multipath_candidate*)
174                                 kmalloc(size_mpc, GFP_ATOMIC);
175
176                         if (!mpc)
177                                 return;
178
179                         power += __multipath_lookup_weight(flp, rt) * 10000;
180
181                         mpc->power = power;
182                         mpc->rt = rt;
183                         mpc->next = NULL;
184
185                         if (!first_mpc)
186                                 first_mpc = mpc;
187                         else
188                                 last_mpc->next = mpc;
189
190                         last_mpc = mpc;
191                 }
192         }
193
194         /* choose a weighted random candidate */
195         decision = first;
196         selector = random(power);
197         last_power = 0;
198
199         /* select candidate, adjust GC data and cleanup local state */
200         decision = first;
201         last_mpc = NULL;
202         for (mpc = first_mpc; mpc; mpc = mpc->next) {
203                 mpc->rt->u.dst.lastuse = jiffies;
204                 if (last_power <= selector && selector < mpc->power)
205                         decision = mpc->rt;
206
207                 last_power = mpc->power;
208                 kfree(last_mpc);
209                 last_mpc = mpc;
210         }
211
212         /* concurrent __multipath_flush may lead to !last_mpc */
213         kfree(last_mpc);
214
215         decision->u.dst.__use++;
216         *rp = decision;
217 }
218
219 static void wrandom_set_nhinfo(__be32 network,
220                                __be32 netmask,
221                                unsigned char prefixlen,
222                                const struct fib_nh *nh)
223 {
224         const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
225         struct multipath_route *r, *target_route = NULL;
226         struct multipath_dest *d, *target_dest = NULL;
227
228         /* store the weight information for a certain route */
229         spin_lock_bh(&state[state_idx].lock);
230
231         /* find state entry for gateway or add one if necessary */
232         list_for_each_entry_rcu(r, &state[state_idx].head, list) {
233                 if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
234                         target_route = r;
235                         break;
236                 }
237         }
238
239         if (!target_route) {
240                 const size_t size_rt = sizeof(struct multipath_route);
241                 target_route = (struct multipath_route *)
242                         kmalloc(size_rt, GFP_ATOMIC);
243
244                 target_route->gw = nh->nh_gw;
245                 target_route->oif = nh->nh_oif;
246                 memset(&target_route->rcu, 0, sizeof(struct rcu_head));
247                 INIT_LIST_HEAD(&target_route->dests);
248
249                 list_add_rcu(&target_route->list, &state[state_idx].head);
250         }
251
252         /* find state entry for destination or add one if necessary */
253         list_for_each_entry_rcu(d, &target_route->dests, list) {
254                 if (d->nh_info == nh) {
255                         target_dest = d;
256                         break;
257                 }
258         }
259
260         if (!target_dest) {
261                 const size_t size_dst = sizeof(struct multipath_dest);
262                 target_dest = (struct multipath_dest*)
263                         kmalloc(size_dst, GFP_ATOMIC);
264
265                 target_dest->nh_info = nh;
266                 target_dest->network = network;
267                 target_dest->netmask = netmask;
268                 target_dest->prefixlen = prefixlen;
269                 memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
270
271                 list_add_rcu(&target_dest->list, &target_route->dests);
272         }
273         /* else: we already stored this info for another destination =>
274          * we are finished
275          */
276
277         spin_unlock_bh(&state[state_idx].lock);
278 }
279
280 static void __multipath_free(struct rcu_head *head)
281 {
282         struct multipath_route *rt = container_of(head, struct multipath_route,
283                                                   rcu);
284         kfree(rt);
285 }
286
287 static void __multipath_free_dst(struct rcu_head *head)
288 {
289         struct multipath_dest *dst = container_of(head,
290                                                   struct multipath_dest,
291                                                   rcu);
292         kfree(dst);
293 }
294
295 static void wrandom_flush(void)
296 {
297         int i;
298
299         /* defere delete to all entries */
300         for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
301                 struct multipath_route *r;
302
303                 spin_lock_bh(&state[i].lock);
304                 list_for_each_entry_rcu(r, &state[i].head, list) {
305                         struct multipath_dest *d;
306                         list_for_each_entry_rcu(d, &r->dests, list) {
307                                 list_del_rcu(&d->list);
308                                 call_rcu(&d->rcu,
309                                          __multipath_free_dst);
310                         }
311                         list_del_rcu(&r->list);
312                         call_rcu(&r->rcu,
313                                  __multipath_free);
314                 }
315
316                 spin_unlock_bh(&state[i].lock);
317         }
318 }
319
320 static struct ip_mp_alg_ops wrandom_ops = {
321         .mp_alg_select_route    =       wrandom_select_route,
322         .mp_alg_flush           =       wrandom_flush,
323         .mp_alg_set_nhinfo      =       wrandom_set_nhinfo,
324 };
325
326 static int __init wrandom_init(void)
327 {
328         wrandom_init_state();
329
330         return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
331 }
332
333 static void __exit wrandom_exit(void)
334 {
335         multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
336 }
337
338 module_init(wrandom_init);
339 module_exit(wrandom_exit);
340 MODULE_LICENSE("GPL");