2 * Weighted random policy for multipath.
5 * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
7 * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
15 #include <asm/system.h>
16 #include <asm/uaccess.h>
17 #include <linux/types.h>
18 #include <linux/errno.h>
19 #include <linux/timer.h>
21 #include <linux/kernel.h>
22 #include <linux/fcntl.h>
23 #include <linux/stat.h>
24 #include <linux/socket.h>
26 #include <linux/inet.h>
27 #include <linux/netdevice.h>
28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h>
30 #include <linux/proc_fs.h>
31 #include <linux/seq_file.h>
32 #include <linux/module.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <linux/skbuff.h>
42 #include <linux/notifier.h>
43 #include <linux/if_arp.h>
44 #include <linux/netfilter_ipv4.h>
46 #include <net/checksum.h>
47 #include <net/ip_fib.h>
48 #include <net/ip_mp_alg.h>
50 #define MULTIPATH_STATE_SIZE 15
52 struct multipath_candidate {
53 struct multipath_candidate *next;
58 struct multipath_dest {
59 struct list_head list;
61 const struct fib_nh *nh_info;
64 unsigned char prefixlen;
69 struct multipath_bucket {
70 struct list_head head;
74 struct multipath_route {
75 struct list_head list;
79 struct list_head dests;
84 /* state: primarily weight per route information */
85 static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
87 /* interface to random number generation */
88 static unsigned int RANDOM_SEED = 93186752;
90 static inline unsigned int random(unsigned int ubound)
92 static unsigned int a = 1588635695,
95 RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
96 return RANDOM_SEED % ubound;
99 static unsigned char __multipath_lookup_weight(const struct flowi *fl,
100 const struct rtable *rt)
102 const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
103 struct multipath_route *r;
104 struct multipath_route *target_route = NULL;
105 struct multipath_dest *d;
108 /* lookup the weight information for a certain route */
111 /* find state entry for gateway or add one if necessary */
112 list_for_each_entry_rcu(r, &state[state_idx].head, list) {
113 if (r->gw == rt->rt_gateway &&
114 r->oif == rt->idev->dev->ifindex) {
121 /* this should not happen... but we are prepared */
122 printk( KERN_CRIT"%s: missing state for gateway: %u and " \
123 "device %d\n", __FUNCTION__, rt->rt_gateway,
124 rt->idev->dev->ifindex);
128 /* find state entry for destination */
129 list_for_each_entry_rcu(d, &target_route->dests, list) {
130 __be32 targetnetwork = fl->fl4_dst &
131 inet_make_mask(d->prefixlen);
133 if ((targetnetwork & d->netmask) == d->network) {
134 weight = d->nh_info->nh_weight;
144 static void wrandom_init_state(void)
148 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
149 INIT_LIST_HEAD(&state[i].head);
150 spin_lock_init(&state[i].lock);
154 static void wrandom_select_route(const struct flowi *flp,
155 struct rtable *first,
159 struct rtable *decision;
160 struct multipath_candidate *first_mpc = NULL;
161 struct multipath_candidate *mpc, *last_mpc = NULL;
165 const size_t size_mpc = sizeof(struct multipath_candidate);
167 /* collect all candidates and identify their weights */
168 for (rt = rcu_dereference(first); rt;
169 rt = rcu_dereference(rt->u.dst.rt_next)) {
170 if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
171 multipath_comparekeys(&rt->fl, flp)) {
172 struct multipath_candidate* mpc =
173 (struct multipath_candidate*)
174 kmalloc(size_mpc, GFP_ATOMIC);
179 power += __multipath_lookup_weight(flp, rt) * 10000;
188 last_mpc->next = mpc;
194 /* choose a weighted random candidate */
196 selector = random(power);
199 /* select candidate, adjust GC data and cleanup local state */
202 for (mpc = first_mpc; mpc; mpc = mpc->next) {
203 mpc->rt->u.dst.lastuse = jiffies;
204 if (last_power <= selector && selector < mpc->power)
207 last_power = mpc->power;
212 /* concurrent __multipath_flush may lead to !last_mpc */
215 decision->u.dst.__use++;
219 static void wrandom_set_nhinfo(__be32 network,
221 unsigned char prefixlen,
222 const struct fib_nh *nh)
224 const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
225 struct multipath_route *r, *target_route = NULL;
226 struct multipath_dest *d, *target_dest = NULL;
228 /* store the weight information for a certain route */
229 spin_lock_bh(&state[state_idx].lock);
231 /* find state entry for gateway or add one if necessary */
232 list_for_each_entry_rcu(r, &state[state_idx].head, list) {
233 if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
240 const size_t size_rt = sizeof(struct multipath_route);
241 target_route = (struct multipath_route *)
242 kmalloc(size_rt, GFP_ATOMIC);
244 target_route->gw = nh->nh_gw;
245 target_route->oif = nh->nh_oif;
246 memset(&target_route->rcu, 0, sizeof(struct rcu_head));
247 INIT_LIST_HEAD(&target_route->dests);
249 list_add_rcu(&target_route->list, &state[state_idx].head);
252 /* find state entry for destination or add one if necessary */
253 list_for_each_entry_rcu(d, &target_route->dests, list) {
254 if (d->nh_info == nh) {
261 const size_t size_dst = sizeof(struct multipath_dest);
262 target_dest = (struct multipath_dest*)
263 kmalloc(size_dst, GFP_ATOMIC);
265 target_dest->nh_info = nh;
266 target_dest->network = network;
267 target_dest->netmask = netmask;
268 target_dest->prefixlen = prefixlen;
269 memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
271 list_add_rcu(&target_dest->list, &target_route->dests);
273 /* else: we already stored this info for another destination =>
277 spin_unlock_bh(&state[state_idx].lock);
280 static void __multipath_free(struct rcu_head *head)
282 struct multipath_route *rt = container_of(head, struct multipath_route,
287 static void __multipath_free_dst(struct rcu_head *head)
289 struct multipath_dest *dst = container_of(head,
290 struct multipath_dest,
295 static void wrandom_flush(void)
299 /* defere delete to all entries */
300 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
301 struct multipath_route *r;
303 spin_lock_bh(&state[i].lock);
304 list_for_each_entry_rcu(r, &state[i].head, list) {
305 struct multipath_dest *d;
306 list_for_each_entry_rcu(d, &r->dests, list) {
307 list_del_rcu(&d->list);
309 __multipath_free_dst);
311 list_del_rcu(&r->list);
316 spin_unlock_bh(&state[i].lock);
320 static struct ip_mp_alg_ops wrandom_ops = {
321 .mp_alg_select_route = wrandom_select_route,
322 .mp_alg_flush = wrandom_flush,
323 .mp_alg_set_nhinfo = wrandom_set_nhinfo,
326 static int __init wrandom_init(void)
328 wrandom_init_state();
330 return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
333 static void __exit wrandom_exit(void)
335 multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
338 module_init(wrandom_init);
339 module_exit(wrandom_exit);
340 MODULE_LICENSE("GPL");