2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
45 #include <net/ip_mp_alg.h>
46 #include <net/netlink.h>
47 #include <net/nexthop.h>
49 #include "fib_lookup.h"
51 #define FSprintk(a...)
53 static DEFINE_SPINLOCK(fib_info_lock);
54 static struct hlist_head *fib_info_hash;
55 static struct hlist_head *fib_info_laddrhash;
56 static unsigned int fib_hash_size;
57 static unsigned int fib_info_cnt;
59 #define DEVINDEX_HASHBITS 8
60 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
63 #ifdef CONFIG_IP_ROUTE_MULTIPATH
65 static DEFINE_SPINLOCK(fib_multipath_lock);
67 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
70 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
73 #else /* CONFIG_IP_ROUTE_MULTIPATH */
75 /* Hope, that gcc will optimize it to get rid of dummy loop */
77 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78 for (nhsel=0; nhsel < 1; nhsel++)
80 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81 for (nhsel=0; nhsel < 1; nhsel++)
83 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
85 #define endfor_nexthops(fi) }
92 } fib_props[RTA_MAX + 1] = {
95 .scope = RT_SCOPE_NOWHERE,
99 .scope = RT_SCOPE_UNIVERSE,
103 .scope = RT_SCOPE_HOST,
107 .scope = RT_SCOPE_LINK,
108 }, /* RTN_BROADCAST */
111 .scope = RT_SCOPE_LINK,
115 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_MULTICAST */
119 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_BLACKHOLE */
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_UNREACHABLE */
127 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_PROHIBIT */
131 .scope = RT_SCOPE_UNIVERSE,
135 .scope = RT_SCOPE_NOWHERE,
139 .scope = RT_SCOPE_NOWHERE,
140 }, /* RTN_XRESOLVE */
144 /* Release a nexthop info record */
146 void free_fib_info(struct fib_info *fi)
148 if (fi->fib_dead == 0) {
149 printk("Freeing alive fib_info %p\n", fi);
152 change_nexthops(fi) {
156 } endfor_nexthops(fi);
161 void fib_release_info(struct fib_info *fi)
163 spin_lock_bh(&fib_info_lock);
164 if (fi && --fi->fib_treeref == 0) {
165 hlist_del(&fi->fib_hash);
167 hlist_del(&fi->fib_lhash);
168 change_nexthops(fi) {
171 hlist_del(&nh->nh_hash);
172 } endfor_nexthops(fi)
176 spin_unlock_bh(&fib_info_lock);
179 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
181 const struct fib_nh *onh = ofi->fib_nh;
184 if (nh->nh_oif != onh->nh_oif ||
185 nh->nh_gw != onh->nh_gw ||
186 nh->nh_scope != onh->nh_scope ||
187 #ifdef CONFIG_IP_ROUTE_MULTIPATH
188 nh->nh_weight != onh->nh_weight ||
190 #ifdef CONFIG_NET_CLS_ROUTE
191 nh->nh_tclassid != onh->nh_tclassid ||
193 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
196 } endfor_nexthops(fi);
200 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
202 unsigned int mask = (fib_hash_size - 1);
203 unsigned int val = fi->fib_nhs;
205 val ^= fi->fib_protocol;
206 val ^= (__force u32)fi->fib_prefsrc;
207 val ^= fi->fib_priority;
209 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
212 static struct fib_info *fib_find_info(const struct fib_info *nfi)
214 struct hlist_head *head;
215 struct hlist_node *node;
219 hash = fib_info_hashfn(nfi);
220 head = &fib_info_hash[hash];
222 hlist_for_each_entry(fi, node, head, fib_hash) {
223 if (fi->fib_nhs != nfi->fib_nhs)
225 if (nfi->fib_protocol == fi->fib_protocol &&
226 nfi->fib_prefsrc == fi->fib_prefsrc &&
227 nfi->fib_priority == fi->fib_priority &&
228 memcmp(nfi->fib_metrics, fi->fib_metrics,
229 sizeof(fi->fib_metrics)) == 0 &&
230 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
238 static inline unsigned int fib_devindex_hashfn(unsigned int val)
240 unsigned int mask = DEVINDEX_HASHSIZE - 1;
243 (val >> DEVINDEX_HASHBITS) ^
244 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
247 /* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
251 int ip_fib_check_default(__be32 gw, struct net_device *dev)
253 struct hlist_head *head;
254 struct hlist_node *node;
258 spin_lock(&fib_info_lock);
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
266 spin_unlock(&fib_info_lock);
271 spin_unlock(&fib_info_lock);
276 static inline size_t fib_nlmsg_size(struct fib_info *fi)
278 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
279 + nla_total_size(4) /* RTA_TABLE */
280 + nla_total_size(4) /* RTA_DST */
281 + nla_total_size(4) /* RTA_PRIORITY */
282 + nla_total_size(4); /* RTA_PREFSRC */
284 /* space for nested metrics */
285 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
288 /* Also handles the special case fib_nhs == 1 */
290 /* each nexthop is packed in an attribute */
291 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
293 /* may contain flow and gateway attribute */
294 nhsize += 2 * nla_total_size(4);
296 /* all nexthops are packed in a nested attribute */
297 payload += nla_total_size(fi->fib_nhs * nhsize);
303 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
304 int dst_len, u32 tb_id, struct nl_info *info)
307 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
310 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
314 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
315 fa->fa_type, fa->fa_scope, key, dst_len,
316 fa->fa_tos, fa->fa_info, 0);
317 /* failure implies BUG in fib_nlmsg_size() */
320 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
321 info->nlh, GFP_KERNEL);
324 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
327 /* Return the first fib alias matching TOS with
328 * priority less than or equal to PRIO.
330 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
333 struct fib_alias *fa;
334 list_for_each_entry(fa, fah, fa_list) {
335 if (fa->fa_tos > tos)
337 if (fa->fa_info->fib_priority >= prio ||
345 int fib_detect_death(struct fib_info *fi, int order,
346 struct fib_info **last_resort, int *last_idx, int *dflt)
349 int state = NUD_NONE;
351 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
353 state = n->nud_state;
356 if (state==NUD_REACHABLE)
358 if ((state&NUD_VALID) && order != *dflt)
360 if ((state&NUD_VALID) ||
361 (*last_idx<0 && order > *dflt)) {
368 #ifdef CONFIG_IP_ROUTE_MULTIPATH
370 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
374 while (rtnh_ok(rtnh, remaining)) {
376 rtnh = rtnh_next(rtnh, &remaining);
379 /* leftover implies invalid nexthop configuration, discard it */
380 return remaining > 0 ? 0 : nhs;
383 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
384 int remaining, struct fib_config *cfg)
386 change_nexthops(fi) {
389 if (!rtnh_ok(rtnh, remaining))
392 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
393 nh->nh_oif = rtnh->rtnh_ifindex;
394 nh->nh_weight = rtnh->rtnh_hops + 1;
396 attrlen = rtnh_attrlen(rtnh);
398 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
400 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
401 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
402 #ifdef CONFIG_NET_CLS_ROUTE
403 nla = nla_find(attrs, attrlen, RTA_FLOW);
404 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
408 rtnh = rtnh_next(rtnh, &remaining);
409 } endfor_nexthops(fi);
416 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
418 #ifdef CONFIG_IP_ROUTE_MULTIPATH
419 struct rtnexthop *rtnh;
423 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
426 if (cfg->fc_oif || cfg->fc_gw) {
427 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
428 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
433 #ifdef CONFIG_IP_ROUTE_MULTIPATH
434 if (cfg->fc_mp == NULL)
438 remaining = cfg->fc_mp_len;
443 if (!rtnh_ok(rtnh, remaining))
446 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
449 attrlen = rtnh_attrlen(rtnh);
451 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
453 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
454 if (nla && nla_get_be32(nla) != nh->nh_gw)
456 #ifdef CONFIG_NET_CLS_ROUTE
457 nla = nla_find(attrs, attrlen, RTA_FLOW);
458 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
463 rtnh = rtnh_next(rtnh, &remaining);
464 } endfor_nexthops(fi);
474 Semantics of nexthop is very messy by historical reasons.
475 We have to take into account, that:
476 a) gateway can be actually local interface address,
477 so that gatewayed route is direct.
478 b) gateway must be on-link address, possibly
479 described not by an ifaddr, but also by a direct route.
480 c) If both gateway and interface are specified, they should not
482 d) If we use tunnel routes, gateway could be not on-link.
484 Attempt to reconcile all of these (alas, self-contradictory) conditions
485 results in pretty ugly and hairy code with obscure logic.
487 I chose to generalized it instead, so that the size
488 of code does not increase practically, but it becomes
490 Every prefix is assigned a "scope" value: "host" is local address,
491 "link" is direct route,
492 [ ... "site" ... "interior" ... ]
493 and "universe" is true gateway route with global meaning.
495 Every prefix refers to a set of "nexthop"s (gw, oif),
496 where gw must have narrower scope. This recursion stops
497 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
498 which means that gw is forced to be on link.
500 Code is still hairy, but now it is apparently logically
501 consistent and very flexible. F.e. as by-product it allows
502 to co-exists in peace independent exterior and interior
505 Normally it looks as following.
507 {universe prefix} -> (gw, oif) [scope link]
509 |-> {link prefix} -> (gw, oif) [scope local]
511 |-> {local prefix} (terminal node)
514 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
520 struct fib_result res;
522 #ifdef CONFIG_IP_ROUTE_PERVASIVE
523 if (nh->nh_flags&RTNH_F_PERVASIVE)
526 if (nh->nh_flags&RTNH_F_ONLINK) {
527 struct net_device *dev;
529 if (cfg->fc_scope >= RT_SCOPE_LINK)
531 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
533 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
535 if (!(dev->flags&IFF_UP))
539 nh->nh_scope = RT_SCOPE_LINK;
547 .scope = cfg->fc_scope + 1,
553 /* It is not necessary, but requires a bit of thinking */
554 if (fl.fl4_scope < RT_SCOPE_LINK)
555 fl.fl4_scope = RT_SCOPE_LINK;
556 if ((err = fib_lookup(&fl, &res)) != 0)
560 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
562 nh->nh_scope = res.scope;
563 nh->nh_oif = FIB_RES_OIF(res);
564 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
566 dev_hold(nh->nh_dev);
568 if (!(nh->nh_dev->flags & IFF_UP))
575 struct in_device *in_dev;
577 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
580 in_dev = inetdev_by_index(nh->nh_oif);
583 if (!(in_dev->dev->flags&IFF_UP)) {
587 nh->nh_dev = in_dev->dev;
588 dev_hold(nh->nh_dev);
589 nh->nh_scope = RT_SCOPE_HOST;
595 static inline unsigned int fib_laddr_hashfn(__be32 val)
597 unsigned int mask = (fib_hash_size - 1);
599 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
602 static struct hlist_head *fib_hash_alloc(int bytes)
604 if (bytes <= PAGE_SIZE)
605 return kmalloc(bytes, GFP_KERNEL);
607 return (struct hlist_head *)
608 __get_free_pages(GFP_KERNEL, get_order(bytes));
611 static void fib_hash_free(struct hlist_head *hash, int bytes)
616 if (bytes <= PAGE_SIZE)
619 free_pages((unsigned long) hash, get_order(bytes));
622 static void fib_hash_move(struct hlist_head *new_info_hash,
623 struct hlist_head *new_laddrhash,
624 unsigned int new_size)
626 struct hlist_head *old_info_hash, *old_laddrhash;
627 unsigned int old_size = fib_hash_size;
628 unsigned int i, bytes;
630 spin_lock_bh(&fib_info_lock);
631 old_info_hash = fib_info_hash;
632 old_laddrhash = fib_info_laddrhash;
633 fib_hash_size = new_size;
635 for (i = 0; i < old_size; i++) {
636 struct hlist_head *head = &fib_info_hash[i];
637 struct hlist_node *node, *n;
640 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
641 struct hlist_head *dest;
642 unsigned int new_hash;
644 hlist_del(&fi->fib_hash);
646 new_hash = fib_info_hashfn(fi);
647 dest = &new_info_hash[new_hash];
648 hlist_add_head(&fi->fib_hash, dest);
651 fib_info_hash = new_info_hash;
653 for (i = 0; i < old_size; i++) {
654 struct hlist_head *lhead = &fib_info_laddrhash[i];
655 struct hlist_node *node, *n;
658 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
659 struct hlist_head *ldest;
660 unsigned int new_hash;
662 hlist_del(&fi->fib_lhash);
664 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
665 ldest = &new_laddrhash[new_hash];
666 hlist_add_head(&fi->fib_lhash, ldest);
669 fib_info_laddrhash = new_laddrhash;
671 spin_unlock_bh(&fib_info_lock);
673 bytes = old_size * sizeof(struct hlist_head *);
674 fib_hash_free(old_info_hash, bytes);
675 fib_hash_free(old_laddrhash, bytes);
678 struct fib_info *fib_create_info(struct fib_config *cfg)
681 struct fib_info *fi = NULL;
682 struct fib_info *ofi;
685 /* Fast check to catch the most weird cases */
686 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
689 #ifdef CONFIG_IP_ROUTE_MULTIPATH
691 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
696 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
697 if (cfg->fc_mp_alg) {
698 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
699 cfg->fc_mp_alg > IP_MP_ALG_MAX)
705 if (fib_info_cnt >= fib_hash_size) {
706 unsigned int new_size = fib_hash_size << 1;
707 struct hlist_head *new_info_hash;
708 struct hlist_head *new_laddrhash;
713 bytes = new_size * sizeof(struct hlist_head *);
714 new_info_hash = fib_hash_alloc(bytes);
715 new_laddrhash = fib_hash_alloc(bytes);
716 if (!new_info_hash || !new_laddrhash) {
717 fib_hash_free(new_info_hash, bytes);
718 fib_hash_free(new_laddrhash, bytes);
720 memset(new_info_hash, 0, bytes);
721 memset(new_laddrhash, 0, bytes);
723 fib_hash_move(new_info_hash, new_laddrhash, new_size);
730 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
735 fi->fib_protocol = cfg->fc_protocol;
736 fi->fib_flags = cfg->fc_flags;
737 fi->fib_priority = cfg->fc_priority;
738 fi->fib_prefsrc = cfg->fc_prefsrc;
741 change_nexthops(fi) {
743 } endfor_nexthops(fi)
749 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
750 int type = nla->nla_type;
755 fi->fib_metrics[type - 1] = nla_get_u32(nla);
761 #ifdef CONFIG_IP_ROUTE_MULTIPATH
762 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
765 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
767 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
769 #ifdef CONFIG_NET_CLS_ROUTE
770 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
777 struct fib_nh *nh = fi->fib_nh;
779 nh->nh_oif = cfg->fc_oif;
780 nh->nh_gw = cfg->fc_gw;
781 nh->nh_flags = cfg->fc_flags;
782 #ifdef CONFIG_NET_CLS_ROUTE
783 nh->nh_tclassid = cfg->fc_flow;
785 #ifdef CONFIG_IP_ROUTE_MULTIPATH
790 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
791 fi->fib_mp_alg = cfg->fc_mp_alg;
794 if (fib_props[cfg->fc_type].error) {
795 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
800 if (cfg->fc_scope > RT_SCOPE_HOST)
803 if (cfg->fc_scope == RT_SCOPE_HOST) {
804 struct fib_nh *nh = fi->fib_nh;
806 /* Local address is added. */
807 if (nhs != 1 || nh->nh_gw)
809 nh->nh_scope = RT_SCOPE_NOWHERE;
810 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
812 if (nh->nh_dev == NULL)
815 change_nexthops(fi) {
816 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
818 } endfor_nexthops(fi)
821 if (fi->fib_prefsrc) {
822 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
823 fi->fib_prefsrc != cfg->fc_dst)
824 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
829 if ((ofi = fib_find_info(fi)) != NULL) {
837 atomic_inc(&fi->fib_clntref);
838 spin_lock_bh(&fib_info_lock);
839 hlist_add_head(&fi->fib_hash,
840 &fib_info_hash[fib_info_hashfn(fi)]);
841 if (fi->fib_prefsrc) {
842 struct hlist_head *head;
844 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
845 hlist_add_head(&fi->fib_lhash, head);
847 change_nexthops(fi) {
848 struct hlist_head *head;
853 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
854 head = &fib_info_devhash[hash];
855 hlist_add_head(&nh->nh_hash, head);
856 } endfor_nexthops(fi)
857 spin_unlock_bh(&fib_info_lock);
872 /* Note! fib_semantic_match intentionally uses RCU list functions. */
873 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
874 struct fib_result *res, __be32 zone, __be32 mask,
877 struct fib_alias *fa;
880 list_for_each_entry_rcu(fa, head, fa_list) {
884 fa->fa_tos != flp->fl4_tos)
887 if (fa->fa_scope < flp->fl4_scope)
890 fa->fa_state |= FA_S_ACCESSED;
892 err = fib_props[fa->fa_type].error;
894 struct fib_info *fi = fa->fa_info;
896 if (fi->fib_flags & RTNH_F_DEAD)
899 switch (fa->fa_type) {
906 if (nh->nh_flags&RTNH_F_DEAD)
908 if (!flp->oif || flp->oif == nh->nh_oif)
911 #ifdef CONFIG_IP_ROUTE_MULTIPATH
912 if (nhsel < fi->fib_nhs) {
925 printk(KERN_DEBUG "impossible 102\n");
934 res->prefixlen = prefixlen;
935 res->nh_sel = nh_sel;
936 res->type = fa->fa_type;
937 res->scope = fa->fa_scope;
938 res->fi = fa->fa_info;
939 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
941 res->network = zone & inet_make_mask(prefixlen);
943 atomic_inc(&res->fi->fib_clntref);
947 /* Find appropriate source address to this destination */
949 __be32 __fib_res_prefsrc(struct fib_result *res)
951 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
954 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
955 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
956 struct fib_info *fi, unsigned int flags)
958 struct nlmsghdr *nlh;
961 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
965 rtm = nlmsg_data(nlh);
966 rtm->rtm_family = AF_INET;
967 rtm->rtm_dst_len = dst_len;
968 rtm->rtm_src_len = 0;
970 rtm->rtm_table = tb_id;
971 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
972 rtm->rtm_type = type;
973 rtm->rtm_flags = fi->fib_flags;
974 rtm->rtm_scope = scope;
975 rtm->rtm_protocol = fi->fib_protocol;
977 if (rtm->rtm_dst_len)
978 NLA_PUT_BE32(skb, RTA_DST, dst);
980 if (fi->fib_priority)
981 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
983 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
984 goto nla_put_failure;
987 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
989 if (fi->fib_nhs == 1) {
990 if (fi->fib_nh->nh_gw)
991 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
993 if (fi->fib_nh->nh_oif)
994 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
995 #ifdef CONFIG_NET_CLS_ROUTE
996 if (fi->fib_nh[0].nh_tclassid)
997 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
1000 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1001 if (fi->fib_nhs > 1) {
1002 struct rtnexthop *rtnh;
1005 mp = nla_nest_start(skb, RTA_MULTIPATH);
1007 goto nla_put_failure;
1010 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1012 goto nla_put_failure;
1014 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1015 rtnh->rtnh_hops = nh->nh_weight - 1;
1016 rtnh->rtnh_ifindex = nh->nh_oif;
1019 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1020 #ifdef CONFIG_NET_CLS_ROUTE
1021 if (nh->nh_tclassid)
1022 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1024 /* length of rtnetlink header + attributes */
1025 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1026 } endfor_nexthops(fi);
1028 nla_nest_end(skb, mp);
1031 return nlmsg_end(skb, nlh);
1034 return nlmsg_cancel(skb, nlh);
1039 - local address disappeared -> we must delete all the entries
1041 - device went down -> we must shutdown all nexthops going via it.
1044 int fib_sync_down(__be32 local, struct net_device *dev, int force)
1047 int scope = RT_SCOPE_NOWHERE;
1052 if (local && fib_info_laddrhash) {
1053 unsigned int hash = fib_laddr_hashfn(local);
1054 struct hlist_head *head = &fib_info_laddrhash[hash];
1055 struct hlist_node *node;
1056 struct fib_info *fi;
1058 hlist_for_each_entry(fi, node, head, fib_lhash) {
1059 if (fi->fib_prefsrc == local) {
1060 fi->fib_flags |= RTNH_F_DEAD;
1067 struct fib_info *prev_fi = NULL;
1068 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1069 struct hlist_head *head = &fib_info_devhash[hash];
1070 struct hlist_node *node;
1073 hlist_for_each_entry(nh, node, head, nh_hash) {
1074 struct fib_info *fi = nh->nh_parent;
1077 BUG_ON(!fi->fib_nhs);
1078 if (nh->nh_dev != dev || fi == prev_fi)
1082 change_nexthops(fi) {
1083 if (nh->nh_flags&RTNH_F_DEAD)
1085 else if (nh->nh_dev == dev &&
1086 nh->nh_scope != scope) {
1087 nh->nh_flags |= RTNH_F_DEAD;
1088 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1089 spin_lock_bh(&fib_multipath_lock);
1090 fi->fib_power -= nh->nh_power;
1092 spin_unlock_bh(&fib_multipath_lock);
1096 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1097 if (force > 1 && nh->nh_dev == dev) {
1102 } endfor_nexthops(fi)
1103 if (dead == fi->fib_nhs) {
1104 fi->fib_flags |= RTNH_F_DEAD;
1113 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1116 Dead device goes up. We wake up dead nexthops.
1117 It takes sense only on multipath routes.
1120 int fib_sync_up(struct net_device *dev)
1122 struct fib_info *prev_fi;
1124 struct hlist_head *head;
1125 struct hlist_node *node;
1129 if (!(dev->flags&IFF_UP))
1133 hash = fib_devindex_hashfn(dev->ifindex);
1134 head = &fib_info_devhash[hash];
1137 hlist_for_each_entry(nh, node, head, nh_hash) {
1138 struct fib_info *fi = nh->nh_parent;
1141 BUG_ON(!fi->fib_nhs);
1142 if (nh->nh_dev != dev || fi == prev_fi)
1147 change_nexthops(fi) {
1148 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1152 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1154 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1157 spin_lock_bh(&fib_multipath_lock);
1159 nh->nh_flags &= ~RTNH_F_DEAD;
1160 spin_unlock_bh(&fib_multipath_lock);
1161 } endfor_nexthops(fi)
1164 fi->fib_flags &= ~RTNH_F_DEAD;
1173 The algorithm is suboptimal, but it provides really
1174 fair weighted route distribution.
1177 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1179 struct fib_info *fi = res->fi;
1182 spin_lock_bh(&fib_multipath_lock);
1183 if (fi->fib_power <= 0) {
1185 change_nexthops(fi) {
1186 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1187 power += nh->nh_weight;
1188 nh->nh_power = nh->nh_weight;
1190 } endfor_nexthops(fi);
1191 fi->fib_power = power;
1193 spin_unlock_bh(&fib_multipath_lock);
1194 /* Race condition: route has just become dead. */
1201 /* w should be random number [0..fi->fib_power-1],
1202 it is pretty bad approximation.
1205 w = jiffies % fi->fib_power;
1207 change_nexthops(fi) {
1208 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1209 if ((w -= nh->nh_power) <= 0) {
1212 res->nh_sel = nhsel;
1213 spin_unlock_bh(&fib_multipath_lock);
1217 } endfor_nexthops(fi);
1219 /* Race condition: route has just become dead. */
1221 spin_unlock_bh(&fib_multipath_lock);