2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
45 #include <net/netlink.h>
46 #include <net/nexthop.h>
48 #include "fib_lookup.h"
50 static DEFINE_SPINLOCK(fib_info_lock);
51 static struct hlist_head *fib_info_hash;
52 static struct hlist_head *fib_info_laddrhash;
53 static unsigned int fib_hash_size;
54 static unsigned int fib_info_cnt;
56 #define DEVINDEX_HASHBITS 8
57 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
58 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60 #ifdef CONFIG_IP_ROUTE_MULTIPATH
62 static DEFINE_SPINLOCK(fib_multipath_lock);
64 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
65 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
67 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
68 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
70 #else /* CONFIG_IP_ROUTE_MULTIPATH */
72 /* Hope, that gcc will optimize it to get rid of dummy loop */
74 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
75 for (nhsel=0; nhsel < 1; nhsel++)
77 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
78 for (nhsel=0; nhsel < 1; nhsel++)
80 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
82 #define endfor_nexthops(fi) }
89 } fib_props[RTN_MAX + 1] = {
92 .scope = RT_SCOPE_NOWHERE,
96 .scope = RT_SCOPE_UNIVERSE,
100 .scope = RT_SCOPE_HOST,
104 .scope = RT_SCOPE_LINK,
105 }, /* RTN_BROADCAST */
108 .scope = RT_SCOPE_LINK,
112 .scope = RT_SCOPE_UNIVERSE,
113 }, /* RTN_MULTICAST */
116 .scope = RT_SCOPE_UNIVERSE,
117 }, /* RTN_BLACKHOLE */
119 .error = -EHOSTUNREACH,
120 .scope = RT_SCOPE_UNIVERSE,
121 }, /* RTN_UNREACHABLE */
124 .scope = RT_SCOPE_UNIVERSE,
125 }, /* RTN_PROHIBIT */
128 .scope = RT_SCOPE_UNIVERSE,
132 .scope = RT_SCOPE_NOWHERE,
136 .scope = RT_SCOPE_NOWHERE,
137 }, /* RTN_XRESOLVE */
141 /* Release a nexthop info record */
143 void free_fib_info(struct fib_info *fi)
145 if (fi->fib_dead == 0) {
146 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
149 change_nexthops(fi) {
153 } endfor_nexthops(fi);
158 void fib_release_info(struct fib_info *fi)
160 spin_lock_bh(&fib_info_lock);
161 if (fi && --fi->fib_treeref == 0) {
162 hlist_del(&fi->fib_hash);
164 hlist_del(&fi->fib_lhash);
165 change_nexthops(fi) {
168 hlist_del(&nh->nh_hash);
169 } endfor_nexthops(fi)
173 spin_unlock_bh(&fib_info_lock);
176 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
178 const struct fib_nh *onh = ofi->fib_nh;
181 if (nh->nh_oif != onh->nh_oif ||
182 nh->nh_gw != onh->nh_gw ||
183 nh->nh_scope != onh->nh_scope ||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh->nh_weight != onh->nh_weight ||
187 #ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid ||
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193 } endfor_nexthops(fi);
197 static inline unsigned int fib_devindex_hashfn(unsigned int val)
199 unsigned int mask = DEVINDEX_HASHSIZE - 1;
202 (val >> DEVINDEX_HASHBITS) ^
203 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
206 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
208 unsigned int mask = (fib_hash_size - 1);
209 unsigned int val = fi->fib_nhs;
211 val ^= fi->fib_protocol;
212 val ^= (__force u32)fi->fib_prefsrc;
213 val ^= fi->fib_priority;
215 val ^= fib_devindex_hashfn(nh->nh_oif);
216 } endfor_nexthops(fi)
218 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
221 static struct fib_info *fib_find_info(const struct fib_info *nfi)
223 struct hlist_head *head;
224 struct hlist_node *node;
228 hash = fib_info_hashfn(nfi);
229 head = &fib_info_hash[hash];
231 hlist_for_each_entry(fi, node, head, fib_hash) {
232 if (fi->fib_net != nfi->fib_net)
234 if (fi->fib_nhs != nfi->fib_nhs)
236 if (nfi->fib_protocol == fi->fib_protocol &&
237 nfi->fib_prefsrc == fi->fib_prefsrc &&
238 nfi->fib_priority == fi->fib_priority &&
239 memcmp(nfi->fib_metrics, fi->fib_metrics,
240 sizeof(fi->fib_metrics)) == 0 &&
241 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
242 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
249 /* Check, that the gateway is already configured.
250 Used only by redirect accept routine.
253 int ip_fib_check_default(__be32 gw, struct net_device *dev)
255 struct hlist_head *head;
256 struct hlist_node *node;
260 spin_lock(&fib_info_lock);
262 hash = fib_devindex_hashfn(dev->ifindex);
263 head = &fib_info_devhash[hash];
264 hlist_for_each_entry(nh, node, head, nh_hash) {
265 if (nh->nh_dev == dev &&
267 !(nh->nh_flags&RTNH_F_DEAD)) {
268 spin_unlock(&fib_info_lock);
273 spin_unlock(&fib_info_lock);
278 static inline size_t fib_nlmsg_size(struct fib_info *fi)
280 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
281 + nla_total_size(4) /* RTA_TABLE */
282 + nla_total_size(4) /* RTA_DST */
283 + nla_total_size(4) /* RTA_PRIORITY */
284 + nla_total_size(4); /* RTA_PREFSRC */
286 /* space for nested metrics */
287 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
290 /* Also handles the special case fib_nhs == 1 */
292 /* each nexthop is packed in an attribute */
293 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
295 /* may contain flow and gateway attribute */
296 nhsize += 2 * nla_total_size(4);
298 /* all nexthops are packed in a nested attribute */
299 payload += nla_total_size(fi->fib_nhs * nhsize);
305 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
306 int dst_len, u32 tb_id, struct nl_info *info,
307 unsigned int nlm_flags)
310 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
313 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
317 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
318 fa->fa_type, fa->fa_scope, key, dst_len,
319 fa->fa_tos, fa->fa_info, nlm_flags);
321 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
322 WARN_ON(err == -EMSGSIZE);
326 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
327 info->nlh, GFP_KERNEL);
330 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
333 /* Return the first fib alias matching TOS with
334 * priority less than or equal to PRIO.
336 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
339 struct fib_alias *fa;
340 list_for_each_entry(fa, fah, fa_list) {
341 if (fa->fa_tos > tos)
343 if (fa->fa_info->fib_priority >= prio ||
351 int fib_detect_death(struct fib_info *fi, int order,
352 struct fib_info **last_resort, int *last_idx, int dflt)
355 int state = NUD_NONE;
357 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
359 state = n->nud_state;
362 if (state==NUD_REACHABLE)
364 if ((state&NUD_VALID) && order != dflt)
366 if ((state&NUD_VALID) ||
367 (*last_idx<0 && order > dflt)) {
374 #ifdef CONFIG_IP_ROUTE_MULTIPATH
376 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
380 while (rtnh_ok(rtnh, remaining)) {
382 rtnh = rtnh_next(rtnh, &remaining);
385 /* leftover implies invalid nexthop configuration, discard it */
386 return remaining > 0 ? 0 : nhs;
389 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
390 int remaining, struct fib_config *cfg)
392 change_nexthops(fi) {
395 if (!rtnh_ok(rtnh, remaining))
398 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
399 nh->nh_oif = rtnh->rtnh_ifindex;
400 nh->nh_weight = rtnh->rtnh_hops + 1;
402 attrlen = rtnh_attrlen(rtnh);
404 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
406 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
407 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
408 #ifdef CONFIG_NET_CLS_ROUTE
409 nla = nla_find(attrs, attrlen, RTA_FLOW);
410 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
414 rtnh = rtnh_next(rtnh, &remaining);
415 } endfor_nexthops(fi);
422 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
424 #ifdef CONFIG_IP_ROUTE_MULTIPATH
425 struct rtnexthop *rtnh;
429 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
432 if (cfg->fc_oif || cfg->fc_gw) {
433 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
434 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
439 #ifdef CONFIG_IP_ROUTE_MULTIPATH
440 if (cfg->fc_mp == NULL)
444 remaining = cfg->fc_mp_len;
449 if (!rtnh_ok(rtnh, remaining))
452 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
455 attrlen = rtnh_attrlen(rtnh);
457 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
459 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
460 if (nla && nla_get_be32(nla) != nh->nh_gw)
462 #ifdef CONFIG_NET_CLS_ROUTE
463 nla = nla_find(attrs, attrlen, RTA_FLOW);
464 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
469 rtnh = rtnh_next(rtnh, &remaining);
470 } endfor_nexthops(fi);
480 Semantics of nexthop is very messy by historical reasons.
481 We have to take into account, that:
482 a) gateway can be actually local interface address,
483 so that gatewayed route is direct.
484 b) gateway must be on-link address, possibly
485 described not by an ifaddr, but also by a direct route.
486 c) If both gateway and interface are specified, they should not
488 d) If we use tunnel routes, gateway could be not on-link.
490 Attempt to reconcile all of these (alas, self-contradictory) conditions
491 results in pretty ugly and hairy code with obscure logic.
493 I chose to generalized it instead, so that the size
494 of code does not increase practically, but it becomes
496 Every prefix is assigned a "scope" value: "host" is local address,
497 "link" is direct route,
498 [ ... "site" ... "interior" ... ]
499 and "universe" is true gateway route with global meaning.
501 Every prefix refers to a set of "nexthop"s (gw, oif),
502 where gw must have narrower scope. This recursion stops
503 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
504 which means that gw is forced to be on link.
506 Code is still hairy, but now it is apparently logically
507 consistent and very flexible. F.e. as by-product it allows
508 to co-exists in peace independent exterior and interior
511 Normally it looks as following.
513 {universe prefix} -> (gw, oif) [scope link]
515 |-> {link prefix} -> (gw, oif) [scope local]
517 |-> {local prefix} (terminal node)
520 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
526 net = cfg->fc_nlinfo.nl_net;
528 struct fib_result res;
530 #ifdef CONFIG_IP_ROUTE_PERVASIVE
531 if (nh->nh_flags&RTNH_F_PERVASIVE)
534 if (nh->nh_flags&RTNH_F_ONLINK) {
535 struct net_device *dev;
537 if (cfg->fc_scope >= RT_SCOPE_LINK)
539 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
541 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
543 if (!(dev->flags&IFF_UP))
547 nh->nh_scope = RT_SCOPE_LINK;
555 .scope = cfg->fc_scope + 1,
561 /* It is not necessary, but requires a bit of thinking */
562 if (fl.fl4_scope < RT_SCOPE_LINK)
563 fl.fl4_scope = RT_SCOPE_LINK;
564 if ((err = fib_lookup(net, &fl, &res)) != 0)
568 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
570 nh->nh_scope = res.scope;
571 nh->nh_oif = FIB_RES_OIF(res);
572 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
574 dev_hold(nh->nh_dev);
576 if (!(nh->nh_dev->flags & IFF_UP))
583 struct in_device *in_dev;
585 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
588 in_dev = inetdev_by_index(net, nh->nh_oif);
591 if (!(in_dev->dev->flags&IFF_UP)) {
595 nh->nh_dev = in_dev->dev;
596 dev_hold(nh->nh_dev);
597 nh->nh_scope = RT_SCOPE_HOST;
603 static inline unsigned int fib_laddr_hashfn(__be32 val)
605 unsigned int mask = (fib_hash_size - 1);
607 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
610 static struct hlist_head *fib_hash_alloc(int bytes)
612 if (bytes <= PAGE_SIZE)
613 return kzalloc(bytes, GFP_KERNEL);
615 return (struct hlist_head *)
616 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
619 static void fib_hash_free(struct hlist_head *hash, int bytes)
624 if (bytes <= PAGE_SIZE)
627 free_pages((unsigned long) hash, get_order(bytes));
630 static void fib_hash_move(struct hlist_head *new_info_hash,
631 struct hlist_head *new_laddrhash,
632 unsigned int new_size)
634 struct hlist_head *old_info_hash, *old_laddrhash;
635 unsigned int old_size = fib_hash_size;
636 unsigned int i, bytes;
638 spin_lock_bh(&fib_info_lock);
639 old_info_hash = fib_info_hash;
640 old_laddrhash = fib_info_laddrhash;
641 fib_hash_size = new_size;
643 for (i = 0; i < old_size; i++) {
644 struct hlist_head *head = &fib_info_hash[i];
645 struct hlist_node *node, *n;
648 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
649 struct hlist_head *dest;
650 unsigned int new_hash;
652 hlist_del(&fi->fib_hash);
654 new_hash = fib_info_hashfn(fi);
655 dest = &new_info_hash[new_hash];
656 hlist_add_head(&fi->fib_hash, dest);
659 fib_info_hash = new_info_hash;
661 for (i = 0; i < old_size; i++) {
662 struct hlist_head *lhead = &fib_info_laddrhash[i];
663 struct hlist_node *node, *n;
666 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
667 struct hlist_head *ldest;
668 unsigned int new_hash;
670 hlist_del(&fi->fib_lhash);
672 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
673 ldest = &new_laddrhash[new_hash];
674 hlist_add_head(&fi->fib_lhash, ldest);
677 fib_info_laddrhash = new_laddrhash;
679 spin_unlock_bh(&fib_info_lock);
681 bytes = old_size * sizeof(struct hlist_head *);
682 fib_hash_free(old_info_hash, bytes);
683 fib_hash_free(old_laddrhash, bytes);
686 struct fib_info *fib_create_info(struct fib_config *cfg)
689 struct fib_info *fi = NULL;
690 struct fib_info *ofi;
692 struct net *net = cfg->fc_nlinfo.nl_net;
694 /* Fast check to catch the most weird cases */
695 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
698 #ifdef CONFIG_IP_ROUTE_MULTIPATH
700 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
707 if (fib_info_cnt >= fib_hash_size) {
708 unsigned int new_size = fib_hash_size << 1;
709 struct hlist_head *new_info_hash;
710 struct hlist_head *new_laddrhash;
715 bytes = new_size * sizeof(struct hlist_head *);
716 new_info_hash = fib_hash_alloc(bytes);
717 new_laddrhash = fib_hash_alloc(bytes);
718 if (!new_info_hash || !new_laddrhash) {
719 fib_hash_free(new_info_hash, bytes);
720 fib_hash_free(new_laddrhash, bytes);
722 fib_hash_move(new_info_hash, new_laddrhash, new_size);
728 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
734 fi->fib_protocol = cfg->fc_protocol;
735 fi->fib_flags = cfg->fc_flags;
736 fi->fib_priority = cfg->fc_priority;
737 fi->fib_prefsrc = cfg->fc_prefsrc;
740 change_nexthops(fi) {
742 } endfor_nexthops(fi)
748 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
749 int type = nla_type(nla);
754 fi->fib_metrics[type - 1] = nla_get_u32(nla);
760 #ifdef CONFIG_IP_ROUTE_MULTIPATH
761 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
764 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
766 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
768 #ifdef CONFIG_NET_CLS_ROUTE
769 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
776 struct fib_nh *nh = fi->fib_nh;
778 nh->nh_oif = cfg->fc_oif;
779 nh->nh_gw = cfg->fc_gw;
780 nh->nh_flags = cfg->fc_flags;
781 #ifdef CONFIG_NET_CLS_ROUTE
782 nh->nh_tclassid = cfg->fc_flow;
784 #ifdef CONFIG_IP_ROUTE_MULTIPATH
789 if (fib_props[cfg->fc_type].error) {
790 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
795 if (cfg->fc_scope > RT_SCOPE_HOST)
798 if (cfg->fc_scope == RT_SCOPE_HOST) {
799 struct fib_nh *nh = fi->fib_nh;
801 /* Local address is added. */
802 if (nhs != 1 || nh->nh_gw)
804 nh->nh_scope = RT_SCOPE_NOWHERE;
805 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
807 if (nh->nh_dev == NULL)
810 change_nexthops(fi) {
811 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
813 } endfor_nexthops(fi)
816 if (fi->fib_prefsrc) {
817 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
818 fi->fib_prefsrc != cfg->fc_dst)
819 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
824 if ((ofi = fib_find_info(fi)) != NULL) {
832 atomic_inc(&fi->fib_clntref);
833 spin_lock_bh(&fib_info_lock);
834 hlist_add_head(&fi->fib_hash,
835 &fib_info_hash[fib_info_hashfn(fi)]);
836 if (fi->fib_prefsrc) {
837 struct hlist_head *head;
839 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
840 hlist_add_head(&fi->fib_lhash, head);
842 change_nexthops(fi) {
843 struct hlist_head *head;
848 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
849 head = &fib_info_devhash[hash];
850 hlist_add_head(&nh->nh_hash, head);
851 } endfor_nexthops(fi)
852 spin_unlock_bh(&fib_info_lock);
867 /* Note! fib_semantic_match intentionally uses RCU list functions. */
868 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
869 struct fib_result *res, __be32 zone, __be32 mask,
872 struct fib_alias *fa;
875 list_for_each_entry_rcu(fa, head, fa_list) {
879 fa->fa_tos != flp->fl4_tos)
882 if (fa->fa_scope < flp->fl4_scope)
885 fa->fa_state |= FA_S_ACCESSED;
887 err = fib_props[fa->fa_type].error;
889 struct fib_info *fi = fa->fa_info;
891 if (fi->fib_flags & RTNH_F_DEAD)
894 switch (fa->fa_type) {
901 if (nh->nh_flags&RTNH_F_DEAD)
903 if (!flp->oif || flp->oif == nh->nh_oif)
906 #ifdef CONFIG_IP_ROUTE_MULTIPATH
907 if (nhsel < fi->fib_nhs) {
920 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
930 res->prefixlen = prefixlen;
931 res->nh_sel = nh_sel;
932 res->type = fa->fa_type;
933 res->scope = fa->fa_scope;
934 res->fi = fa->fa_info;
935 atomic_inc(&res->fi->fib_clntref);
939 /* Find appropriate source address to this destination */
941 __be32 __fib_res_prefsrc(struct fib_result *res)
943 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
946 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
947 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
948 struct fib_info *fi, unsigned int flags)
950 struct nlmsghdr *nlh;
953 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
957 rtm = nlmsg_data(nlh);
958 rtm->rtm_family = AF_INET;
959 rtm->rtm_dst_len = dst_len;
960 rtm->rtm_src_len = 0;
962 rtm->rtm_table = tb_id;
963 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
964 rtm->rtm_type = type;
965 rtm->rtm_flags = fi->fib_flags;
966 rtm->rtm_scope = scope;
967 rtm->rtm_protocol = fi->fib_protocol;
969 if (rtm->rtm_dst_len)
970 NLA_PUT_BE32(skb, RTA_DST, dst);
972 if (fi->fib_priority)
973 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
975 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
976 goto nla_put_failure;
979 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
981 if (fi->fib_nhs == 1) {
982 if (fi->fib_nh->nh_gw)
983 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
985 if (fi->fib_nh->nh_oif)
986 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
987 #ifdef CONFIG_NET_CLS_ROUTE
988 if (fi->fib_nh[0].nh_tclassid)
989 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
992 #ifdef CONFIG_IP_ROUTE_MULTIPATH
993 if (fi->fib_nhs > 1) {
994 struct rtnexthop *rtnh;
997 mp = nla_nest_start(skb, RTA_MULTIPATH);
999 goto nla_put_failure;
1002 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1004 goto nla_put_failure;
1006 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1007 rtnh->rtnh_hops = nh->nh_weight - 1;
1008 rtnh->rtnh_ifindex = nh->nh_oif;
1011 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1012 #ifdef CONFIG_NET_CLS_ROUTE
1013 if (nh->nh_tclassid)
1014 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1016 /* length of rtnetlink header + attributes */
1017 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1018 } endfor_nexthops(fi);
1020 nla_nest_end(skb, mp);
1023 return nlmsg_end(skb, nlh);
1026 nlmsg_cancel(skb, nlh);
1032 - local address disappeared -> we must delete all the entries
1034 - device went down -> we must shutdown all nexthops going via it.
1036 int fib_sync_down_addr(struct net *net, __be32 local)
1039 unsigned int hash = fib_laddr_hashfn(local);
1040 struct hlist_head *head = &fib_info_laddrhash[hash];
1041 struct hlist_node *node;
1042 struct fib_info *fi;
1044 if (fib_info_laddrhash == NULL || local == 0)
1047 hlist_for_each_entry(fi, node, head, fib_lhash) {
1048 if (fi->fib_net != net)
1050 if (fi->fib_prefsrc == local) {
1051 fi->fib_flags |= RTNH_F_DEAD;
1058 int fib_sync_down_dev(struct net_device *dev, int force)
1061 int scope = RT_SCOPE_NOWHERE;
1062 struct fib_info *prev_fi = NULL;
1063 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1064 struct hlist_head *head = &fib_info_devhash[hash];
1065 struct hlist_node *node;
1071 hlist_for_each_entry(nh, node, head, nh_hash) {
1072 struct fib_info *fi = nh->nh_parent;
1075 BUG_ON(!fi->fib_nhs);
1076 if (nh->nh_dev != dev || fi == prev_fi)
1080 change_nexthops(fi) {
1081 if (nh->nh_flags&RTNH_F_DEAD)
1083 else if (nh->nh_dev == dev &&
1084 nh->nh_scope != scope) {
1085 nh->nh_flags |= RTNH_F_DEAD;
1086 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1087 spin_lock_bh(&fib_multipath_lock);
1088 fi->fib_power -= nh->nh_power;
1090 spin_unlock_bh(&fib_multipath_lock);
1094 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1095 if (force > 1 && nh->nh_dev == dev) {
1100 } endfor_nexthops(fi)
1101 if (dead == fi->fib_nhs) {
1102 fi->fib_flags |= RTNH_F_DEAD;
1110 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1113 Dead device goes up. We wake up dead nexthops.
1114 It takes sense only on multipath routes.
1117 int fib_sync_up(struct net_device *dev)
1119 struct fib_info *prev_fi;
1121 struct hlist_head *head;
1122 struct hlist_node *node;
1126 if (!(dev->flags&IFF_UP))
1130 hash = fib_devindex_hashfn(dev->ifindex);
1131 head = &fib_info_devhash[hash];
1134 hlist_for_each_entry(nh, node, head, nh_hash) {
1135 struct fib_info *fi = nh->nh_parent;
1138 BUG_ON(!fi->fib_nhs);
1139 if (nh->nh_dev != dev || fi == prev_fi)
1144 change_nexthops(fi) {
1145 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1149 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1151 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1154 spin_lock_bh(&fib_multipath_lock);
1156 nh->nh_flags &= ~RTNH_F_DEAD;
1157 spin_unlock_bh(&fib_multipath_lock);
1158 } endfor_nexthops(fi)
1161 fi->fib_flags &= ~RTNH_F_DEAD;
1170 The algorithm is suboptimal, but it provides really
1171 fair weighted route distribution.
1174 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1176 struct fib_info *fi = res->fi;
1179 spin_lock_bh(&fib_multipath_lock);
1180 if (fi->fib_power <= 0) {
1182 change_nexthops(fi) {
1183 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1184 power += nh->nh_weight;
1185 nh->nh_power = nh->nh_weight;
1187 } endfor_nexthops(fi);
1188 fi->fib_power = power;
1190 spin_unlock_bh(&fib_multipath_lock);
1191 /* Race condition: route has just become dead. */
1198 /* w should be random number [0..fi->fib_power-1],
1199 it is pretty bad approximation.
1202 w = jiffies % fi->fib_power;
1204 change_nexthops(fi) {
1205 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1206 if ((w -= nh->nh_power) <= 0) {
1209 res->nh_sel = nhsel;
1210 spin_unlock_bh(&fib_multipath_lock);
1214 } endfor_nexthops(fi);
1216 /* Race condition: route has just become dead. */
1218 spin_unlock_bh(&fib_multipath_lock);