2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
45 #include <net/ip_mp_alg.h>
46 #include <net/netlink.h>
47 #include <net/nexthop.h>
49 #include "fib_lookup.h"
51 #define FSprintk(a...)
53 static DEFINE_SPINLOCK(fib_info_lock);
54 static struct hlist_head *fib_info_hash;
55 static struct hlist_head *fib_info_laddrhash;
56 static unsigned int fib_hash_size;
57 static unsigned int fib_info_cnt;
59 #define DEVINDEX_HASHBITS 8
60 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
63 #ifdef CONFIG_IP_ROUTE_MULTIPATH
65 static DEFINE_SPINLOCK(fib_multipath_lock);
67 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
70 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
73 #else /* CONFIG_IP_ROUTE_MULTIPATH */
75 /* Hope, that gcc will optimize it to get rid of dummy loop */
77 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78 for (nhsel=0; nhsel < 1; nhsel++)
80 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81 for (nhsel=0; nhsel < 1; nhsel++)
83 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
85 #define endfor_nexthops(fi) }
92 } fib_props[RTA_MAX + 1] = {
95 .scope = RT_SCOPE_NOWHERE,
99 .scope = RT_SCOPE_UNIVERSE,
103 .scope = RT_SCOPE_HOST,
107 .scope = RT_SCOPE_LINK,
108 }, /* RTN_BROADCAST */
111 .scope = RT_SCOPE_LINK,
115 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_MULTICAST */
119 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_BLACKHOLE */
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_UNREACHABLE */
127 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_PROHIBIT */
131 .scope = RT_SCOPE_UNIVERSE,
135 .scope = RT_SCOPE_NOWHERE,
139 .scope = RT_SCOPE_NOWHERE,
140 }, /* RTN_XRESOLVE */
144 /* Release a nexthop info record */
146 void free_fib_info(struct fib_info *fi)
148 if (fi->fib_dead == 0) {
149 printk("Freeing alive fib_info %p\n", fi);
152 change_nexthops(fi) {
156 } endfor_nexthops(fi);
161 void fib_release_info(struct fib_info *fi)
163 spin_lock_bh(&fib_info_lock);
164 if (fi && --fi->fib_treeref == 0) {
165 hlist_del(&fi->fib_hash);
167 hlist_del(&fi->fib_lhash);
168 change_nexthops(fi) {
171 hlist_del(&nh->nh_hash);
172 } endfor_nexthops(fi)
176 spin_unlock_bh(&fib_info_lock);
179 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
181 const struct fib_nh *onh = ofi->fib_nh;
184 if (nh->nh_oif != onh->nh_oif ||
185 nh->nh_gw != onh->nh_gw ||
186 nh->nh_scope != onh->nh_scope ||
187 #ifdef CONFIG_IP_ROUTE_MULTIPATH
188 nh->nh_weight != onh->nh_weight ||
190 #ifdef CONFIG_NET_CLS_ROUTE
191 nh->nh_tclassid != onh->nh_tclassid ||
193 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
196 } endfor_nexthops(fi);
200 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
202 unsigned int mask = (fib_hash_size - 1);
203 unsigned int val = fi->fib_nhs;
205 val ^= fi->fib_protocol;
206 val ^= fi->fib_prefsrc;
207 val ^= fi->fib_priority;
209 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
212 static struct fib_info *fib_find_info(const struct fib_info *nfi)
214 struct hlist_head *head;
215 struct hlist_node *node;
219 hash = fib_info_hashfn(nfi);
220 head = &fib_info_hash[hash];
222 hlist_for_each_entry(fi, node, head, fib_hash) {
223 if (fi->fib_nhs != nfi->fib_nhs)
225 if (nfi->fib_protocol == fi->fib_protocol &&
226 nfi->fib_prefsrc == fi->fib_prefsrc &&
227 nfi->fib_priority == fi->fib_priority &&
228 memcmp(nfi->fib_metrics, fi->fib_metrics,
229 sizeof(fi->fib_metrics)) == 0 &&
230 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
238 static inline unsigned int fib_devindex_hashfn(unsigned int val)
240 unsigned int mask = DEVINDEX_HASHSIZE - 1;
243 (val >> DEVINDEX_HASHBITS) ^
244 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
247 /* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
251 int ip_fib_check_default(u32 gw, struct net_device *dev)
253 struct hlist_head *head;
254 struct hlist_node *node;
258 spin_lock(&fib_info_lock);
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
266 spin_unlock(&fib_info_lock);
271 spin_unlock(&fib_info_lock);
276 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
277 int dst_len, u32 tb_id, struct nl_info *info)
280 int payload = sizeof(struct rtmsg) + 256;
281 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
284 skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
288 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
289 fa->fa_type, fa->fa_scope, key, dst_len,
290 fa->fa_tos, fa->fa_info, 0);
296 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
297 info->nlh, GFP_KERNEL);
300 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
303 /* Return the first fib alias matching TOS with
304 * priority less than or equal to PRIO.
306 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
309 struct fib_alias *fa;
310 list_for_each_entry(fa, fah, fa_list) {
311 if (fa->fa_tos > tos)
313 if (fa->fa_info->fib_priority >= prio ||
321 int fib_detect_death(struct fib_info *fi, int order,
322 struct fib_info **last_resort, int *last_idx, int *dflt)
325 int state = NUD_NONE;
327 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
329 state = n->nud_state;
332 if (state==NUD_REACHABLE)
334 if ((state&NUD_VALID) && order != *dflt)
336 if ((state&NUD_VALID) ||
337 (*last_idx<0 && order > *dflt)) {
344 #ifdef CONFIG_IP_ROUTE_MULTIPATH
346 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
350 while (rtnh_ok(rtnh, remaining)) {
352 rtnh = rtnh_next(rtnh, &remaining);
355 /* leftover implies invalid nexthop configuration, discard it */
356 return remaining > 0 ? 0 : nhs;
359 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
360 int remaining, struct fib_config *cfg)
362 change_nexthops(fi) {
365 if (!rtnh_ok(rtnh, remaining))
368 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
369 nh->nh_oif = rtnh->rtnh_ifindex;
370 nh->nh_weight = rtnh->rtnh_hops + 1;
372 attrlen = rtnh_attrlen(rtnh);
374 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
376 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
377 nh->nh_gw = nla ? nla_get_u32(nla) : 0;
378 #ifdef CONFIG_NET_CLS_ROUTE
379 nla = nla_find(attrs, attrlen, RTA_FLOW);
380 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
384 rtnh = rtnh_next(rtnh, &remaining);
385 } endfor_nexthops(fi);
392 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
394 #ifdef CONFIG_IP_ROUTE_MULTIPATH
395 struct rtnexthop *rtnh;
399 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
402 if (cfg->fc_oif || cfg->fc_gw) {
403 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
404 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
409 #ifdef CONFIG_IP_ROUTE_MULTIPATH
410 if (cfg->fc_mp == NULL)
414 remaining = cfg->fc_mp_len;
419 if (!rtnh_ok(rtnh, remaining))
422 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
425 attrlen = rtnh_attrlen(rtnh);
427 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
429 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
430 if (nla && nla_get_u32(nla) != nh->nh_gw)
432 #ifdef CONFIG_NET_CLS_ROUTE
433 nla = nla_find(attrs, attrlen, RTA_FLOW);
434 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
439 rtnh = rtnh_next(rtnh, &remaining);
440 } endfor_nexthops(fi);
450 Semantics of nexthop is very messy by historical reasons.
451 We have to take into account, that:
452 a) gateway can be actually local interface address,
453 so that gatewayed route is direct.
454 b) gateway must be on-link address, possibly
455 described not by an ifaddr, but also by a direct route.
456 c) If both gateway and interface are specified, they should not
458 d) If we use tunnel routes, gateway could be not on-link.
460 Attempt to reconcile all of these (alas, self-contradictory) conditions
461 results in pretty ugly and hairy code with obscure logic.
463 I chose to generalized it instead, so that the size
464 of code does not increase practically, but it becomes
466 Every prefix is assigned a "scope" value: "host" is local address,
467 "link" is direct route,
468 [ ... "site" ... "interior" ... ]
469 and "universe" is true gateway route with global meaning.
471 Every prefix refers to a set of "nexthop"s (gw, oif),
472 where gw must have narrower scope. This recursion stops
473 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
474 which means that gw is forced to be on link.
476 Code is still hairy, but now it is apparently logically
477 consistent and very flexible. F.e. as by-product it allows
478 to co-exists in peace independent exterior and interior
481 Normally it looks as following.
483 {universe prefix} -> (gw, oif) [scope link]
485 |-> {link prefix} -> (gw, oif) [scope local]
487 |-> {local prefix} (terminal node)
490 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
496 struct fib_result res;
498 #ifdef CONFIG_IP_ROUTE_PERVASIVE
499 if (nh->nh_flags&RTNH_F_PERVASIVE)
502 if (nh->nh_flags&RTNH_F_ONLINK) {
503 struct net_device *dev;
505 if (cfg->fc_scope >= RT_SCOPE_LINK)
507 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
509 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
511 if (!(dev->flags&IFF_UP))
515 nh->nh_scope = RT_SCOPE_LINK;
523 .scope = cfg->fc_scope + 1,
529 /* It is not necessary, but requires a bit of thinking */
530 if (fl.fl4_scope < RT_SCOPE_LINK)
531 fl.fl4_scope = RT_SCOPE_LINK;
532 if ((err = fib_lookup(&fl, &res)) != 0)
536 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
538 nh->nh_scope = res.scope;
539 nh->nh_oif = FIB_RES_OIF(res);
540 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
542 dev_hold(nh->nh_dev);
544 if (!(nh->nh_dev->flags & IFF_UP))
551 struct in_device *in_dev;
553 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
556 in_dev = inetdev_by_index(nh->nh_oif);
559 if (!(in_dev->dev->flags&IFF_UP)) {
563 nh->nh_dev = in_dev->dev;
564 dev_hold(nh->nh_dev);
565 nh->nh_scope = RT_SCOPE_HOST;
571 static inline unsigned int fib_laddr_hashfn(u32 val)
573 unsigned int mask = (fib_hash_size - 1);
575 return (val ^ (val >> 7) ^ (val >> 14)) & mask;
578 static struct hlist_head *fib_hash_alloc(int bytes)
580 if (bytes <= PAGE_SIZE)
581 return kmalloc(bytes, GFP_KERNEL);
583 return (struct hlist_head *)
584 __get_free_pages(GFP_KERNEL, get_order(bytes));
587 static void fib_hash_free(struct hlist_head *hash, int bytes)
592 if (bytes <= PAGE_SIZE)
595 free_pages((unsigned long) hash, get_order(bytes));
598 static void fib_hash_move(struct hlist_head *new_info_hash,
599 struct hlist_head *new_laddrhash,
600 unsigned int new_size)
602 struct hlist_head *old_info_hash, *old_laddrhash;
603 unsigned int old_size = fib_hash_size;
604 unsigned int i, bytes;
606 spin_lock_bh(&fib_info_lock);
607 old_info_hash = fib_info_hash;
608 old_laddrhash = fib_info_laddrhash;
609 fib_hash_size = new_size;
611 for (i = 0; i < old_size; i++) {
612 struct hlist_head *head = &fib_info_hash[i];
613 struct hlist_node *node, *n;
616 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
617 struct hlist_head *dest;
618 unsigned int new_hash;
620 hlist_del(&fi->fib_hash);
622 new_hash = fib_info_hashfn(fi);
623 dest = &new_info_hash[new_hash];
624 hlist_add_head(&fi->fib_hash, dest);
627 fib_info_hash = new_info_hash;
629 for (i = 0; i < old_size; i++) {
630 struct hlist_head *lhead = &fib_info_laddrhash[i];
631 struct hlist_node *node, *n;
634 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
635 struct hlist_head *ldest;
636 unsigned int new_hash;
638 hlist_del(&fi->fib_lhash);
640 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
641 ldest = &new_laddrhash[new_hash];
642 hlist_add_head(&fi->fib_lhash, ldest);
645 fib_info_laddrhash = new_laddrhash;
647 spin_unlock_bh(&fib_info_lock);
649 bytes = old_size * sizeof(struct hlist_head *);
650 fib_hash_free(old_info_hash, bytes);
651 fib_hash_free(old_laddrhash, bytes);
654 struct fib_info *fib_create_info(struct fib_config *cfg)
657 struct fib_info *fi = NULL;
658 struct fib_info *ofi;
661 /* Fast check to catch the most weird cases */
662 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
665 #ifdef CONFIG_IP_ROUTE_MULTIPATH
667 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
672 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
673 if (cfg->fc_mp_alg) {
674 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
675 cfg->fc_mp_alg > IP_MP_ALG_MAX)
681 if (fib_info_cnt >= fib_hash_size) {
682 unsigned int new_size = fib_hash_size << 1;
683 struct hlist_head *new_info_hash;
684 struct hlist_head *new_laddrhash;
689 bytes = new_size * sizeof(struct hlist_head *);
690 new_info_hash = fib_hash_alloc(bytes);
691 new_laddrhash = fib_hash_alloc(bytes);
692 if (!new_info_hash || !new_laddrhash) {
693 fib_hash_free(new_info_hash, bytes);
694 fib_hash_free(new_laddrhash, bytes);
696 memset(new_info_hash, 0, bytes);
697 memset(new_laddrhash, 0, bytes);
699 fib_hash_move(new_info_hash, new_laddrhash, new_size);
706 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
711 fi->fib_protocol = cfg->fc_protocol;
712 fi->fib_flags = cfg->fc_flags;
713 fi->fib_priority = cfg->fc_priority;
714 fi->fib_prefsrc = cfg->fc_prefsrc;
717 change_nexthops(fi) {
719 } endfor_nexthops(fi)
725 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
726 int type = nla->nla_type;
731 fi->fib_metrics[type - 1] = nla_get_u32(nla);
737 #ifdef CONFIG_IP_ROUTE_MULTIPATH
738 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
741 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
743 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
745 #ifdef CONFIG_NET_CLS_ROUTE
746 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
753 struct fib_nh *nh = fi->fib_nh;
755 nh->nh_oif = cfg->fc_oif;
756 nh->nh_gw = cfg->fc_gw;
757 nh->nh_flags = cfg->fc_flags;
758 #ifdef CONFIG_NET_CLS_ROUTE
759 nh->nh_tclassid = cfg->fc_flow;
761 #ifdef CONFIG_IP_ROUTE_MULTIPATH
766 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
767 fi->fib_mp_alg = cfg->fc_mp_alg;
770 if (fib_props[cfg->fc_type].error) {
771 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
776 if (cfg->fc_scope > RT_SCOPE_HOST)
779 if (cfg->fc_scope == RT_SCOPE_HOST) {
780 struct fib_nh *nh = fi->fib_nh;
782 /* Local address is added. */
783 if (nhs != 1 || nh->nh_gw)
785 nh->nh_scope = RT_SCOPE_NOWHERE;
786 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
788 if (nh->nh_dev == NULL)
791 change_nexthops(fi) {
792 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
794 } endfor_nexthops(fi)
797 if (fi->fib_prefsrc) {
798 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
799 fi->fib_prefsrc != cfg->fc_dst)
800 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
805 if ((ofi = fib_find_info(fi)) != NULL) {
813 atomic_inc(&fi->fib_clntref);
814 spin_lock_bh(&fib_info_lock);
815 hlist_add_head(&fi->fib_hash,
816 &fib_info_hash[fib_info_hashfn(fi)]);
817 if (fi->fib_prefsrc) {
818 struct hlist_head *head;
820 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
821 hlist_add_head(&fi->fib_lhash, head);
823 change_nexthops(fi) {
824 struct hlist_head *head;
829 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
830 head = &fib_info_devhash[hash];
831 hlist_add_head(&nh->nh_hash, head);
832 } endfor_nexthops(fi)
833 spin_unlock_bh(&fib_info_lock);
848 /* Note! fib_semantic_match intentionally uses RCU list functions. */
849 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
850 struct fib_result *res, __u32 zone, __u32 mask,
853 struct fib_alias *fa;
856 list_for_each_entry_rcu(fa, head, fa_list) {
860 fa->fa_tos != flp->fl4_tos)
863 if (fa->fa_scope < flp->fl4_scope)
866 fa->fa_state |= FA_S_ACCESSED;
868 err = fib_props[fa->fa_type].error;
870 struct fib_info *fi = fa->fa_info;
872 if (fi->fib_flags & RTNH_F_DEAD)
875 switch (fa->fa_type) {
882 if (nh->nh_flags&RTNH_F_DEAD)
884 if (!flp->oif || flp->oif == nh->nh_oif)
887 #ifdef CONFIG_IP_ROUTE_MULTIPATH
888 if (nhsel < fi->fib_nhs) {
901 printk(KERN_DEBUG "impossible 102\n");
910 res->prefixlen = prefixlen;
911 res->nh_sel = nh_sel;
912 res->type = fa->fa_type;
913 res->scope = fa->fa_scope;
914 res->fi = fa->fa_info;
915 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
917 res->network = zone &
918 (0xFFFFFFFF >> (32 - prefixlen));
920 atomic_inc(&res->fi->fib_clntref);
924 /* Find appropriate source address to this destination */
926 u32 __fib_res_prefsrc(struct fib_result *res)
928 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
931 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
932 u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos,
933 struct fib_info *fi, unsigned int flags)
935 struct nlmsghdr *nlh;
938 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
942 rtm = nlmsg_data(nlh);
943 rtm->rtm_family = AF_INET;
944 rtm->rtm_dst_len = dst_len;
945 rtm->rtm_src_len = 0;
947 rtm->rtm_table = tb_id;
948 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
949 rtm->rtm_type = type;
950 rtm->rtm_flags = fi->fib_flags;
951 rtm->rtm_scope = scope;
952 rtm->rtm_protocol = fi->fib_protocol;
954 if (rtm->rtm_dst_len)
955 NLA_PUT_U32(skb, RTA_DST, dst);
957 if (fi->fib_priority)
958 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
960 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
961 goto nla_put_failure;
964 NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc);
966 if (fi->fib_nhs == 1) {
967 if (fi->fib_nh->nh_gw)
968 NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
970 if (fi->fib_nh->nh_oif)
971 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
972 #ifdef CONFIG_NET_CLS_ROUTE
973 if (fi->fib_nh[0].nh_tclassid)
974 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
977 #ifdef CONFIG_IP_ROUTE_MULTIPATH
978 if (fi->fib_nhs > 1) {
979 struct rtnexthop *rtnh;
982 mp = nla_nest_start(skb, RTA_MULTIPATH);
984 goto nla_put_failure;
987 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
989 goto nla_put_failure;
991 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
992 rtnh->rtnh_hops = nh->nh_weight - 1;
993 rtnh->rtnh_ifindex = nh->nh_oif;
996 NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw);
997 #ifdef CONFIG_NET_CLS_ROUTE
999 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1001 /* length of rtnetlink header + attributes */
1002 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1003 } endfor_nexthops(fi);
1005 nla_nest_end(skb, mp);
1008 return nlmsg_end(skb, nlh);
1011 return nlmsg_cancel(skb, nlh);
1016 - local address disappeared -> we must delete all the entries
1018 - device went down -> we must shutdown all nexthops going via it.
1021 int fib_sync_down(u32 local, struct net_device *dev, int force)
1024 int scope = RT_SCOPE_NOWHERE;
1029 if (local && fib_info_laddrhash) {
1030 unsigned int hash = fib_laddr_hashfn(local);
1031 struct hlist_head *head = &fib_info_laddrhash[hash];
1032 struct hlist_node *node;
1033 struct fib_info *fi;
1035 hlist_for_each_entry(fi, node, head, fib_lhash) {
1036 if (fi->fib_prefsrc == local) {
1037 fi->fib_flags |= RTNH_F_DEAD;
1044 struct fib_info *prev_fi = NULL;
1045 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1046 struct hlist_head *head = &fib_info_devhash[hash];
1047 struct hlist_node *node;
1050 hlist_for_each_entry(nh, node, head, nh_hash) {
1051 struct fib_info *fi = nh->nh_parent;
1054 BUG_ON(!fi->fib_nhs);
1055 if (nh->nh_dev != dev || fi == prev_fi)
1059 change_nexthops(fi) {
1060 if (nh->nh_flags&RTNH_F_DEAD)
1062 else if (nh->nh_dev == dev &&
1063 nh->nh_scope != scope) {
1064 nh->nh_flags |= RTNH_F_DEAD;
1065 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1066 spin_lock_bh(&fib_multipath_lock);
1067 fi->fib_power -= nh->nh_power;
1069 spin_unlock_bh(&fib_multipath_lock);
1073 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1074 if (force > 1 && nh->nh_dev == dev) {
1079 } endfor_nexthops(fi)
1080 if (dead == fi->fib_nhs) {
1081 fi->fib_flags |= RTNH_F_DEAD;
1090 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1093 Dead device goes up. We wake up dead nexthops.
1094 It takes sense only on multipath routes.
1097 int fib_sync_up(struct net_device *dev)
1099 struct fib_info *prev_fi;
1101 struct hlist_head *head;
1102 struct hlist_node *node;
1106 if (!(dev->flags&IFF_UP))
1110 hash = fib_devindex_hashfn(dev->ifindex);
1111 head = &fib_info_devhash[hash];
1114 hlist_for_each_entry(nh, node, head, nh_hash) {
1115 struct fib_info *fi = nh->nh_parent;
1118 BUG_ON(!fi->fib_nhs);
1119 if (nh->nh_dev != dev || fi == prev_fi)
1124 change_nexthops(fi) {
1125 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1129 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1131 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1134 spin_lock_bh(&fib_multipath_lock);
1136 nh->nh_flags &= ~RTNH_F_DEAD;
1137 spin_unlock_bh(&fib_multipath_lock);
1138 } endfor_nexthops(fi)
1141 fi->fib_flags &= ~RTNH_F_DEAD;
1150 The algorithm is suboptimal, but it provides really
1151 fair weighted route distribution.
1154 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1156 struct fib_info *fi = res->fi;
1159 spin_lock_bh(&fib_multipath_lock);
1160 if (fi->fib_power <= 0) {
1162 change_nexthops(fi) {
1163 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1164 power += nh->nh_weight;
1165 nh->nh_power = nh->nh_weight;
1167 } endfor_nexthops(fi);
1168 fi->fib_power = power;
1170 spin_unlock_bh(&fib_multipath_lock);
1171 /* Race condition: route has just become dead. */
1178 /* w should be random number [0..fi->fib_power-1],
1179 it is pretty bad approximation.
1182 w = jiffies % fi->fib_power;
1184 change_nexthops(fi) {
1185 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1186 if ((w -= nh->nh_power) <= 0) {
1189 res->nh_sel = nhsel;
1190 spin_unlock_bh(&fib_multipath_lock);
1194 } endfor_nexthops(fi);
1196 /* Race condition: route has just become dead. */
1198 spin_unlock_bh(&fib_multipath_lock);