2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
50 #define FFprint(a...) printk(KERN_DEBUG a)
52 static struct sock *fibnl;
54 #ifndef CONFIG_IP_MULTIPLE_TABLES
56 struct fib_table *ip_fib_local_table;
57 struct fib_table *ip_fib_main_table;
59 #define FIB_TABLE_HASHSZ 1
60 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
62 static void __init fib4_rules_init(void)
64 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
65 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
66 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
67 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
71 #define FIB_TABLE_HASHSZ 256
72 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
74 struct fib_table *fib_new_table(u32 id)
81 tb = fib_get_table(id);
84 tb = fib_hash_init(id);
87 h = id & (FIB_TABLE_HASHSZ - 1);
88 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
92 struct fib_table *fib_get_table(u32 id)
95 struct hlist_node *node;
100 h = id & (FIB_TABLE_HASHSZ - 1);
102 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
103 if (tb->tb_id == id) {
111 #endif /* CONFIG_IP_MULTIPLE_TABLES */
113 static void fib_flush(void)
116 struct fib_table *tb;
117 struct hlist_node *node;
120 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
121 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
122 flushed += tb->tb_flush(tb);
130 * Find the first device with a given source address.
133 struct net_device * ip_dev_find(__be32 addr)
135 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
136 struct fib_result res;
137 struct net_device *dev = NULL;
138 struct fib_table *local_table;
140 #ifdef CONFIG_IP_MULTIPLE_TABLES
144 local_table = fib_get_table(RT_TABLE_LOCAL);
145 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
147 if (res.type != RTN_LOCAL)
149 dev = FIB_RES_DEV(res);
158 unsigned inet_addr_type(__be32 addr)
160 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
161 struct fib_result res;
162 unsigned ret = RTN_BROADCAST;
163 struct fib_table *local_table;
165 if (ZERONET(addr) || BADCLASS(addr))
166 return RTN_BROADCAST;
168 return RTN_MULTICAST;
170 #ifdef CONFIG_IP_MULTIPLE_TABLES
174 local_table = fib_get_table(RT_TABLE_LOCAL);
177 if (!local_table->tb_lookup(local_table, &fl, &res)) {
185 /* Given (packet source, input interface) and optional (dst, oif, tos):
186 - (main) check, that source is valid i.e. not broadcast or our local
188 - figure out what "logical" interface this packet arrived
189 and calculate "specific destination" address.
190 - check, that packet arrived from expected physical interface.
193 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
194 struct net_device *dev, __be32 *spec_dst, u32 *itag)
196 struct in_device *in_dev;
197 struct flowi fl = { .nl_u = { .ip4_u =
202 struct fib_result res;
208 in_dev = __in_dev_get_rcu(dev);
210 no_addr = in_dev->ifa_list == NULL;
211 rpf = IN_DEV_RPFILTER(in_dev);
218 if (fib_lookup(&fl, &res))
220 if (res.type != RTN_UNICAST)
222 *spec_dst = FIB_RES_PREFSRC(res);
223 fib_combine_itag(itag, &res);
224 #ifdef CONFIG_IP_ROUTE_MULTIPATH
225 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
227 if (FIB_RES_DEV(res) == dev)
230 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
239 fl.oif = dev->ifindex;
242 if (fib_lookup(&fl, &res) == 0) {
243 if (res.type == RTN_UNICAST) {
244 *spec_dst = FIB_RES_PREFSRC(res);
245 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
254 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
264 static inline __be32 sk_extract_addr(struct sockaddr *addr)
266 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
269 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
273 nla = (struct nlattr *) ((char *) mx + len);
274 nla->nla_type = type;
275 nla->nla_len = nla_attr_size(4);
276 *(u32 *) nla_data(nla) = value;
278 return len + nla_total_size(4);
281 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
282 struct fib_config *cfg)
287 memset(cfg, 0, sizeof(*cfg));
289 if (rt->rt_dst.sa_family != AF_INET)
290 return -EAFNOSUPPORT;
293 * Check mask for validity:
294 * a) it must be contiguous.
295 * b) destination must have all host bits clear.
296 * c) if application forgot to set correct family (AF_INET),
297 * reject request unless it is absolutely clear i.e.
298 * both family and mask are zero.
301 addr = sk_extract_addr(&rt->rt_dst);
302 if (!(rt->rt_flags & RTF_HOST)) {
303 __be32 mask = sk_extract_addr(&rt->rt_genmask);
305 if (rt->rt_genmask.sa_family != AF_INET) {
306 if (mask || rt->rt_genmask.sa_family)
307 return -EAFNOSUPPORT;
310 if (bad_mask(mask, addr))
313 plen = inet_mask_len(mask);
316 cfg->fc_dst_len = plen;
319 if (cmd != SIOCDELRT) {
320 cfg->fc_nlflags = NLM_F_CREATE;
321 cfg->fc_protocol = RTPROT_BOOT;
325 cfg->fc_priority = rt->rt_metric - 1;
327 if (rt->rt_flags & RTF_REJECT) {
328 cfg->fc_scope = RT_SCOPE_HOST;
329 cfg->fc_type = RTN_UNREACHABLE;
333 cfg->fc_scope = RT_SCOPE_NOWHERE;
334 cfg->fc_type = RTN_UNICAST;
338 struct net_device *dev;
339 char devname[IFNAMSIZ];
341 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
344 devname[IFNAMSIZ-1] = 0;
345 colon = strchr(devname, ':');
348 dev = __dev_get_by_name(&init_net, devname);
351 cfg->fc_oif = dev->ifindex;
353 struct in_ifaddr *ifa;
354 struct in_device *in_dev = __in_dev_get_rtnl(dev);
358 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
359 if (strcmp(ifa->ifa_label, devname) == 0)
363 cfg->fc_prefsrc = ifa->ifa_local;
367 addr = sk_extract_addr(&rt->rt_gateway);
368 if (rt->rt_gateway.sa_family == AF_INET && addr) {
370 if (rt->rt_flags & RTF_GATEWAY &&
371 inet_addr_type(addr) == RTN_UNICAST)
372 cfg->fc_scope = RT_SCOPE_UNIVERSE;
375 if (cmd == SIOCDELRT)
378 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
381 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
382 cfg->fc_scope = RT_SCOPE_LINK;
384 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
388 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
392 if (rt->rt_flags & RTF_MTU)
393 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
395 if (rt->rt_flags & RTF_WINDOW)
396 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
398 if (rt->rt_flags & RTF_IRTT)
399 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
402 cfg->fc_mx_len = len;
409 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
412 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
414 struct fib_config cfg;
419 case SIOCADDRT: /* Add a route */
420 case SIOCDELRT: /* Delete a route */
421 if (!capable(CAP_NET_ADMIN))
424 if (copy_from_user(&rt, arg, sizeof(rt)))
428 err = rtentry_to_fib_config(cmd, &rt, &cfg);
430 struct fib_table *tb;
432 if (cmd == SIOCDELRT) {
433 tb = fib_get_table(cfg.fc_table);
435 err = tb->tb_delete(tb, &cfg);
439 tb = fib_new_table(cfg.fc_table);
441 err = tb->tb_insert(tb, &cfg);
446 /* allocated by rtentry_to_fib_config() */
455 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
456 [RTA_DST] = { .type = NLA_U32 },
457 [RTA_SRC] = { .type = NLA_U32 },
458 [RTA_IIF] = { .type = NLA_U32 },
459 [RTA_OIF] = { .type = NLA_U32 },
460 [RTA_GATEWAY] = { .type = NLA_U32 },
461 [RTA_PRIORITY] = { .type = NLA_U32 },
462 [RTA_PREFSRC] = { .type = NLA_U32 },
463 [RTA_METRICS] = { .type = NLA_NESTED },
464 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
465 [RTA_PROTOINFO] = { .type = NLA_U32 },
466 [RTA_FLOW] = { .type = NLA_U32 },
469 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
470 struct fib_config *cfg)
476 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
480 memset(cfg, 0, sizeof(*cfg));
482 rtm = nlmsg_data(nlh);
483 cfg->fc_dst_len = rtm->rtm_dst_len;
484 cfg->fc_tos = rtm->rtm_tos;
485 cfg->fc_table = rtm->rtm_table;
486 cfg->fc_protocol = rtm->rtm_protocol;
487 cfg->fc_scope = rtm->rtm_scope;
488 cfg->fc_type = rtm->rtm_type;
489 cfg->fc_flags = rtm->rtm_flags;
490 cfg->fc_nlflags = nlh->nlmsg_flags;
492 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
493 cfg->fc_nlinfo.nlh = nlh;
495 if (cfg->fc_type > RTN_MAX) {
500 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
501 switch (nla_type(attr)) {
503 cfg->fc_dst = nla_get_be32(attr);
506 cfg->fc_oif = nla_get_u32(attr);
509 cfg->fc_gw = nla_get_be32(attr);
512 cfg->fc_priority = nla_get_u32(attr);
515 cfg->fc_prefsrc = nla_get_be32(attr);
518 cfg->fc_mx = nla_data(attr);
519 cfg->fc_mx_len = nla_len(attr);
522 cfg->fc_mp = nla_data(attr);
523 cfg->fc_mp_len = nla_len(attr);
526 cfg->fc_flow = nla_get_u32(attr);
529 cfg->fc_table = nla_get_u32(attr);
539 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
541 struct fib_config cfg;
542 struct fib_table *tb;
545 err = rtm_to_fib_config(skb, nlh, &cfg);
549 tb = fib_get_table(cfg.fc_table);
555 err = tb->tb_delete(tb, &cfg);
560 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
562 struct fib_config cfg;
563 struct fib_table *tb;
566 err = rtm_to_fib_config(skb, nlh, &cfg);
570 tb = fib_new_table(cfg.fc_table);
576 err = tb->tb_insert(tb, &cfg);
581 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
584 unsigned int e = 0, s_e;
585 struct fib_table *tb;
586 struct hlist_node *node;
589 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
590 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
591 return ip_rt_dump(skb, cb);
596 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
598 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
602 memset(&cb->args[2], 0, sizeof(cb->args) -
603 2 * sizeof(cb->args[0]));
604 if (tb->tb_dump(tb, skb, cb) < 0)
618 /* Prepare and feed intra-kernel routing request.
619 Really, it should be netlink message, but :-( netlink
620 can be not configured, so that we feed it directly
621 to fib engine. It is legal, because all events occur
622 only when netlink is already locked.
625 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
627 struct fib_table *tb;
628 struct fib_config cfg = {
629 .fc_protocol = RTPROT_KERNEL,
632 .fc_dst_len = dst_len,
633 .fc_prefsrc = ifa->ifa_local,
634 .fc_oif = ifa->ifa_dev->dev->ifindex,
635 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
638 if (type == RTN_UNICAST)
639 tb = fib_new_table(RT_TABLE_MAIN);
641 tb = fib_new_table(RT_TABLE_LOCAL);
646 cfg.fc_table = tb->tb_id;
648 if (type != RTN_LOCAL)
649 cfg.fc_scope = RT_SCOPE_LINK;
651 cfg.fc_scope = RT_SCOPE_HOST;
653 if (cmd == RTM_NEWROUTE)
654 tb->tb_insert(tb, &cfg);
656 tb->tb_delete(tb, &cfg);
659 void fib_add_ifaddr(struct in_ifaddr *ifa)
661 struct in_device *in_dev = ifa->ifa_dev;
662 struct net_device *dev = in_dev->dev;
663 struct in_ifaddr *prim = ifa;
664 __be32 mask = ifa->ifa_mask;
665 __be32 addr = ifa->ifa_local;
666 __be32 prefix = ifa->ifa_address&mask;
668 if (ifa->ifa_flags&IFA_F_SECONDARY) {
669 prim = inet_ifa_byprefix(in_dev, prefix, mask);
671 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
676 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
678 if (!(dev->flags&IFF_UP))
681 /* Add broadcast address, if it is explicitly assigned. */
682 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
683 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
685 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
686 (prefix != addr || ifa->ifa_prefixlen < 32)) {
687 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
688 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
690 /* Add network specific broadcasts, when it takes a sense */
691 if (ifa->ifa_prefixlen < 31) {
692 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
693 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
698 static void fib_del_ifaddr(struct in_ifaddr *ifa)
700 struct in_device *in_dev = ifa->ifa_dev;
701 struct net_device *dev = in_dev->dev;
702 struct in_ifaddr *ifa1;
703 struct in_ifaddr *prim = ifa;
704 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
705 __be32 any = ifa->ifa_address&ifa->ifa_mask;
712 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
713 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
714 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
716 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
718 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
723 /* Deletion is more complicated than add.
724 We should take care of not to delete too much :-)
726 Scan address list to be sure that addresses are really gone.
729 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
730 if (ifa->ifa_local == ifa1->ifa_local)
732 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
734 if (brd == ifa1->ifa_broadcast)
736 if (any == ifa1->ifa_broadcast)
741 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
743 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
745 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
746 if (!(ok&LOCAL_OK)) {
747 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
749 /* Check, that this local address finally disappeared. */
750 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
751 /* And the last, but not the least thing.
752 We must flush stray FIB entries.
754 First of all, we scan fib_info list searching
755 for stray nexthop entries, then ignite fib_flush.
757 if (fib_sync_down(ifa->ifa_local, NULL, 0))
767 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
770 struct fib_result res;
771 struct flowi fl = { .mark = frn->fl_mark,
772 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
774 .scope = frn->fl_scope } } };
776 #ifdef CONFIG_IP_MULTIPLE_TABLES
784 frn->tb_id = tb->tb_id;
785 frn->err = tb->tb_lookup(tb, &fl, &res);
788 frn->prefixlen = res.prefixlen;
789 frn->nh_sel = res.nh_sel;
790 frn->type = res.type;
791 frn->scope = res.scope;
798 static void nl_fib_input(struct sk_buff *skb)
800 struct fib_result_nl *frn;
801 struct nlmsghdr *nlh;
802 struct fib_table *tb;
805 nlh = nlmsg_hdr(skb);
806 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
807 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
810 skb = skb_clone(skb, GFP_KERNEL);
813 nlh = nlmsg_hdr(skb);
815 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
816 tb = fib_get_table(frn->tb_id_in);
818 nl_fib_lookup(frn, tb);
820 pid = NETLINK_CB(skb).pid; /* pid of sending process */
821 NETLINK_CB(skb).pid = 0; /* from kernel */
822 NETLINK_CB(skb).dst_group = 0; /* unicast */
823 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
826 static void nl_fib_lookup_init(void)
828 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
829 nl_fib_input, NULL, THIS_MODULE);
832 static void fib_disable_ip(struct net_device *dev, int force)
834 if (fib_sync_down(0, dev, force))
840 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
842 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
847 #ifdef CONFIG_IP_ROUTE_MULTIPATH
848 fib_sync_up(ifa->ifa_dev->dev);
854 if (ifa->ifa_dev->ifa_list == NULL) {
855 /* Last address was deleted from this interface.
858 fib_disable_ip(ifa->ifa_dev->dev, 1);
867 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
869 struct net_device *dev = ptr;
870 struct in_device *in_dev = __in_dev_get_rtnl(dev);
872 if (dev->nd_net != &init_net)
875 if (event == NETDEV_UNREGISTER) {
876 fib_disable_ip(dev, 2);
887 } endfor_ifa(in_dev);
888 #ifdef CONFIG_IP_ROUTE_MULTIPATH
894 fib_disable_ip(dev, 0);
896 case NETDEV_CHANGEMTU:
904 static struct notifier_block fib_inetaddr_notifier = {
905 .notifier_call =fib_inetaddr_event,
908 static struct notifier_block fib_netdev_notifier = {
909 .notifier_call =fib_netdev_event,
912 void __init ip_fib_init(void)
916 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
917 INIT_HLIST_HEAD(&fib_table_hash[i]);
921 register_netdevice_notifier(&fib_netdev_notifier);
922 register_inetaddr_notifier(&fib_inetaddr_notifier);
923 nl_fib_lookup_init();
925 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
926 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
927 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
930 EXPORT_SYMBOL(inet_addr_type);
931 EXPORT_SYMBOL(ip_dev_find);