2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
50 #define FFprint(a...) printk(KERN_DEBUG a)
52 static struct sock *fibnl;
54 #ifndef CONFIG_IP_MULTIPLE_TABLES
56 struct fib_table *ip_fib_local_table;
57 struct fib_table *ip_fib_main_table;
59 #define FIB_TABLE_HASHSZ 1
60 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
64 #define FIB_TABLE_HASHSZ 256
65 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
67 struct fib_table *fib_new_table(u32 id)
74 tb = fib_get_table(id);
77 tb = fib_hash_init(id);
80 h = id & (FIB_TABLE_HASHSZ - 1);
81 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
85 struct fib_table *fib_get_table(u32 id)
88 struct hlist_node *node;
93 h = id & (FIB_TABLE_HASHSZ - 1);
95 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
96 if (tb->tb_id == id) {
104 #endif /* CONFIG_IP_MULTIPLE_TABLES */
106 static void fib_flush(void)
109 struct fib_table *tb;
110 struct hlist_node *node;
113 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
114 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
115 flushed += tb->tb_flush(tb);
123 * Find the first device with a given source address.
126 struct net_device * ip_dev_find(__be32 addr)
128 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
129 struct fib_result res;
130 struct net_device *dev = NULL;
132 #ifdef CONFIG_IP_MULTIPLE_TABLES
136 if (!ip_fib_local_table ||
137 ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
139 if (res.type != RTN_LOCAL)
141 dev = FIB_RES_DEV(res);
150 unsigned inet_addr_type(__be32 addr)
152 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
153 struct fib_result res;
154 unsigned ret = RTN_BROADCAST;
156 if (ZERONET(addr) || BADCLASS(addr))
157 return RTN_BROADCAST;
159 return RTN_MULTICAST;
161 #ifdef CONFIG_IP_MULTIPLE_TABLES
165 if (ip_fib_local_table) {
167 if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
176 /* Given (packet source, input interface) and optional (dst, oif, tos):
177 - (main) check, that source is valid i.e. not broadcast or our local
179 - figure out what "logical" interface this packet arrived
180 and calculate "specific destination" address.
181 - check, that packet arrived from expected physical interface.
184 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
185 struct net_device *dev, __be32 *spec_dst, u32 *itag)
187 struct in_device *in_dev;
188 struct flowi fl = { .nl_u = { .ip4_u =
193 struct fib_result res;
199 in_dev = __in_dev_get_rcu(dev);
201 no_addr = in_dev->ifa_list == NULL;
202 rpf = IN_DEV_RPFILTER(in_dev);
209 if (fib_lookup(&fl, &res))
211 if (res.type != RTN_UNICAST)
213 *spec_dst = FIB_RES_PREFSRC(res);
214 fib_combine_itag(itag, &res);
215 #ifdef CONFIG_IP_ROUTE_MULTIPATH
216 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
218 if (FIB_RES_DEV(res) == dev)
221 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
230 fl.oif = dev->ifindex;
233 if (fib_lookup(&fl, &res) == 0) {
234 if (res.type == RTN_UNICAST) {
235 *spec_dst = FIB_RES_PREFSRC(res);
236 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
245 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
255 static inline __be32 sk_extract_addr(struct sockaddr *addr)
257 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
260 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
264 nla = (struct nlattr *) ((char *) mx + len);
265 nla->nla_type = type;
266 nla->nla_len = nla_attr_size(4);
267 *(u32 *) nla_data(nla) = value;
269 return len + nla_total_size(4);
272 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
273 struct fib_config *cfg)
278 memset(cfg, 0, sizeof(*cfg));
280 if (rt->rt_dst.sa_family != AF_INET)
281 return -EAFNOSUPPORT;
284 * Check mask for validity:
285 * a) it must be contiguous.
286 * b) destination must have all host bits clear.
287 * c) if application forgot to set correct family (AF_INET),
288 * reject request unless it is absolutely clear i.e.
289 * both family and mask are zero.
292 addr = sk_extract_addr(&rt->rt_dst);
293 if (!(rt->rt_flags & RTF_HOST)) {
294 __be32 mask = sk_extract_addr(&rt->rt_genmask);
296 if (rt->rt_genmask.sa_family != AF_INET) {
297 if (mask || rt->rt_genmask.sa_family)
298 return -EAFNOSUPPORT;
301 if (bad_mask(mask, addr))
304 plen = inet_mask_len(mask);
307 cfg->fc_dst_len = plen;
310 if (cmd != SIOCDELRT) {
311 cfg->fc_nlflags = NLM_F_CREATE;
312 cfg->fc_protocol = RTPROT_BOOT;
316 cfg->fc_priority = rt->rt_metric - 1;
318 if (rt->rt_flags & RTF_REJECT) {
319 cfg->fc_scope = RT_SCOPE_HOST;
320 cfg->fc_type = RTN_UNREACHABLE;
324 cfg->fc_scope = RT_SCOPE_NOWHERE;
325 cfg->fc_type = RTN_UNICAST;
329 struct net_device *dev;
330 char devname[IFNAMSIZ];
332 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
335 devname[IFNAMSIZ-1] = 0;
336 colon = strchr(devname, ':');
339 dev = __dev_get_by_name(&init_net, devname);
342 cfg->fc_oif = dev->ifindex;
344 struct in_ifaddr *ifa;
345 struct in_device *in_dev = __in_dev_get_rtnl(dev);
349 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
350 if (strcmp(ifa->ifa_label, devname) == 0)
354 cfg->fc_prefsrc = ifa->ifa_local;
358 addr = sk_extract_addr(&rt->rt_gateway);
359 if (rt->rt_gateway.sa_family == AF_INET && addr) {
361 if (rt->rt_flags & RTF_GATEWAY &&
362 inet_addr_type(addr) == RTN_UNICAST)
363 cfg->fc_scope = RT_SCOPE_UNIVERSE;
366 if (cmd == SIOCDELRT)
369 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
372 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
373 cfg->fc_scope = RT_SCOPE_LINK;
375 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
379 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
383 if (rt->rt_flags & RTF_MTU)
384 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
386 if (rt->rt_flags & RTF_WINDOW)
387 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
389 if (rt->rt_flags & RTF_IRTT)
390 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
393 cfg->fc_mx_len = len;
400 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
403 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
405 struct fib_config cfg;
410 case SIOCADDRT: /* Add a route */
411 case SIOCDELRT: /* Delete a route */
412 if (!capable(CAP_NET_ADMIN))
415 if (copy_from_user(&rt, arg, sizeof(rt)))
419 err = rtentry_to_fib_config(cmd, &rt, &cfg);
421 struct fib_table *tb;
423 if (cmd == SIOCDELRT) {
424 tb = fib_get_table(cfg.fc_table);
426 err = tb->tb_delete(tb, &cfg);
430 tb = fib_new_table(cfg.fc_table);
432 err = tb->tb_insert(tb, &cfg);
437 /* allocated by rtentry_to_fib_config() */
446 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
447 [RTA_DST] = { .type = NLA_U32 },
448 [RTA_SRC] = { .type = NLA_U32 },
449 [RTA_IIF] = { .type = NLA_U32 },
450 [RTA_OIF] = { .type = NLA_U32 },
451 [RTA_GATEWAY] = { .type = NLA_U32 },
452 [RTA_PRIORITY] = { .type = NLA_U32 },
453 [RTA_PREFSRC] = { .type = NLA_U32 },
454 [RTA_METRICS] = { .type = NLA_NESTED },
455 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
456 [RTA_PROTOINFO] = { .type = NLA_U32 },
457 [RTA_FLOW] = { .type = NLA_U32 },
460 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
461 struct fib_config *cfg)
467 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
471 memset(cfg, 0, sizeof(*cfg));
473 rtm = nlmsg_data(nlh);
474 cfg->fc_dst_len = rtm->rtm_dst_len;
475 cfg->fc_tos = rtm->rtm_tos;
476 cfg->fc_table = rtm->rtm_table;
477 cfg->fc_protocol = rtm->rtm_protocol;
478 cfg->fc_scope = rtm->rtm_scope;
479 cfg->fc_type = rtm->rtm_type;
480 cfg->fc_flags = rtm->rtm_flags;
481 cfg->fc_nlflags = nlh->nlmsg_flags;
483 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
484 cfg->fc_nlinfo.nlh = nlh;
486 if (cfg->fc_type > RTN_MAX) {
491 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
492 switch (nla_type(attr)) {
494 cfg->fc_dst = nla_get_be32(attr);
497 cfg->fc_oif = nla_get_u32(attr);
500 cfg->fc_gw = nla_get_be32(attr);
503 cfg->fc_priority = nla_get_u32(attr);
506 cfg->fc_prefsrc = nla_get_be32(attr);
509 cfg->fc_mx = nla_data(attr);
510 cfg->fc_mx_len = nla_len(attr);
513 cfg->fc_mp = nla_data(attr);
514 cfg->fc_mp_len = nla_len(attr);
517 cfg->fc_flow = nla_get_u32(attr);
520 cfg->fc_table = nla_get_u32(attr);
530 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
532 struct fib_config cfg;
533 struct fib_table *tb;
536 err = rtm_to_fib_config(skb, nlh, &cfg);
540 tb = fib_get_table(cfg.fc_table);
546 err = tb->tb_delete(tb, &cfg);
551 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
553 struct fib_config cfg;
554 struct fib_table *tb;
557 err = rtm_to_fib_config(skb, nlh, &cfg);
561 tb = fib_new_table(cfg.fc_table);
567 err = tb->tb_insert(tb, &cfg);
572 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
575 unsigned int e = 0, s_e;
576 struct fib_table *tb;
577 struct hlist_node *node;
580 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
581 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
582 return ip_rt_dump(skb, cb);
587 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
589 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
593 memset(&cb->args[2], 0, sizeof(cb->args) -
594 2 * sizeof(cb->args[0]));
595 if (tb->tb_dump(tb, skb, cb) < 0)
609 /* Prepare and feed intra-kernel routing request.
610 Really, it should be netlink message, but :-( netlink
611 can be not configured, so that we feed it directly
612 to fib engine. It is legal, because all events occur
613 only when netlink is already locked.
616 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
618 struct fib_table *tb;
619 struct fib_config cfg = {
620 .fc_protocol = RTPROT_KERNEL,
623 .fc_dst_len = dst_len,
624 .fc_prefsrc = ifa->ifa_local,
625 .fc_oif = ifa->ifa_dev->dev->ifindex,
626 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
629 if (type == RTN_UNICAST)
630 tb = fib_new_table(RT_TABLE_MAIN);
632 tb = fib_new_table(RT_TABLE_LOCAL);
637 cfg.fc_table = tb->tb_id;
639 if (type != RTN_LOCAL)
640 cfg.fc_scope = RT_SCOPE_LINK;
642 cfg.fc_scope = RT_SCOPE_HOST;
644 if (cmd == RTM_NEWROUTE)
645 tb->tb_insert(tb, &cfg);
647 tb->tb_delete(tb, &cfg);
650 void fib_add_ifaddr(struct in_ifaddr *ifa)
652 struct in_device *in_dev = ifa->ifa_dev;
653 struct net_device *dev = in_dev->dev;
654 struct in_ifaddr *prim = ifa;
655 __be32 mask = ifa->ifa_mask;
656 __be32 addr = ifa->ifa_local;
657 __be32 prefix = ifa->ifa_address&mask;
659 if (ifa->ifa_flags&IFA_F_SECONDARY) {
660 prim = inet_ifa_byprefix(in_dev, prefix, mask);
662 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
667 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
669 if (!(dev->flags&IFF_UP))
672 /* Add broadcast address, if it is explicitly assigned. */
673 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
674 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
676 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
677 (prefix != addr || ifa->ifa_prefixlen < 32)) {
678 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
679 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
681 /* Add network specific broadcasts, when it takes a sense */
682 if (ifa->ifa_prefixlen < 31) {
683 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
684 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
689 static void fib_del_ifaddr(struct in_ifaddr *ifa)
691 struct in_device *in_dev = ifa->ifa_dev;
692 struct net_device *dev = in_dev->dev;
693 struct in_ifaddr *ifa1;
694 struct in_ifaddr *prim = ifa;
695 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
696 __be32 any = ifa->ifa_address&ifa->ifa_mask;
703 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
704 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
705 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
707 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
709 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
714 /* Deletion is more complicated than add.
715 We should take care of not to delete too much :-)
717 Scan address list to be sure that addresses are really gone.
720 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
721 if (ifa->ifa_local == ifa1->ifa_local)
723 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
725 if (brd == ifa1->ifa_broadcast)
727 if (any == ifa1->ifa_broadcast)
732 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
734 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
736 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
737 if (!(ok&LOCAL_OK)) {
738 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
740 /* Check, that this local address finally disappeared. */
741 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
742 /* And the last, but not the least thing.
743 We must flush stray FIB entries.
745 First of all, we scan fib_info list searching
746 for stray nexthop entries, then ignite fib_flush.
748 if (fib_sync_down(ifa->ifa_local, NULL, 0))
758 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
761 struct fib_result res;
762 struct flowi fl = { .mark = frn->fl_mark,
763 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
765 .scope = frn->fl_scope } } };
767 #ifdef CONFIG_IP_MULTIPLE_TABLES
775 frn->tb_id = tb->tb_id;
776 frn->err = tb->tb_lookup(tb, &fl, &res);
779 frn->prefixlen = res.prefixlen;
780 frn->nh_sel = res.nh_sel;
781 frn->type = res.type;
782 frn->scope = res.scope;
789 static void nl_fib_input(struct sk_buff *skb)
791 struct fib_result_nl *frn;
792 struct nlmsghdr *nlh;
793 struct fib_table *tb;
796 nlh = nlmsg_hdr(skb);
797 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
798 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
803 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
804 tb = fib_get_table(frn->tb_id_in);
806 nl_fib_lookup(frn, tb);
808 pid = NETLINK_CB(skb).pid; /* pid of sending process */
809 NETLINK_CB(skb).pid = 0; /* from kernel */
810 NETLINK_CB(skb).dst_group = 0; /* unicast */
811 netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
814 static void nl_fib_lookup_init(void)
816 fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
817 nl_fib_input, NULL, THIS_MODULE);
820 static void fib_disable_ip(struct net_device *dev, int force)
822 if (fib_sync_down(0, dev, force))
828 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
830 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
835 #ifdef CONFIG_IP_ROUTE_MULTIPATH
836 fib_sync_up(ifa->ifa_dev->dev);
842 if (ifa->ifa_dev->ifa_list == NULL) {
843 /* Last address was deleted from this interface.
846 fib_disable_ip(ifa->ifa_dev->dev, 1);
855 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
857 struct net_device *dev = ptr;
858 struct in_device *in_dev = __in_dev_get_rtnl(dev);
860 if (dev->nd_net != &init_net)
863 if (event == NETDEV_UNREGISTER) {
864 fib_disable_ip(dev, 2);
875 } endfor_ifa(in_dev);
876 #ifdef CONFIG_IP_ROUTE_MULTIPATH
882 fib_disable_ip(dev, 0);
884 case NETDEV_CHANGEMTU:
892 static struct notifier_block fib_inetaddr_notifier = {
893 .notifier_call =fib_inetaddr_event,
896 static struct notifier_block fib_netdev_notifier = {
897 .notifier_call =fib_netdev_event,
900 void __init ip_fib_init(void)
904 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
905 INIT_HLIST_HEAD(&fib_table_hash[i]);
906 #ifndef CONFIG_IP_MULTIPLE_TABLES
907 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
908 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
909 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
910 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
915 register_netdevice_notifier(&fib_netdev_notifier);
916 register_inetaddr_notifier(&fib_inetaddr_notifier);
917 nl_fib_lookup_init();
919 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
920 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
921 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
924 EXPORT_SYMBOL(inet_addr_type);
925 EXPORT_SYMBOL(ip_dev_find);