2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/init.h>
38 #include <linux/list.h>
41 #include <net/protocol.h>
42 #include <net/route.h>
47 #include <net/ip_fib.h>
48 #include <net/rtnetlink.h>
50 #define FFprint(a...) printk(KERN_DEBUG a)
52 #ifndef CONFIG_IP_MULTIPLE_TABLES
54 struct fib_table *ip_fib_local_table;
55 struct fib_table *ip_fib_main_table;
57 #define FIB_TABLE_HASHSZ 1
58 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
62 #define FIB_TABLE_HASHSZ 256
63 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
65 struct fib_table *fib_new_table(u32 id)
72 tb = fib_get_table(id);
75 tb = fib_hash_init(id);
78 h = id & (FIB_TABLE_HASHSZ - 1);
79 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
83 struct fib_table *fib_get_table(u32 id)
86 struct hlist_node *node;
91 h = id & (FIB_TABLE_HASHSZ - 1);
93 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94 if (tb->tb_id == id) {
102 #endif /* CONFIG_IP_MULTIPLE_TABLES */
104 static void fib_flush(void)
107 struct fib_table *tb;
108 struct hlist_node *node;
111 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113 flushed += tb->tb_flush(tb);
121 * Find the first device with a given source address.
124 struct net_device * ip_dev_find(__be32 addr)
126 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127 struct fib_result res;
128 struct net_device *dev = NULL;
130 #ifdef CONFIG_IP_MULTIPLE_TABLES
134 if (!ip_fib_local_table ||
135 ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
137 if (res.type != RTN_LOCAL)
139 dev = FIB_RES_DEV(res);
148 unsigned inet_addr_type(__be32 addr)
150 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151 struct fib_result res;
152 unsigned ret = RTN_BROADCAST;
154 if (ZERONET(addr) || BADCLASS(addr))
155 return RTN_BROADCAST;
157 return RTN_MULTICAST;
159 #ifdef CONFIG_IP_MULTIPLE_TABLES
163 if (ip_fib_local_table) {
165 if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
174 /* Given (packet source, input interface) and optional (dst, oif, tos):
175 - (main) check, that source is valid i.e. not broadcast or our local
177 - figure out what "logical" interface this packet arrived
178 and calculate "specific destination" address.
179 - check, that packet arrived from expected physical interface.
182 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183 struct net_device *dev, __be32 *spec_dst, u32 *itag)
185 struct in_device *in_dev;
186 struct flowi fl = { .nl_u = { .ip4_u =
191 struct fib_result res;
197 in_dev = __in_dev_get_rcu(dev);
199 no_addr = in_dev->ifa_list == NULL;
200 rpf = IN_DEV_RPFILTER(in_dev);
207 if (fib_lookup(&fl, &res))
209 if (res.type != RTN_UNICAST)
211 *spec_dst = FIB_RES_PREFSRC(res);
212 fib_combine_itag(itag, &res);
213 #ifdef CONFIG_IP_ROUTE_MULTIPATH
214 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
216 if (FIB_RES_DEV(res) == dev)
219 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
228 fl.oif = dev->ifindex;
231 if (fib_lookup(&fl, &res) == 0) {
232 if (res.type == RTN_UNICAST) {
233 *spec_dst = FIB_RES_PREFSRC(res);
234 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
243 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
253 static inline __be32 sk_extract_addr(struct sockaddr *addr)
255 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
258 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
262 nla = (struct nlattr *) ((char *) mx + len);
263 nla->nla_type = type;
264 nla->nla_len = nla_attr_size(4);
265 *(u32 *) nla_data(nla) = value;
267 return len + nla_total_size(4);
270 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
271 struct fib_config *cfg)
276 memset(cfg, 0, sizeof(*cfg));
278 if (rt->rt_dst.sa_family != AF_INET)
279 return -EAFNOSUPPORT;
282 * Check mask for validity:
283 * a) it must be contiguous.
284 * b) destination must have all host bits clear.
285 * c) if application forgot to set correct family (AF_INET),
286 * reject request unless it is absolutely clear i.e.
287 * both family and mask are zero.
290 addr = sk_extract_addr(&rt->rt_dst);
291 if (!(rt->rt_flags & RTF_HOST)) {
292 __be32 mask = sk_extract_addr(&rt->rt_genmask);
294 if (rt->rt_genmask.sa_family != AF_INET) {
295 if (mask || rt->rt_genmask.sa_family)
296 return -EAFNOSUPPORT;
299 if (bad_mask(mask, addr))
302 plen = inet_mask_len(mask);
305 cfg->fc_dst_len = plen;
308 if (cmd != SIOCDELRT) {
309 cfg->fc_nlflags = NLM_F_CREATE;
310 cfg->fc_protocol = RTPROT_BOOT;
314 cfg->fc_priority = rt->rt_metric - 1;
316 if (rt->rt_flags & RTF_REJECT) {
317 cfg->fc_scope = RT_SCOPE_HOST;
318 cfg->fc_type = RTN_UNREACHABLE;
322 cfg->fc_scope = RT_SCOPE_NOWHERE;
323 cfg->fc_type = RTN_UNICAST;
327 struct net_device *dev;
328 char devname[IFNAMSIZ];
330 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
333 devname[IFNAMSIZ-1] = 0;
334 colon = strchr(devname, ':');
337 dev = __dev_get_by_name(devname);
340 cfg->fc_oif = dev->ifindex;
342 struct in_ifaddr *ifa;
343 struct in_device *in_dev = __in_dev_get_rtnl(dev);
347 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
348 if (strcmp(ifa->ifa_label, devname) == 0)
352 cfg->fc_prefsrc = ifa->ifa_local;
356 addr = sk_extract_addr(&rt->rt_gateway);
357 if (rt->rt_gateway.sa_family == AF_INET && addr) {
359 if (rt->rt_flags & RTF_GATEWAY &&
360 inet_addr_type(addr) == RTN_UNICAST)
361 cfg->fc_scope = RT_SCOPE_UNIVERSE;
364 if (cmd == SIOCDELRT)
367 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
370 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
371 cfg->fc_scope = RT_SCOPE_LINK;
373 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
377 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
381 if (rt->rt_flags & RTF_MTU)
382 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
384 if (rt->rt_flags & RTF_WINDOW)
385 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
387 if (rt->rt_flags & RTF_IRTT)
388 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
391 cfg->fc_mx_len = len;
398 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
401 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
403 struct fib_config cfg;
408 case SIOCADDRT: /* Add a route */
409 case SIOCDELRT: /* Delete a route */
410 if (!capable(CAP_NET_ADMIN))
413 if (copy_from_user(&rt, arg, sizeof(rt)))
417 err = rtentry_to_fib_config(cmd, &rt, &cfg);
419 struct fib_table *tb;
421 if (cmd == SIOCDELRT) {
422 tb = fib_get_table(cfg.fc_table);
424 err = tb->tb_delete(tb, &cfg);
428 tb = fib_new_table(cfg.fc_table);
430 err = tb->tb_insert(tb, &cfg);
435 /* allocated by rtentry_to_fib_config() */
444 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
445 [RTA_DST] = { .type = NLA_U32 },
446 [RTA_SRC] = { .type = NLA_U32 },
447 [RTA_IIF] = { .type = NLA_U32 },
448 [RTA_OIF] = { .type = NLA_U32 },
449 [RTA_GATEWAY] = { .type = NLA_U32 },
450 [RTA_PRIORITY] = { .type = NLA_U32 },
451 [RTA_PREFSRC] = { .type = NLA_U32 },
452 [RTA_METRICS] = { .type = NLA_NESTED },
453 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
454 [RTA_PROTOINFO] = { .type = NLA_U32 },
455 [RTA_FLOW] = { .type = NLA_U32 },
456 [RTA_MP_ALGO] = { .type = NLA_U32 },
459 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
460 struct fib_config *cfg)
466 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
470 memset(cfg, 0, sizeof(*cfg));
472 rtm = nlmsg_data(nlh);
473 cfg->fc_dst_len = rtm->rtm_dst_len;
474 cfg->fc_tos = rtm->rtm_tos;
475 cfg->fc_table = rtm->rtm_table;
476 cfg->fc_protocol = rtm->rtm_protocol;
477 cfg->fc_scope = rtm->rtm_scope;
478 cfg->fc_type = rtm->rtm_type;
479 cfg->fc_flags = rtm->rtm_flags;
480 cfg->fc_nlflags = nlh->nlmsg_flags;
482 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
483 cfg->fc_nlinfo.nlh = nlh;
485 if (cfg->fc_type > RTN_MAX) {
490 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
491 switch (attr->nla_type) {
493 cfg->fc_dst = nla_get_be32(attr);
496 cfg->fc_oif = nla_get_u32(attr);
499 cfg->fc_gw = nla_get_be32(attr);
502 cfg->fc_priority = nla_get_u32(attr);
505 cfg->fc_prefsrc = nla_get_be32(attr);
508 cfg->fc_mx = nla_data(attr);
509 cfg->fc_mx_len = nla_len(attr);
512 cfg->fc_mp = nla_data(attr);
513 cfg->fc_mp_len = nla_len(attr);
516 cfg->fc_flow = nla_get_u32(attr);
519 cfg->fc_mp_alg = nla_get_u32(attr);
522 cfg->fc_table = nla_get_u32(attr);
532 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
534 struct fib_config cfg;
535 struct fib_table *tb;
538 err = rtm_to_fib_config(skb, nlh, &cfg);
542 tb = fib_get_table(cfg.fc_table);
548 err = tb->tb_delete(tb, &cfg);
553 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
555 struct fib_config cfg;
556 struct fib_table *tb;
559 err = rtm_to_fib_config(skb, nlh, &cfg);
563 tb = fib_new_table(cfg.fc_table);
569 err = tb->tb_insert(tb, &cfg);
574 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
577 unsigned int e = 0, s_e;
578 struct fib_table *tb;
579 struct hlist_node *node;
582 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
583 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
584 return ip_rt_dump(skb, cb);
589 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
591 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
595 memset(&cb->args[2], 0, sizeof(cb->args) -
596 2 * sizeof(cb->args[0]));
597 if (tb->tb_dump(tb, skb, cb) < 0)
611 /* Prepare and feed intra-kernel routing request.
612 Really, it should be netlink message, but :-( netlink
613 can be not configured, so that we feed it directly
614 to fib engine. It is legal, because all events occur
615 only when netlink is already locked.
618 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
620 struct fib_table *tb;
621 struct fib_config cfg = {
622 .fc_protocol = RTPROT_KERNEL,
625 .fc_dst_len = dst_len,
626 .fc_prefsrc = ifa->ifa_local,
627 .fc_oif = ifa->ifa_dev->dev->ifindex,
628 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
631 if (type == RTN_UNICAST)
632 tb = fib_new_table(RT_TABLE_MAIN);
634 tb = fib_new_table(RT_TABLE_LOCAL);
639 cfg.fc_table = tb->tb_id;
641 if (type != RTN_LOCAL)
642 cfg.fc_scope = RT_SCOPE_LINK;
644 cfg.fc_scope = RT_SCOPE_HOST;
646 if (cmd == RTM_NEWROUTE)
647 tb->tb_insert(tb, &cfg);
649 tb->tb_delete(tb, &cfg);
652 void fib_add_ifaddr(struct in_ifaddr *ifa)
654 struct in_device *in_dev = ifa->ifa_dev;
655 struct net_device *dev = in_dev->dev;
656 struct in_ifaddr *prim = ifa;
657 __be32 mask = ifa->ifa_mask;
658 __be32 addr = ifa->ifa_local;
659 __be32 prefix = ifa->ifa_address&mask;
661 if (ifa->ifa_flags&IFA_F_SECONDARY) {
662 prim = inet_ifa_byprefix(in_dev, prefix, mask);
664 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
669 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
671 if (!(dev->flags&IFF_UP))
674 /* Add broadcast address, if it is explicitly assigned. */
675 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
676 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
678 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
679 (prefix != addr || ifa->ifa_prefixlen < 32)) {
680 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
681 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
683 /* Add network specific broadcasts, when it takes a sense */
684 if (ifa->ifa_prefixlen < 31) {
685 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
686 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
691 static void fib_del_ifaddr(struct in_ifaddr *ifa)
693 struct in_device *in_dev = ifa->ifa_dev;
694 struct net_device *dev = in_dev->dev;
695 struct in_ifaddr *ifa1;
696 struct in_ifaddr *prim = ifa;
697 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
698 __be32 any = ifa->ifa_address&ifa->ifa_mask;
705 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
706 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
707 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
709 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
711 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
716 /* Deletion is more complicated than add.
717 We should take care of not to delete too much :-)
719 Scan address list to be sure that addresses are really gone.
722 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
723 if (ifa->ifa_local == ifa1->ifa_local)
725 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
727 if (brd == ifa1->ifa_broadcast)
729 if (any == ifa1->ifa_broadcast)
734 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
736 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
738 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
739 if (!(ok&LOCAL_OK)) {
740 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
742 /* Check, that this local address finally disappeared. */
743 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
744 /* And the last, but not the least thing.
745 We must flush stray FIB entries.
747 First of all, we scan fib_info list searching
748 for stray nexthop entries, then ignite fib_flush.
750 if (fib_sync_down(ifa->ifa_local, NULL, 0))
760 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
763 struct fib_result res;
764 struct flowi fl = { .mark = frn->fl_mark,
765 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
767 .scope = frn->fl_scope } } };
769 #ifdef CONFIG_IP_MULTIPLE_TABLES
777 frn->tb_id = tb->tb_id;
778 frn->err = tb->tb_lookup(tb, &fl, &res);
781 frn->prefixlen = res.prefixlen;
782 frn->nh_sel = res.nh_sel;
783 frn->type = res.type;
784 frn->scope = res.scope;
791 static void nl_fib_input(struct sock *sk, int len)
793 struct sk_buff *skb = NULL;
794 struct nlmsghdr *nlh = NULL;
795 struct fib_result_nl *frn;
797 struct fib_table *tb;
799 skb = skb_dequeue(&sk->sk_receive_queue);
803 nlh = nlmsg_hdr(skb);
804 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
805 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
810 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
811 tb = fib_get_table(frn->tb_id_in);
813 nl_fib_lookup(frn, tb);
815 pid = NETLINK_CB(skb).pid; /* pid of sending process */
816 NETLINK_CB(skb).pid = 0; /* from kernel */
817 NETLINK_CB(skb).dst_group = 0; /* unicast */
818 netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
821 static void nl_fib_lookup_init(void)
823 netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
827 static void fib_disable_ip(struct net_device *dev, int force)
829 if (fib_sync_down(0, dev, force))
835 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
837 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
842 #ifdef CONFIG_IP_ROUTE_MULTIPATH
843 fib_sync_up(ifa->ifa_dev->dev);
849 if (ifa->ifa_dev->ifa_list == NULL) {
850 /* Last address was deleted from this interface.
853 fib_disable_ip(ifa->ifa_dev->dev, 1);
862 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
864 struct net_device *dev = ptr;
865 struct in_device *in_dev = __in_dev_get_rtnl(dev);
867 if (event == NETDEV_UNREGISTER) {
868 fib_disable_ip(dev, 2);
879 } endfor_ifa(in_dev);
880 #ifdef CONFIG_IP_ROUTE_MULTIPATH
886 fib_disable_ip(dev, 0);
888 case NETDEV_CHANGEMTU:
896 static struct notifier_block fib_inetaddr_notifier = {
897 .notifier_call =fib_inetaddr_event,
900 static struct notifier_block fib_netdev_notifier = {
901 .notifier_call =fib_netdev_event,
904 void __init ip_fib_init(void)
908 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
909 INIT_HLIST_HEAD(&fib_table_hash[i]);
910 #ifndef CONFIG_IP_MULTIPLE_TABLES
911 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
912 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
913 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
914 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
919 register_netdevice_notifier(&fib_netdev_notifier);
920 register_inetaddr_notifier(&fib_inetaddr_notifier);
921 nl_fib_lookup_init();
923 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
924 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
925 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
928 EXPORT_SYMBOL(inet_addr_type);
929 EXPORT_SYMBOL(ip_dev_find);