2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/module.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/capability.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/inetdevice.h>
33 #include <linux/netdevice.h>
34 #include <linux/if_addr.h>
35 #include <linux/if_arp.h>
36 #include <linux/skbuff.h>
37 #include <linux/netlink.h>
38 #include <linux/init.h>
39 #include <linux/list.h>
42 #include <net/protocol.h>
43 #include <net/route.h>
48 #include <net/ip_fib.h>
50 #define FFprint(a...) printk(KERN_DEBUG a)
52 #ifndef CONFIG_IP_MULTIPLE_TABLES
54 struct fib_table *ip_fib_local_table;
55 struct fib_table *ip_fib_main_table;
57 #define FIB_TABLE_HASHSZ 1
58 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
62 #define FIB_TABLE_HASHSZ 256
63 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
65 struct fib_table *fib_new_table(u32 id)
72 tb = fib_get_table(id);
75 tb = fib_hash_init(id);
78 h = id & (FIB_TABLE_HASHSZ - 1);
79 hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
83 struct fib_table *fib_get_table(u32 id)
86 struct hlist_node *node;
91 h = id & (FIB_TABLE_HASHSZ - 1);
93 hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
94 if (tb->tb_id == id) {
102 #endif /* CONFIG_IP_MULTIPLE_TABLES */
104 static void fib_flush(void)
107 struct fib_table *tb;
108 struct hlist_node *node;
111 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
112 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
113 flushed += tb->tb_flush(tb);
121 * Find the first device with a given source address.
124 struct net_device * ip_dev_find(__be32 addr)
126 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
127 struct fib_result res;
128 struct net_device *dev = NULL;
130 #ifdef CONFIG_IP_MULTIPLE_TABLES
134 if (!ip_fib_local_table ||
135 ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
137 if (res.type != RTN_LOCAL)
139 dev = FIB_RES_DEV(res);
148 unsigned inet_addr_type(__be32 addr)
150 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
151 struct fib_result res;
152 unsigned ret = RTN_BROADCAST;
154 if (ZERONET(addr) || BADCLASS(addr))
155 return RTN_BROADCAST;
157 return RTN_MULTICAST;
159 #ifdef CONFIG_IP_MULTIPLE_TABLES
163 if (ip_fib_local_table) {
165 if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
174 /* Given (packet source, input interface) and optional (dst, oif, tos):
175 - (main) check, that source is valid i.e. not broadcast or our local
177 - figure out what "logical" interface this packet arrived
178 and calculate "specific destination" address.
179 - check, that packet arrived from expected physical interface.
182 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
183 struct net_device *dev, __be32 *spec_dst, u32 *itag)
185 struct in_device *in_dev;
186 struct flowi fl = { .nl_u = { .ip4_u =
191 struct fib_result res;
197 in_dev = __in_dev_get_rcu(dev);
199 no_addr = in_dev->ifa_list == NULL;
200 rpf = IN_DEV_RPFILTER(in_dev);
207 if (fib_lookup(&fl, &res))
209 if (res.type != RTN_UNICAST)
211 *spec_dst = FIB_RES_PREFSRC(res);
212 fib_combine_itag(itag, &res);
213 #ifdef CONFIG_IP_ROUTE_MULTIPATH
214 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
216 if (FIB_RES_DEV(res) == dev)
219 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
228 fl.oif = dev->ifindex;
231 if (fib_lookup(&fl, &res) == 0) {
232 if (res.type == RTN_UNICAST) {
233 *spec_dst = FIB_RES_PREFSRC(res);
234 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
243 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
253 #ifndef CONFIG_IP_NOSIOCRT
255 static inline __be32 sk_extract_addr(struct sockaddr *addr)
257 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
260 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
264 nla = (struct nlattr *) ((char *) mx + len);
265 nla->nla_type = type;
266 nla->nla_len = nla_attr_size(4);
267 *(u32 *) nla_data(nla) = value;
269 return len + nla_total_size(4);
272 static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
273 struct fib_config *cfg)
278 memset(cfg, 0, sizeof(*cfg));
280 if (rt->rt_dst.sa_family != AF_INET)
281 return -EAFNOSUPPORT;
284 * Check mask for validity:
285 * a) it must be contiguous.
286 * b) destination must have all host bits clear.
287 * c) if application forgot to set correct family (AF_INET),
288 * reject request unless it is absolutely clear i.e.
289 * both family and mask are zero.
292 addr = sk_extract_addr(&rt->rt_dst);
293 if (!(rt->rt_flags & RTF_HOST)) {
294 __be32 mask = sk_extract_addr(&rt->rt_genmask);
296 if (rt->rt_genmask.sa_family != AF_INET) {
297 if (mask || rt->rt_genmask.sa_family)
298 return -EAFNOSUPPORT;
301 if (bad_mask(mask, addr))
304 plen = inet_mask_len(mask);
307 cfg->fc_dst_len = plen;
310 if (cmd != SIOCDELRT) {
311 cfg->fc_nlflags = NLM_F_CREATE;
312 cfg->fc_protocol = RTPROT_BOOT;
316 cfg->fc_priority = rt->rt_metric - 1;
318 if (rt->rt_flags & RTF_REJECT) {
319 cfg->fc_scope = RT_SCOPE_HOST;
320 cfg->fc_type = RTN_UNREACHABLE;
324 cfg->fc_scope = RT_SCOPE_NOWHERE;
325 cfg->fc_type = RTN_UNICAST;
329 struct net_device *dev;
330 char devname[IFNAMSIZ];
332 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
335 devname[IFNAMSIZ-1] = 0;
336 colon = strchr(devname, ':');
339 dev = __dev_get_by_name(devname);
342 cfg->fc_oif = dev->ifindex;
344 struct in_ifaddr *ifa;
345 struct in_device *in_dev = __in_dev_get_rtnl(dev);
349 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
350 if (strcmp(ifa->ifa_label, devname) == 0)
354 cfg->fc_prefsrc = ifa->ifa_local;
358 addr = sk_extract_addr(&rt->rt_gateway);
359 if (rt->rt_gateway.sa_family == AF_INET && addr) {
361 if (rt->rt_flags & RTF_GATEWAY &&
362 inet_addr_type(addr) == RTN_UNICAST)
363 cfg->fc_scope = RT_SCOPE_UNIVERSE;
366 if (cmd == SIOCDELRT)
369 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
372 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
373 cfg->fc_scope = RT_SCOPE_LINK;
375 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
379 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
383 if (rt->rt_flags & RTF_MTU)
384 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
386 if (rt->rt_flags & RTF_WINDOW)
387 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
389 if (rt->rt_flags & RTF_IRTT)
390 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
393 cfg->fc_mx_len = len;
400 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
403 int ip_rt_ioctl(unsigned int cmd, void __user *arg)
405 struct fib_config cfg;
410 case SIOCADDRT: /* Add a route */
411 case SIOCDELRT: /* Delete a route */
412 if (!capable(CAP_NET_ADMIN))
415 if (copy_from_user(&rt, arg, sizeof(rt)))
419 err = rtentry_to_fib_config(cmd, &rt, &cfg);
421 struct fib_table *tb;
423 if (cmd == SIOCDELRT) {
424 tb = fib_get_table(cfg.fc_table);
426 err = tb->tb_delete(tb, &cfg);
430 tb = fib_new_table(cfg.fc_table);
432 err = tb->tb_insert(tb, &cfg);
437 /* allocated by rtentry_to_fib_config() */
448 int ip_rt_ioctl(unsigned int cmd, void *arg)
455 struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
456 [RTA_DST] = { .type = NLA_U32 },
457 [RTA_SRC] = { .type = NLA_U32 },
458 [RTA_IIF] = { .type = NLA_U32 },
459 [RTA_OIF] = { .type = NLA_U32 },
460 [RTA_GATEWAY] = { .type = NLA_U32 },
461 [RTA_PRIORITY] = { .type = NLA_U32 },
462 [RTA_PREFSRC] = { .type = NLA_U32 },
463 [RTA_METRICS] = { .type = NLA_NESTED },
464 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
465 [RTA_PROTOINFO] = { .type = NLA_U32 },
466 [RTA_FLOW] = { .type = NLA_U32 },
467 [RTA_MP_ALGO] = { .type = NLA_U32 },
470 static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
471 struct fib_config *cfg)
477 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
481 memset(cfg, 0, sizeof(*cfg));
483 rtm = nlmsg_data(nlh);
484 cfg->fc_dst_len = rtm->rtm_dst_len;
485 cfg->fc_tos = rtm->rtm_tos;
486 cfg->fc_table = rtm->rtm_table;
487 cfg->fc_protocol = rtm->rtm_protocol;
488 cfg->fc_scope = rtm->rtm_scope;
489 cfg->fc_type = rtm->rtm_type;
490 cfg->fc_flags = rtm->rtm_flags;
491 cfg->fc_nlflags = nlh->nlmsg_flags;
493 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
494 cfg->fc_nlinfo.nlh = nlh;
496 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
497 switch (attr->nla_type) {
499 cfg->fc_dst = nla_get_be32(attr);
502 cfg->fc_oif = nla_get_u32(attr);
505 cfg->fc_gw = nla_get_be32(attr);
508 cfg->fc_priority = nla_get_u32(attr);
511 cfg->fc_prefsrc = nla_get_be32(attr);
514 cfg->fc_mx = nla_data(attr);
515 cfg->fc_mx_len = nla_len(attr);
518 cfg->fc_mp = nla_data(attr);
519 cfg->fc_mp_len = nla_len(attr);
522 cfg->fc_flow = nla_get_u32(attr);
525 cfg->fc_mp_alg = nla_get_u32(attr);
528 cfg->fc_table = nla_get_u32(attr);
538 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
540 struct fib_config cfg;
541 struct fib_table *tb;
544 err = rtm_to_fib_config(skb, nlh, &cfg);
548 tb = fib_get_table(cfg.fc_table);
554 err = tb->tb_delete(tb, &cfg);
559 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
561 struct fib_config cfg;
562 struct fib_table *tb;
565 err = rtm_to_fib_config(skb, nlh, &cfg);
569 tb = fib_new_table(cfg.fc_table);
575 err = tb->tb_insert(tb, &cfg);
580 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
583 unsigned int e = 0, s_e;
584 struct fib_table *tb;
585 struct hlist_node *node;
588 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
589 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
590 return ip_rt_dump(skb, cb);
595 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
597 hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
601 memset(&cb->args[2], 0, sizeof(cb->args) -
602 2 * sizeof(cb->args[0]));
603 if (tb->tb_dump(tb, skb, cb) < 0)
617 /* Prepare and feed intra-kernel routing request.
618 Really, it should be netlink message, but :-( netlink
619 can be not configured, so that we feed it directly
620 to fib engine. It is legal, because all events occur
621 only when netlink is already locked.
624 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
626 struct fib_table *tb;
627 struct fib_config cfg = {
628 .fc_protocol = RTPROT_KERNEL,
631 .fc_dst_len = dst_len,
632 .fc_prefsrc = ifa->ifa_local,
633 .fc_oif = ifa->ifa_dev->dev->ifindex,
634 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
637 if (type == RTN_UNICAST)
638 tb = fib_new_table(RT_TABLE_MAIN);
640 tb = fib_new_table(RT_TABLE_LOCAL);
645 cfg.fc_table = tb->tb_id;
647 if (type != RTN_LOCAL)
648 cfg.fc_scope = RT_SCOPE_LINK;
650 cfg.fc_scope = RT_SCOPE_HOST;
652 if (cmd == RTM_NEWROUTE)
653 tb->tb_insert(tb, &cfg);
655 tb->tb_delete(tb, &cfg);
658 void fib_add_ifaddr(struct in_ifaddr *ifa)
660 struct in_device *in_dev = ifa->ifa_dev;
661 struct net_device *dev = in_dev->dev;
662 struct in_ifaddr *prim = ifa;
663 __be32 mask = ifa->ifa_mask;
664 __be32 addr = ifa->ifa_local;
665 __be32 prefix = ifa->ifa_address&mask;
667 if (ifa->ifa_flags&IFA_F_SECONDARY) {
668 prim = inet_ifa_byprefix(in_dev, prefix, mask);
670 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
675 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
677 if (!(dev->flags&IFF_UP))
680 /* Add broadcast address, if it is explicitly assigned. */
681 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
682 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
684 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
685 (prefix != addr || ifa->ifa_prefixlen < 32)) {
686 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
687 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
689 /* Add network specific broadcasts, when it takes a sense */
690 if (ifa->ifa_prefixlen < 31) {
691 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
692 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
697 static void fib_del_ifaddr(struct in_ifaddr *ifa)
699 struct in_device *in_dev = ifa->ifa_dev;
700 struct net_device *dev = in_dev->dev;
701 struct in_ifaddr *ifa1;
702 struct in_ifaddr *prim = ifa;
703 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
704 __be32 any = ifa->ifa_address&ifa->ifa_mask;
711 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
712 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
713 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
715 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
717 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
722 /* Deletion is more complicated than add.
723 We should take care of not to delete too much :-)
725 Scan address list to be sure that addresses are really gone.
728 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
729 if (ifa->ifa_local == ifa1->ifa_local)
731 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
733 if (brd == ifa1->ifa_broadcast)
735 if (any == ifa1->ifa_broadcast)
740 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
742 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
744 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
745 if (!(ok&LOCAL_OK)) {
746 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
748 /* Check, that this local address finally disappeared. */
749 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
750 /* And the last, but not the least thing.
751 We must flush stray FIB entries.
753 First of all, we scan fib_info list searching
754 for stray nexthop entries, then ignite fib_flush.
756 if (fib_sync_down(ifa->ifa_local, NULL, 0))
766 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
769 struct fib_result res;
770 struct flowi fl = { .mark = frn->fl_mark,
771 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
773 .scope = frn->fl_scope } } };
777 frn->tb_id = tb->tb_id;
778 frn->err = tb->tb_lookup(tb, &fl, &res);
781 frn->prefixlen = res.prefixlen;
782 frn->nh_sel = res.nh_sel;
783 frn->type = res.type;
784 frn->scope = res.scope;
790 static void nl_fib_input(struct sock *sk, int len)
792 struct sk_buff *skb = NULL;
793 struct nlmsghdr *nlh = NULL;
794 struct fib_result_nl *frn;
796 struct fib_table *tb;
798 skb = skb_dequeue(&sk->sk_receive_queue);
799 nlh = (struct nlmsghdr *)skb->data;
800 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
801 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
806 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
807 tb = fib_get_table(frn->tb_id_in);
809 nl_fib_lookup(frn, tb);
811 pid = nlh->nlmsg_pid; /*pid of sending process */
812 NETLINK_CB(skb).pid = 0; /* from kernel */
813 NETLINK_CB(skb).dst_group = 0; /* unicast */
814 netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
817 static void nl_fib_lookup_init(void)
819 netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
822 static void fib_disable_ip(struct net_device *dev, int force)
824 if (fib_sync_down(0, dev, force))
830 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
832 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
837 #ifdef CONFIG_IP_ROUTE_MULTIPATH
838 fib_sync_up(ifa->ifa_dev->dev);
844 if (ifa->ifa_dev->ifa_list == NULL) {
845 /* Last address was deleted from this interface.
848 fib_disable_ip(ifa->ifa_dev->dev, 1);
857 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
859 struct net_device *dev = ptr;
860 struct in_device *in_dev = __in_dev_get_rtnl(dev);
862 if (event == NETDEV_UNREGISTER) {
863 fib_disable_ip(dev, 2);
874 } endfor_ifa(in_dev);
875 #ifdef CONFIG_IP_ROUTE_MULTIPATH
881 fib_disable_ip(dev, 0);
883 case NETDEV_CHANGEMTU:
891 static struct notifier_block fib_inetaddr_notifier = {
892 .notifier_call =fib_inetaddr_event,
895 static struct notifier_block fib_netdev_notifier = {
896 .notifier_call =fib_netdev_event,
899 void __init ip_fib_init(void)
903 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
904 INIT_HLIST_HEAD(&fib_table_hash[i]);
905 #ifndef CONFIG_IP_MULTIPLE_TABLES
906 ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
907 hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
908 ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
909 hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
914 register_netdevice_notifier(&fib_netdev_notifier);
915 register_inetaddr_notifier(&fib_inetaddr_notifier);
916 nl_fib_lookup_init();
919 EXPORT_SYMBOL(inet_addr_type);
920 EXPORT_SYMBOL(ip_dev_find);