2 * NET3 IP device support routines.
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Derived from the IP parts of dev.c 1.0.19
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Mark Evans, <evansmp@uhura.aston.ac.uk>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
21 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
23 * Cyrus Durgin: updated for kmod
24 * Matthias Andree: in devinet_ioctl, compare label and
25 * address (4.4BSD alias style support),
26 * fall back to comparing just the label
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
56 #include <linux/sysctl.h>
58 #include <linux/kmod.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
66 struct ipv4_devconf ipv4_devconf = {
68 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
69 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
71 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
75 static struct ipv4_devconf ipv4_devconf_dflt = {
77 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
78 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
79 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
80 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
81 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 [IFA_LOCAL] = { .type = NLA_U32 },
89 [IFA_ADDRESS] = { .type = NLA_U32 },
90 [IFA_BROADCAST] = { .type = NLA_U32 },
91 [IFA_ANYCAST] = { .type = NLA_U32 },
92 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101 static void devinet_sysctl_register(struct in_device *in_dev,
102 struct ipv4_devconf *p);
103 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
106 /* Locks all the inet devices. */
108 static struct in_ifaddr *inet_alloc_ifa(void)
110 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
113 INIT_RCU_HEAD(&ifa->rcu_head);
119 static void inet_rcu_free_ifa(struct rcu_head *head)
121 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
123 in_dev_put(ifa->ifa_dev);
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
129 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
132 void in_dev_finish_destroy(struct in_device *idev)
134 struct net_device *dev = idev->dev;
136 BUG_TRAP(!idev->ifa_list);
137 BUG_TRAP(!idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 idev, dev ? dev->name : "NIL");
144 printk("Freeing alive in_device %p\n", idev);
150 static struct in_device *inetdev_init(struct net_device *dev)
152 struct in_device *in_dev;
156 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
159 INIT_RCU_HEAD(&in_dev->rcu_head);
160 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
161 in_dev->cnf.sysctl = NULL;
163 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
165 /* Reference in_dev->dev */
168 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
169 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
172 /* Account for reference dev->ip_ptr (below) */
176 devinet_sysctl_register(in_dev, &in_dev->cnf);
178 ip_mc_init_dev(in_dev);
179 if (dev->flags & IFF_UP)
182 /* we can receive as soon as ip_ptr is set -- do this last */
183 rcu_assign_pointer(dev->ip_ptr, in_dev);
192 static void in_dev_rcu_put(struct rcu_head *head)
194 struct in_device *idev = container_of(head, struct in_device, rcu_head);
198 static void inetdev_destroy(struct in_device *in_dev)
200 struct in_ifaddr *ifa;
201 struct net_device *dev;
209 ip_mc_destroy_dev(in_dev);
211 while ((ifa = in_dev->ifa_list) != NULL) {
212 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
217 devinet_sysctl_unregister(&in_dev->cnf);
223 neigh_sysctl_unregister(in_dev->arp_parms);
225 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
228 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
231 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
234 for_primary_ifa(in_dev) {
235 if (inet_ifa_match(a, ifa)) {
236 if (!b || inet_ifa_match(b, ifa)) {
241 } endfor_ifa(in_dev);
246 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
247 int destroy, struct nlmsghdr *nlh, u32 pid)
249 struct in_ifaddr *promote = NULL;
250 struct in_ifaddr *ifa, *ifa1 = *ifap;
251 struct in_ifaddr *last_prim = in_dev->ifa_list;
252 struct in_ifaddr *prev_prom = NULL;
253 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
257 /* 1. Deleting primary ifaddr forces deletion all secondaries
258 * unless alias promotion is set
261 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
262 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
264 while ((ifa = *ifap1) != NULL) {
265 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
266 ifa1->ifa_scope <= ifa->ifa_scope)
269 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
270 ifa1->ifa_mask != ifa->ifa_mask ||
271 !inet_ifa_match(ifa1->ifa_address, ifa)) {
272 ifap1 = &ifa->ifa_next;
278 *ifap1 = ifa->ifa_next;
280 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
281 blocking_notifier_call_chain(&inetaddr_chain,
293 *ifap = ifa1->ifa_next;
295 /* 3. Announce address deletion */
297 /* Send message first, then call notifier.
298 At first sight, FIB update triggered by notifier
299 will refer to already deleted ifaddr, that could confuse
300 netlink listeners. It is not true: look, gated sees
301 that route deleted and if it still thinks that ifaddr
302 is valid, it will try to restore deleted routes... Grr.
303 So that, this order is correct.
305 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
306 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
311 prev_prom->ifa_next = promote->ifa_next;
312 promote->ifa_next = last_prim->ifa_next;
313 last_prim->ifa_next = promote;
316 promote->ifa_flags &= ~IFA_F_SECONDARY;
317 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
318 blocking_notifier_call_chain(&inetaddr_chain,
320 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
321 if (ifa1->ifa_mask != ifa->ifa_mask ||
322 !inet_ifa_match(ifa1->ifa_address, ifa))
332 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
335 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
338 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
341 struct in_device *in_dev = ifa->ifa_dev;
342 struct in_ifaddr *ifa1, **ifap, **last_primary;
346 if (!ifa->ifa_local) {
351 ifa->ifa_flags &= ~IFA_F_SECONDARY;
352 last_primary = &in_dev->ifa_list;
354 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
355 ifap = &ifa1->ifa_next) {
356 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
357 ifa->ifa_scope <= ifa1->ifa_scope)
358 last_primary = &ifa1->ifa_next;
359 if (ifa1->ifa_mask == ifa->ifa_mask &&
360 inet_ifa_match(ifa1->ifa_address, ifa)) {
361 if (ifa1->ifa_local == ifa->ifa_local) {
365 if (ifa1->ifa_scope != ifa->ifa_scope) {
369 ifa->ifa_flags |= IFA_F_SECONDARY;
373 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
374 net_srandom(ifa->ifa_local);
378 ifa->ifa_next = *ifap;
381 /* Send message first, then call notifier.
382 Notifier will trigger FIB update, so that
383 listeners of netlink will know about new ifaddr */
384 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
385 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
390 static int inet_insert_ifa(struct in_ifaddr *ifa)
392 return __inet_insert_ifa(ifa, NULL, 0);
395 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
397 struct in_device *in_dev = __in_dev_get_rtnl(dev);
405 ipv4_devconf_setall(in_dev);
406 if (ifa->ifa_dev != in_dev) {
407 BUG_TRAP(!ifa->ifa_dev);
409 ifa->ifa_dev = in_dev;
411 if (LOOPBACK(ifa->ifa_local))
412 ifa->ifa_scope = RT_SCOPE_HOST;
413 return inet_insert_ifa(ifa);
416 struct in_device *inetdev_by_index(int ifindex)
418 struct net_device *dev;
419 struct in_device *in_dev = NULL;
420 read_lock(&dev_base_lock);
421 dev = __dev_get_by_index(&init_net, ifindex);
423 in_dev = in_dev_get(dev);
424 read_unlock(&dev_base_lock);
428 /* Called only from RTNL semaphored context. No locks. */
430 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
435 for_primary_ifa(in_dev) {
436 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
438 } endfor_ifa(in_dev);
442 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
444 struct net *net = skb->sk->sk_net;
445 struct nlattr *tb[IFA_MAX+1];
446 struct in_device *in_dev;
447 struct ifaddrmsg *ifm;
448 struct in_ifaddr *ifa, **ifap;
453 if (net != &init_net)
456 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
460 ifm = nlmsg_data(nlh);
461 in_dev = inetdev_by_index(ifm->ifa_index);
462 if (in_dev == NULL) {
467 __in_dev_put(in_dev);
469 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
470 ifap = &ifa->ifa_next) {
472 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
475 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
478 if (tb[IFA_ADDRESS] &&
479 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
480 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
483 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
487 err = -EADDRNOTAVAIL;
492 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
494 struct nlattr *tb[IFA_MAX+1];
495 struct in_ifaddr *ifa;
496 struct ifaddrmsg *ifm;
497 struct net_device *dev;
498 struct in_device *in_dev;
501 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
505 ifm = nlmsg_data(nlh);
506 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
511 dev = __dev_get_by_index(&init_net, ifm->ifa_index);
517 in_dev = __in_dev_get_rtnl(dev);
518 if (in_dev == NULL) {
523 ifa = inet_alloc_ifa();
526 * A potential indev allocation can be left alive, it stays
527 * assigned to its device and is destroy with it.
533 ipv4_devconf_setall(in_dev);
536 if (tb[IFA_ADDRESS] == NULL)
537 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
539 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
540 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
541 ifa->ifa_flags = ifm->ifa_flags;
542 ifa->ifa_scope = ifm->ifa_scope;
543 ifa->ifa_dev = in_dev;
545 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
546 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
548 if (tb[IFA_BROADCAST])
549 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
552 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
555 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
557 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
565 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
567 struct net *net = skb->sk->sk_net;
568 struct in_ifaddr *ifa;
572 if (net != &init_net)
575 ifa = rtm_to_ifaddr(nlh);
579 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
583 * Determine a default network mask, based on the IP address.
586 static __inline__ int inet_abc_len(__be32 addr)
588 int rc = -1; /* Something else, probably a multicast. */
593 __u32 haddr = ntohl(addr);
595 if (IN_CLASSA(haddr))
597 else if (IN_CLASSB(haddr))
599 else if (IN_CLASSC(haddr))
607 int devinet_ioctl(unsigned int cmd, void __user *arg)
610 struct sockaddr_in sin_orig;
611 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
612 struct in_device *in_dev;
613 struct in_ifaddr **ifap = NULL;
614 struct in_ifaddr *ifa = NULL;
615 struct net_device *dev;
618 int tryaddrmatch = 0;
621 * Fetch the caller's info block into kernel space
624 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
626 ifr.ifr_name[IFNAMSIZ - 1] = 0;
628 /* save original address for comparison */
629 memcpy(&sin_orig, sin, sizeof(*sin));
631 colon = strchr(ifr.ifr_name, ':');
636 dev_load(&init_net, ifr.ifr_name);
640 case SIOCGIFADDR: /* Get interface address */
641 case SIOCGIFBRDADDR: /* Get the broadcast address */
642 case SIOCGIFDSTADDR: /* Get the destination address */
643 case SIOCGIFNETMASK: /* Get the netmask for the interface */
644 /* Note that these ioctls will not sleep,
645 so that we do not impose a lock.
646 One day we will be forced to put shlock here (I mean SMP)
648 tryaddrmatch = (sin_orig.sin_family == AF_INET);
649 memset(sin, 0, sizeof(*sin));
650 sin->sin_family = AF_INET;
655 if (!capable(CAP_NET_ADMIN))
658 case SIOCSIFADDR: /* Set interface address (and family) */
659 case SIOCSIFBRDADDR: /* Set the broadcast address */
660 case SIOCSIFDSTADDR: /* Set the destination address */
661 case SIOCSIFNETMASK: /* Set the netmask for the interface */
663 if (!capable(CAP_NET_ADMIN))
666 if (sin->sin_family != AF_INET)
677 if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
683 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
685 /* Matthias Andree */
686 /* compare label and address (4.4BSD style) */
687 /* note: we only do this for a limited set of ioctls
688 and only if the original address family was AF_INET.
689 This is checked above. */
690 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
691 ifap = &ifa->ifa_next) {
692 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
693 sin_orig.sin_addr.s_addr ==
699 /* we didn't get a match, maybe the application is
700 4.3BSD-style and passed in junk so we fall back to
701 comparing just the label */
703 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
704 ifap = &ifa->ifa_next)
705 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
710 ret = -EADDRNOTAVAIL;
711 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
715 case SIOCGIFADDR: /* Get interface address */
716 sin->sin_addr.s_addr = ifa->ifa_local;
719 case SIOCGIFBRDADDR: /* Get the broadcast address */
720 sin->sin_addr.s_addr = ifa->ifa_broadcast;
723 case SIOCGIFDSTADDR: /* Get the destination address */
724 sin->sin_addr.s_addr = ifa->ifa_address;
727 case SIOCGIFNETMASK: /* Get the netmask for the interface */
728 sin->sin_addr.s_addr = ifa->ifa_mask;
733 ret = -EADDRNOTAVAIL;
737 if (!(ifr.ifr_flags & IFF_UP))
738 inet_del_ifa(in_dev, ifap, 1);
741 ret = dev_change_flags(dev, ifr.ifr_flags);
744 case SIOCSIFADDR: /* Set interface address (and family) */
746 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
751 if ((ifa = inet_alloc_ifa()) == NULL)
754 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
756 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
759 if (ifa->ifa_local == sin->sin_addr.s_addr)
761 inet_del_ifa(in_dev, ifap, 0);
762 ifa->ifa_broadcast = 0;
763 ifa->ifa_anycast = 0;
766 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
768 if (!(dev->flags & IFF_POINTOPOINT)) {
769 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
770 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
771 if ((dev->flags & IFF_BROADCAST) &&
772 ifa->ifa_prefixlen < 31)
773 ifa->ifa_broadcast = ifa->ifa_address |
776 ifa->ifa_prefixlen = 32;
777 ifa->ifa_mask = inet_make_mask(32);
779 ret = inet_set_ifa(dev, ifa);
782 case SIOCSIFBRDADDR: /* Set the broadcast address */
784 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
785 inet_del_ifa(in_dev, ifap, 0);
786 ifa->ifa_broadcast = sin->sin_addr.s_addr;
787 inet_insert_ifa(ifa);
791 case SIOCSIFDSTADDR: /* Set the destination address */
793 if (ifa->ifa_address == sin->sin_addr.s_addr)
796 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
799 inet_del_ifa(in_dev, ifap, 0);
800 ifa->ifa_address = sin->sin_addr.s_addr;
801 inet_insert_ifa(ifa);
804 case SIOCSIFNETMASK: /* Set the netmask for the interface */
807 * The mask we set must be legal.
810 if (bad_mask(sin->sin_addr.s_addr, 0))
813 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
814 __be32 old_mask = ifa->ifa_mask;
815 inet_del_ifa(in_dev, ifap, 0);
816 ifa->ifa_mask = sin->sin_addr.s_addr;
817 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
819 /* See if current broadcast address matches
820 * with current netmask, then recalculate
821 * the broadcast address. Otherwise it's a
822 * funny address, so don't touch it since
823 * the user seems to know what (s)he's doing...
825 if ((dev->flags & IFF_BROADCAST) &&
826 (ifa->ifa_prefixlen < 31) &&
827 (ifa->ifa_broadcast ==
828 (ifa->ifa_local|~old_mask))) {
829 ifa->ifa_broadcast = (ifa->ifa_local |
830 ~sin->sin_addr.s_addr);
832 inet_insert_ifa(ifa);
842 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
846 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
848 struct in_device *in_dev = __in_dev_get_rtnl(dev);
849 struct in_ifaddr *ifa;
853 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
856 for (; ifa; ifa = ifa->ifa_next) {
861 if (len < (int) sizeof(ifr))
863 memset(&ifr, 0, sizeof(struct ifreq));
865 strcpy(ifr.ifr_name, ifa->ifa_label);
867 strcpy(ifr.ifr_name, dev->name);
869 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
870 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
873 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
877 buf += sizeof(struct ifreq);
878 len -= sizeof(struct ifreq);
879 done += sizeof(struct ifreq);
885 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
888 struct in_device *in_dev;
891 in_dev = __in_dev_get_rcu(dev);
895 for_primary_ifa(in_dev) {
896 if (ifa->ifa_scope > scope)
898 if (!dst || inet_ifa_match(dst, ifa)) {
899 addr = ifa->ifa_local;
903 addr = ifa->ifa_local;
904 } endfor_ifa(in_dev);
911 /* Not loopback addresses on loopback should be preferred
912 in this case. It is importnat that lo is the first interface
915 read_lock(&dev_base_lock);
917 for_each_netdev(&init_net, dev) {
918 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
921 for_primary_ifa(in_dev) {
922 if (ifa->ifa_scope != RT_SCOPE_LINK &&
923 ifa->ifa_scope <= scope) {
924 addr = ifa->ifa_local;
925 goto out_unlock_both;
927 } endfor_ifa(in_dev);
930 read_unlock(&dev_base_lock);
936 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
937 __be32 local, int scope)
944 (local == ifa->ifa_local || !local) &&
945 ifa->ifa_scope <= scope) {
946 addr = ifa->ifa_local;
951 same = (!local || inet_ifa_match(local, ifa)) &&
952 (!dst || inet_ifa_match(dst, ifa));
956 /* Is the selected addr into dst subnet? */
957 if (inet_ifa_match(addr, ifa))
959 /* No, then can we use new local src? */
960 if (ifa->ifa_scope <= scope) {
961 addr = ifa->ifa_local;
964 /* search for large dst subnet for addr */
968 } endfor_ifa(in_dev);
970 return same? addr : 0;
974 * Confirm that local IP address exists using wildcards:
975 * - dev: only on this interface, 0=any interface
976 * - dst: only in the same subnet as dst, 0=any dst
977 * - local: address, 0=autoselect the local address
978 * - scope: maximum allowed scope value for the local address
980 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
983 struct in_device *in_dev;
987 if ((in_dev = __in_dev_get_rcu(dev)))
988 addr = confirm_addr_indev(in_dev, dst, local, scope);
994 read_lock(&dev_base_lock);
996 for_each_netdev(&init_net, dev) {
997 if ((in_dev = __in_dev_get_rcu(dev))) {
998 addr = confirm_addr_indev(in_dev, dst, local, scope);
1004 read_unlock(&dev_base_lock);
1013 int register_inetaddr_notifier(struct notifier_block *nb)
1015 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1018 int unregister_inetaddr_notifier(struct notifier_block *nb)
1020 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1023 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1024 * alias numbering and to create unique labels if possible.
1026 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1028 struct in_ifaddr *ifa;
1031 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1032 char old[IFNAMSIZ], *dot;
1034 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1035 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1038 dot = strchr(old, ':');
1040 sprintf(old, ":%d", named);
1043 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1044 strcat(ifa->ifa_label, dot);
1046 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1051 /* Called only under RTNL semaphore */
1053 static int inetdev_event(struct notifier_block *this, unsigned long event,
1056 struct net_device *dev = ptr;
1057 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1059 if (dev->nd_net != &init_net)
1065 if (event == NETDEV_REGISTER) {
1066 in_dev = inetdev_init(dev);
1068 return notifier_from_errno(-ENOMEM);
1069 if (dev->flags & IFF_LOOPBACK) {
1070 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1071 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1078 case NETDEV_REGISTER:
1079 printk(KERN_DEBUG "inetdev_event: bug\n");
1085 if (dev->flags & IFF_LOOPBACK) {
1086 struct in_ifaddr *ifa;
1087 if ((ifa = inet_alloc_ifa()) != NULL) {
1089 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1090 ifa->ifa_prefixlen = 8;
1091 ifa->ifa_mask = inet_make_mask(8);
1092 in_dev_hold(in_dev);
1093 ifa->ifa_dev = in_dev;
1094 ifa->ifa_scope = RT_SCOPE_HOST;
1095 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1096 inet_insert_ifa(ifa);
1104 case NETDEV_CHANGEMTU:
1107 /* MTU falled under 68, disable IP */
1108 case NETDEV_UNREGISTER:
1109 inetdev_destroy(in_dev);
1111 case NETDEV_CHANGENAME:
1112 /* Do not notify about label change, this event is
1113 * not interesting to applications using netlink.
1115 inetdev_changename(dev, in_dev);
1117 #ifdef CONFIG_SYSCTL
1118 devinet_sysctl_unregister(&in_dev->cnf);
1119 neigh_sysctl_unregister(in_dev->arp_parms);
1120 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1121 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1122 devinet_sysctl_register(in_dev, &in_dev->cnf);
1130 static struct notifier_block ip_netdev_notifier = {
1131 .notifier_call =inetdev_event,
1134 static inline size_t inet_nlmsg_size(void)
1136 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1137 + nla_total_size(4) /* IFA_ADDRESS */
1138 + nla_total_size(4) /* IFA_LOCAL */
1139 + nla_total_size(4) /* IFA_BROADCAST */
1140 + nla_total_size(4) /* IFA_ANYCAST */
1141 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1144 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1145 u32 pid, u32 seq, int event, unsigned int flags)
1147 struct ifaddrmsg *ifm;
1148 struct nlmsghdr *nlh;
1150 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1154 ifm = nlmsg_data(nlh);
1155 ifm->ifa_family = AF_INET;
1156 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1157 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1158 ifm->ifa_scope = ifa->ifa_scope;
1159 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1161 if (ifa->ifa_address)
1162 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1165 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1167 if (ifa->ifa_broadcast)
1168 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1170 if (ifa->ifa_anycast)
1171 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1173 if (ifa->ifa_label[0])
1174 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1176 return nlmsg_end(skb, nlh);
1179 nlmsg_cancel(skb, nlh);
1183 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1185 struct net *net = skb->sk->sk_net;
1187 struct net_device *dev;
1188 struct in_device *in_dev;
1189 struct in_ifaddr *ifa;
1190 int s_ip_idx, s_idx = cb->args[0];
1192 if (net != &init_net)
1195 s_ip_idx = ip_idx = cb->args[1];
1197 for_each_netdev(&init_net, dev) {
1202 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1205 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1206 ifa = ifa->ifa_next, ip_idx++) {
1207 if (ip_idx < s_ip_idx)
1209 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1211 RTM_NEWADDR, NLM_F_MULTI) <= 0)
1220 cb->args[1] = ip_idx;
1225 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1228 struct sk_buff *skb;
1229 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1232 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1236 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1238 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1239 WARN_ON(err == -EMSGSIZE);
1243 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1246 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
1249 #ifdef CONFIG_SYSCTL
1251 static void devinet_copy_dflt_conf(int i)
1253 struct net_device *dev;
1255 read_lock(&dev_base_lock);
1256 for_each_netdev(&init_net, dev) {
1257 struct in_device *in_dev;
1259 in_dev = __in_dev_get_rcu(dev);
1260 if (in_dev && !test_bit(i, in_dev->cnf.state))
1261 in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1264 read_unlock(&dev_base_lock);
1267 static int devinet_conf_proc(ctl_table *ctl, int write,
1268 struct file* filp, void __user *buffer,
1269 size_t *lenp, loff_t *ppos)
1271 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1274 struct ipv4_devconf *cnf = ctl->extra1;
1275 int i = (int *)ctl->data - cnf->data;
1277 set_bit(i, cnf->state);
1279 if (cnf == &ipv4_devconf_dflt)
1280 devinet_copy_dflt_conf(i);
1286 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1287 void __user *oldval, size_t __user *oldlenp,
1288 void __user *newval, size_t newlen)
1290 struct ipv4_devconf *cnf;
1291 int *valp = table->data;
1295 if (!newval || !newlen)
1298 if (newlen != sizeof(int))
1301 if (get_user(new, (int __user *)newval))
1307 if (oldval && oldlenp) {
1310 if (get_user(len, oldlenp))
1314 if (len > table->maxlen)
1315 len = table->maxlen;
1316 if (copy_to_user(oldval, valp, len))
1318 if (put_user(len, oldlenp))
1325 cnf = table->extra1;
1326 i = (int *)table->data - cnf->data;
1328 set_bit(i, cnf->state);
1330 if (cnf == &ipv4_devconf_dflt)
1331 devinet_copy_dflt_conf(i);
1336 void inet_forward_change(void)
1338 struct net_device *dev;
1339 int on = IPV4_DEVCONF_ALL(FORWARDING);
1341 IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1342 IPV4_DEVCONF_DFLT(FORWARDING) = on;
1344 read_lock(&dev_base_lock);
1345 for_each_netdev(&init_net, dev) {
1346 struct in_device *in_dev;
1348 in_dev = __in_dev_get_rcu(dev);
1350 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1353 read_unlock(&dev_base_lock);
1358 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1359 struct file* filp, void __user *buffer,
1360 size_t *lenp, loff_t *ppos)
1362 int *valp = ctl->data;
1364 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1366 if (write && *valp != val) {
1367 if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1368 inet_forward_change();
1369 else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1376 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1377 struct file* filp, void __user *buffer,
1378 size_t *lenp, loff_t *ppos)
1380 int *valp = ctl->data;
1382 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1384 if (write && *valp != val)
1390 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1391 void __user *oldval, size_t __user *oldlenp,
1392 void __user *newval, size_t newlen)
1394 int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1404 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1406 .ctl_name = NET_IPV4_CONF_ ## attr, \
1408 .data = ipv4_devconf.data + \
1409 NET_IPV4_CONF_ ## attr - 1, \
1410 .maxlen = sizeof(int), \
1412 .proc_handler = proc, \
1413 .strategy = sysctl, \
1414 .extra1 = &ipv4_devconf, \
1417 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1418 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1419 devinet_conf_sysctl)
1421 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1422 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1423 devinet_conf_sysctl)
1425 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1426 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1428 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1429 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1430 ipv4_doint_and_flush_strategy)
1432 static struct devinet_sysctl_table {
1433 struct ctl_table_header *sysctl_header;
1434 ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1435 ctl_table devinet_dev[2];
1436 ctl_table devinet_conf_dir[2];
1437 ctl_table devinet_proto_dir[2];
1438 ctl_table devinet_root_dir[2];
1439 } devinet_sysctl = {
1441 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1442 devinet_sysctl_forward,
1443 devinet_conf_sysctl),
1444 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1446 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1447 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1448 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1449 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1450 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1451 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1452 "accept_source_route"),
1453 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1454 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1455 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1456 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1457 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1458 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1459 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1460 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1461 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1463 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1464 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1465 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1466 "force_igmp_version"),
1467 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1468 "promote_secondaries"),
1472 .ctl_name = NET_PROTO_CONF_ALL,
1475 .child = devinet_sysctl.devinet_vars,
1478 .devinet_conf_dir = {
1480 .ctl_name = NET_IPV4_CONF,
1483 .child = devinet_sysctl.devinet_dev,
1486 .devinet_proto_dir = {
1488 .ctl_name = NET_IPV4,
1491 .child = devinet_sysctl.devinet_conf_dir,
1494 .devinet_root_dir = {
1496 .ctl_name = CTL_NET,
1499 .child = devinet_sysctl.devinet_proto_dir,
1504 static void devinet_sysctl_register(struct in_device *in_dev,
1505 struct ipv4_devconf *p)
1508 struct net_device *dev = in_dev ? in_dev->dev : NULL;
1509 struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1511 char *dev_name = NULL;
1515 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1516 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1517 t->devinet_vars[i].extra1 = p;
1521 dev_name = dev->name;
1522 t->devinet_dev[0].ctl_name = dev->ifindex;
1524 dev_name = "default";
1525 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1529 * Make a copy of dev_name, because '.procname' is regarded as const
1530 * by sysctl and we wouldn't want anyone to change it under our feet
1531 * (see SIOCSIFNAME).
1533 dev_name = kstrdup(dev_name, GFP_KERNEL);
1537 t->devinet_dev[0].procname = dev_name;
1538 t->devinet_dev[0].child = t->devinet_vars;
1539 t->devinet_conf_dir[0].child = t->devinet_dev;
1540 t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1541 t->devinet_root_dir[0].child = t->devinet_proto_dir;
1543 t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
1544 if (!t->sysctl_header)
1558 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1561 struct devinet_sysctl_table *t = p->sysctl;
1563 unregister_sysctl_table(t->sysctl_header);
1564 kfree(t->devinet_dev[0].procname);
1570 void __init devinet_init(void)
1572 register_gifconf(PF_INET, inet_gifconf);
1573 register_netdevice_notifier(&ip_netdev_notifier);
1575 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1576 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1577 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1578 #ifdef CONFIG_SYSCTL
1579 devinet_sysctl.sysctl_header =
1580 register_sysctl_table(devinet_sysctl.devinet_root_dir);
1581 devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1585 EXPORT_SYMBOL(in_dev_finish_destroy);
1586 EXPORT_SYMBOL(inet_select_addr);
1587 EXPORT_SYMBOL(inetdev_by_index);
1588 EXPORT_SYMBOL(register_inetaddr_notifier);
1589 EXPORT_SYMBOL(unregister_inetaddr_notifier);