2 * NET3 IP device support routines.
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Derived from the IP parts of dev.c 1.0.19
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Mark Evans, <evansmp@uhura.aston.ac.uk>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
21 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
23 * Cyrus Durgin: updated for kmod
24 * Matthias Andree: in devinet_ioctl, compare label and
25 * address (4.4BSD alias style support),
26 * fall back to comparing just the label
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
56 #include <linux/sysctl.h>
58 #include <linux/kmod.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
66 struct ipv4_devconf ipv4_devconf = {
68 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
69 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
71 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
75 static struct ipv4_devconf ipv4_devconf_dflt = {
77 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
78 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
79 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
80 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
81 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 [IFA_LOCAL] = { .type = NLA_U32 },
89 [IFA_ADDRESS] = { .type = NLA_U32 },
90 [IFA_BROADCAST] = { .type = NLA_U32 },
91 [IFA_ANYCAST] = { .type = NLA_U32 },
92 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
104 static inline void devinet_sysctl_register(struct in_device *idev)
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
112 /* Locks all the inet devices. */
114 static struct in_ifaddr *inet_alloc_ifa(void)
116 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119 INIT_RCU_HEAD(&ifa->rcu_head);
125 static void inet_rcu_free_ifa(struct rcu_head *head)
127 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
129 in_dev_put(ifa->ifa_dev);
133 static inline void inet_free_ifa(struct in_ifaddr *ifa)
135 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138 void in_dev_finish_destroy(struct in_device *idev)
140 struct net_device *dev = idev->dev;
142 BUG_TRAP(!idev->ifa_list);
143 BUG_TRAP(!idev->mc_list);
144 #ifdef NET_REFCNT_DEBUG
145 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
146 idev, dev ? dev->name : "NIL");
150 printk("Freeing alive in_device %p\n", idev);
156 static struct in_device *inetdev_init(struct net_device *dev)
158 struct in_device *in_dev;
162 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165 INIT_RCU_HEAD(&in_dev->rcu_head);
166 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
167 in_dev->cnf.sysctl = NULL;
169 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
171 /* Reference in_dev->dev */
173 /* Account for reference dev->ip_ptr (below) */
176 devinet_sysctl_register(in_dev);
177 ip_mc_init_dev(in_dev);
178 if (dev->flags & IFF_UP)
181 /* we can receive as soon as ip_ptr is set -- do this last */
182 rcu_assign_pointer(dev->ip_ptr, in_dev);
191 static void in_dev_rcu_put(struct rcu_head *head)
193 struct in_device *idev = container_of(head, struct in_device, rcu_head);
197 static void inetdev_destroy(struct in_device *in_dev)
199 struct in_ifaddr *ifa;
200 struct net_device *dev;
208 ip_mc_destroy_dev(in_dev);
210 while ((ifa = in_dev->ifa_list) != NULL) {
211 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
217 devinet_sysctl_unregister(in_dev);
218 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 for_primary_ifa(in_dev) {
228 if (inet_ifa_match(a, ifa)) {
229 if (!b || inet_ifa_match(b, ifa)) {
234 } endfor_ifa(in_dev);
239 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
240 int destroy, struct nlmsghdr *nlh, u32 pid)
242 struct in_ifaddr *promote = NULL;
243 struct in_ifaddr *ifa, *ifa1 = *ifap;
244 struct in_ifaddr *last_prim = in_dev->ifa_list;
245 struct in_ifaddr *prev_prom = NULL;
246 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250 /* 1. Deleting primary ifaddr forces deletion all secondaries
251 * unless alias promotion is set
254 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
255 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
257 while ((ifa = *ifap1) != NULL) {
258 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
259 ifa1->ifa_scope <= ifa->ifa_scope)
262 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
263 ifa1->ifa_mask != ifa->ifa_mask ||
264 !inet_ifa_match(ifa1->ifa_address, ifa)) {
265 ifap1 = &ifa->ifa_next;
271 *ifap1 = ifa->ifa_next;
273 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
274 blocking_notifier_call_chain(&inetaddr_chain,
286 *ifap = ifa1->ifa_next;
288 /* 3. Announce address deletion */
290 /* Send message first, then call notifier.
291 At first sight, FIB update triggered by notifier
292 will refer to already deleted ifaddr, that could confuse
293 netlink listeners. It is not true: look, gated sees
294 that route deleted and if it still thinks that ifaddr
295 is valid, it will try to restore deleted routes... Grr.
296 So that, this order is correct.
298 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
299 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
304 prev_prom->ifa_next = promote->ifa_next;
305 promote->ifa_next = last_prim->ifa_next;
306 last_prim->ifa_next = promote;
309 promote->ifa_flags &= ~IFA_F_SECONDARY;
310 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
311 blocking_notifier_call_chain(&inetaddr_chain,
313 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
314 if (ifa1->ifa_mask != ifa->ifa_mask ||
315 !inet_ifa_match(ifa1->ifa_address, ifa))
325 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334 struct in_device *in_dev = ifa->ifa_dev;
335 struct in_ifaddr *ifa1, **ifap, **last_primary;
339 if (!ifa->ifa_local) {
344 ifa->ifa_flags &= ~IFA_F_SECONDARY;
345 last_primary = &in_dev->ifa_list;
347 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
348 ifap = &ifa1->ifa_next) {
349 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
350 ifa->ifa_scope <= ifa1->ifa_scope)
351 last_primary = &ifa1->ifa_next;
352 if (ifa1->ifa_mask == ifa->ifa_mask &&
353 inet_ifa_match(ifa1->ifa_address, ifa)) {
354 if (ifa1->ifa_local == ifa->ifa_local) {
358 if (ifa1->ifa_scope != ifa->ifa_scope) {
362 ifa->ifa_flags |= IFA_F_SECONDARY;
366 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
367 net_srandom(ifa->ifa_local);
371 ifa->ifa_next = *ifap;
374 /* Send message first, then call notifier.
375 Notifier will trigger FIB update, so that
376 listeners of netlink will know about new ifaddr */
377 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
378 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
383 static int inet_insert_ifa(struct in_ifaddr *ifa)
385 return __inet_insert_ifa(ifa, NULL, 0);
388 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
390 struct in_device *in_dev = __in_dev_get_rtnl(dev);
398 ipv4_devconf_setall(in_dev);
399 if (ifa->ifa_dev != in_dev) {
400 BUG_TRAP(!ifa->ifa_dev);
402 ifa->ifa_dev = in_dev;
404 if (LOOPBACK(ifa->ifa_local))
405 ifa->ifa_scope = RT_SCOPE_HOST;
406 return inet_insert_ifa(ifa);
409 struct in_device *inetdev_by_index(int ifindex)
411 struct net_device *dev;
412 struct in_device *in_dev = NULL;
413 read_lock(&dev_base_lock);
414 dev = __dev_get_by_index(&init_net, ifindex);
416 in_dev = in_dev_get(dev);
417 read_unlock(&dev_base_lock);
421 /* Called only from RTNL semaphored context. No locks. */
423 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
428 for_primary_ifa(in_dev) {
429 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
431 } endfor_ifa(in_dev);
435 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
437 struct net *net = skb->sk->sk_net;
438 struct nlattr *tb[IFA_MAX+1];
439 struct in_device *in_dev;
440 struct ifaddrmsg *ifm;
441 struct in_ifaddr *ifa, **ifap;
446 if (net != &init_net)
449 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 ifm = nlmsg_data(nlh);
454 in_dev = inetdev_by_index(ifm->ifa_index);
455 if (in_dev == NULL) {
460 __in_dev_put(in_dev);
462 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
463 ifap = &ifa->ifa_next) {
465 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
468 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
471 if (tb[IFA_ADDRESS] &&
472 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
473 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
476 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480 err = -EADDRNOTAVAIL;
485 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
487 struct nlattr *tb[IFA_MAX+1];
488 struct in_ifaddr *ifa;
489 struct ifaddrmsg *ifm;
490 struct net_device *dev;
491 struct in_device *in_dev;
494 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498 ifm = nlmsg_data(nlh);
499 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
504 dev = __dev_get_by_index(&init_net, ifm->ifa_index);
510 in_dev = __in_dev_get_rtnl(dev);
511 if (in_dev == NULL) {
516 ifa = inet_alloc_ifa();
519 * A potential indev allocation can be left alive, it stays
520 * assigned to its device and is destroy with it.
526 ipv4_devconf_setall(in_dev);
529 if (tb[IFA_ADDRESS] == NULL)
530 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
532 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
533 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
534 ifa->ifa_flags = ifm->ifa_flags;
535 ifa->ifa_scope = ifm->ifa_scope;
536 ifa->ifa_dev = in_dev;
538 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
539 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
541 if (tb[IFA_BROADCAST])
542 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
545 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
548 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
550 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
558 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
560 struct net *net = skb->sk->sk_net;
561 struct in_ifaddr *ifa;
565 if (net != &init_net)
568 ifa = rtm_to_ifaddr(nlh);
572 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
576 * Determine a default network mask, based on the IP address.
579 static __inline__ int inet_abc_len(__be32 addr)
581 int rc = -1; /* Something else, probably a multicast. */
586 __u32 haddr = ntohl(addr);
588 if (IN_CLASSA(haddr))
590 else if (IN_CLASSB(haddr))
592 else if (IN_CLASSC(haddr))
600 int devinet_ioctl(unsigned int cmd, void __user *arg)
603 struct sockaddr_in sin_orig;
604 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
605 struct in_device *in_dev;
606 struct in_ifaddr **ifap = NULL;
607 struct in_ifaddr *ifa = NULL;
608 struct net_device *dev;
611 int tryaddrmatch = 0;
614 * Fetch the caller's info block into kernel space
617 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
619 ifr.ifr_name[IFNAMSIZ - 1] = 0;
621 /* save original address for comparison */
622 memcpy(&sin_orig, sin, sizeof(*sin));
624 colon = strchr(ifr.ifr_name, ':');
629 dev_load(&init_net, ifr.ifr_name);
633 case SIOCGIFADDR: /* Get interface address */
634 case SIOCGIFBRDADDR: /* Get the broadcast address */
635 case SIOCGIFDSTADDR: /* Get the destination address */
636 case SIOCGIFNETMASK: /* Get the netmask for the interface */
637 /* Note that these ioctls will not sleep,
638 so that we do not impose a lock.
639 One day we will be forced to put shlock here (I mean SMP)
641 tryaddrmatch = (sin_orig.sin_family == AF_INET);
642 memset(sin, 0, sizeof(*sin));
643 sin->sin_family = AF_INET;
648 if (!capable(CAP_NET_ADMIN))
651 case SIOCSIFADDR: /* Set interface address (and family) */
652 case SIOCSIFBRDADDR: /* Set the broadcast address */
653 case SIOCSIFDSTADDR: /* Set the destination address */
654 case SIOCSIFNETMASK: /* Set the netmask for the interface */
656 if (!capable(CAP_NET_ADMIN))
659 if (sin->sin_family != AF_INET)
670 if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
676 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
678 /* Matthias Andree */
679 /* compare label and address (4.4BSD style) */
680 /* note: we only do this for a limited set of ioctls
681 and only if the original address family was AF_INET.
682 This is checked above. */
683 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
684 ifap = &ifa->ifa_next) {
685 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
686 sin_orig.sin_addr.s_addr ==
692 /* we didn't get a match, maybe the application is
693 4.3BSD-style and passed in junk so we fall back to
694 comparing just the label */
696 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
697 ifap = &ifa->ifa_next)
698 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
703 ret = -EADDRNOTAVAIL;
704 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
708 case SIOCGIFADDR: /* Get interface address */
709 sin->sin_addr.s_addr = ifa->ifa_local;
712 case SIOCGIFBRDADDR: /* Get the broadcast address */
713 sin->sin_addr.s_addr = ifa->ifa_broadcast;
716 case SIOCGIFDSTADDR: /* Get the destination address */
717 sin->sin_addr.s_addr = ifa->ifa_address;
720 case SIOCGIFNETMASK: /* Get the netmask for the interface */
721 sin->sin_addr.s_addr = ifa->ifa_mask;
726 ret = -EADDRNOTAVAIL;
730 if (!(ifr.ifr_flags & IFF_UP))
731 inet_del_ifa(in_dev, ifap, 1);
734 ret = dev_change_flags(dev, ifr.ifr_flags);
737 case SIOCSIFADDR: /* Set interface address (and family) */
739 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
744 if ((ifa = inet_alloc_ifa()) == NULL)
747 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
749 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
752 if (ifa->ifa_local == sin->sin_addr.s_addr)
754 inet_del_ifa(in_dev, ifap, 0);
755 ifa->ifa_broadcast = 0;
756 ifa->ifa_anycast = 0;
759 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
761 if (!(dev->flags & IFF_POINTOPOINT)) {
762 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
763 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
764 if ((dev->flags & IFF_BROADCAST) &&
765 ifa->ifa_prefixlen < 31)
766 ifa->ifa_broadcast = ifa->ifa_address |
769 ifa->ifa_prefixlen = 32;
770 ifa->ifa_mask = inet_make_mask(32);
772 ret = inet_set_ifa(dev, ifa);
775 case SIOCSIFBRDADDR: /* Set the broadcast address */
777 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
778 inet_del_ifa(in_dev, ifap, 0);
779 ifa->ifa_broadcast = sin->sin_addr.s_addr;
780 inet_insert_ifa(ifa);
784 case SIOCSIFDSTADDR: /* Set the destination address */
786 if (ifa->ifa_address == sin->sin_addr.s_addr)
789 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
792 inet_del_ifa(in_dev, ifap, 0);
793 ifa->ifa_address = sin->sin_addr.s_addr;
794 inet_insert_ifa(ifa);
797 case SIOCSIFNETMASK: /* Set the netmask for the interface */
800 * The mask we set must be legal.
803 if (bad_mask(sin->sin_addr.s_addr, 0))
806 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
807 __be32 old_mask = ifa->ifa_mask;
808 inet_del_ifa(in_dev, ifap, 0);
809 ifa->ifa_mask = sin->sin_addr.s_addr;
810 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
812 /* See if current broadcast address matches
813 * with current netmask, then recalculate
814 * the broadcast address. Otherwise it's a
815 * funny address, so don't touch it since
816 * the user seems to know what (s)he's doing...
818 if ((dev->flags & IFF_BROADCAST) &&
819 (ifa->ifa_prefixlen < 31) &&
820 (ifa->ifa_broadcast ==
821 (ifa->ifa_local|~old_mask))) {
822 ifa->ifa_broadcast = (ifa->ifa_local |
823 ~sin->sin_addr.s_addr);
825 inet_insert_ifa(ifa);
835 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
839 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
841 struct in_device *in_dev = __in_dev_get_rtnl(dev);
842 struct in_ifaddr *ifa;
846 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
849 for (; ifa; ifa = ifa->ifa_next) {
854 if (len < (int) sizeof(ifr))
856 memset(&ifr, 0, sizeof(struct ifreq));
858 strcpy(ifr.ifr_name, ifa->ifa_label);
860 strcpy(ifr.ifr_name, dev->name);
862 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
863 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
866 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
870 buf += sizeof(struct ifreq);
871 len -= sizeof(struct ifreq);
872 done += sizeof(struct ifreq);
878 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
881 struct in_device *in_dev;
884 in_dev = __in_dev_get_rcu(dev);
888 for_primary_ifa(in_dev) {
889 if (ifa->ifa_scope > scope)
891 if (!dst || inet_ifa_match(dst, ifa)) {
892 addr = ifa->ifa_local;
896 addr = ifa->ifa_local;
897 } endfor_ifa(in_dev);
904 /* Not loopback addresses on loopback should be preferred
905 in this case. It is importnat that lo is the first interface
908 read_lock(&dev_base_lock);
910 for_each_netdev(&init_net, dev) {
911 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
914 for_primary_ifa(in_dev) {
915 if (ifa->ifa_scope != RT_SCOPE_LINK &&
916 ifa->ifa_scope <= scope) {
917 addr = ifa->ifa_local;
918 goto out_unlock_both;
920 } endfor_ifa(in_dev);
923 read_unlock(&dev_base_lock);
929 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
930 __be32 local, int scope)
937 (local == ifa->ifa_local || !local) &&
938 ifa->ifa_scope <= scope) {
939 addr = ifa->ifa_local;
944 same = (!local || inet_ifa_match(local, ifa)) &&
945 (!dst || inet_ifa_match(dst, ifa));
949 /* Is the selected addr into dst subnet? */
950 if (inet_ifa_match(addr, ifa))
952 /* No, then can we use new local src? */
953 if (ifa->ifa_scope <= scope) {
954 addr = ifa->ifa_local;
957 /* search for large dst subnet for addr */
961 } endfor_ifa(in_dev);
963 return same? addr : 0;
967 * Confirm that local IP address exists using wildcards:
968 * - dev: only on this interface, 0=any interface
969 * - dst: only in the same subnet as dst, 0=any dst
970 * - local: address, 0=autoselect the local address
971 * - scope: maximum allowed scope value for the local address
973 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
976 struct in_device *in_dev;
980 if ((in_dev = __in_dev_get_rcu(dev)))
981 addr = confirm_addr_indev(in_dev, dst, local, scope);
987 read_lock(&dev_base_lock);
989 for_each_netdev(&init_net, dev) {
990 if ((in_dev = __in_dev_get_rcu(dev))) {
991 addr = confirm_addr_indev(in_dev, dst, local, scope);
997 read_unlock(&dev_base_lock);
1006 int register_inetaddr_notifier(struct notifier_block *nb)
1008 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1011 int unregister_inetaddr_notifier(struct notifier_block *nb)
1013 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1016 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1017 * alias numbering and to create unique labels if possible.
1019 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1021 struct in_ifaddr *ifa;
1024 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1025 char old[IFNAMSIZ], *dot;
1027 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1028 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1031 dot = strchr(old, ':');
1033 sprintf(old, ":%d", named);
1036 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1037 strcat(ifa->ifa_label, dot);
1039 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1044 /* Called only under RTNL semaphore */
1046 static int inetdev_event(struct notifier_block *this, unsigned long event,
1049 struct net_device *dev = ptr;
1050 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1052 if (dev->nd_net != &init_net)
1058 if (event == NETDEV_REGISTER) {
1059 in_dev = inetdev_init(dev);
1061 return notifier_from_errno(-ENOMEM);
1062 if (dev->flags & IFF_LOOPBACK) {
1063 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1064 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1071 case NETDEV_REGISTER:
1072 printk(KERN_DEBUG "inetdev_event: bug\n");
1078 if (dev->flags & IFF_LOOPBACK) {
1079 struct in_ifaddr *ifa;
1080 if ((ifa = inet_alloc_ifa()) != NULL) {
1082 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1083 ifa->ifa_prefixlen = 8;
1084 ifa->ifa_mask = inet_make_mask(8);
1085 in_dev_hold(in_dev);
1086 ifa->ifa_dev = in_dev;
1087 ifa->ifa_scope = RT_SCOPE_HOST;
1088 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1089 inet_insert_ifa(ifa);
1097 case NETDEV_CHANGEMTU:
1100 /* MTU falled under 68, disable IP */
1101 case NETDEV_UNREGISTER:
1102 inetdev_destroy(in_dev);
1104 case NETDEV_CHANGENAME:
1105 /* Do not notify about label change, this event is
1106 * not interesting to applications using netlink.
1108 inetdev_changename(dev, in_dev);
1110 devinet_sysctl_unregister(in_dev);
1111 devinet_sysctl_register(in_dev);
1118 static struct notifier_block ip_netdev_notifier = {
1119 .notifier_call =inetdev_event,
1122 static inline size_t inet_nlmsg_size(void)
1124 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1125 + nla_total_size(4) /* IFA_ADDRESS */
1126 + nla_total_size(4) /* IFA_LOCAL */
1127 + nla_total_size(4) /* IFA_BROADCAST */
1128 + nla_total_size(4) /* IFA_ANYCAST */
1129 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1132 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1133 u32 pid, u32 seq, int event, unsigned int flags)
1135 struct ifaddrmsg *ifm;
1136 struct nlmsghdr *nlh;
1138 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1142 ifm = nlmsg_data(nlh);
1143 ifm->ifa_family = AF_INET;
1144 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1145 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1146 ifm->ifa_scope = ifa->ifa_scope;
1147 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1149 if (ifa->ifa_address)
1150 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1153 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1155 if (ifa->ifa_broadcast)
1156 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1158 if (ifa->ifa_anycast)
1159 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1161 if (ifa->ifa_label[0])
1162 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1164 return nlmsg_end(skb, nlh);
1167 nlmsg_cancel(skb, nlh);
1171 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1173 struct net *net = skb->sk->sk_net;
1175 struct net_device *dev;
1176 struct in_device *in_dev;
1177 struct in_ifaddr *ifa;
1178 int s_ip_idx, s_idx = cb->args[0];
1180 if (net != &init_net)
1183 s_ip_idx = ip_idx = cb->args[1];
1185 for_each_netdev(&init_net, dev) {
1190 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1193 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1194 ifa = ifa->ifa_next, ip_idx++) {
1195 if (ip_idx < s_ip_idx)
1197 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1199 RTM_NEWADDR, NLM_F_MULTI) <= 0)
1208 cb->args[1] = ip_idx;
1213 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1216 struct sk_buff *skb;
1217 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1220 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1224 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1226 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1227 WARN_ON(err == -EMSGSIZE);
1231 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1234 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
1237 #ifdef CONFIG_SYSCTL
1239 static void devinet_copy_dflt_conf(int i)
1241 struct net_device *dev;
1243 read_lock(&dev_base_lock);
1244 for_each_netdev(&init_net, dev) {
1245 struct in_device *in_dev;
1247 in_dev = __in_dev_get_rcu(dev);
1248 if (in_dev && !test_bit(i, in_dev->cnf.state))
1249 in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1252 read_unlock(&dev_base_lock);
1255 static void inet_forward_change(void)
1257 struct net_device *dev;
1258 int on = IPV4_DEVCONF_ALL(FORWARDING);
1260 IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1261 IPV4_DEVCONF_DFLT(FORWARDING) = on;
1263 read_lock(&dev_base_lock);
1264 for_each_netdev(&init_net, dev) {
1265 struct in_device *in_dev;
1267 in_dev = __in_dev_get_rcu(dev);
1269 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1272 read_unlock(&dev_base_lock);
1277 static int devinet_conf_proc(ctl_table *ctl, int write,
1278 struct file* filp, void __user *buffer,
1279 size_t *lenp, loff_t *ppos)
1281 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1284 struct ipv4_devconf *cnf = ctl->extra1;
1285 int i = (int *)ctl->data - cnf->data;
1287 set_bit(i, cnf->state);
1289 if (cnf == &ipv4_devconf_dflt)
1290 devinet_copy_dflt_conf(i);
1296 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1297 void __user *oldval, size_t __user *oldlenp,
1298 void __user *newval, size_t newlen)
1300 struct ipv4_devconf *cnf;
1301 int *valp = table->data;
1305 if (!newval || !newlen)
1308 if (newlen != sizeof(int))
1311 if (get_user(new, (int __user *)newval))
1317 if (oldval && oldlenp) {
1320 if (get_user(len, oldlenp))
1324 if (len > table->maxlen)
1325 len = table->maxlen;
1326 if (copy_to_user(oldval, valp, len))
1328 if (put_user(len, oldlenp))
1335 cnf = table->extra1;
1336 i = (int *)table->data - cnf->data;
1338 set_bit(i, cnf->state);
1340 if (cnf == &ipv4_devconf_dflt)
1341 devinet_copy_dflt_conf(i);
1346 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1347 struct file* filp, void __user *buffer,
1348 size_t *lenp, loff_t *ppos)
1350 int *valp = ctl->data;
1352 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1354 if (write && *valp != val) {
1355 if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1356 inet_forward_change();
1357 else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1364 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1365 struct file* filp, void __user *buffer,
1366 size_t *lenp, loff_t *ppos)
1368 int *valp = ctl->data;
1370 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1372 if (write && *valp != val)
1378 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1379 void __user *oldval, size_t __user *oldlenp,
1380 void __user *newval, size_t newlen)
1382 int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1392 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1394 .ctl_name = NET_IPV4_CONF_ ## attr, \
1396 .data = ipv4_devconf.data + \
1397 NET_IPV4_CONF_ ## attr - 1, \
1398 .maxlen = sizeof(int), \
1400 .proc_handler = proc, \
1401 .strategy = sysctl, \
1402 .extra1 = &ipv4_devconf, \
1405 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1406 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1407 devinet_conf_sysctl)
1409 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1410 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1411 devinet_conf_sysctl)
1413 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1414 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1416 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1417 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1418 ipv4_doint_and_flush_strategy)
1420 static struct devinet_sysctl_table {
1421 struct ctl_table_header *sysctl_header;
1422 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1424 } devinet_sysctl = {
1426 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1427 devinet_sysctl_forward,
1428 devinet_conf_sysctl),
1429 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1431 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1432 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1433 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1434 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1435 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1436 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1437 "accept_source_route"),
1438 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1439 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1440 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1441 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1442 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1443 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1444 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1445 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1446 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1448 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1449 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1450 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1451 "force_igmp_version"),
1452 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1453 "promote_secondaries"),
1457 static void __devinet_sysctl_register(char *dev_name, int ctl_name,
1458 struct ipv4_devconf *p)
1461 struct devinet_sysctl_table *t;
1463 #define DEVINET_CTL_PATH_DEV 3
1465 struct ctl_path devinet_ctl_path[] = {
1466 { .procname = "net", .ctl_name = CTL_NET, },
1467 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1468 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1469 { /* to be set */ },
1473 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1477 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1478 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1479 t->devinet_vars[i].extra1 = p;
1483 * Make a copy of dev_name, because '.procname' is regarded as const
1484 * by sysctl and we wouldn't want anyone to change it under our feet
1485 * (see SIOCSIFNAME).
1487 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1491 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1492 devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1494 t->sysctl_header = register_sysctl_paths(devinet_ctl_path,
1496 if (!t->sysctl_header)
1510 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1512 struct devinet_sysctl_table *t = cnf->sysctl;
1518 unregister_sysctl_table(t->sysctl_header);
1523 static void devinet_sysctl_register(struct in_device *idev)
1525 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1526 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1527 __devinet_sysctl_register(idev->dev->name, idev->dev->ifindex,
1531 static void devinet_sysctl_unregister(struct in_device *idev)
1533 __devinet_sysctl_unregister(&idev->cnf);
1534 neigh_sysctl_unregister(idev->arp_parms);
1538 static struct ctl_table ctl_forward_entry[] = {
1540 .ctl_name = NET_IPV4_FORWARD,
1541 .procname = "ip_forward",
1542 .data = &ipv4_devconf.data[
1543 NET_IPV4_CONF_FORWARDING - 1],
1544 .maxlen = sizeof(int),
1546 .proc_handler = devinet_sysctl_forward,
1547 .strategy = devinet_conf_sysctl,
1548 .extra1 = &ipv4_devconf,
1553 static __initdata struct ctl_path net_ipv4_path[] = {
1554 { .procname = "net", .ctl_name = CTL_NET, },
1555 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1559 void __init devinet_init(void)
1561 register_gifconf(PF_INET, inet_gifconf);
1562 register_netdevice_notifier(&ip_netdev_notifier);
1564 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1565 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1566 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1567 #ifdef CONFIG_SYSCTL
1568 __devinet_sysctl_register("all", NET_PROTO_CONF_ALL,
1570 __devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
1571 &ipv4_devconf_dflt);
1572 register_sysctl_paths(net_ipv4_path, ctl_forward_entry);
1576 EXPORT_SYMBOL(in_dev_finish_destroy);
1577 EXPORT_SYMBOL(inet_select_addr);
1578 EXPORT_SYMBOL(inetdev_by_index);
1579 EXPORT_SYMBOL(register_inetaddr_notifier);
1580 EXPORT_SYMBOL(unregister_inetaddr_notifier);