2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
54 #include <linux/sysctl.h>
56 #include <linux/kmod.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
65 static struct ipv4_devconf ipv4_devconf = {
67 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
74 static struct ipv4_devconf ipv4_devconf_dflt = {
76 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 [IFA_LOCAL] = { .type = NLA_U32 },
89 [IFA_ADDRESS] = { .type = NLA_U32 },
90 [IFA_BROADCAST] = { .type = NLA_U32 },
91 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
103 static inline void devinet_sysctl_register(struct in_device *idev)
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
111 /* Locks all the inet devices. */
113 static struct in_ifaddr *inet_alloc_ifa(void)
115 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
118 INIT_RCU_HEAD(&ifa->rcu_head);
124 static void inet_rcu_free_ifa(struct rcu_head *head)
126 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
128 in_dev_put(ifa->ifa_dev);
132 static inline void inet_free_ifa(struct in_ifaddr *ifa)
134 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
137 void in_dev_finish_destroy(struct in_device *idev)
139 struct net_device *dev = idev->dev;
141 BUG_TRAP(!idev->ifa_list);
142 BUG_TRAP(!idev->mc_list);
143 #ifdef NET_REFCNT_DEBUG
144 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
145 idev, dev ? dev->name : "NIL");
149 printk("Freeing alive in_device %p\n", idev);
155 static struct in_device *inetdev_init(struct net_device *dev)
157 struct in_device *in_dev;
161 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
164 INIT_RCU_HEAD(&in_dev->rcu_head);
165 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
166 sizeof(in_dev->cnf));
167 in_dev->cnf.sysctl = NULL;
169 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
171 /* Reference in_dev->dev */
173 /* Account for reference dev->ip_ptr (below) */
176 devinet_sysctl_register(in_dev);
177 ip_mc_init_dev(in_dev);
178 if (dev->flags & IFF_UP)
181 /* we can receive as soon as ip_ptr is set -- do this last */
182 rcu_assign_pointer(dev->ip_ptr, in_dev);
191 static void in_dev_rcu_put(struct rcu_head *head)
193 struct in_device *idev = container_of(head, struct in_device, rcu_head);
197 static void inetdev_destroy(struct in_device *in_dev)
199 struct in_ifaddr *ifa;
200 struct net_device *dev;
208 ip_mc_destroy_dev(in_dev);
210 while ((ifa = in_dev->ifa_list) != NULL) {
211 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
217 devinet_sysctl_unregister(in_dev);
218 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 for_primary_ifa(in_dev) {
228 if (inet_ifa_match(a, ifa)) {
229 if (!b || inet_ifa_match(b, ifa)) {
234 } endfor_ifa(in_dev);
239 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
240 int destroy, struct nlmsghdr *nlh, u32 pid)
242 struct in_ifaddr *promote = NULL;
243 struct in_ifaddr *ifa, *ifa1 = *ifap;
244 struct in_ifaddr *last_prim = in_dev->ifa_list;
245 struct in_ifaddr *prev_prom = NULL;
246 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250 /* 1. Deleting primary ifaddr forces deletion all secondaries
251 * unless alias promotion is set
254 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
255 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
257 while ((ifa = *ifap1) != NULL) {
258 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
259 ifa1->ifa_scope <= ifa->ifa_scope)
262 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
263 ifa1->ifa_mask != ifa->ifa_mask ||
264 !inet_ifa_match(ifa1->ifa_address, ifa)) {
265 ifap1 = &ifa->ifa_next;
271 *ifap1 = ifa->ifa_next;
273 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
274 blocking_notifier_call_chain(&inetaddr_chain,
286 *ifap = ifa1->ifa_next;
288 /* 3. Announce address deletion */
290 /* Send message first, then call notifier.
291 At first sight, FIB update triggered by notifier
292 will refer to already deleted ifaddr, that could confuse
293 netlink listeners. It is not true: look, gated sees
294 that route deleted and if it still thinks that ifaddr
295 is valid, it will try to restore deleted routes... Grr.
296 So that, this order is correct.
298 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
299 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
304 prev_prom->ifa_next = promote->ifa_next;
305 promote->ifa_next = last_prim->ifa_next;
306 last_prim->ifa_next = promote;
309 promote->ifa_flags &= ~IFA_F_SECONDARY;
310 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
311 blocking_notifier_call_chain(&inetaddr_chain,
313 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
314 if (ifa1->ifa_mask != ifa->ifa_mask ||
315 !inet_ifa_match(ifa1->ifa_address, ifa))
325 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334 struct in_device *in_dev = ifa->ifa_dev;
335 struct in_ifaddr *ifa1, **ifap, **last_primary;
339 if (!ifa->ifa_local) {
344 ifa->ifa_flags &= ~IFA_F_SECONDARY;
345 last_primary = &in_dev->ifa_list;
347 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
348 ifap = &ifa1->ifa_next) {
349 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
350 ifa->ifa_scope <= ifa1->ifa_scope)
351 last_primary = &ifa1->ifa_next;
352 if (ifa1->ifa_mask == ifa->ifa_mask &&
353 inet_ifa_match(ifa1->ifa_address, ifa)) {
354 if (ifa1->ifa_local == ifa->ifa_local) {
358 if (ifa1->ifa_scope != ifa->ifa_scope) {
362 ifa->ifa_flags |= IFA_F_SECONDARY;
366 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
367 net_srandom(ifa->ifa_local);
371 ifa->ifa_next = *ifap;
374 /* Send message first, then call notifier.
375 Notifier will trigger FIB update, so that
376 listeners of netlink will know about new ifaddr */
377 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
378 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
383 static int inet_insert_ifa(struct in_ifaddr *ifa)
385 return __inet_insert_ifa(ifa, NULL, 0);
388 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
390 struct in_device *in_dev = __in_dev_get_rtnl(dev);
398 ipv4_devconf_setall(in_dev);
399 if (ifa->ifa_dev != in_dev) {
400 BUG_TRAP(!ifa->ifa_dev);
402 ifa->ifa_dev = in_dev;
404 if (ipv4_is_loopback(ifa->ifa_local))
405 ifa->ifa_scope = RT_SCOPE_HOST;
406 return inet_insert_ifa(ifa);
409 struct in_device *inetdev_by_index(struct net *net, int ifindex)
411 struct net_device *dev;
412 struct in_device *in_dev = NULL;
413 read_lock(&dev_base_lock);
414 dev = __dev_get_by_index(net, ifindex);
416 in_dev = in_dev_get(dev);
417 read_unlock(&dev_base_lock);
421 /* Called only from RTNL semaphored context. No locks. */
423 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
428 for_primary_ifa(in_dev) {
429 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
431 } endfor_ifa(in_dev);
435 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
437 struct net *net = sock_net(skb->sk);
438 struct nlattr *tb[IFA_MAX+1];
439 struct in_device *in_dev;
440 struct ifaddrmsg *ifm;
441 struct in_ifaddr *ifa, **ifap;
446 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
450 ifm = nlmsg_data(nlh);
451 in_dev = inetdev_by_index(net, ifm->ifa_index);
452 if (in_dev == NULL) {
457 __in_dev_put(in_dev);
459 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460 ifap = &ifa->ifa_next) {
462 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468 if (tb[IFA_ADDRESS] &&
469 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
477 err = -EADDRNOTAVAIL;
482 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
484 struct nlattr *tb[IFA_MAX+1];
485 struct in_ifaddr *ifa;
486 struct ifaddrmsg *ifm;
487 struct net_device *dev;
488 struct in_device *in_dev;
491 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
495 ifm = nlmsg_data(nlh);
497 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500 dev = __dev_get_by_index(net, ifm->ifa_index);
505 in_dev = __in_dev_get_rtnl(dev);
510 ifa = inet_alloc_ifa();
513 * A potential indev allocation can be left alive, it stays
514 * assigned to its device and is destroy with it.
518 ipv4_devconf_setall(in_dev);
521 if (tb[IFA_ADDRESS] == NULL)
522 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
524 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526 ifa->ifa_flags = ifm->ifa_flags;
527 ifa->ifa_scope = ifm->ifa_scope;
528 ifa->ifa_dev = in_dev;
530 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
533 if (tb[IFA_BROADCAST])
534 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
539 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
547 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
549 struct net *net = sock_net(skb->sk);
550 struct in_ifaddr *ifa;
554 ifa = rtm_to_ifaddr(net, nlh);
558 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
562 * Determine a default network mask, based on the IP address.
565 static __inline__ int inet_abc_len(__be32 addr)
567 int rc = -1; /* Something else, probably a multicast. */
569 if (ipv4_is_zeronet(addr))
572 __u32 haddr = ntohl(addr);
574 if (IN_CLASSA(haddr))
576 else if (IN_CLASSB(haddr))
578 else if (IN_CLASSC(haddr))
586 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 struct sockaddr_in sin_orig;
590 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591 struct in_device *in_dev;
592 struct in_ifaddr **ifap = NULL;
593 struct in_ifaddr *ifa = NULL;
594 struct net_device *dev;
597 int tryaddrmatch = 0;
600 * Fetch the caller's info block into kernel space
603 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
605 ifr.ifr_name[IFNAMSIZ - 1] = 0;
607 /* save original address for comparison */
608 memcpy(&sin_orig, sin, sizeof(*sin));
610 colon = strchr(ifr.ifr_name, ':');
615 dev_load(net, ifr.ifr_name);
619 case SIOCGIFADDR: /* Get interface address */
620 case SIOCGIFBRDADDR: /* Get the broadcast address */
621 case SIOCGIFDSTADDR: /* Get the destination address */
622 case SIOCGIFNETMASK: /* Get the netmask for the interface */
623 /* Note that these ioctls will not sleep,
624 so that we do not impose a lock.
625 One day we will be forced to put shlock here (I mean SMP)
627 tryaddrmatch = (sin_orig.sin_family == AF_INET);
628 memset(sin, 0, sizeof(*sin));
629 sin->sin_family = AF_INET;
634 if (!capable(CAP_NET_ADMIN))
637 case SIOCSIFADDR: /* Set interface address (and family) */
638 case SIOCSIFBRDADDR: /* Set the broadcast address */
639 case SIOCSIFDSTADDR: /* Set the destination address */
640 case SIOCSIFNETMASK: /* Set the netmask for the interface */
642 if (!capable(CAP_NET_ADMIN))
645 if (sin->sin_family != AF_INET)
656 if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
662 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
664 /* Matthias Andree */
665 /* compare label and address (4.4BSD style) */
666 /* note: we only do this for a limited set of ioctls
667 and only if the original address family was AF_INET.
668 This is checked above. */
669 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670 ifap = &ifa->ifa_next) {
671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 sin_orig.sin_addr.s_addr ==
678 /* we didn't get a match, maybe the application is
679 4.3BSD-style and passed in junk so we fall back to
680 comparing just the label */
682 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683 ifap = &ifa->ifa_next)
684 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
689 ret = -EADDRNOTAVAIL;
690 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
694 case SIOCGIFADDR: /* Get interface address */
695 sin->sin_addr.s_addr = ifa->ifa_local;
698 case SIOCGIFBRDADDR: /* Get the broadcast address */
699 sin->sin_addr.s_addr = ifa->ifa_broadcast;
702 case SIOCGIFDSTADDR: /* Get the destination address */
703 sin->sin_addr.s_addr = ifa->ifa_address;
706 case SIOCGIFNETMASK: /* Get the netmask for the interface */
707 sin->sin_addr.s_addr = ifa->ifa_mask;
712 ret = -EADDRNOTAVAIL;
716 if (!(ifr.ifr_flags & IFF_UP))
717 inet_del_ifa(in_dev, ifap, 1);
720 ret = dev_change_flags(dev, ifr.ifr_flags);
723 case SIOCSIFADDR: /* Set interface address (and family) */
725 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
730 if ((ifa = inet_alloc_ifa()) == NULL)
733 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
735 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
738 if (ifa->ifa_local == sin->sin_addr.s_addr)
740 inet_del_ifa(in_dev, ifap, 0);
741 ifa->ifa_broadcast = 0;
745 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
747 if (!(dev->flags & IFF_POINTOPOINT)) {
748 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750 if ((dev->flags & IFF_BROADCAST) &&
751 ifa->ifa_prefixlen < 31)
752 ifa->ifa_broadcast = ifa->ifa_address |
755 ifa->ifa_prefixlen = 32;
756 ifa->ifa_mask = inet_make_mask(32);
758 ret = inet_set_ifa(dev, ifa);
761 case SIOCSIFBRDADDR: /* Set the broadcast address */
763 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764 inet_del_ifa(in_dev, ifap, 0);
765 ifa->ifa_broadcast = sin->sin_addr.s_addr;
766 inet_insert_ifa(ifa);
770 case SIOCSIFDSTADDR: /* Set the destination address */
772 if (ifa->ifa_address == sin->sin_addr.s_addr)
775 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
778 inet_del_ifa(in_dev, ifap, 0);
779 ifa->ifa_address = sin->sin_addr.s_addr;
780 inet_insert_ifa(ifa);
783 case SIOCSIFNETMASK: /* Set the netmask for the interface */
786 * The mask we set must be legal.
789 if (bad_mask(sin->sin_addr.s_addr, 0))
792 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793 __be32 old_mask = ifa->ifa_mask;
794 inet_del_ifa(in_dev, ifap, 0);
795 ifa->ifa_mask = sin->sin_addr.s_addr;
796 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
798 /* See if current broadcast address matches
799 * with current netmask, then recalculate
800 * the broadcast address. Otherwise it's a
801 * funny address, so don't touch it since
802 * the user seems to know what (s)he's doing...
804 if ((dev->flags & IFF_BROADCAST) &&
805 (ifa->ifa_prefixlen < 31) &&
806 (ifa->ifa_broadcast ==
807 (ifa->ifa_local|~old_mask))) {
808 ifa->ifa_broadcast = (ifa->ifa_local |
809 ~sin->sin_addr.s_addr);
811 inet_insert_ifa(ifa);
821 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
825 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
827 struct in_device *in_dev = __in_dev_get_rtnl(dev);
828 struct in_ifaddr *ifa;
832 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
835 for (; ifa; ifa = ifa->ifa_next) {
840 if (len < (int) sizeof(ifr))
842 memset(&ifr, 0, sizeof(struct ifreq));
844 strcpy(ifr.ifr_name, ifa->ifa_label);
846 strcpy(ifr.ifr_name, dev->name);
848 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
852 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
856 buf += sizeof(struct ifreq);
857 len -= sizeof(struct ifreq);
858 done += sizeof(struct ifreq);
864 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
867 struct in_device *in_dev;
868 struct net *net = dev_net(dev);
871 in_dev = __in_dev_get_rcu(dev);
875 for_primary_ifa(in_dev) {
876 if (ifa->ifa_scope > scope)
878 if (!dst || inet_ifa_match(dst, ifa)) {
879 addr = ifa->ifa_local;
883 addr = ifa->ifa_local;
884 } endfor_ifa(in_dev);
891 /* Not loopback addresses on loopback should be preferred
892 in this case. It is importnat that lo is the first interface
895 read_lock(&dev_base_lock);
897 for_each_netdev(net, dev) {
898 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
901 for_primary_ifa(in_dev) {
902 if (ifa->ifa_scope != RT_SCOPE_LINK &&
903 ifa->ifa_scope <= scope) {
904 addr = ifa->ifa_local;
905 goto out_unlock_both;
907 } endfor_ifa(in_dev);
910 read_unlock(&dev_base_lock);
916 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
917 __be32 local, int scope)
924 (local == ifa->ifa_local || !local) &&
925 ifa->ifa_scope <= scope) {
926 addr = ifa->ifa_local;
931 same = (!local || inet_ifa_match(local, ifa)) &&
932 (!dst || inet_ifa_match(dst, ifa));
936 /* Is the selected addr into dst subnet? */
937 if (inet_ifa_match(addr, ifa))
939 /* No, then can we use new local src? */
940 if (ifa->ifa_scope <= scope) {
941 addr = ifa->ifa_local;
944 /* search for large dst subnet for addr */
948 } endfor_ifa(in_dev);
950 return same? addr : 0;
954 * Confirm that local IP address exists using wildcards:
955 * - in_dev: only on this interface, 0=any interface
956 * - dst: only in the same subnet as dst, 0=any dst
957 * - local: address, 0=autoselect the local address
958 * - scope: maximum allowed scope value for the local address
960 __be32 inet_confirm_addr(struct in_device *in_dev,
961 __be32 dst, __be32 local, int scope)
964 struct net_device *dev;
967 if (scope != RT_SCOPE_LINK)
968 return confirm_addr_indev(in_dev, dst, local, scope);
970 net = dev_net(in_dev->dev);
971 read_lock(&dev_base_lock);
973 for_each_netdev(net, dev) {
974 if ((in_dev = __in_dev_get_rcu(dev))) {
975 addr = confirm_addr_indev(in_dev, dst, local, scope);
981 read_unlock(&dev_base_lock);
990 int register_inetaddr_notifier(struct notifier_block *nb)
992 return blocking_notifier_chain_register(&inetaddr_chain, nb);
995 int unregister_inetaddr_notifier(struct notifier_block *nb)
997 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1000 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1001 * alias numbering and to create unique labels if possible.
1003 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1005 struct in_ifaddr *ifa;
1008 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1009 char old[IFNAMSIZ], *dot;
1011 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1012 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1015 dot = strchr(old, ':');
1017 sprintf(old, ":%d", named);
1020 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1021 strcat(ifa->ifa_label, dot);
1023 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1026 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1030 /* Called only under RTNL semaphore */
1032 static int inetdev_event(struct notifier_block *this, unsigned long event,
1035 struct net_device *dev = ptr;
1036 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1041 if (event == NETDEV_REGISTER) {
1042 in_dev = inetdev_init(dev);
1044 return notifier_from_errno(-ENOMEM);
1045 if (dev->flags & IFF_LOOPBACK) {
1046 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1047 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1054 case NETDEV_REGISTER:
1055 printk(KERN_DEBUG "inetdev_event: bug\n");
1061 if (dev->flags & IFF_LOOPBACK) {
1062 struct in_ifaddr *ifa;
1063 if ((ifa = inet_alloc_ifa()) != NULL) {
1065 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1066 ifa->ifa_prefixlen = 8;
1067 ifa->ifa_mask = inet_make_mask(8);
1068 in_dev_hold(in_dev);
1069 ifa->ifa_dev = in_dev;
1070 ifa->ifa_scope = RT_SCOPE_HOST;
1071 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1072 inet_insert_ifa(ifa);
1080 case NETDEV_CHANGEMTU:
1083 /* MTU falled under 68, disable IP */
1084 case NETDEV_UNREGISTER:
1085 inetdev_destroy(in_dev);
1087 case NETDEV_CHANGENAME:
1088 /* Do not notify about label change, this event is
1089 * not interesting to applications using netlink.
1091 inetdev_changename(dev, in_dev);
1093 devinet_sysctl_unregister(in_dev);
1094 devinet_sysctl_register(in_dev);
1101 static struct notifier_block ip_netdev_notifier = {
1102 .notifier_call =inetdev_event,
1105 static inline size_t inet_nlmsg_size(void)
1107 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1108 + nla_total_size(4) /* IFA_ADDRESS */
1109 + nla_total_size(4) /* IFA_LOCAL */
1110 + nla_total_size(4) /* IFA_BROADCAST */
1111 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1114 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1115 u32 pid, u32 seq, int event, unsigned int flags)
1117 struct ifaddrmsg *ifm;
1118 struct nlmsghdr *nlh;
1120 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1124 ifm = nlmsg_data(nlh);
1125 ifm->ifa_family = AF_INET;
1126 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1127 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1128 ifm->ifa_scope = ifa->ifa_scope;
1129 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1131 if (ifa->ifa_address)
1132 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1135 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1137 if (ifa->ifa_broadcast)
1138 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1140 if (ifa->ifa_label[0])
1141 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1143 return nlmsg_end(skb, nlh);
1146 nlmsg_cancel(skb, nlh);
1150 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1152 struct net *net = sock_net(skb->sk);
1154 struct net_device *dev;
1155 struct in_device *in_dev;
1156 struct in_ifaddr *ifa;
1157 int s_ip_idx, s_idx = cb->args[0];
1159 s_ip_idx = ip_idx = cb->args[1];
1161 for_each_netdev(net, dev) {
1166 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1169 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1170 ifa = ifa->ifa_next, ip_idx++) {
1171 if (ip_idx < s_ip_idx)
1173 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1175 RTM_NEWADDR, NLM_F_MULTI) <= 0)
1184 cb->args[1] = ip_idx;
1189 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1192 struct sk_buff *skb;
1193 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1197 net = dev_net(ifa->ifa_dev->dev);
1198 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1202 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1204 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1205 WARN_ON(err == -EMSGSIZE);
1209 err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1212 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1215 #ifdef CONFIG_SYSCTL
1217 static void devinet_copy_dflt_conf(struct net *net, int i)
1219 struct net_device *dev;
1221 read_lock(&dev_base_lock);
1222 for_each_netdev(net, dev) {
1223 struct in_device *in_dev;
1225 in_dev = __in_dev_get_rcu(dev);
1226 if (in_dev && !test_bit(i, in_dev->cnf.state))
1227 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1230 read_unlock(&dev_base_lock);
1233 static void inet_forward_change(struct net *net)
1235 struct net_device *dev;
1236 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1238 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1239 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1241 read_lock(&dev_base_lock);
1242 for_each_netdev(net, dev) {
1243 struct in_device *in_dev;
1245 in_dev = __in_dev_get_rcu(dev);
1247 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1250 read_unlock(&dev_base_lock);
1255 static int devinet_conf_proc(ctl_table *ctl, int write,
1256 struct file* filp, void __user *buffer,
1257 size_t *lenp, loff_t *ppos)
1259 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1262 struct ipv4_devconf *cnf = ctl->extra1;
1263 struct net *net = ctl->extra2;
1264 int i = (int *)ctl->data - cnf->data;
1266 set_bit(i, cnf->state);
1268 if (cnf == net->ipv4.devconf_dflt)
1269 devinet_copy_dflt_conf(net, i);
1275 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1276 void __user *oldval, size_t __user *oldlenp,
1277 void __user *newval, size_t newlen)
1279 struct ipv4_devconf *cnf;
1281 int *valp = table->data;
1285 if (!newval || !newlen)
1288 if (newlen != sizeof(int))
1291 if (get_user(new, (int __user *)newval))
1297 if (oldval && oldlenp) {
1300 if (get_user(len, oldlenp))
1304 if (len > table->maxlen)
1305 len = table->maxlen;
1306 if (copy_to_user(oldval, valp, len))
1308 if (put_user(len, oldlenp))
1315 cnf = table->extra1;
1316 net = table->extra2;
1317 i = (int *)table->data - cnf->data;
1319 set_bit(i, cnf->state);
1321 if (cnf == net->ipv4.devconf_dflt)
1322 devinet_copy_dflt_conf(net, i);
1327 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1328 struct file* filp, void __user *buffer,
1329 size_t *lenp, loff_t *ppos)
1331 int *valp = ctl->data;
1333 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1335 if (write && *valp != val) {
1336 struct net *net = ctl->extra2;
1338 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1339 inet_forward_change(net);
1340 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1347 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1348 struct file* filp, void __user *buffer,
1349 size_t *lenp, loff_t *ppos)
1351 int *valp = ctl->data;
1353 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1355 if (write && *valp != val)
1361 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1362 void __user *oldval, size_t __user *oldlenp,
1363 void __user *newval, size_t newlen)
1365 int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1375 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1377 .ctl_name = NET_IPV4_CONF_ ## attr, \
1379 .data = ipv4_devconf.data + \
1380 NET_IPV4_CONF_ ## attr - 1, \
1381 .maxlen = sizeof(int), \
1383 .proc_handler = proc, \
1384 .strategy = sysctl, \
1385 .extra1 = &ipv4_devconf, \
1388 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1389 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1390 devinet_conf_sysctl)
1392 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1393 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1394 devinet_conf_sysctl)
1396 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1397 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1399 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1400 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1401 ipv4_doint_and_flush_strategy)
1403 static struct devinet_sysctl_table {
1404 struct ctl_table_header *sysctl_header;
1405 struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1407 } devinet_sysctl = {
1409 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1410 devinet_sysctl_forward,
1411 devinet_conf_sysctl),
1412 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1414 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1415 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1416 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1417 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1418 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1419 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1420 "accept_source_route"),
1421 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1422 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1423 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1424 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1425 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1426 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1427 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1428 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1429 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1431 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1432 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1433 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1434 "force_igmp_version"),
1435 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1436 "promote_secondaries"),
1440 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1441 int ctl_name, struct ipv4_devconf *p)
1444 struct devinet_sysctl_table *t;
1446 #define DEVINET_CTL_PATH_DEV 3
1448 struct ctl_path devinet_ctl_path[] = {
1449 { .procname = "net", .ctl_name = CTL_NET, },
1450 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1451 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1452 { /* to be set */ },
1456 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1460 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1461 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1462 t->devinet_vars[i].extra1 = p;
1463 t->devinet_vars[i].extra2 = net;
1467 * Make a copy of dev_name, because '.procname' is regarded as const
1468 * by sysctl and we wouldn't want anyone to change it under our feet
1469 * (see SIOCSIFNAME).
1471 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1475 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1476 devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1478 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1480 if (!t->sysctl_header)
1494 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1496 struct devinet_sysctl_table *t = cnf->sysctl;
1502 unregister_sysctl_table(t->sysctl_header);
1507 static void devinet_sysctl_register(struct in_device *idev)
1509 neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1510 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1511 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1512 idev->dev->ifindex, &idev->cnf);
1515 static void devinet_sysctl_unregister(struct in_device *idev)
1517 __devinet_sysctl_unregister(&idev->cnf);
1518 neigh_sysctl_unregister(idev->arp_parms);
1521 static struct ctl_table ctl_forward_entry[] = {
1523 .ctl_name = NET_IPV4_FORWARD,
1524 .procname = "ip_forward",
1525 .data = &ipv4_devconf.data[
1526 NET_IPV4_CONF_FORWARDING - 1],
1527 .maxlen = sizeof(int),
1529 .proc_handler = devinet_sysctl_forward,
1530 .strategy = devinet_conf_sysctl,
1531 .extra1 = &ipv4_devconf,
1532 .extra2 = &init_net,
1537 static __net_initdata struct ctl_path net_ipv4_path[] = {
1538 { .procname = "net", .ctl_name = CTL_NET, },
1539 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1544 static __net_init int devinet_init_net(struct net *net)
1547 struct ipv4_devconf *all, *dflt;
1548 #ifdef CONFIG_SYSCTL
1549 struct ctl_table *tbl = ctl_forward_entry;
1550 struct ctl_table_header *forw_hdr;
1554 all = &ipv4_devconf;
1555 dflt = &ipv4_devconf_dflt;
1557 if (net != &init_net) {
1558 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1562 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1564 goto err_alloc_dflt;
1566 #ifdef CONFIG_SYSCTL
1567 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1571 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1572 tbl[0].extra1 = all;
1573 tbl[0].extra2 = net;
1577 #ifdef CONFIG_SYSCTL
1578 err = __devinet_sysctl_register(net, "all",
1579 NET_PROTO_CONF_ALL, all);
1583 err = __devinet_sysctl_register(net, "default",
1584 NET_PROTO_CONF_DEFAULT, dflt);
1589 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1590 if (forw_hdr == NULL)
1592 net->ipv4.forw_hdr = forw_hdr;
1595 net->ipv4.devconf_all = all;
1596 net->ipv4.devconf_dflt = dflt;
1599 #ifdef CONFIG_SYSCTL
1601 __devinet_sysctl_unregister(dflt);
1603 __devinet_sysctl_unregister(all);
1605 if (tbl != ctl_forward_entry)
1609 if (dflt != &ipv4_devconf_dflt)
1612 if (all != &ipv4_devconf)
1618 static __net_exit void devinet_exit_net(struct net *net)
1620 #ifdef CONFIG_SYSCTL
1621 struct ctl_table *tbl;
1623 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1624 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1625 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1626 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1629 kfree(net->ipv4.devconf_dflt);
1630 kfree(net->ipv4.devconf_all);
1633 static __net_initdata struct pernet_operations devinet_ops = {
1634 .init = devinet_init_net,
1635 .exit = devinet_exit_net,
1638 void __init devinet_init(void)
1640 register_pernet_subsys(&devinet_ops);
1642 register_gifconf(PF_INET, inet_gifconf);
1643 register_netdevice_notifier(&ip_netdev_notifier);
1645 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1646 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1647 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1650 EXPORT_SYMBOL(in_dev_finish_destroy);
1651 EXPORT_SYMBOL(inet_select_addr);
1652 EXPORT_SYMBOL(inetdev_by_index);
1653 EXPORT_SYMBOL(register_inetaddr_notifier);
1654 EXPORT_SYMBOL(unregister_inetaddr_notifier);