2 * NET3 IP device support routines.
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Derived from the IP parts of dev.c 1.0.19
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Mark Evans, <evansmp@uhura.aston.ac.uk>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
21 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
23 * Cyrus Durgin: updated for kmod
24 * Matthias Andree: in devinet_ioctl, compare label and
25 * address (4.4BSD alias style support),
26 * fall back to comparing just the label
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/rtnetlink.h>
52 #include <linux/init.h>
53 #include <linux/notifier.h>
54 #include <linux/inetdevice.h>
55 #include <linux/igmp.h>
57 #include <linux/sysctl.h>
59 #include <linux/kmod.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/netlink.h>
67 struct ipv4_devconf ipv4_devconf = {
68 .accept_redirects = 1,
70 .secure_redirects = 1,
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75 .accept_redirects = 1,
77 .secure_redirects = 1,
79 .accept_source_route = 1,
82 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
83 [IFA_LOCAL] = { .type = NLA_U32 },
84 [IFA_ADDRESS] = { .type = NLA_U32 },
85 [IFA_BROADCAST] = { .type = NLA_U32 },
86 [IFA_ANYCAST] = { .type = NLA_U32 },
87 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
90 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
92 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
93 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
96 static void devinet_sysctl_register(struct in_device *in_dev,
97 struct ipv4_devconf *p);
98 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
101 /* Locks all the inet devices. */
103 static struct in_ifaddr *inet_alloc_ifa(void)
105 struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
108 INIT_RCU_HEAD(&ifa->rcu_head);
114 static void inet_rcu_free_ifa(struct rcu_head *head)
116 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
118 in_dev_put(ifa->ifa_dev);
122 static inline void inet_free_ifa(struct in_ifaddr *ifa)
124 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
127 void in_dev_finish_destroy(struct in_device *idev)
129 struct net_device *dev = idev->dev;
131 BUG_TRAP(!idev->ifa_list);
132 BUG_TRAP(!idev->mc_list);
133 #ifdef NET_REFCNT_DEBUG
134 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
135 idev, dev ? dev->name : "NIL");
139 printk("Freeing alive in_device %p\n", idev);
145 struct in_device *inetdev_init(struct net_device *dev)
147 struct in_device *in_dev;
151 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
154 INIT_RCU_HEAD(&in_dev->rcu_head);
155 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
156 in_dev->cnf.sysctl = NULL;
158 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
160 /* Reference in_dev->dev */
163 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
164 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
167 /* Account for reference dev->ip_ptr (below) */
171 devinet_sysctl_register(in_dev, &in_dev->cnf);
173 ip_mc_init_dev(in_dev);
174 if (dev->flags & IFF_UP)
177 /* we can receive as soon as ip_ptr is set -- do this last */
178 rcu_assign_pointer(dev->ip_ptr, in_dev);
187 static void in_dev_rcu_put(struct rcu_head *head)
189 struct in_device *idev = container_of(head, struct in_device, rcu_head);
193 static void inetdev_destroy(struct in_device *in_dev)
195 struct in_ifaddr *ifa;
196 struct net_device *dev;
201 if (dev == &loopback_dev)
206 ip_mc_destroy_dev(in_dev);
208 while ((ifa = in_dev->ifa_list) != NULL) {
209 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214 devinet_sysctl_unregister(&in_dev->cnf);
220 neigh_sysctl_unregister(in_dev->arp_parms);
222 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
225 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
228 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
231 for_primary_ifa(in_dev) {
232 if (inet_ifa_match(a, ifa)) {
233 if (!b || inet_ifa_match(b, ifa)) {
238 } endfor_ifa(in_dev);
243 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
244 int destroy, struct nlmsghdr *nlh, u32 pid)
246 struct in_ifaddr *promote = NULL;
247 struct in_ifaddr *ifa, *ifa1 = *ifap;
248 struct in_ifaddr *last_prim = in_dev->ifa_list;
249 struct in_ifaddr *prev_prom = NULL;
250 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
254 /* 1. Deleting primary ifaddr forces deletion all secondaries
255 * unless alias promotion is set
258 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
259 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
261 while ((ifa = *ifap1) != NULL) {
262 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
263 ifa1->ifa_scope <= ifa->ifa_scope)
266 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
267 ifa1->ifa_mask != ifa->ifa_mask ||
268 !inet_ifa_match(ifa1->ifa_address, ifa)) {
269 ifap1 = &ifa->ifa_next;
275 *ifap1 = ifa->ifa_next;
277 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
278 blocking_notifier_call_chain(&inetaddr_chain,
290 *ifap = ifa1->ifa_next;
292 /* 3. Announce address deletion */
294 /* Send message first, then call notifier.
295 At first sight, FIB update triggered by notifier
296 will refer to already deleted ifaddr, that could confuse
297 netlink listeners. It is not true: look, gated sees
298 that route deleted and if it still thinks that ifaddr
299 is valid, it will try to restore deleted routes... Grr.
300 So that, this order is correct.
302 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
303 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
308 prev_prom->ifa_next = promote->ifa_next;
309 promote->ifa_next = last_prim->ifa_next;
310 last_prim->ifa_next = promote;
313 promote->ifa_flags &= ~IFA_F_SECONDARY;
314 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
315 blocking_notifier_call_chain(&inetaddr_chain,
317 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
318 if (ifa1->ifa_mask != ifa->ifa_mask ||
319 !inet_ifa_match(ifa1->ifa_address, ifa))
328 if (!in_dev->ifa_list)
329 inetdev_destroy(in_dev);
333 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
336 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
339 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
342 struct in_device *in_dev = ifa->ifa_dev;
343 struct in_ifaddr *ifa1, **ifap, **last_primary;
347 if (!ifa->ifa_local) {
352 ifa->ifa_flags &= ~IFA_F_SECONDARY;
353 last_primary = &in_dev->ifa_list;
355 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
356 ifap = &ifa1->ifa_next) {
357 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
358 ifa->ifa_scope <= ifa1->ifa_scope)
359 last_primary = &ifa1->ifa_next;
360 if (ifa1->ifa_mask == ifa->ifa_mask &&
361 inet_ifa_match(ifa1->ifa_address, ifa)) {
362 if (ifa1->ifa_local == ifa->ifa_local) {
366 if (ifa1->ifa_scope != ifa->ifa_scope) {
370 ifa->ifa_flags |= IFA_F_SECONDARY;
374 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
375 net_srandom(ifa->ifa_local);
379 ifa->ifa_next = *ifap;
382 /* Send message first, then call notifier.
383 Notifier will trigger FIB update, so that
384 listeners of netlink will know about new ifaddr */
385 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
386 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
391 static int inet_insert_ifa(struct in_ifaddr *ifa)
393 return __inet_insert_ifa(ifa, NULL, 0);
396 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
398 struct in_device *in_dev = __in_dev_get_rtnl(dev);
403 in_dev = inetdev_init(dev);
409 if (ifa->ifa_dev != in_dev) {
410 BUG_TRAP(!ifa->ifa_dev);
412 ifa->ifa_dev = in_dev;
414 if (LOOPBACK(ifa->ifa_local))
415 ifa->ifa_scope = RT_SCOPE_HOST;
416 return inet_insert_ifa(ifa);
419 struct in_device *inetdev_by_index(int ifindex)
421 struct net_device *dev;
422 struct in_device *in_dev = NULL;
423 read_lock(&dev_base_lock);
424 dev = __dev_get_by_index(ifindex);
426 in_dev = in_dev_get(dev);
427 read_unlock(&dev_base_lock);
431 /* Called only from RTNL semaphored context. No locks. */
433 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
438 for_primary_ifa(in_dev) {
439 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
441 } endfor_ifa(in_dev);
445 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
447 struct nlattr *tb[IFA_MAX+1];
448 struct in_device *in_dev;
449 struct ifaddrmsg *ifm;
450 struct in_ifaddr *ifa, **ifap;
455 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
459 ifm = nlmsg_data(nlh);
460 in_dev = inetdev_by_index(ifm->ifa_index);
461 if (in_dev == NULL) {
466 __in_dev_put(in_dev);
468 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
469 ifap = &ifa->ifa_next) {
471 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
474 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
477 if (tb[IFA_ADDRESS] &&
478 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
479 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
482 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
486 err = -EADDRNOTAVAIL;
491 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
493 struct nlattr *tb[IFA_MAX+1];
494 struct in_ifaddr *ifa;
495 struct ifaddrmsg *ifm;
496 struct net_device *dev;
497 struct in_device *in_dev;
500 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
504 ifm = nlmsg_data(nlh);
505 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
510 dev = __dev_get_by_index(ifm->ifa_index);
516 in_dev = __in_dev_get_rtnl(dev);
517 if (in_dev == NULL) {
518 in_dev = inetdev_init(dev);
519 if (in_dev == NULL) {
525 ifa = inet_alloc_ifa();
528 * A potential indev allocation can be left alive, it stays
529 * assigned to its device and is destroy with it.
537 if (tb[IFA_ADDRESS] == NULL)
538 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
540 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
541 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
542 ifa->ifa_flags = ifm->ifa_flags;
543 ifa->ifa_scope = ifm->ifa_scope;
544 ifa->ifa_dev = in_dev;
546 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
547 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
549 if (tb[IFA_BROADCAST])
550 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
553 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
556 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
558 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
566 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
568 struct in_ifaddr *ifa;
572 ifa = rtm_to_ifaddr(nlh);
576 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
580 * Determine a default network mask, based on the IP address.
583 static __inline__ int inet_abc_len(__be32 addr)
585 int rc = -1; /* Something else, probably a multicast. */
590 __u32 haddr = ntohl(addr);
592 if (IN_CLASSA(haddr))
594 else if (IN_CLASSB(haddr))
596 else if (IN_CLASSC(haddr))
604 int devinet_ioctl(unsigned int cmd, void __user *arg)
607 struct sockaddr_in sin_orig;
608 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
609 struct in_device *in_dev;
610 struct in_ifaddr **ifap = NULL;
611 struct in_ifaddr *ifa = NULL;
612 struct net_device *dev;
615 int tryaddrmatch = 0;
618 * Fetch the caller's info block into kernel space
621 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
623 ifr.ifr_name[IFNAMSIZ - 1] = 0;
625 /* save original address for comparison */
626 memcpy(&sin_orig, sin, sizeof(*sin));
628 colon = strchr(ifr.ifr_name, ':');
633 dev_load(ifr.ifr_name);
637 case SIOCGIFADDR: /* Get interface address */
638 case SIOCGIFBRDADDR: /* Get the broadcast address */
639 case SIOCGIFDSTADDR: /* Get the destination address */
640 case SIOCGIFNETMASK: /* Get the netmask for the interface */
641 /* Note that these ioctls will not sleep,
642 so that we do not impose a lock.
643 One day we will be forced to put shlock here (I mean SMP)
645 tryaddrmatch = (sin_orig.sin_family == AF_INET);
646 memset(sin, 0, sizeof(*sin));
647 sin->sin_family = AF_INET;
652 if (!capable(CAP_NET_ADMIN))
655 case SIOCSIFADDR: /* Set interface address (and family) */
656 case SIOCSIFBRDADDR: /* Set the broadcast address */
657 case SIOCSIFDSTADDR: /* Set the destination address */
658 case SIOCSIFNETMASK: /* Set the netmask for the interface */
660 if (!capable(CAP_NET_ADMIN))
663 if (sin->sin_family != AF_INET)
674 if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
680 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
682 /* Matthias Andree */
683 /* compare label and address (4.4BSD style) */
684 /* note: we only do this for a limited set of ioctls
685 and only if the original address family was AF_INET.
686 This is checked above. */
687 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
688 ifap = &ifa->ifa_next) {
689 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
690 sin_orig.sin_addr.s_addr ==
696 /* we didn't get a match, maybe the application is
697 4.3BSD-style and passed in junk so we fall back to
698 comparing just the label */
700 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
701 ifap = &ifa->ifa_next)
702 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
707 ret = -EADDRNOTAVAIL;
708 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
712 case SIOCGIFADDR: /* Get interface address */
713 sin->sin_addr.s_addr = ifa->ifa_local;
716 case SIOCGIFBRDADDR: /* Get the broadcast address */
717 sin->sin_addr.s_addr = ifa->ifa_broadcast;
720 case SIOCGIFDSTADDR: /* Get the destination address */
721 sin->sin_addr.s_addr = ifa->ifa_address;
724 case SIOCGIFNETMASK: /* Get the netmask for the interface */
725 sin->sin_addr.s_addr = ifa->ifa_mask;
730 ret = -EADDRNOTAVAIL;
734 if (!(ifr.ifr_flags & IFF_UP))
735 inet_del_ifa(in_dev, ifap, 1);
738 ret = dev_change_flags(dev, ifr.ifr_flags);
741 case SIOCSIFADDR: /* Set interface address (and family) */
743 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
748 if ((ifa = inet_alloc_ifa()) == NULL)
751 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
753 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
756 if (ifa->ifa_local == sin->sin_addr.s_addr)
758 inet_del_ifa(in_dev, ifap, 0);
759 ifa->ifa_broadcast = 0;
760 ifa->ifa_anycast = 0;
763 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
765 if (!(dev->flags & IFF_POINTOPOINT)) {
766 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
767 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
768 if ((dev->flags & IFF_BROADCAST) &&
769 ifa->ifa_prefixlen < 31)
770 ifa->ifa_broadcast = ifa->ifa_address |
773 ifa->ifa_prefixlen = 32;
774 ifa->ifa_mask = inet_make_mask(32);
776 ret = inet_set_ifa(dev, ifa);
779 case SIOCSIFBRDADDR: /* Set the broadcast address */
781 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
782 inet_del_ifa(in_dev, ifap, 0);
783 ifa->ifa_broadcast = sin->sin_addr.s_addr;
784 inet_insert_ifa(ifa);
788 case SIOCSIFDSTADDR: /* Set the destination address */
790 if (ifa->ifa_address == sin->sin_addr.s_addr)
793 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
796 inet_del_ifa(in_dev, ifap, 0);
797 ifa->ifa_address = sin->sin_addr.s_addr;
798 inet_insert_ifa(ifa);
801 case SIOCSIFNETMASK: /* Set the netmask for the interface */
804 * The mask we set must be legal.
807 if (bad_mask(sin->sin_addr.s_addr, 0))
810 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
811 __be32 old_mask = ifa->ifa_mask;
812 inet_del_ifa(in_dev, ifap, 0);
813 ifa->ifa_mask = sin->sin_addr.s_addr;
814 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
816 /* See if current broadcast address matches
817 * with current netmask, then recalculate
818 * the broadcast address. Otherwise it's a
819 * funny address, so don't touch it since
820 * the user seems to know what (s)he's doing...
822 if ((dev->flags & IFF_BROADCAST) &&
823 (ifa->ifa_prefixlen < 31) &&
824 (ifa->ifa_broadcast ==
825 (ifa->ifa_local|~old_mask))) {
826 ifa->ifa_broadcast = (ifa->ifa_local |
827 ~sin->sin_addr.s_addr);
829 inet_insert_ifa(ifa);
839 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
843 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
845 struct in_device *in_dev = __in_dev_get_rtnl(dev);
846 struct in_ifaddr *ifa;
850 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
853 for (; ifa; ifa = ifa->ifa_next) {
858 if (len < (int) sizeof(ifr))
860 memset(&ifr, 0, sizeof(struct ifreq));
862 strcpy(ifr.ifr_name, ifa->ifa_label);
864 strcpy(ifr.ifr_name, dev->name);
866 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
867 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
870 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
874 buf += sizeof(struct ifreq);
875 len -= sizeof(struct ifreq);
876 done += sizeof(struct ifreq);
882 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
885 struct in_device *in_dev;
888 in_dev = __in_dev_get_rcu(dev);
892 for_primary_ifa(in_dev) {
893 if (ifa->ifa_scope > scope)
895 if (!dst || inet_ifa_match(dst, ifa)) {
896 addr = ifa->ifa_local;
900 addr = ifa->ifa_local;
901 } endfor_ifa(in_dev);
908 /* Not loopback addresses on loopback should be preferred
909 in this case. It is importnat that lo is the first interface
912 read_lock(&dev_base_lock);
914 for (dev = dev_base; dev; dev = dev->next) {
915 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
918 for_primary_ifa(in_dev) {
919 if (ifa->ifa_scope != RT_SCOPE_LINK &&
920 ifa->ifa_scope <= scope) {
921 addr = ifa->ifa_local;
922 goto out_unlock_both;
924 } endfor_ifa(in_dev);
927 read_unlock(&dev_base_lock);
933 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
934 __be32 local, int scope)
941 (local == ifa->ifa_local || !local) &&
942 ifa->ifa_scope <= scope) {
943 addr = ifa->ifa_local;
948 same = (!local || inet_ifa_match(local, ifa)) &&
949 (!dst || inet_ifa_match(dst, ifa));
953 /* Is the selected addr into dst subnet? */
954 if (inet_ifa_match(addr, ifa))
956 /* No, then can we use new local src? */
957 if (ifa->ifa_scope <= scope) {
958 addr = ifa->ifa_local;
961 /* search for large dst subnet for addr */
965 } endfor_ifa(in_dev);
967 return same? addr : 0;
971 * Confirm that local IP address exists using wildcards:
972 * - dev: only on this interface, 0=any interface
973 * - dst: only in the same subnet as dst, 0=any dst
974 * - local: address, 0=autoselect the local address
975 * - scope: maximum allowed scope value for the local address
977 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
980 struct in_device *in_dev;
984 if ((in_dev = __in_dev_get_rcu(dev)))
985 addr = confirm_addr_indev(in_dev, dst, local, scope);
991 read_lock(&dev_base_lock);
993 for (dev = dev_base; dev; dev = dev->next) {
994 if ((in_dev = __in_dev_get_rcu(dev))) {
995 addr = confirm_addr_indev(in_dev, dst, local, scope);
1001 read_unlock(&dev_base_lock);
1010 int register_inetaddr_notifier(struct notifier_block *nb)
1012 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1015 int unregister_inetaddr_notifier(struct notifier_block *nb)
1017 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1020 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1021 * alias numbering and to create unique labels if possible.
1023 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1025 struct in_ifaddr *ifa;
1028 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1029 char old[IFNAMSIZ], *dot;
1031 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1032 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1035 dot = strchr(ifa->ifa_label, ':');
1037 sprintf(old, ":%d", named);
1040 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1041 strcat(ifa->ifa_label, dot);
1043 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1048 /* Called only under RTNL semaphore */
1050 static int inetdev_event(struct notifier_block *this, unsigned long event,
1053 struct net_device *dev = ptr;
1054 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1059 if (event == NETDEV_REGISTER) {
1060 in_dev = inetdev_init(dev);
1062 panic("devinet: Failed to create loopback\n");
1063 if (dev == &loopback_dev) {
1064 in_dev->cnf.no_xfrm = 1;
1065 in_dev->cnf.no_policy = 1;
1072 case NETDEV_REGISTER:
1073 printk(KERN_DEBUG "inetdev_event: bug\n");
1079 if (dev == &loopback_dev) {
1080 struct in_ifaddr *ifa;
1081 if ((ifa = inet_alloc_ifa()) != NULL) {
1083 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1084 ifa->ifa_prefixlen = 8;
1085 ifa->ifa_mask = inet_make_mask(8);
1086 in_dev_hold(in_dev);
1087 ifa->ifa_dev = in_dev;
1088 ifa->ifa_scope = RT_SCOPE_HOST;
1089 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1090 inet_insert_ifa(ifa);
1098 case NETDEV_CHANGEMTU:
1101 /* MTU falled under 68, disable IP */
1102 case NETDEV_UNREGISTER:
1103 inetdev_destroy(in_dev);
1105 case NETDEV_CHANGENAME:
1106 /* Do not notify about label change, this event is
1107 * not interesting to applications using netlink.
1109 inetdev_changename(dev, in_dev);
1111 #ifdef CONFIG_SYSCTL
1112 devinet_sysctl_unregister(&in_dev->cnf);
1113 neigh_sysctl_unregister(in_dev->arp_parms);
1114 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1115 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1116 devinet_sysctl_register(in_dev, &in_dev->cnf);
1124 static struct notifier_block ip_netdev_notifier = {
1125 .notifier_call =inetdev_event,
1128 static inline size_t inet_nlmsg_size(void)
1130 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1131 + nla_total_size(4) /* IFA_ADDRESS */
1132 + nla_total_size(4) /* IFA_LOCAL */
1133 + nla_total_size(4) /* IFA_BROADCAST */
1134 + nla_total_size(4) /* IFA_ANYCAST */
1135 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1138 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1139 u32 pid, u32 seq, int event, unsigned int flags)
1141 struct ifaddrmsg *ifm;
1142 struct nlmsghdr *nlh;
1144 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1148 ifm = nlmsg_data(nlh);
1149 ifm->ifa_family = AF_INET;
1150 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1151 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1152 ifm->ifa_scope = ifa->ifa_scope;
1153 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1155 if (ifa->ifa_address)
1156 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1159 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1161 if (ifa->ifa_broadcast)
1162 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1164 if (ifa->ifa_anycast)
1165 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1167 if (ifa->ifa_label[0])
1168 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1170 return nlmsg_end(skb, nlh);
1173 nlmsg_cancel(skb, nlh);
1177 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1180 struct net_device *dev;
1181 struct in_device *in_dev;
1182 struct in_ifaddr *ifa;
1183 int s_ip_idx, s_idx = cb->args[0];
1185 s_ip_idx = ip_idx = cb->args[1];
1186 read_lock(&dev_base_lock);
1187 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1193 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1198 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1199 ifa = ifa->ifa_next, ip_idx++) {
1200 if (ip_idx < s_ip_idx)
1202 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1204 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1213 read_unlock(&dev_base_lock);
1215 cb->args[1] = ip_idx;
1220 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1223 struct sk_buff *skb;
1224 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1227 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1231 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1233 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1234 WARN_ON(err == -EMSGSIZE);
1238 err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1241 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1244 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1245 [RTM_NEWADDR - RTM_BASE] = { .doit = inet_rtm_newaddr, },
1246 [RTM_DELADDR - RTM_BASE] = { .doit = inet_rtm_deladdr, },
1247 [RTM_GETADDR - RTM_BASE] = { .dumpit = inet_dump_ifaddr, },
1248 [RTM_NEWROUTE - RTM_BASE] = { .doit = inet_rtm_newroute, },
1249 [RTM_DELROUTE - RTM_BASE] = { .doit = inet_rtm_delroute, },
1250 [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
1251 .dumpit = inet_dump_fib, },
1252 #ifdef CONFIG_IP_MULTIPLE_TABLES
1253 [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, },
1257 #ifdef CONFIG_SYSCTL
1259 void inet_forward_change(void)
1261 struct net_device *dev;
1262 int on = ipv4_devconf.forwarding;
1264 ipv4_devconf.accept_redirects = !on;
1265 ipv4_devconf_dflt.forwarding = on;
1267 read_lock(&dev_base_lock);
1268 for (dev = dev_base; dev; dev = dev->next) {
1269 struct in_device *in_dev;
1271 in_dev = __in_dev_get_rcu(dev);
1273 in_dev->cnf.forwarding = on;
1276 read_unlock(&dev_base_lock);
1281 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1282 struct file* filp, void __user *buffer,
1283 size_t *lenp, loff_t *ppos)
1285 int *valp = ctl->data;
1287 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1289 if (write && *valp != val) {
1290 if (valp == &ipv4_devconf.forwarding)
1291 inet_forward_change();
1292 else if (valp != &ipv4_devconf_dflt.forwarding)
1299 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1300 struct file* filp, void __user *buffer,
1301 size_t *lenp, loff_t *ppos)
1303 int *valp = ctl->data;
1305 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1307 if (write && *valp != val)
1313 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1314 void __user *oldval, size_t __user *oldlenp,
1315 void __user *newval, size_t newlen)
1317 int *valp = table->data;
1320 if (!newval || !newlen)
1323 if (newlen != sizeof(int))
1326 if (get_user(new, (int __user *)newval))
1332 if (oldval && oldlenp) {
1335 if (get_user(len, oldlenp))
1339 if (len > table->maxlen)
1340 len = table->maxlen;
1341 if (copy_to_user(oldval, valp, len))
1343 if (put_user(len, oldlenp))
1354 static struct devinet_sysctl_table {
1355 struct ctl_table_header *sysctl_header;
1356 ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1357 ctl_table devinet_dev[2];
1358 ctl_table devinet_conf_dir[2];
1359 ctl_table devinet_proto_dir[2];
1360 ctl_table devinet_root_dir[2];
1361 } devinet_sysctl = {
1364 .ctl_name = NET_IPV4_CONF_FORWARDING,
1365 .procname = "forwarding",
1366 .data = &ipv4_devconf.forwarding,
1367 .maxlen = sizeof(int),
1369 .proc_handler = &devinet_sysctl_forward,
1372 .ctl_name = NET_IPV4_CONF_MC_FORWARDING,
1373 .procname = "mc_forwarding",
1374 .data = &ipv4_devconf.mc_forwarding,
1375 .maxlen = sizeof(int),
1377 .proc_handler = &proc_dointvec,
1380 .ctl_name = NET_IPV4_CONF_ACCEPT_REDIRECTS,
1381 .procname = "accept_redirects",
1382 .data = &ipv4_devconf.accept_redirects,
1383 .maxlen = sizeof(int),
1385 .proc_handler = &proc_dointvec,
1388 .ctl_name = NET_IPV4_CONF_SECURE_REDIRECTS,
1389 .procname = "secure_redirects",
1390 .data = &ipv4_devconf.secure_redirects,
1391 .maxlen = sizeof(int),
1393 .proc_handler = &proc_dointvec,
1396 .ctl_name = NET_IPV4_CONF_SHARED_MEDIA,
1397 .procname = "shared_media",
1398 .data = &ipv4_devconf.shared_media,
1399 .maxlen = sizeof(int),
1401 .proc_handler = &proc_dointvec,
1404 .ctl_name = NET_IPV4_CONF_RP_FILTER,
1405 .procname = "rp_filter",
1406 .data = &ipv4_devconf.rp_filter,
1407 .maxlen = sizeof(int),
1409 .proc_handler = &proc_dointvec,
1412 .ctl_name = NET_IPV4_CONF_SEND_REDIRECTS,
1413 .procname = "send_redirects",
1414 .data = &ipv4_devconf.send_redirects,
1415 .maxlen = sizeof(int),
1417 .proc_handler = &proc_dointvec,
1420 .ctl_name = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1421 .procname = "accept_source_route",
1422 .data = &ipv4_devconf.accept_source_route,
1423 .maxlen = sizeof(int),
1425 .proc_handler = &proc_dointvec,
1428 .ctl_name = NET_IPV4_CONF_PROXY_ARP,
1429 .procname = "proxy_arp",
1430 .data = &ipv4_devconf.proxy_arp,
1431 .maxlen = sizeof(int),
1433 .proc_handler = &proc_dointvec,
1436 .ctl_name = NET_IPV4_CONF_MEDIUM_ID,
1437 .procname = "medium_id",
1438 .data = &ipv4_devconf.medium_id,
1439 .maxlen = sizeof(int),
1441 .proc_handler = &proc_dointvec,
1444 .ctl_name = NET_IPV4_CONF_BOOTP_RELAY,
1445 .procname = "bootp_relay",
1446 .data = &ipv4_devconf.bootp_relay,
1447 .maxlen = sizeof(int),
1449 .proc_handler = &proc_dointvec,
1452 .ctl_name = NET_IPV4_CONF_LOG_MARTIANS,
1453 .procname = "log_martians",
1454 .data = &ipv4_devconf.log_martians,
1455 .maxlen = sizeof(int),
1457 .proc_handler = &proc_dointvec,
1460 .ctl_name = NET_IPV4_CONF_TAG,
1462 .data = &ipv4_devconf.tag,
1463 .maxlen = sizeof(int),
1465 .proc_handler = &proc_dointvec,
1468 .ctl_name = NET_IPV4_CONF_ARPFILTER,
1469 .procname = "arp_filter",
1470 .data = &ipv4_devconf.arp_filter,
1471 .maxlen = sizeof(int),
1473 .proc_handler = &proc_dointvec,
1476 .ctl_name = NET_IPV4_CONF_ARP_ANNOUNCE,
1477 .procname = "arp_announce",
1478 .data = &ipv4_devconf.arp_announce,
1479 .maxlen = sizeof(int),
1481 .proc_handler = &proc_dointvec,
1484 .ctl_name = NET_IPV4_CONF_ARP_IGNORE,
1485 .procname = "arp_ignore",
1486 .data = &ipv4_devconf.arp_ignore,
1487 .maxlen = sizeof(int),
1489 .proc_handler = &proc_dointvec,
1492 .ctl_name = NET_IPV4_CONF_ARP_ACCEPT,
1493 .procname = "arp_accept",
1494 .data = &ipv4_devconf.arp_accept,
1495 .maxlen = sizeof(int),
1497 .proc_handler = &proc_dointvec,
1500 .ctl_name = NET_IPV4_CONF_NOXFRM,
1501 .procname = "disable_xfrm",
1502 .data = &ipv4_devconf.no_xfrm,
1503 .maxlen = sizeof(int),
1505 .proc_handler = &ipv4_doint_and_flush,
1506 .strategy = &ipv4_doint_and_flush_strategy,
1509 .ctl_name = NET_IPV4_CONF_NOPOLICY,
1510 .procname = "disable_policy",
1511 .data = &ipv4_devconf.no_policy,
1512 .maxlen = sizeof(int),
1514 .proc_handler = &ipv4_doint_and_flush,
1515 .strategy = &ipv4_doint_and_flush_strategy,
1518 .ctl_name = NET_IPV4_CONF_FORCE_IGMP_VERSION,
1519 .procname = "force_igmp_version",
1520 .data = &ipv4_devconf.force_igmp_version,
1521 .maxlen = sizeof(int),
1523 .proc_handler = &ipv4_doint_and_flush,
1524 .strategy = &ipv4_doint_and_flush_strategy,
1527 .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES,
1528 .procname = "promote_secondaries",
1529 .data = &ipv4_devconf.promote_secondaries,
1530 .maxlen = sizeof(int),
1532 .proc_handler = &ipv4_doint_and_flush,
1533 .strategy = &ipv4_doint_and_flush_strategy,
1538 .ctl_name = NET_PROTO_CONF_ALL,
1541 .child = devinet_sysctl.devinet_vars,
1544 .devinet_conf_dir = {
1546 .ctl_name = NET_IPV4_CONF,
1549 .child = devinet_sysctl.devinet_dev,
1552 .devinet_proto_dir = {
1554 .ctl_name = NET_IPV4,
1557 .child = devinet_sysctl.devinet_conf_dir,
1560 .devinet_root_dir = {
1562 .ctl_name = CTL_NET,
1565 .child = devinet_sysctl.devinet_proto_dir,
1570 static void devinet_sysctl_register(struct in_device *in_dev,
1571 struct ipv4_devconf *p)
1574 struct net_device *dev = in_dev ? in_dev->dev : NULL;
1575 struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1577 char *dev_name = NULL;
1581 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1582 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1586 dev_name = dev->name;
1587 t->devinet_dev[0].ctl_name = dev->ifindex;
1589 dev_name = "default";
1590 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1594 * Make a copy of dev_name, because '.procname' is regarded as const
1595 * by sysctl and we wouldn't want anyone to change it under our feet
1596 * (see SIOCSIFNAME).
1598 dev_name = kstrdup(dev_name, GFP_KERNEL);
1602 t->devinet_dev[0].procname = dev_name;
1603 t->devinet_dev[0].child = t->devinet_vars;
1604 t->devinet_conf_dir[0].child = t->devinet_dev;
1605 t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1606 t->devinet_root_dir[0].child = t->devinet_proto_dir;
1608 t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
1609 if (!t->sysctl_header)
1623 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1626 struct devinet_sysctl_table *t = p->sysctl;
1628 unregister_sysctl_table(t->sysctl_header);
1629 kfree(t->devinet_dev[0].procname);
1635 void __init devinet_init(void)
1637 register_gifconf(PF_INET, inet_gifconf);
1638 register_netdevice_notifier(&ip_netdev_notifier);
1639 rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1640 #ifdef CONFIG_SYSCTL
1641 devinet_sysctl.sysctl_header =
1642 register_sysctl_table(devinet_sysctl.devinet_root_dir);
1643 devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1647 EXPORT_SYMBOL(in_dev_finish_destroy);
1648 EXPORT_SYMBOL(inet_select_addr);
1649 EXPORT_SYMBOL(inetdev_by_index);
1650 EXPORT_SYMBOL(register_inetaddr_notifier);
1651 EXPORT_SYMBOL(unregister_inetaddr_notifier);