2 * NET3 IP device support routines.
4 * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Derived from the IP parts of dev.c 1.0.19
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Mark Evans, <evansmp@uhura.aston.ac.uk>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
21 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
23 * Cyrus Durgin: updated for kmod
24 * Matthias Andree: in devinet_ioctl, compare label and
25 * address (4.4BSD alias style support),
26 * fall back to comparing just the label
30 #include <linux/config.h>
32 #include <asm/uaccess.h>
33 #include <asm/system.h>
34 #include <linux/bitops.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/sched.h>
39 #include <linux/string.h>
41 #include <linux/socket.h>
42 #include <linux/sockios.h>
44 #include <linux/errno.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/rtnetlink.h>
52 #include <linux/init.h>
53 #include <linux/notifier.h>
54 #include <linux/inetdevice.h>
55 #include <linux/igmp.h>
57 #include <linux/sysctl.h>
59 #include <linux/kmod.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
65 struct ipv4_devconf ipv4_devconf = {
66 .accept_redirects = 1,
68 .secure_redirects = 1,
72 static struct ipv4_devconf ipv4_devconf_dflt = {
73 .accept_redirects = 1,
75 .secure_redirects = 1,
77 .accept_source_route = 1,
80 static void rtmsg_ifa(int event, struct in_ifaddr *);
82 static struct notifier_block *inetaddr_chain;
83 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
86 static void devinet_sysctl_register(struct in_device *in_dev,
87 struct ipv4_devconf *p);
88 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
91 /* Locks all the inet devices. */
93 static struct in_ifaddr *inet_alloc_ifa(void)
95 struct in_ifaddr *ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
98 memset(ifa, 0, sizeof(*ifa));
99 INIT_RCU_HEAD(&ifa->rcu_head);
105 static void inet_rcu_free_ifa(struct rcu_head *head)
107 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
109 in_dev_put(ifa->ifa_dev);
113 static inline void inet_free_ifa(struct in_ifaddr *ifa)
115 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
118 void in_dev_finish_destroy(struct in_device *idev)
120 struct net_device *dev = idev->dev;
122 BUG_TRAP(!idev->ifa_list);
123 BUG_TRAP(!idev->mc_list);
124 #ifdef NET_REFCNT_DEBUG
125 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
126 idev, dev ? dev->name : "NIL");
130 printk("Freeing alive in_device %p\n", idev);
136 struct in_device *inetdev_init(struct net_device *dev)
138 struct in_device *in_dev;
142 in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL);
145 memset(in_dev, 0, sizeof(*in_dev));
146 INIT_RCU_HEAD(&in_dev->rcu_head);
147 memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
148 in_dev->cnf.sysctl = NULL;
150 if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
152 /* Reference in_dev->dev */
155 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
156 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
159 /* Account for reference dev->ip_ptr */
161 rcu_assign_pointer(dev->ip_ptr, in_dev);
164 devinet_sysctl_register(in_dev, &in_dev->cnf);
166 ip_mc_init_dev(in_dev);
167 if (dev->flags & IFF_UP)
177 static void in_dev_rcu_put(struct rcu_head *head)
179 struct in_device *idev = container_of(head, struct in_device, rcu_head);
183 static void inetdev_destroy(struct in_device *in_dev)
185 struct in_ifaddr *ifa;
186 struct net_device *dev;
191 if (dev == &loopback_dev)
196 ip_mc_destroy_dev(in_dev);
198 while ((ifa = in_dev->ifa_list) != NULL) {
199 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
204 devinet_sysctl_unregister(&in_dev->cnf);
210 neigh_sysctl_unregister(in_dev->arp_parms);
212 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
215 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
218 int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
221 for_primary_ifa(in_dev) {
222 if (inet_ifa_match(a, ifa)) {
223 if (!b || inet_ifa_match(b, ifa)) {
228 } endfor_ifa(in_dev);
233 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
236 struct in_ifaddr *promote = NULL;
237 struct in_ifaddr *ifa1 = *ifap;
241 /* 1. Deleting primary ifaddr forces deletion all secondaries
242 * unless alias promotion is set
245 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
246 struct in_ifaddr *ifa;
247 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
249 while ((ifa = *ifap1) != NULL) {
250 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
251 ifa1->ifa_mask != ifa->ifa_mask ||
252 !inet_ifa_match(ifa1->ifa_address, ifa)) {
253 ifap1 = &ifa->ifa_next;
257 if (!IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
258 *ifap1 = ifa->ifa_next;
260 rtmsg_ifa(RTM_DELADDR, ifa);
261 notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa);
272 *ifap = ifa1->ifa_next;
274 /* 3. Announce address deletion */
276 /* Send message first, then call notifier.
277 At first sight, FIB update triggered by notifier
278 will refer to already deleted ifaddr, that could confuse
279 netlink listeners. It is not true: look, gated sees
280 that route deleted and if it still thinks that ifaddr
281 is valid, it will try to restore deleted routes... Grr.
282 So that, this order is correct.
284 rtmsg_ifa(RTM_DELADDR, ifa1);
285 notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
289 if (!in_dev->ifa_list)
290 inetdev_destroy(in_dev);
293 if (promote && IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
294 /* not sure if we should send a delete notify first? */
295 promote->ifa_flags &= ~IFA_F_SECONDARY;
296 rtmsg_ifa(RTM_NEWADDR, promote);
297 notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote);
301 static int inet_insert_ifa(struct in_ifaddr *ifa)
303 struct in_device *in_dev = ifa->ifa_dev;
304 struct in_ifaddr *ifa1, **ifap, **last_primary;
308 if (!ifa->ifa_local) {
313 ifa->ifa_flags &= ~IFA_F_SECONDARY;
314 last_primary = &in_dev->ifa_list;
316 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
317 ifap = &ifa1->ifa_next) {
318 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
319 ifa->ifa_scope <= ifa1->ifa_scope)
320 last_primary = &ifa1->ifa_next;
321 if (ifa1->ifa_mask == ifa->ifa_mask &&
322 inet_ifa_match(ifa1->ifa_address, ifa)) {
323 if (ifa1->ifa_local == ifa->ifa_local) {
327 if (ifa1->ifa_scope != ifa->ifa_scope) {
331 ifa->ifa_flags |= IFA_F_SECONDARY;
335 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
336 net_srandom(ifa->ifa_local);
340 ifa->ifa_next = *ifap;
343 /* Send message first, then call notifier.
344 Notifier will trigger FIB update, so that
345 listeners of netlink will know about new ifaddr */
346 rtmsg_ifa(RTM_NEWADDR, ifa);
347 notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
352 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
354 struct in_device *in_dev = __in_dev_get_rtnl(dev);
359 in_dev = inetdev_init(dev);
365 if (ifa->ifa_dev != in_dev) {
366 BUG_TRAP(!ifa->ifa_dev);
368 ifa->ifa_dev = in_dev;
370 if (LOOPBACK(ifa->ifa_local))
371 ifa->ifa_scope = RT_SCOPE_HOST;
372 return inet_insert_ifa(ifa);
375 struct in_device *inetdev_by_index(int ifindex)
377 struct net_device *dev;
378 struct in_device *in_dev = NULL;
379 read_lock(&dev_base_lock);
380 dev = __dev_get_by_index(ifindex);
382 in_dev = in_dev_get(dev);
383 read_unlock(&dev_base_lock);
387 /* Called only from RTNL semaphored context. No locks. */
389 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix,
394 for_primary_ifa(in_dev) {
395 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
397 } endfor_ifa(in_dev);
401 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
403 struct rtattr **rta = arg;
404 struct in_device *in_dev;
405 struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
406 struct in_ifaddr *ifa, **ifap;
410 if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL)
412 __in_dev_put(in_dev);
414 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
415 ifap = &ifa->ifa_next) {
416 if ((rta[IFA_LOCAL - 1] &&
417 memcmp(RTA_DATA(rta[IFA_LOCAL - 1]),
418 &ifa->ifa_local, 4)) ||
419 (rta[IFA_LABEL - 1] &&
420 rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) ||
421 (rta[IFA_ADDRESS - 1] &&
422 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
423 !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]),
426 inet_del_ifa(in_dev, ifap, 1);
430 return -EADDRNOTAVAIL;
433 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
435 struct rtattr **rta = arg;
436 struct net_device *dev;
437 struct in_device *in_dev;
438 struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
439 struct in_ifaddr *ifa;
444 if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1])
448 if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
452 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
453 in_dev = inetdev_init(dev);
458 if ((ifa = inet_alloc_ifa()) == NULL)
461 if (!rta[IFA_ADDRESS - 1])
462 rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1];
463 memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4);
464 memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4);
465 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
466 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
467 if (rta[IFA_BROADCAST - 1])
468 memcpy(&ifa->ifa_broadcast,
469 RTA_DATA(rta[IFA_BROADCAST - 1]), 4);
470 if (rta[IFA_ANYCAST - 1])
471 memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4);
472 ifa->ifa_flags = ifm->ifa_flags;
473 ifa->ifa_scope = ifm->ifa_scope;
475 ifa->ifa_dev = in_dev;
476 if (rta[IFA_LABEL - 1])
477 rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ);
479 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
481 rc = inet_insert_ifa(ifa);
487 * Determine a default network mask, based on the IP address.
490 static __inline__ int inet_abc_len(u32 addr)
492 int rc = -1; /* Something else, probably a multicast. */
501 else if (IN_CLASSB(addr))
503 else if (IN_CLASSC(addr))
511 int devinet_ioctl(unsigned int cmd, void __user *arg)
514 struct sockaddr_in sin_orig;
515 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
516 struct in_device *in_dev;
517 struct in_ifaddr **ifap = NULL;
518 struct in_ifaddr *ifa = NULL;
519 struct net_device *dev;
522 int tryaddrmatch = 0;
525 * Fetch the caller's info block into kernel space
528 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
530 ifr.ifr_name[IFNAMSIZ - 1] = 0;
532 /* save original address for comparison */
533 memcpy(&sin_orig, sin, sizeof(*sin));
535 colon = strchr(ifr.ifr_name, ':');
540 dev_load(ifr.ifr_name);
544 case SIOCGIFADDR: /* Get interface address */
545 case SIOCGIFBRDADDR: /* Get the broadcast address */
546 case SIOCGIFDSTADDR: /* Get the destination address */
547 case SIOCGIFNETMASK: /* Get the netmask for the interface */
548 /* Note that these ioctls will not sleep,
549 so that we do not impose a lock.
550 One day we will be forced to put shlock here (I mean SMP)
552 tryaddrmatch = (sin_orig.sin_family == AF_INET);
553 memset(sin, 0, sizeof(*sin));
554 sin->sin_family = AF_INET;
559 if (!capable(CAP_NET_ADMIN))
562 case SIOCSIFADDR: /* Set interface address (and family) */
563 case SIOCSIFBRDADDR: /* Set the broadcast address */
564 case SIOCSIFDSTADDR: /* Set the destination address */
565 case SIOCSIFNETMASK: /* Set the netmask for the interface */
567 if (!capable(CAP_NET_ADMIN))
570 if (sin->sin_family != AF_INET)
581 if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
587 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
589 /* Matthias Andree */
590 /* compare label and address (4.4BSD style) */
591 /* note: we only do this for a limited set of ioctls
592 and only if the original address family was AF_INET.
593 This is checked above. */
594 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
595 ifap = &ifa->ifa_next) {
596 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
597 sin_orig.sin_addr.s_addr ==
603 /* we didn't get a match, maybe the application is
604 4.3BSD-style and passed in junk so we fall back to
605 comparing just the label */
607 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
608 ifap = &ifa->ifa_next)
609 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
614 ret = -EADDRNOTAVAIL;
615 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
619 case SIOCGIFADDR: /* Get interface address */
620 sin->sin_addr.s_addr = ifa->ifa_local;
623 case SIOCGIFBRDADDR: /* Get the broadcast address */
624 sin->sin_addr.s_addr = ifa->ifa_broadcast;
627 case SIOCGIFDSTADDR: /* Get the destination address */
628 sin->sin_addr.s_addr = ifa->ifa_address;
631 case SIOCGIFNETMASK: /* Get the netmask for the interface */
632 sin->sin_addr.s_addr = ifa->ifa_mask;
637 ret = -EADDRNOTAVAIL;
641 if (!(ifr.ifr_flags & IFF_UP))
642 inet_del_ifa(in_dev, ifap, 1);
645 ret = dev_change_flags(dev, ifr.ifr_flags);
648 case SIOCSIFADDR: /* Set interface address (and family) */
650 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
655 if ((ifa = inet_alloc_ifa()) == NULL)
658 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
660 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
663 if (ifa->ifa_local == sin->sin_addr.s_addr)
665 inet_del_ifa(in_dev, ifap, 0);
666 ifa->ifa_broadcast = 0;
667 ifa->ifa_anycast = 0;
670 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
672 if (!(dev->flags & IFF_POINTOPOINT)) {
673 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
674 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
675 if ((dev->flags & IFF_BROADCAST) &&
676 ifa->ifa_prefixlen < 31)
677 ifa->ifa_broadcast = ifa->ifa_address |
680 ifa->ifa_prefixlen = 32;
681 ifa->ifa_mask = inet_make_mask(32);
683 ret = inet_set_ifa(dev, ifa);
686 case SIOCSIFBRDADDR: /* Set the broadcast address */
688 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
689 inet_del_ifa(in_dev, ifap, 0);
690 ifa->ifa_broadcast = sin->sin_addr.s_addr;
691 inet_insert_ifa(ifa);
695 case SIOCSIFDSTADDR: /* Set the destination address */
697 if (ifa->ifa_address == sin->sin_addr.s_addr)
700 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
703 inet_del_ifa(in_dev, ifap, 0);
704 ifa->ifa_address = sin->sin_addr.s_addr;
705 inet_insert_ifa(ifa);
708 case SIOCSIFNETMASK: /* Set the netmask for the interface */
711 * The mask we set must be legal.
714 if (bad_mask(sin->sin_addr.s_addr, 0))
717 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
718 u32 old_mask = ifa->ifa_mask;
719 inet_del_ifa(in_dev, ifap, 0);
720 ifa->ifa_mask = sin->sin_addr.s_addr;
721 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
723 /* See if current broadcast address matches
724 * with current netmask, then recalculate
725 * the broadcast address. Otherwise it's a
726 * funny address, so don't touch it since
727 * the user seems to know what (s)he's doing...
729 if ((dev->flags & IFF_BROADCAST) &&
730 (ifa->ifa_prefixlen < 31) &&
731 (ifa->ifa_broadcast ==
732 (ifa->ifa_local|~old_mask))) {
733 ifa->ifa_broadcast = (ifa->ifa_local |
734 ~sin->sin_addr.s_addr);
736 inet_insert_ifa(ifa);
746 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
750 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
752 struct in_device *in_dev = __in_dev_get_rtnl(dev);
753 struct in_ifaddr *ifa;
757 if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
760 for (; ifa; ifa = ifa->ifa_next) {
765 if (len < (int) sizeof(ifr))
767 memset(&ifr, 0, sizeof(struct ifreq));
769 strcpy(ifr.ifr_name, ifa->ifa_label);
771 strcpy(ifr.ifr_name, dev->name);
773 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
774 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
777 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
781 buf += sizeof(struct ifreq);
782 len -= sizeof(struct ifreq);
783 done += sizeof(struct ifreq);
789 u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
792 struct in_device *in_dev;
795 in_dev = __in_dev_get_rcu(dev);
799 for_primary_ifa(in_dev) {
800 if (ifa->ifa_scope > scope)
802 if (!dst || inet_ifa_match(dst, ifa)) {
803 addr = ifa->ifa_local;
807 addr = ifa->ifa_local;
808 } endfor_ifa(in_dev);
815 /* Not loopback addresses on loopback should be preferred
816 in this case. It is importnat that lo is the first interface
819 read_lock(&dev_base_lock);
821 for (dev = dev_base; dev; dev = dev->next) {
822 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
825 for_primary_ifa(in_dev) {
826 if (ifa->ifa_scope != RT_SCOPE_LINK &&
827 ifa->ifa_scope <= scope) {
828 addr = ifa->ifa_local;
829 goto out_unlock_both;
831 } endfor_ifa(in_dev);
834 read_unlock(&dev_base_lock);
840 static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst,
841 u32 local, int scope)
848 (local == ifa->ifa_local || !local) &&
849 ifa->ifa_scope <= scope) {
850 addr = ifa->ifa_local;
855 same = (!local || inet_ifa_match(local, ifa)) &&
856 (!dst || inet_ifa_match(dst, ifa));
860 /* Is the selected addr into dst subnet? */
861 if (inet_ifa_match(addr, ifa))
863 /* No, then can we use new local src? */
864 if (ifa->ifa_scope <= scope) {
865 addr = ifa->ifa_local;
868 /* search for large dst subnet for addr */
872 } endfor_ifa(in_dev);
874 return same? addr : 0;
878 * Confirm that local IP address exists using wildcards:
879 * - dev: only on this interface, 0=any interface
880 * - dst: only in the same subnet as dst, 0=any dst
881 * - local: address, 0=autoselect the local address
882 * - scope: maximum allowed scope value for the local address
884 u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope)
887 struct in_device *in_dev;
891 if ((in_dev = __in_dev_get_rcu(dev)))
892 addr = confirm_addr_indev(in_dev, dst, local, scope);
898 read_lock(&dev_base_lock);
900 for (dev = dev_base; dev; dev = dev->next) {
901 if ((in_dev = __in_dev_get_rcu(dev))) {
902 addr = confirm_addr_indev(in_dev, dst, local, scope);
908 read_unlock(&dev_base_lock);
917 int register_inetaddr_notifier(struct notifier_block *nb)
919 return notifier_chain_register(&inetaddr_chain, nb);
922 int unregister_inetaddr_notifier(struct notifier_block *nb)
924 return notifier_chain_unregister(&inetaddr_chain, nb);
927 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
928 * alias numbering and to create unique labels if possible.
930 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
932 struct in_ifaddr *ifa;
935 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
936 char old[IFNAMSIZ], *dot;
938 memcpy(old, ifa->ifa_label, IFNAMSIZ);
939 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
942 dot = strchr(ifa->ifa_label, ':');
944 sprintf(old, ":%d", named);
947 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
948 strcat(ifa->ifa_label, dot);
950 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
955 /* Called only under RTNL semaphore */
957 static int inetdev_event(struct notifier_block *this, unsigned long event,
960 struct net_device *dev = ptr;
961 struct in_device *in_dev = __in_dev_get_rtnl(dev);
966 if (event == NETDEV_REGISTER && dev == &loopback_dev) {
967 in_dev = inetdev_init(dev);
969 panic("devinet: Failed to create loopback\n");
970 in_dev->cnf.no_xfrm = 1;
971 in_dev->cnf.no_policy = 1;
977 case NETDEV_REGISTER:
978 printk(KERN_DEBUG "inetdev_event: bug\n");
984 if (dev == &loopback_dev) {
985 struct in_ifaddr *ifa;
986 if ((ifa = inet_alloc_ifa()) != NULL) {
988 ifa->ifa_address = htonl(INADDR_LOOPBACK);
989 ifa->ifa_prefixlen = 8;
990 ifa->ifa_mask = inet_make_mask(8);
992 ifa->ifa_dev = in_dev;
993 ifa->ifa_scope = RT_SCOPE_HOST;
994 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
995 inet_insert_ifa(ifa);
1003 case NETDEV_CHANGEMTU:
1006 /* MTU falled under 68, disable IP */
1007 case NETDEV_UNREGISTER:
1008 inetdev_destroy(in_dev);
1010 case NETDEV_CHANGENAME:
1011 /* Do not notify about label change, this event is
1012 * not interesting to applications using netlink.
1014 inetdev_changename(dev, in_dev);
1016 #ifdef CONFIG_SYSCTL
1017 devinet_sysctl_unregister(&in_dev->cnf);
1018 neigh_sysctl_unregister(in_dev->arp_parms);
1019 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1020 NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1021 devinet_sysctl_register(in_dev, &in_dev->cnf);
1029 static struct notifier_block ip_netdev_notifier = {
1030 .notifier_call =inetdev_event,
1033 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1034 u32 pid, u32 seq, int event, unsigned int flags)
1036 struct ifaddrmsg *ifm;
1037 struct nlmsghdr *nlh;
1038 unsigned char *b = skb->tail;
1040 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
1041 ifm = NLMSG_DATA(nlh);
1042 ifm->ifa_family = AF_INET;
1043 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1044 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1045 ifm->ifa_scope = ifa->ifa_scope;
1046 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1047 if (ifa->ifa_address)
1048 RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address);
1050 RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local);
1051 if (ifa->ifa_broadcast)
1052 RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast);
1053 if (ifa->ifa_anycast)
1054 RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast);
1055 if (ifa->ifa_label[0])
1056 RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label);
1057 nlh->nlmsg_len = skb->tail - b;
1062 skb_trim(skb, b - skb->data);
1066 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1069 struct net_device *dev;
1070 struct in_device *in_dev;
1071 struct in_ifaddr *ifa;
1072 int s_ip_idx, s_idx = cb->args[0];
1074 s_ip_idx = ip_idx = cb->args[1];
1075 read_lock(&dev_base_lock);
1076 for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1082 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1087 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1088 ifa = ifa->ifa_next, ip_idx++) {
1089 if (ip_idx < s_ip_idx)
1091 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1093 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1102 read_unlock(&dev_base_lock);
1104 cb->args[1] = ip_idx;
1109 static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
1111 int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128);
1112 struct sk_buff *skb = alloc_skb(size, GFP_KERNEL);
1115 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS);
1116 else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
1118 netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL);
1120 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL);
1124 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1125 [RTM_NEWADDR - RTM_BASE] = { .doit = inet_rtm_newaddr, },
1126 [RTM_DELADDR - RTM_BASE] = { .doit = inet_rtm_deladdr, },
1127 [RTM_GETADDR - RTM_BASE] = { .dumpit = inet_dump_ifaddr, },
1128 [RTM_NEWROUTE - RTM_BASE] = { .doit = inet_rtm_newroute, },
1129 [RTM_DELROUTE - RTM_BASE] = { .doit = inet_rtm_delroute, },
1130 [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute,
1131 .dumpit = inet_dump_fib, },
1132 #ifdef CONFIG_IP_MULTIPLE_TABLES
1133 [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, },
1134 [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, },
1135 [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, },
1139 #ifdef CONFIG_SYSCTL
1141 void inet_forward_change(void)
1143 struct net_device *dev;
1144 int on = ipv4_devconf.forwarding;
1146 ipv4_devconf.accept_redirects = !on;
1147 ipv4_devconf_dflt.forwarding = on;
1149 read_lock(&dev_base_lock);
1150 for (dev = dev_base; dev; dev = dev->next) {
1151 struct in_device *in_dev;
1153 in_dev = __in_dev_get_rcu(dev);
1155 in_dev->cnf.forwarding = on;
1158 read_unlock(&dev_base_lock);
1163 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1164 struct file* filp, void __user *buffer,
1165 size_t *lenp, loff_t *ppos)
1167 int *valp = ctl->data;
1169 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1171 if (write && *valp != val) {
1172 if (valp == &ipv4_devconf.forwarding)
1173 inet_forward_change();
1174 else if (valp != &ipv4_devconf_dflt.forwarding)
1181 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1182 struct file* filp, void __user *buffer,
1183 size_t *lenp, loff_t *ppos)
1185 int *valp = ctl->data;
1187 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1189 if (write && *valp != val)
1195 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1196 void __user *oldval, size_t __user *oldlenp,
1197 void __user *newval, size_t newlen,
1200 int *valp = table->data;
1203 if (!newval || !newlen)
1206 if (newlen != sizeof(int))
1209 if (get_user(new, (int __user *)newval))
1215 if (oldval && oldlenp) {
1218 if (get_user(len, oldlenp))
1222 if (len > table->maxlen)
1223 len = table->maxlen;
1224 if (copy_to_user(oldval, valp, len))
1226 if (put_user(len, oldlenp))
1237 static struct devinet_sysctl_table {
1238 struct ctl_table_header *sysctl_header;
1239 ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1240 ctl_table devinet_dev[2];
1241 ctl_table devinet_conf_dir[2];
1242 ctl_table devinet_proto_dir[2];
1243 ctl_table devinet_root_dir[2];
1244 } devinet_sysctl = {
1247 .ctl_name = NET_IPV4_CONF_FORWARDING,
1248 .procname = "forwarding",
1249 .data = &ipv4_devconf.forwarding,
1250 .maxlen = sizeof(int),
1252 .proc_handler = &devinet_sysctl_forward,
1255 .ctl_name = NET_IPV4_CONF_MC_FORWARDING,
1256 .procname = "mc_forwarding",
1257 .data = &ipv4_devconf.mc_forwarding,
1258 .maxlen = sizeof(int),
1260 .proc_handler = &proc_dointvec,
1263 .ctl_name = NET_IPV4_CONF_ACCEPT_REDIRECTS,
1264 .procname = "accept_redirects",
1265 .data = &ipv4_devconf.accept_redirects,
1266 .maxlen = sizeof(int),
1268 .proc_handler = &proc_dointvec,
1271 .ctl_name = NET_IPV4_CONF_SECURE_REDIRECTS,
1272 .procname = "secure_redirects",
1273 .data = &ipv4_devconf.secure_redirects,
1274 .maxlen = sizeof(int),
1276 .proc_handler = &proc_dointvec,
1279 .ctl_name = NET_IPV4_CONF_SHARED_MEDIA,
1280 .procname = "shared_media",
1281 .data = &ipv4_devconf.shared_media,
1282 .maxlen = sizeof(int),
1284 .proc_handler = &proc_dointvec,
1287 .ctl_name = NET_IPV4_CONF_RP_FILTER,
1288 .procname = "rp_filter",
1289 .data = &ipv4_devconf.rp_filter,
1290 .maxlen = sizeof(int),
1292 .proc_handler = &proc_dointvec,
1295 .ctl_name = NET_IPV4_CONF_SEND_REDIRECTS,
1296 .procname = "send_redirects",
1297 .data = &ipv4_devconf.send_redirects,
1298 .maxlen = sizeof(int),
1300 .proc_handler = &proc_dointvec,
1303 .ctl_name = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1304 .procname = "accept_source_route",
1305 .data = &ipv4_devconf.accept_source_route,
1306 .maxlen = sizeof(int),
1308 .proc_handler = &proc_dointvec,
1311 .ctl_name = NET_IPV4_CONF_PROXY_ARP,
1312 .procname = "proxy_arp",
1313 .data = &ipv4_devconf.proxy_arp,
1314 .maxlen = sizeof(int),
1316 .proc_handler = &proc_dointvec,
1319 .ctl_name = NET_IPV4_CONF_MEDIUM_ID,
1320 .procname = "medium_id",
1321 .data = &ipv4_devconf.medium_id,
1322 .maxlen = sizeof(int),
1324 .proc_handler = &proc_dointvec,
1327 .ctl_name = NET_IPV4_CONF_BOOTP_RELAY,
1328 .procname = "bootp_relay",
1329 .data = &ipv4_devconf.bootp_relay,
1330 .maxlen = sizeof(int),
1332 .proc_handler = &proc_dointvec,
1335 .ctl_name = NET_IPV4_CONF_LOG_MARTIANS,
1336 .procname = "log_martians",
1337 .data = &ipv4_devconf.log_martians,
1338 .maxlen = sizeof(int),
1340 .proc_handler = &proc_dointvec,
1343 .ctl_name = NET_IPV4_CONF_TAG,
1345 .data = &ipv4_devconf.tag,
1346 .maxlen = sizeof(int),
1348 .proc_handler = &proc_dointvec,
1351 .ctl_name = NET_IPV4_CONF_ARPFILTER,
1352 .procname = "arp_filter",
1353 .data = &ipv4_devconf.arp_filter,
1354 .maxlen = sizeof(int),
1356 .proc_handler = &proc_dointvec,
1359 .ctl_name = NET_IPV4_CONF_ARP_ANNOUNCE,
1360 .procname = "arp_announce",
1361 .data = &ipv4_devconf.arp_announce,
1362 .maxlen = sizeof(int),
1364 .proc_handler = &proc_dointvec,
1367 .ctl_name = NET_IPV4_CONF_ARP_IGNORE,
1368 .procname = "arp_ignore",
1369 .data = &ipv4_devconf.arp_ignore,
1370 .maxlen = sizeof(int),
1372 .proc_handler = &proc_dointvec,
1375 .ctl_name = NET_IPV4_CONF_NOXFRM,
1376 .procname = "disable_xfrm",
1377 .data = &ipv4_devconf.no_xfrm,
1378 .maxlen = sizeof(int),
1380 .proc_handler = &ipv4_doint_and_flush,
1381 .strategy = &ipv4_doint_and_flush_strategy,
1384 .ctl_name = NET_IPV4_CONF_NOPOLICY,
1385 .procname = "disable_policy",
1386 .data = &ipv4_devconf.no_policy,
1387 .maxlen = sizeof(int),
1389 .proc_handler = &ipv4_doint_and_flush,
1390 .strategy = &ipv4_doint_and_flush_strategy,
1393 .ctl_name = NET_IPV4_CONF_FORCE_IGMP_VERSION,
1394 .procname = "force_igmp_version",
1395 .data = &ipv4_devconf.force_igmp_version,
1396 .maxlen = sizeof(int),
1398 .proc_handler = &ipv4_doint_and_flush,
1399 .strategy = &ipv4_doint_and_flush_strategy,
1402 .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES,
1403 .procname = "promote_secondaries",
1404 .data = &ipv4_devconf.promote_secondaries,
1405 .maxlen = sizeof(int),
1407 .proc_handler = &ipv4_doint_and_flush,
1408 .strategy = &ipv4_doint_and_flush_strategy,
1413 .ctl_name = NET_PROTO_CONF_ALL,
1416 .child = devinet_sysctl.devinet_vars,
1419 .devinet_conf_dir = {
1421 .ctl_name = NET_IPV4_CONF,
1424 .child = devinet_sysctl.devinet_dev,
1427 .devinet_proto_dir = {
1429 .ctl_name = NET_IPV4,
1432 .child = devinet_sysctl.devinet_conf_dir,
1435 .devinet_root_dir = {
1437 .ctl_name = CTL_NET,
1440 .child = devinet_sysctl.devinet_proto_dir,
1445 static void devinet_sysctl_register(struct in_device *in_dev,
1446 struct ipv4_devconf *p)
1449 struct net_device *dev = in_dev ? in_dev->dev : NULL;
1450 struct devinet_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
1451 char *dev_name = NULL;
1455 memcpy(t, &devinet_sysctl, sizeof(*t));
1456 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1457 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1458 t->devinet_vars[i].de = NULL;
1462 dev_name = dev->name;
1463 t->devinet_dev[0].ctl_name = dev->ifindex;
1465 dev_name = "default";
1466 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1470 * Make a copy of dev_name, because '.procname' is regarded as const
1471 * by sysctl and we wouldn't want anyone to change it under our feet
1472 * (see SIOCSIFNAME).
1474 dev_name = kstrdup(dev_name, GFP_KERNEL);
1478 t->devinet_dev[0].procname = dev_name;
1479 t->devinet_dev[0].child = t->devinet_vars;
1480 t->devinet_dev[0].de = NULL;
1481 t->devinet_conf_dir[0].child = t->devinet_dev;
1482 t->devinet_conf_dir[0].de = NULL;
1483 t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1484 t->devinet_proto_dir[0].de = NULL;
1485 t->devinet_root_dir[0].child = t->devinet_proto_dir;
1486 t->devinet_root_dir[0].de = NULL;
1488 t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
1489 if (!t->sysctl_header)
1503 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1506 struct devinet_sysctl_table *t = p->sysctl;
1508 unregister_sysctl_table(t->sysctl_header);
1509 kfree(t->devinet_dev[0].procname);
1515 void __init devinet_init(void)
1517 register_gifconf(PF_INET, inet_gifconf);
1518 register_netdevice_notifier(&ip_netdev_notifier);
1519 rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1520 #ifdef CONFIG_SYSCTL
1521 devinet_sysctl.sysctl_header =
1522 register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
1523 devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1527 EXPORT_SYMBOL(devinet_ioctl);
1528 EXPORT_SYMBOL(in_dev_finish_destroy);
1529 EXPORT_SYMBOL(inet_select_addr);
1530 EXPORT_SYMBOL(inetdev_by_index);
1531 EXPORT_SYMBOL(register_inetaddr_notifier);
1532 EXPORT_SYMBOL(unregister_inetaddr_notifier);