[NET]: Make the device list and device lookups per namespace.
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65
66 struct ipv4_devconf ipv4_devconf = {
67         .data = {
68                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
69                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
71                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
72         },
73 };
74
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76         .data = {
77                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
78                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
80                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
81                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82         },
83 };
84
85 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
86
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88         [IFA_LOCAL]             = { .type = NLA_U32 },
89         [IFA_ADDRESS]           = { .type = NLA_U32 },
90         [IFA_BROADCAST]         = { .type = NLA_U32 },
91         [IFA_ANYCAST]           = { .type = NLA_U32 },
92         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99                          int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *in_dev,
102                                     struct ipv4_devconf *p);
103 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
104 #endif
105
106 /* Locks all the inet devices. */
107
108 static struct in_ifaddr *inet_alloc_ifa(void)
109 {
110         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
111
112         if (ifa) {
113                 INIT_RCU_HEAD(&ifa->rcu_head);
114         }
115
116         return ifa;
117 }
118
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122         if (ifa->ifa_dev)
123                 in_dev_put(ifa->ifa_dev);
124         kfree(ifa);
125 }
126
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134         struct net_device *dev = idev->dev;
135
136         BUG_TRAP(!idev->ifa_list);
137         BUG_TRAP(!idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140                idev, dev ? dev->name : "NIL");
141 #endif
142         dev_put(dev);
143         if (!idev->dead)
144                 printk("Freeing alive in_device %p\n", idev);
145         else {
146                 kfree(idev);
147         }
148 }
149
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152         struct in_device *in_dev;
153
154         ASSERT_RTNL();
155
156         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157         if (!in_dev)
158                 goto out;
159         INIT_RCU_HEAD(&in_dev->rcu_head);
160         memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
161         in_dev->cnf.sysctl = NULL;
162         in_dev->dev = dev;
163         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
164                 goto out_kfree;
165         /* Reference in_dev->dev */
166         dev_hold(dev);
167 #ifdef CONFIG_SYSCTL
168         neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
169                               NET_IPV4_NEIGH, "ipv4", NULL, NULL);
170 #endif
171
172         /* Account for reference dev->ip_ptr (below) */
173         in_dev_hold(in_dev);
174
175 #ifdef CONFIG_SYSCTL
176         devinet_sysctl_register(in_dev, &in_dev->cnf);
177 #endif
178         ip_mc_init_dev(in_dev);
179         if (dev->flags & IFF_UP)
180                 ip_mc_up(in_dev);
181
182         /* we can receive as soon as ip_ptr is set -- do this last */
183         rcu_assign_pointer(dev->ip_ptr, in_dev);
184 out:
185         return in_dev;
186 out_kfree:
187         kfree(in_dev);
188         in_dev = NULL;
189         goto out;
190 }
191
192 static void in_dev_rcu_put(struct rcu_head *head)
193 {
194         struct in_device *idev = container_of(head, struct in_device, rcu_head);
195         in_dev_put(idev);
196 }
197
198 static void inetdev_destroy(struct in_device *in_dev)
199 {
200         struct in_ifaddr *ifa;
201         struct net_device *dev;
202
203         ASSERT_RTNL();
204
205         dev = in_dev->dev;
206         if (dev == &loopback_dev)
207                 return;
208
209         in_dev->dead = 1;
210
211         ip_mc_destroy_dev(in_dev);
212
213         while ((ifa = in_dev->ifa_list) != NULL) {
214                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215                 inet_free_ifa(ifa);
216         }
217
218 #ifdef CONFIG_SYSCTL
219         devinet_sysctl_unregister(&in_dev->cnf);
220 #endif
221
222         dev->ip_ptr = NULL;
223
224 #ifdef CONFIG_SYSCTL
225         neigh_sysctl_unregister(in_dev->arp_parms);
226 #endif
227         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
228         arp_ifdown(dev);
229
230         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
231 }
232
233 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
234 {
235         rcu_read_lock();
236         for_primary_ifa(in_dev) {
237                 if (inet_ifa_match(a, ifa)) {
238                         if (!b || inet_ifa_match(b, ifa)) {
239                                 rcu_read_unlock();
240                                 return 1;
241                         }
242                 }
243         } endfor_ifa(in_dev);
244         rcu_read_unlock();
245         return 0;
246 }
247
248 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
249                          int destroy, struct nlmsghdr *nlh, u32 pid)
250 {
251         struct in_ifaddr *promote = NULL;
252         struct in_ifaddr *ifa, *ifa1 = *ifap;
253         struct in_ifaddr *last_prim = in_dev->ifa_list;
254         struct in_ifaddr *prev_prom = NULL;
255         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
256
257         ASSERT_RTNL();
258
259         /* 1. Deleting primary ifaddr forces deletion all secondaries
260          * unless alias promotion is set
261          **/
262
263         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
264                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
265
266                 while ((ifa = *ifap1) != NULL) {
267                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
268                             ifa1->ifa_scope <= ifa->ifa_scope)
269                                 last_prim = ifa;
270
271                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
272                             ifa1->ifa_mask != ifa->ifa_mask ||
273                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
274                                 ifap1 = &ifa->ifa_next;
275                                 prev_prom = ifa;
276                                 continue;
277                         }
278
279                         if (!do_promote) {
280                                 *ifap1 = ifa->ifa_next;
281
282                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
283                                 blocking_notifier_call_chain(&inetaddr_chain,
284                                                 NETDEV_DOWN, ifa);
285                                 inet_free_ifa(ifa);
286                         } else {
287                                 promote = ifa;
288                                 break;
289                         }
290                 }
291         }
292
293         /* 2. Unlink it */
294
295         *ifap = ifa1->ifa_next;
296
297         /* 3. Announce address deletion */
298
299         /* Send message first, then call notifier.
300            At first sight, FIB update triggered by notifier
301            will refer to already deleted ifaddr, that could confuse
302            netlink listeners. It is not true: look, gated sees
303            that route deleted and if it still thinks that ifaddr
304            is valid, it will try to restore deleted routes... Grr.
305            So that, this order is correct.
306          */
307         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
308         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
309
310         if (promote) {
311
312                 if (prev_prom) {
313                         prev_prom->ifa_next = promote->ifa_next;
314                         promote->ifa_next = last_prim->ifa_next;
315                         last_prim->ifa_next = promote;
316                 }
317
318                 promote->ifa_flags &= ~IFA_F_SECONDARY;
319                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
320                 blocking_notifier_call_chain(&inetaddr_chain,
321                                 NETDEV_UP, promote);
322                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
323                         if (ifa1->ifa_mask != ifa->ifa_mask ||
324                             !inet_ifa_match(ifa1->ifa_address, ifa))
325                                         continue;
326                         fib_add_ifaddr(ifa);
327                 }
328
329         }
330         if (destroy)
331                 inet_free_ifa(ifa1);
332 }
333
334 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
335                          int destroy)
336 {
337         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
338 }
339
340 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
341                              u32 pid)
342 {
343         struct in_device *in_dev = ifa->ifa_dev;
344         struct in_ifaddr *ifa1, **ifap, **last_primary;
345
346         ASSERT_RTNL();
347
348         if (!ifa->ifa_local) {
349                 inet_free_ifa(ifa);
350                 return 0;
351         }
352
353         ifa->ifa_flags &= ~IFA_F_SECONDARY;
354         last_primary = &in_dev->ifa_list;
355
356         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
357              ifap = &ifa1->ifa_next) {
358                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
359                     ifa->ifa_scope <= ifa1->ifa_scope)
360                         last_primary = &ifa1->ifa_next;
361                 if (ifa1->ifa_mask == ifa->ifa_mask &&
362                     inet_ifa_match(ifa1->ifa_address, ifa)) {
363                         if (ifa1->ifa_local == ifa->ifa_local) {
364                                 inet_free_ifa(ifa);
365                                 return -EEXIST;
366                         }
367                         if (ifa1->ifa_scope != ifa->ifa_scope) {
368                                 inet_free_ifa(ifa);
369                                 return -EINVAL;
370                         }
371                         ifa->ifa_flags |= IFA_F_SECONDARY;
372                 }
373         }
374
375         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
376                 net_srandom(ifa->ifa_local);
377                 ifap = last_primary;
378         }
379
380         ifa->ifa_next = *ifap;
381         *ifap = ifa;
382
383         /* Send message first, then call notifier.
384            Notifier will trigger FIB update, so that
385            listeners of netlink will know about new ifaddr */
386         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
387         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
388
389         return 0;
390 }
391
392 static int inet_insert_ifa(struct in_ifaddr *ifa)
393 {
394         return __inet_insert_ifa(ifa, NULL, 0);
395 }
396
397 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
398 {
399         struct in_device *in_dev = __in_dev_get_rtnl(dev);
400
401         ASSERT_RTNL();
402
403         if (!in_dev) {
404                 inet_free_ifa(ifa);
405                 return -ENOBUFS;
406         }
407         ipv4_devconf_setall(in_dev);
408         if (ifa->ifa_dev != in_dev) {
409                 BUG_TRAP(!ifa->ifa_dev);
410                 in_dev_hold(in_dev);
411                 ifa->ifa_dev = in_dev;
412         }
413         if (LOOPBACK(ifa->ifa_local))
414                 ifa->ifa_scope = RT_SCOPE_HOST;
415         return inet_insert_ifa(ifa);
416 }
417
418 struct in_device *inetdev_by_index(int ifindex)
419 {
420         struct net_device *dev;
421         struct in_device *in_dev = NULL;
422         read_lock(&dev_base_lock);
423         dev = __dev_get_by_index(&init_net, ifindex);
424         if (dev)
425                 in_dev = in_dev_get(dev);
426         read_unlock(&dev_base_lock);
427         return in_dev;
428 }
429
430 /* Called only from RTNL semaphored context. No locks. */
431
432 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
433                                     __be32 mask)
434 {
435         ASSERT_RTNL();
436
437         for_primary_ifa(in_dev) {
438                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
439                         return ifa;
440         } endfor_ifa(in_dev);
441         return NULL;
442 }
443
444 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
445 {
446         struct nlattr *tb[IFA_MAX+1];
447         struct in_device *in_dev;
448         struct ifaddrmsg *ifm;
449         struct in_ifaddr *ifa, **ifap;
450         int err = -EINVAL;
451
452         ASSERT_RTNL();
453
454         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
455         if (err < 0)
456                 goto errout;
457
458         ifm = nlmsg_data(nlh);
459         in_dev = inetdev_by_index(ifm->ifa_index);
460         if (in_dev == NULL) {
461                 err = -ENODEV;
462                 goto errout;
463         }
464
465         __in_dev_put(in_dev);
466
467         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
468              ifap = &ifa->ifa_next) {
469                 if (tb[IFA_LOCAL] &&
470                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
471                         continue;
472
473                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
474                         continue;
475
476                 if (tb[IFA_ADDRESS] &&
477                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
478                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
479                         continue;
480
481                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
482                 return 0;
483         }
484
485         err = -EADDRNOTAVAIL;
486 errout:
487         return err;
488 }
489
490 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
491 {
492         struct nlattr *tb[IFA_MAX+1];
493         struct in_ifaddr *ifa;
494         struct ifaddrmsg *ifm;
495         struct net_device *dev;
496         struct in_device *in_dev;
497         int err = -EINVAL;
498
499         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
500         if (err < 0)
501                 goto errout;
502
503         ifm = nlmsg_data(nlh);
504         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
505                 err = -EINVAL;
506                 goto errout;
507         }
508
509         dev = __dev_get_by_index(&init_net, ifm->ifa_index);
510         if (dev == NULL) {
511                 err = -ENODEV;
512                 goto errout;
513         }
514
515         in_dev = __in_dev_get_rtnl(dev);
516         if (in_dev == NULL) {
517                 err = -ENOBUFS;
518                 goto errout;
519         }
520
521         ipv4_devconf_setall(in_dev);
522
523         ifa = inet_alloc_ifa();
524         if (ifa == NULL) {
525                 /*
526                  * A potential indev allocation can be left alive, it stays
527                  * assigned to its device and is destroy with it.
528                  */
529                 err = -ENOBUFS;
530                 goto errout;
531         }
532
533         in_dev_hold(in_dev);
534
535         if (tb[IFA_ADDRESS] == NULL)
536                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
537
538         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
539         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
540         ifa->ifa_flags = ifm->ifa_flags;
541         ifa->ifa_scope = ifm->ifa_scope;
542         ifa->ifa_dev = in_dev;
543
544         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
545         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
546
547         if (tb[IFA_BROADCAST])
548                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
549
550         if (tb[IFA_ANYCAST])
551                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
552
553         if (tb[IFA_LABEL])
554                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
555         else
556                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
557
558         return ifa;
559
560 errout:
561         return ERR_PTR(err);
562 }
563
564 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
565 {
566         struct in_ifaddr *ifa;
567
568         ASSERT_RTNL();
569
570         ifa = rtm_to_ifaddr(nlh);
571         if (IS_ERR(ifa))
572                 return PTR_ERR(ifa);
573
574         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
575 }
576
577 /*
578  *      Determine a default network mask, based on the IP address.
579  */
580
581 static __inline__ int inet_abc_len(__be32 addr)
582 {
583         int rc = -1;    /* Something else, probably a multicast. */
584
585         if (ZERONET(addr))
586                 rc = 0;
587         else {
588                 __u32 haddr = ntohl(addr);
589
590                 if (IN_CLASSA(haddr))
591                         rc = 8;
592                 else if (IN_CLASSB(haddr))
593                         rc = 16;
594                 else if (IN_CLASSC(haddr))
595                         rc = 24;
596         }
597
598         return rc;
599 }
600
601
602 int devinet_ioctl(unsigned int cmd, void __user *arg)
603 {
604         struct ifreq ifr;
605         struct sockaddr_in sin_orig;
606         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
607         struct in_device *in_dev;
608         struct in_ifaddr **ifap = NULL;
609         struct in_ifaddr *ifa = NULL;
610         struct net_device *dev;
611         char *colon;
612         int ret = -EFAULT;
613         int tryaddrmatch = 0;
614
615         /*
616          *      Fetch the caller's info block into kernel space
617          */
618
619         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
620                 goto out;
621         ifr.ifr_name[IFNAMSIZ - 1] = 0;
622
623         /* save original address for comparison */
624         memcpy(&sin_orig, sin, sizeof(*sin));
625
626         colon = strchr(ifr.ifr_name, ':');
627         if (colon)
628                 *colon = 0;
629
630 #ifdef CONFIG_KMOD
631         dev_load(&init_net, ifr.ifr_name);
632 #endif
633
634         switch (cmd) {
635         case SIOCGIFADDR:       /* Get interface address */
636         case SIOCGIFBRDADDR:    /* Get the broadcast address */
637         case SIOCGIFDSTADDR:    /* Get the destination address */
638         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
639                 /* Note that these ioctls will not sleep,
640                    so that we do not impose a lock.
641                    One day we will be forced to put shlock here (I mean SMP)
642                  */
643                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
644                 memset(sin, 0, sizeof(*sin));
645                 sin->sin_family = AF_INET;
646                 break;
647
648         case SIOCSIFFLAGS:
649                 ret = -EACCES;
650                 if (!capable(CAP_NET_ADMIN))
651                         goto out;
652                 break;
653         case SIOCSIFADDR:       /* Set interface address (and family) */
654         case SIOCSIFBRDADDR:    /* Set the broadcast address */
655         case SIOCSIFDSTADDR:    /* Set the destination address */
656         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
657                 ret = -EACCES;
658                 if (!capable(CAP_NET_ADMIN))
659                         goto out;
660                 ret = -EINVAL;
661                 if (sin->sin_family != AF_INET)
662                         goto out;
663                 break;
664         default:
665                 ret = -EINVAL;
666                 goto out;
667         }
668
669         rtnl_lock();
670
671         ret = -ENODEV;
672         if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
673                 goto done;
674
675         if (colon)
676                 *colon = ':';
677
678         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
679                 if (tryaddrmatch) {
680                         /* Matthias Andree */
681                         /* compare label and address (4.4BSD style) */
682                         /* note: we only do this for a limited set of ioctls
683                            and only if the original address family was AF_INET.
684                            This is checked above. */
685                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
686                              ifap = &ifa->ifa_next) {
687                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
688                                     sin_orig.sin_addr.s_addr ==
689                                                         ifa->ifa_address) {
690                                         break; /* found */
691                                 }
692                         }
693                 }
694                 /* we didn't get a match, maybe the application is
695                    4.3BSD-style and passed in junk so we fall back to
696                    comparing just the label */
697                 if (!ifa) {
698                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
699                              ifap = &ifa->ifa_next)
700                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
701                                         break;
702                 }
703         }
704
705         ret = -EADDRNOTAVAIL;
706         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
707                 goto done;
708
709         switch (cmd) {
710         case SIOCGIFADDR:       /* Get interface address */
711                 sin->sin_addr.s_addr = ifa->ifa_local;
712                 goto rarok;
713
714         case SIOCGIFBRDADDR:    /* Get the broadcast address */
715                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
716                 goto rarok;
717
718         case SIOCGIFDSTADDR:    /* Get the destination address */
719                 sin->sin_addr.s_addr = ifa->ifa_address;
720                 goto rarok;
721
722         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
723                 sin->sin_addr.s_addr = ifa->ifa_mask;
724                 goto rarok;
725
726         case SIOCSIFFLAGS:
727                 if (colon) {
728                         ret = -EADDRNOTAVAIL;
729                         if (!ifa)
730                                 break;
731                         ret = 0;
732                         if (!(ifr.ifr_flags & IFF_UP))
733                                 inet_del_ifa(in_dev, ifap, 1);
734                         break;
735                 }
736                 ret = dev_change_flags(dev, ifr.ifr_flags);
737                 break;
738
739         case SIOCSIFADDR:       /* Set interface address (and family) */
740                 ret = -EINVAL;
741                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
742                         break;
743
744                 if (!ifa) {
745                         ret = -ENOBUFS;
746                         if ((ifa = inet_alloc_ifa()) == NULL)
747                                 break;
748                         if (colon)
749                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
750                         else
751                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
752                 } else {
753                         ret = 0;
754                         if (ifa->ifa_local == sin->sin_addr.s_addr)
755                                 break;
756                         inet_del_ifa(in_dev, ifap, 0);
757                         ifa->ifa_broadcast = 0;
758                         ifa->ifa_anycast = 0;
759                 }
760
761                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
762
763                 if (!(dev->flags & IFF_POINTOPOINT)) {
764                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
765                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
766                         if ((dev->flags & IFF_BROADCAST) &&
767                             ifa->ifa_prefixlen < 31)
768                                 ifa->ifa_broadcast = ifa->ifa_address |
769                                                      ~ifa->ifa_mask;
770                 } else {
771                         ifa->ifa_prefixlen = 32;
772                         ifa->ifa_mask = inet_make_mask(32);
773                 }
774                 ret = inet_set_ifa(dev, ifa);
775                 break;
776
777         case SIOCSIFBRDADDR:    /* Set the broadcast address */
778                 ret = 0;
779                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
780                         inet_del_ifa(in_dev, ifap, 0);
781                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
782                         inet_insert_ifa(ifa);
783                 }
784                 break;
785
786         case SIOCSIFDSTADDR:    /* Set the destination address */
787                 ret = 0;
788                 if (ifa->ifa_address == sin->sin_addr.s_addr)
789                         break;
790                 ret = -EINVAL;
791                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
792                         break;
793                 ret = 0;
794                 inet_del_ifa(in_dev, ifap, 0);
795                 ifa->ifa_address = sin->sin_addr.s_addr;
796                 inet_insert_ifa(ifa);
797                 break;
798
799         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
800
801                 /*
802                  *      The mask we set must be legal.
803                  */
804                 ret = -EINVAL;
805                 if (bad_mask(sin->sin_addr.s_addr, 0))
806                         break;
807                 ret = 0;
808                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
809                         __be32 old_mask = ifa->ifa_mask;
810                         inet_del_ifa(in_dev, ifap, 0);
811                         ifa->ifa_mask = sin->sin_addr.s_addr;
812                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
813
814                         /* See if current broadcast address matches
815                          * with current netmask, then recalculate
816                          * the broadcast address. Otherwise it's a
817                          * funny address, so don't touch it since
818                          * the user seems to know what (s)he's doing...
819                          */
820                         if ((dev->flags & IFF_BROADCAST) &&
821                             (ifa->ifa_prefixlen < 31) &&
822                             (ifa->ifa_broadcast ==
823                              (ifa->ifa_local|~old_mask))) {
824                                 ifa->ifa_broadcast = (ifa->ifa_local |
825                                                       ~sin->sin_addr.s_addr);
826                         }
827                         inet_insert_ifa(ifa);
828                 }
829                 break;
830         }
831 done:
832         rtnl_unlock();
833 out:
834         return ret;
835 rarok:
836         rtnl_unlock();
837         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
838         goto out;
839 }
840
841 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
842 {
843         struct in_device *in_dev = __in_dev_get_rtnl(dev);
844         struct in_ifaddr *ifa;
845         struct ifreq ifr;
846         int done = 0;
847
848         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
849                 goto out;
850
851         for (; ifa; ifa = ifa->ifa_next) {
852                 if (!buf) {
853                         done += sizeof(ifr);
854                         continue;
855                 }
856                 if (len < (int) sizeof(ifr))
857                         break;
858                 memset(&ifr, 0, sizeof(struct ifreq));
859                 if (ifa->ifa_label)
860                         strcpy(ifr.ifr_name, ifa->ifa_label);
861                 else
862                         strcpy(ifr.ifr_name, dev->name);
863
864                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
865                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
866                                                                 ifa->ifa_local;
867
868                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
869                         done = -EFAULT;
870                         break;
871                 }
872                 buf  += sizeof(struct ifreq);
873                 len  -= sizeof(struct ifreq);
874                 done += sizeof(struct ifreq);
875         }
876 out:
877         return done;
878 }
879
880 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
881 {
882         __be32 addr = 0;
883         struct in_device *in_dev;
884
885         rcu_read_lock();
886         in_dev = __in_dev_get_rcu(dev);
887         if (!in_dev)
888                 goto no_in_dev;
889
890         for_primary_ifa(in_dev) {
891                 if (ifa->ifa_scope > scope)
892                         continue;
893                 if (!dst || inet_ifa_match(dst, ifa)) {
894                         addr = ifa->ifa_local;
895                         break;
896                 }
897                 if (!addr)
898                         addr = ifa->ifa_local;
899         } endfor_ifa(in_dev);
900 no_in_dev:
901         rcu_read_unlock();
902
903         if (addr)
904                 goto out;
905
906         /* Not loopback addresses on loopback should be preferred
907            in this case. It is importnat that lo is the first interface
908            in dev_base list.
909          */
910         read_lock(&dev_base_lock);
911         rcu_read_lock();
912         for_each_netdev(&init_net, dev) {
913                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
914                         continue;
915
916                 for_primary_ifa(in_dev) {
917                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
918                             ifa->ifa_scope <= scope) {
919                                 addr = ifa->ifa_local;
920                                 goto out_unlock_both;
921                         }
922                 } endfor_ifa(in_dev);
923         }
924 out_unlock_both:
925         read_unlock(&dev_base_lock);
926         rcu_read_unlock();
927 out:
928         return addr;
929 }
930
931 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
932                               __be32 local, int scope)
933 {
934         int same = 0;
935         __be32 addr = 0;
936
937         for_ifa(in_dev) {
938                 if (!addr &&
939                     (local == ifa->ifa_local || !local) &&
940                     ifa->ifa_scope <= scope) {
941                         addr = ifa->ifa_local;
942                         if (same)
943                                 break;
944                 }
945                 if (!same) {
946                         same = (!local || inet_ifa_match(local, ifa)) &&
947                                 (!dst || inet_ifa_match(dst, ifa));
948                         if (same && addr) {
949                                 if (local || !dst)
950                                         break;
951                                 /* Is the selected addr into dst subnet? */
952                                 if (inet_ifa_match(addr, ifa))
953                                         break;
954                                 /* No, then can we use new local src? */
955                                 if (ifa->ifa_scope <= scope) {
956                                         addr = ifa->ifa_local;
957                                         break;
958                                 }
959                                 /* search for large dst subnet for addr */
960                                 same = 0;
961                         }
962                 }
963         } endfor_ifa(in_dev);
964
965         return same? addr : 0;
966 }
967
968 /*
969  * Confirm that local IP address exists using wildcards:
970  * - dev: only on this interface, 0=any interface
971  * - dst: only in the same subnet as dst, 0=any dst
972  * - local: address, 0=autoselect the local address
973  * - scope: maximum allowed scope value for the local address
974  */
975 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
976 {
977         __be32 addr = 0;
978         struct in_device *in_dev;
979
980         if (dev) {
981                 rcu_read_lock();
982                 if ((in_dev = __in_dev_get_rcu(dev)))
983                         addr = confirm_addr_indev(in_dev, dst, local, scope);
984                 rcu_read_unlock();
985
986                 return addr;
987         }
988
989         read_lock(&dev_base_lock);
990         rcu_read_lock();
991         for_each_netdev(&init_net, dev) {
992                 if ((in_dev = __in_dev_get_rcu(dev))) {
993                         addr = confirm_addr_indev(in_dev, dst, local, scope);
994                         if (addr)
995                                 break;
996                 }
997         }
998         rcu_read_unlock();
999         read_unlock(&dev_base_lock);
1000
1001         return addr;
1002 }
1003
1004 /*
1005  *      Device notifier
1006  */
1007
1008 int register_inetaddr_notifier(struct notifier_block *nb)
1009 {
1010         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1011 }
1012
1013 int unregister_inetaddr_notifier(struct notifier_block *nb)
1014 {
1015         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1016 }
1017
1018 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1019  * alias numbering and to create unique labels if possible.
1020 */
1021 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1022 {
1023         struct in_ifaddr *ifa;
1024         int named = 0;
1025
1026         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1027                 char old[IFNAMSIZ], *dot;
1028
1029                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1030                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1031                 if (named++ == 0)
1032                         continue;
1033                 dot = strchr(ifa->ifa_label, ':');
1034                 if (dot == NULL) {
1035                         sprintf(old, ":%d", named);
1036                         dot = old;
1037                 }
1038                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1039                         strcat(ifa->ifa_label, dot);
1040                 } else {
1041                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1042                 }
1043         }
1044 }
1045
1046 /* Called only under RTNL semaphore */
1047
1048 static int inetdev_event(struct notifier_block *this, unsigned long event,
1049                          void *ptr)
1050 {
1051         struct net_device *dev = ptr;
1052         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1053
1054         if (dev->nd_net != &init_net)
1055                 return NOTIFY_DONE;
1056
1057         ASSERT_RTNL();
1058
1059         if (!in_dev) {
1060                 if (event == NETDEV_REGISTER) {
1061                         in_dev = inetdev_init(dev);
1062                         if (!in_dev)
1063                                 return notifier_from_errno(-ENOMEM);
1064                         if (dev == &loopback_dev) {
1065                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1066                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1067                         }
1068                 }
1069                 goto out;
1070         }
1071
1072         switch (event) {
1073         case NETDEV_REGISTER:
1074                 printk(KERN_DEBUG "inetdev_event: bug\n");
1075                 dev->ip_ptr = NULL;
1076                 break;
1077         case NETDEV_UP:
1078                 if (dev->mtu < 68)
1079                         break;
1080                 if (dev == &loopback_dev) {
1081                         struct in_ifaddr *ifa;
1082                         if ((ifa = inet_alloc_ifa()) != NULL) {
1083                                 ifa->ifa_local =
1084                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1085                                 ifa->ifa_prefixlen = 8;
1086                                 ifa->ifa_mask = inet_make_mask(8);
1087                                 in_dev_hold(in_dev);
1088                                 ifa->ifa_dev = in_dev;
1089                                 ifa->ifa_scope = RT_SCOPE_HOST;
1090                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1091                                 inet_insert_ifa(ifa);
1092                         }
1093                 }
1094                 ip_mc_up(in_dev);
1095                 break;
1096         case NETDEV_DOWN:
1097                 ip_mc_down(in_dev);
1098                 break;
1099         case NETDEV_CHANGEMTU:
1100                 if (dev->mtu >= 68)
1101                         break;
1102                 /* MTU falled under 68, disable IP */
1103         case NETDEV_UNREGISTER:
1104                 inetdev_destroy(in_dev);
1105                 break;
1106         case NETDEV_CHANGENAME:
1107                 /* Do not notify about label change, this event is
1108                  * not interesting to applications using netlink.
1109                  */
1110                 inetdev_changename(dev, in_dev);
1111
1112 #ifdef CONFIG_SYSCTL
1113                 devinet_sysctl_unregister(&in_dev->cnf);
1114                 neigh_sysctl_unregister(in_dev->arp_parms);
1115                 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1116                                       NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1117                 devinet_sysctl_register(in_dev, &in_dev->cnf);
1118 #endif
1119                 break;
1120         }
1121 out:
1122         return NOTIFY_DONE;
1123 }
1124
1125 static struct notifier_block ip_netdev_notifier = {
1126         .notifier_call =inetdev_event,
1127 };
1128
1129 static inline size_t inet_nlmsg_size(void)
1130 {
1131         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1132                + nla_total_size(4) /* IFA_ADDRESS */
1133                + nla_total_size(4) /* IFA_LOCAL */
1134                + nla_total_size(4) /* IFA_BROADCAST */
1135                + nla_total_size(4) /* IFA_ANYCAST */
1136                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1137 }
1138
1139 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1140                             u32 pid, u32 seq, int event, unsigned int flags)
1141 {
1142         struct ifaddrmsg *ifm;
1143         struct nlmsghdr  *nlh;
1144
1145         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1146         if (nlh == NULL)
1147                 return -EMSGSIZE;
1148
1149         ifm = nlmsg_data(nlh);
1150         ifm->ifa_family = AF_INET;
1151         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1152         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1153         ifm->ifa_scope = ifa->ifa_scope;
1154         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1155
1156         if (ifa->ifa_address)
1157                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1158
1159         if (ifa->ifa_local)
1160                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1161
1162         if (ifa->ifa_broadcast)
1163                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1164
1165         if (ifa->ifa_anycast)
1166                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1167
1168         if (ifa->ifa_label[0])
1169                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1170
1171         return nlmsg_end(skb, nlh);
1172
1173 nla_put_failure:
1174         nlmsg_cancel(skb, nlh);
1175         return -EMSGSIZE;
1176 }
1177
1178 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1179 {
1180         int idx, ip_idx;
1181         struct net_device *dev;
1182         struct in_device *in_dev;
1183         struct in_ifaddr *ifa;
1184         int s_ip_idx, s_idx = cb->args[0];
1185
1186         s_ip_idx = ip_idx = cb->args[1];
1187         idx = 0;
1188         for_each_netdev(&init_net, dev) {
1189                 if (idx < s_idx)
1190                         goto cont;
1191                 if (idx > s_idx)
1192                         s_ip_idx = 0;
1193                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1194                         goto cont;
1195
1196                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1197                      ifa = ifa->ifa_next, ip_idx++) {
1198                         if (ip_idx < s_ip_idx)
1199                                 continue;
1200                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1201                                              cb->nlh->nlmsg_seq,
1202                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1203                                 goto done;
1204                 }
1205 cont:
1206                 idx++;
1207         }
1208
1209 done:
1210         cb->args[0] = idx;
1211         cb->args[1] = ip_idx;
1212
1213         return skb->len;
1214 }
1215
1216 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1217                       u32 pid)
1218 {
1219         struct sk_buff *skb;
1220         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1221         int err = -ENOBUFS;
1222
1223         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1224         if (skb == NULL)
1225                 goto errout;
1226
1227         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1228         if (err < 0) {
1229                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1230                 WARN_ON(err == -EMSGSIZE);
1231                 kfree_skb(skb);
1232                 goto errout;
1233         }
1234         err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1235 errout:
1236         if (err < 0)
1237                 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1238 }
1239
1240 #ifdef CONFIG_SYSCTL
1241
1242 static void devinet_copy_dflt_conf(int i)
1243 {
1244         struct net_device *dev;
1245
1246         read_lock(&dev_base_lock);
1247         for_each_netdev(&init_net, dev) {
1248                 struct in_device *in_dev;
1249                 rcu_read_lock();
1250                 in_dev = __in_dev_get_rcu(dev);
1251                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1252                         in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1253                 rcu_read_unlock();
1254         }
1255         read_unlock(&dev_base_lock);
1256 }
1257
1258 static int devinet_conf_proc(ctl_table *ctl, int write,
1259                              struct file* filp, void __user *buffer,
1260                              size_t *lenp, loff_t *ppos)
1261 {
1262         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1263
1264         if (write) {
1265                 struct ipv4_devconf *cnf = ctl->extra1;
1266                 int i = (int *)ctl->data - cnf->data;
1267
1268                 set_bit(i, cnf->state);
1269
1270                 if (cnf == &ipv4_devconf_dflt)
1271                         devinet_copy_dflt_conf(i);
1272         }
1273
1274         return ret;
1275 }
1276
1277 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1278                                void __user *oldval, size_t __user *oldlenp,
1279                                void __user *newval, size_t newlen)
1280 {
1281         struct ipv4_devconf *cnf;
1282         int *valp = table->data;
1283         int new;
1284         int i;
1285
1286         if (!newval || !newlen)
1287                 return 0;
1288
1289         if (newlen != sizeof(int))
1290                 return -EINVAL;
1291
1292         if (get_user(new, (int __user *)newval))
1293                 return -EFAULT;
1294
1295         if (new == *valp)
1296                 return 0;
1297
1298         if (oldval && oldlenp) {
1299                 size_t len;
1300
1301                 if (get_user(len, oldlenp))
1302                         return -EFAULT;
1303
1304                 if (len) {
1305                         if (len > table->maxlen)
1306                                 len = table->maxlen;
1307                         if (copy_to_user(oldval, valp, len))
1308                                 return -EFAULT;
1309                         if (put_user(len, oldlenp))
1310                                 return -EFAULT;
1311                 }
1312         }
1313
1314         *valp = new;
1315
1316         cnf = table->extra1;
1317         i = (int *)table->data - cnf->data;
1318
1319         set_bit(i, cnf->state);
1320
1321         if (cnf == &ipv4_devconf_dflt)
1322                 devinet_copy_dflt_conf(i);
1323
1324         return 1;
1325 }
1326
1327 void inet_forward_change(void)
1328 {
1329         struct net_device *dev;
1330         int on = IPV4_DEVCONF_ALL(FORWARDING);
1331
1332         IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1333         IPV4_DEVCONF_DFLT(FORWARDING) = on;
1334
1335         read_lock(&dev_base_lock);
1336         for_each_netdev(&init_net, dev) {
1337                 struct in_device *in_dev;
1338                 rcu_read_lock();
1339                 in_dev = __in_dev_get_rcu(dev);
1340                 if (in_dev)
1341                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1342                 rcu_read_unlock();
1343         }
1344         read_unlock(&dev_base_lock);
1345
1346         rt_cache_flush(0);
1347 }
1348
1349 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1350                                   struct file* filp, void __user *buffer,
1351                                   size_t *lenp, loff_t *ppos)
1352 {
1353         int *valp = ctl->data;
1354         int val = *valp;
1355         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1356
1357         if (write && *valp != val) {
1358                 if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1359                         inet_forward_change();
1360                 else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1361                         rt_cache_flush(0);
1362         }
1363
1364         return ret;
1365 }
1366
1367 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1368                          struct file* filp, void __user *buffer,
1369                          size_t *lenp, loff_t *ppos)
1370 {
1371         int *valp = ctl->data;
1372         int val = *valp;
1373         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1374
1375         if (write && *valp != val)
1376                 rt_cache_flush(0);
1377
1378         return ret;
1379 }
1380
1381 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1382                                   void __user *oldval, size_t __user *oldlenp,
1383                                   void __user *newval, size_t newlen)
1384 {
1385         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1386                                       newval, newlen);
1387
1388         if (ret == 1)
1389                 rt_cache_flush(0);
1390
1391         return ret;
1392 }
1393
1394
1395 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1396         { \
1397                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1398                 .procname       = name, \
1399                 .data           = ipv4_devconf.data + \
1400                                   NET_IPV4_CONF_ ## attr - 1, \
1401                 .maxlen         = sizeof(int), \
1402                 .mode           = mval, \
1403                 .proc_handler   = proc, \
1404                 .strategy       = sysctl, \
1405                 .extra1         = &ipv4_devconf, \
1406         }
1407
1408 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1409         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1410                              devinet_conf_sysctl)
1411
1412 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1413         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1414                              devinet_conf_sysctl)
1415
1416 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1417         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1418
1419 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1420         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1421                                      ipv4_doint_and_flush_strategy)
1422
1423 static struct devinet_sysctl_table {
1424         struct ctl_table_header *sysctl_header;
1425         ctl_table               devinet_vars[__NET_IPV4_CONF_MAX];
1426         ctl_table               devinet_dev[2];
1427         ctl_table               devinet_conf_dir[2];
1428         ctl_table               devinet_proto_dir[2];
1429         ctl_table               devinet_root_dir[2];
1430 } devinet_sysctl = {
1431         .devinet_vars = {
1432                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1433                                              devinet_sysctl_forward,
1434                                              devinet_conf_sysctl),
1435                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1436
1437                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1438                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1439                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1440                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1441                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1442                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1443                                         "accept_source_route"),
1444                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1445                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1446                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1447                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1448                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1449                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1450                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1451                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1452                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1453
1454                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1455                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1456                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1457                                               "force_igmp_version"),
1458                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1459                                               "promote_secondaries"),
1460         },
1461         .devinet_dev = {
1462                 {
1463                         .ctl_name       = NET_PROTO_CONF_ALL,
1464                         .procname       = "all",
1465                         .mode           = 0555,
1466                         .child          = devinet_sysctl.devinet_vars,
1467                 },
1468         },
1469         .devinet_conf_dir = {
1470                 {
1471                         .ctl_name       = NET_IPV4_CONF,
1472                         .procname       = "conf",
1473                         .mode           = 0555,
1474                         .child          = devinet_sysctl.devinet_dev,
1475                 },
1476         },
1477         .devinet_proto_dir = {
1478                 {
1479                         .ctl_name       = NET_IPV4,
1480                         .procname       = "ipv4",
1481                         .mode           = 0555,
1482                         .child          = devinet_sysctl.devinet_conf_dir,
1483                 },
1484         },
1485         .devinet_root_dir = {
1486                 {
1487                         .ctl_name       = CTL_NET,
1488                         .procname       = "net",
1489                         .mode           = 0555,
1490                         .child          = devinet_sysctl.devinet_proto_dir,
1491                 },
1492         },
1493 };
1494
1495 static void devinet_sysctl_register(struct in_device *in_dev,
1496                                     struct ipv4_devconf *p)
1497 {
1498         int i;
1499         struct net_device *dev = in_dev ? in_dev->dev : NULL;
1500         struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1501                                                  GFP_KERNEL);
1502         char *dev_name = NULL;
1503
1504         if (!t)
1505                 return;
1506         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1507                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1508                 t->devinet_vars[i].extra1 = p;
1509         }
1510
1511         if (dev) {
1512                 dev_name = dev->name;
1513                 t->devinet_dev[0].ctl_name = dev->ifindex;
1514         } else {
1515                 dev_name = "default";
1516                 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1517         }
1518
1519         /*
1520          * Make a copy of dev_name, because '.procname' is regarded as const
1521          * by sysctl and we wouldn't want anyone to change it under our feet
1522          * (see SIOCSIFNAME).
1523          */
1524         dev_name = kstrdup(dev_name, GFP_KERNEL);
1525         if (!dev_name)
1526             goto free;
1527
1528         t->devinet_dev[0].procname    = dev_name;
1529         t->devinet_dev[0].child       = t->devinet_vars;
1530         t->devinet_conf_dir[0].child  = t->devinet_dev;
1531         t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1532         t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1533
1534         t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
1535         if (!t->sysctl_header)
1536             goto free_procname;
1537
1538         p->sysctl = t;
1539         return;
1540
1541         /* error path */
1542  free_procname:
1543         kfree(dev_name);
1544  free:
1545         kfree(t);
1546         return;
1547 }
1548
1549 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1550 {
1551         if (p->sysctl) {
1552                 struct devinet_sysctl_table *t = p->sysctl;
1553                 p->sysctl = NULL;
1554                 unregister_sysctl_table(t->sysctl_header);
1555                 kfree(t->devinet_dev[0].procname);
1556                 kfree(t);
1557         }
1558 }
1559 #endif
1560
1561 void __init devinet_init(void)
1562 {
1563         register_gifconf(PF_INET, inet_gifconf);
1564         register_netdevice_notifier(&ip_netdev_notifier);
1565
1566         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1567         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1568         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1569 #ifdef CONFIG_SYSCTL
1570         devinet_sysctl.sysctl_header =
1571                 register_sysctl_table(devinet_sysctl.devinet_root_dir);
1572         devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1573 #endif
1574 }
1575
1576 EXPORT_SYMBOL(in_dev_finish_destroy);
1577 EXPORT_SYMBOL(inet_select_addr);
1578 EXPORT_SYMBOL(inetdev_by_index);
1579 EXPORT_SYMBOL(register_inetaddr_notifier);
1580 EXPORT_SYMBOL(unregister_inetaddr_notifier);