net: remove CVS keywords
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64
65 static struct ipv4_devconf ipv4_devconf = {
66         .data = {
67                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71         },
72 };
73
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75         .data = {
76                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81         },
82 };
83
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88         [IFA_LOCAL]             = { .type = NLA_U32 },
89         [IFA_ADDRESS]           = { .type = NLA_U32 },
90         [IFA_BROADCAST]         = { .type = NLA_U32 },
91         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98                          int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110
111 /* Locks all the inet devices. */
112
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
116
117         if (ifa) {
118                 INIT_RCU_HEAD(&ifa->rcu_head);
119         }
120
121         return ifa;
122 }
123
124 static void inet_rcu_free_ifa(struct rcu_head *head)
125 {
126         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
127         if (ifa->ifa_dev)
128                 in_dev_put(ifa->ifa_dev);
129         kfree(ifa);
130 }
131
132 static inline void inet_free_ifa(struct in_ifaddr *ifa)
133 {
134         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
135 }
136
137 void in_dev_finish_destroy(struct in_device *idev)
138 {
139         struct net_device *dev = idev->dev;
140
141         BUG_TRAP(!idev->ifa_list);
142         BUG_TRAP(!idev->mc_list);
143 #ifdef NET_REFCNT_DEBUG
144         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
145                idev, dev ? dev->name : "NIL");
146 #endif
147         dev_put(dev);
148         if (!idev->dead)
149                 printk("Freeing alive in_device %p\n", idev);
150         else {
151                 kfree(idev);
152         }
153 }
154
155 static struct in_device *inetdev_init(struct net_device *dev)
156 {
157         struct in_device *in_dev;
158
159         ASSERT_RTNL();
160
161         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
162         if (!in_dev)
163                 goto out;
164         INIT_RCU_HEAD(&in_dev->rcu_head);
165         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
166                         sizeof(in_dev->cnf));
167         in_dev->cnf.sysctl = NULL;
168         in_dev->dev = dev;
169         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
170                 goto out_kfree;
171         /* Reference in_dev->dev */
172         dev_hold(dev);
173         /* Account for reference dev->ip_ptr (below) */
174         in_dev_hold(in_dev);
175
176         devinet_sysctl_register(in_dev);
177         ip_mc_init_dev(in_dev);
178         if (dev->flags & IFF_UP)
179                 ip_mc_up(in_dev);
180
181         /* we can receive as soon as ip_ptr is set -- do this last */
182         rcu_assign_pointer(dev->ip_ptr, in_dev);
183 out:
184         return in_dev;
185 out_kfree:
186         kfree(in_dev);
187         in_dev = NULL;
188         goto out;
189 }
190
191 static void in_dev_rcu_put(struct rcu_head *head)
192 {
193         struct in_device *idev = container_of(head, struct in_device, rcu_head);
194         in_dev_put(idev);
195 }
196
197 static void inetdev_destroy(struct in_device *in_dev)
198 {
199         struct in_ifaddr *ifa;
200         struct net_device *dev;
201
202         ASSERT_RTNL();
203
204         dev = in_dev->dev;
205
206         in_dev->dead = 1;
207
208         ip_mc_destroy_dev(in_dev);
209
210         while ((ifa = in_dev->ifa_list) != NULL) {
211                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
212                 inet_free_ifa(ifa);
213         }
214
215         dev->ip_ptr = NULL;
216
217         devinet_sysctl_unregister(in_dev);
218         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
219         arp_ifdown(dev);
220
221         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
222 }
223
224 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
225 {
226         rcu_read_lock();
227         for_primary_ifa(in_dev) {
228                 if (inet_ifa_match(a, ifa)) {
229                         if (!b || inet_ifa_match(b, ifa)) {
230                                 rcu_read_unlock();
231                                 return 1;
232                         }
233                 }
234         } endfor_ifa(in_dev);
235         rcu_read_unlock();
236         return 0;
237 }
238
239 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
240                          int destroy, struct nlmsghdr *nlh, u32 pid)
241 {
242         struct in_ifaddr *promote = NULL;
243         struct in_ifaddr *ifa, *ifa1 = *ifap;
244         struct in_ifaddr *last_prim = in_dev->ifa_list;
245         struct in_ifaddr *prev_prom = NULL;
246         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
247
248         ASSERT_RTNL();
249
250         /* 1. Deleting primary ifaddr forces deletion all secondaries
251          * unless alias promotion is set
252          **/
253
254         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
255                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
256
257                 while ((ifa = *ifap1) != NULL) {
258                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
259                             ifa1->ifa_scope <= ifa->ifa_scope)
260                                 last_prim = ifa;
261
262                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
263                             ifa1->ifa_mask != ifa->ifa_mask ||
264                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
265                                 ifap1 = &ifa->ifa_next;
266                                 prev_prom = ifa;
267                                 continue;
268                         }
269
270                         if (!do_promote) {
271                                 *ifap1 = ifa->ifa_next;
272
273                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
274                                 blocking_notifier_call_chain(&inetaddr_chain,
275                                                 NETDEV_DOWN, ifa);
276                                 inet_free_ifa(ifa);
277                         } else {
278                                 promote = ifa;
279                                 break;
280                         }
281                 }
282         }
283
284         /* 2. Unlink it */
285
286         *ifap = ifa1->ifa_next;
287
288         /* 3. Announce address deletion */
289
290         /* Send message first, then call notifier.
291            At first sight, FIB update triggered by notifier
292            will refer to already deleted ifaddr, that could confuse
293            netlink listeners. It is not true: look, gated sees
294            that route deleted and if it still thinks that ifaddr
295            is valid, it will try to restore deleted routes... Grr.
296            So that, this order is correct.
297          */
298         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
299         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
300
301         if (promote) {
302
303                 if (prev_prom) {
304                         prev_prom->ifa_next = promote->ifa_next;
305                         promote->ifa_next = last_prim->ifa_next;
306                         last_prim->ifa_next = promote;
307                 }
308
309                 promote->ifa_flags &= ~IFA_F_SECONDARY;
310                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
311                 blocking_notifier_call_chain(&inetaddr_chain,
312                                 NETDEV_UP, promote);
313                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
314                         if (ifa1->ifa_mask != ifa->ifa_mask ||
315                             !inet_ifa_match(ifa1->ifa_address, ifa))
316                                         continue;
317                         fib_add_ifaddr(ifa);
318                 }
319
320         }
321         if (destroy)
322                 inet_free_ifa(ifa1);
323 }
324
325 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
326                          int destroy)
327 {
328         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
329 }
330
331 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
332                              u32 pid)
333 {
334         struct in_device *in_dev = ifa->ifa_dev;
335         struct in_ifaddr *ifa1, **ifap, **last_primary;
336
337         ASSERT_RTNL();
338
339         if (!ifa->ifa_local) {
340                 inet_free_ifa(ifa);
341                 return 0;
342         }
343
344         ifa->ifa_flags &= ~IFA_F_SECONDARY;
345         last_primary = &in_dev->ifa_list;
346
347         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
348              ifap = &ifa1->ifa_next) {
349                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
350                     ifa->ifa_scope <= ifa1->ifa_scope)
351                         last_primary = &ifa1->ifa_next;
352                 if (ifa1->ifa_mask == ifa->ifa_mask &&
353                     inet_ifa_match(ifa1->ifa_address, ifa)) {
354                         if (ifa1->ifa_local == ifa->ifa_local) {
355                                 inet_free_ifa(ifa);
356                                 return -EEXIST;
357                         }
358                         if (ifa1->ifa_scope != ifa->ifa_scope) {
359                                 inet_free_ifa(ifa);
360                                 return -EINVAL;
361                         }
362                         ifa->ifa_flags |= IFA_F_SECONDARY;
363                 }
364         }
365
366         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
367                 net_srandom(ifa->ifa_local);
368                 ifap = last_primary;
369         }
370
371         ifa->ifa_next = *ifap;
372         *ifap = ifa;
373
374         /* Send message first, then call notifier.
375            Notifier will trigger FIB update, so that
376            listeners of netlink will know about new ifaddr */
377         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
378         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
379
380         return 0;
381 }
382
383 static int inet_insert_ifa(struct in_ifaddr *ifa)
384 {
385         return __inet_insert_ifa(ifa, NULL, 0);
386 }
387
388 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
389 {
390         struct in_device *in_dev = __in_dev_get_rtnl(dev);
391
392         ASSERT_RTNL();
393
394         if (!in_dev) {
395                 inet_free_ifa(ifa);
396                 return -ENOBUFS;
397         }
398         ipv4_devconf_setall(in_dev);
399         if (ifa->ifa_dev != in_dev) {
400                 BUG_TRAP(!ifa->ifa_dev);
401                 in_dev_hold(in_dev);
402                 ifa->ifa_dev = in_dev;
403         }
404         if (ipv4_is_loopback(ifa->ifa_local))
405                 ifa->ifa_scope = RT_SCOPE_HOST;
406         return inet_insert_ifa(ifa);
407 }
408
409 struct in_device *inetdev_by_index(struct net *net, int ifindex)
410 {
411         struct net_device *dev;
412         struct in_device *in_dev = NULL;
413         read_lock(&dev_base_lock);
414         dev = __dev_get_by_index(net, ifindex);
415         if (dev)
416                 in_dev = in_dev_get(dev);
417         read_unlock(&dev_base_lock);
418         return in_dev;
419 }
420
421 /* Called only from RTNL semaphored context. No locks. */
422
423 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
424                                     __be32 mask)
425 {
426         ASSERT_RTNL();
427
428         for_primary_ifa(in_dev) {
429                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
430                         return ifa;
431         } endfor_ifa(in_dev);
432         return NULL;
433 }
434
435 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
436 {
437         struct net *net = sock_net(skb->sk);
438         struct nlattr *tb[IFA_MAX+1];
439         struct in_device *in_dev;
440         struct ifaddrmsg *ifm;
441         struct in_ifaddr *ifa, **ifap;
442         int err = -EINVAL;
443
444         ASSERT_RTNL();
445
446         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
447         if (err < 0)
448                 goto errout;
449
450         ifm = nlmsg_data(nlh);
451         in_dev = inetdev_by_index(net, ifm->ifa_index);
452         if (in_dev == NULL) {
453                 err = -ENODEV;
454                 goto errout;
455         }
456
457         __in_dev_put(in_dev);
458
459         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
460              ifap = &ifa->ifa_next) {
461                 if (tb[IFA_LOCAL] &&
462                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
463                         continue;
464
465                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
466                         continue;
467
468                 if (tb[IFA_ADDRESS] &&
469                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
470                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
471                         continue;
472
473                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
474                 return 0;
475         }
476
477         err = -EADDRNOTAVAIL;
478 errout:
479         return err;
480 }
481
482 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
483 {
484         struct nlattr *tb[IFA_MAX+1];
485         struct in_ifaddr *ifa;
486         struct ifaddrmsg *ifm;
487         struct net_device *dev;
488         struct in_device *in_dev;
489         int err;
490
491         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
492         if (err < 0)
493                 goto errout;
494
495         ifm = nlmsg_data(nlh);
496         err = -EINVAL;
497         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
498                 goto errout;
499
500         dev = __dev_get_by_index(net, ifm->ifa_index);
501         err = -ENODEV;
502         if (dev == NULL)
503                 goto errout;
504
505         in_dev = __in_dev_get_rtnl(dev);
506         err = -ENOBUFS;
507         if (in_dev == NULL)
508                 goto errout;
509
510         ifa = inet_alloc_ifa();
511         if (ifa == NULL)
512                 /*
513                  * A potential indev allocation can be left alive, it stays
514                  * assigned to its device and is destroy with it.
515                  */
516                 goto errout;
517
518         ipv4_devconf_setall(in_dev);
519         in_dev_hold(in_dev);
520
521         if (tb[IFA_ADDRESS] == NULL)
522                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
523
524         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
525         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
526         ifa->ifa_flags = ifm->ifa_flags;
527         ifa->ifa_scope = ifm->ifa_scope;
528         ifa->ifa_dev = in_dev;
529
530         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
531         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
532
533         if (tb[IFA_BROADCAST])
534                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
535
536         if (tb[IFA_LABEL])
537                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
538         else
539                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
540
541         return ifa;
542
543 errout:
544         return ERR_PTR(err);
545 }
546
547 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
548 {
549         struct net *net = sock_net(skb->sk);
550         struct in_ifaddr *ifa;
551
552         ASSERT_RTNL();
553
554         ifa = rtm_to_ifaddr(net, nlh);
555         if (IS_ERR(ifa))
556                 return PTR_ERR(ifa);
557
558         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
559 }
560
561 /*
562  *      Determine a default network mask, based on the IP address.
563  */
564
565 static __inline__ int inet_abc_len(__be32 addr)
566 {
567         int rc = -1;    /* Something else, probably a multicast. */
568
569         if (ipv4_is_zeronet(addr))
570                 rc = 0;
571         else {
572                 __u32 haddr = ntohl(addr);
573
574                 if (IN_CLASSA(haddr))
575                         rc = 8;
576                 else if (IN_CLASSB(haddr))
577                         rc = 16;
578                 else if (IN_CLASSC(haddr))
579                         rc = 24;
580         }
581
582         return rc;
583 }
584
585
586 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
587 {
588         struct ifreq ifr;
589         struct sockaddr_in sin_orig;
590         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
591         struct in_device *in_dev;
592         struct in_ifaddr **ifap = NULL;
593         struct in_ifaddr *ifa = NULL;
594         struct net_device *dev;
595         char *colon;
596         int ret = -EFAULT;
597         int tryaddrmatch = 0;
598
599         /*
600          *      Fetch the caller's info block into kernel space
601          */
602
603         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
604                 goto out;
605         ifr.ifr_name[IFNAMSIZ - 1] = 0;
606
607         /* save original address for comparison */
608         memcpy(&sin_orig, sin, sizeof(*sin));
609
610         colon = strchr(ifr.ifr_name, ':');
611         if (colon)
612                 *colon = 0;
613
614 #ifdef CONFIG_KMOD
615         dev_load(net, ifr.ifr_name);
616 #endif
617
618         switch (cmd) {
619         case SIOCGIFADDR:       /* Get interface address */
620         case SIOCGIFBRDADDR:    /* Get the broadcast address */
621         case SIOCGIFDSTADDR:    /* Get the destination address */
622         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
623                 /* Note that these ioctls will not sleep,
624                    so that we do not impose a lock.
625                    One day we will be forced to put shlock here (I mean SMP)
626                  */
627                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
628                 memset(sin, 0, sizeof(*sin));
629                 sin->sin_family = AF_INET;
630                 break;
631
632         case SIOCSIFFLAGS:
633                 ret = -EACCES;
634                 if (!capable(CAP_NET_ADMIN))
635                         goto out;
636                 break;
637         case SIOCSIFADDR:       /* Set interface address (and family) */
638         case SIOCSIFBRDADDR:    /* Set the broadcast address */
639         case SIOCSIFDSTADDR:    /* Set the destination address */
640         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
641                 ret = -EACCES;
642                 if (!capable(CAP_NET_ADMIN))
643                         goto out;
644                 ret = -EINVAL;
645                 if (sin->sin_family != AF_INET)
646                         goto out;
647                 break;
648         default:
649                 ret = -EINVAL;
650                 goto out;
651         }
652
653         rtnl_lock();
654
655         ret = -ENODEV;
656         if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
657                 goto done;
658
659         if (colon)
660                 *colon = ':';
661
662         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
663                 if (tryaddrmatch) {
664                         /* Matthias Andree */
665                         /* compare label and address (4.4BSD style) */
666                         /* note: we only do this for a limited set of ioctls
667                            and only if the original address family was AF_INET.
668                            This is checked above. */
669                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670                              ifap = &ifa->ifa_next) {
671                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672                                     sin_orig.sin_addr.s_addr ==
673                                                         ifa->ifa_address) {
674                                         break; /* found */
675                                 }
676                         }
677                 }
678                 /* we didn't get a match, maybe the application is
679                    4.3BSD-style and passed in junk so we fall back to
680                    comparing just the label */
681                 if (!ifa) {
682                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683                              ifap = &ifa->ifa_next)
684                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
685                                         break;
686                 }
687         }
688
689         ret = -EADDRNOTAVAIL;
690         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
691                 goto done;
692
693         switch (cmd) {
694         case SIOCGIFADDR:       /* Get interface address */
695                 sin->sin_addr.s_addr = ifa->ifa_local;
696                 goto rarok;
697
698         case SIOCGIFBRDADDR:    /* Get the broadcast address */
699                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
700                 goto rarok;
701
702         case SIOCGIFDSTADDR:    /* Get the destination address */
703                 sin->sin_addr.s_addr = ifa->ifa_address;
704                 goto rarok;
705
706         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
707                 sin->sin_addr.s_addr = ifa->ifa_mask;
708                 goto rarok;
709
710         case SIOCSIFFLAGS:
711                 if (colon) {
712                         ret = -EADDRNOTAVAIL;
713                         if (!ifa)
714                                 break;
715                         ret = 0;
716                         if (!(ifr.ifr_flags & IFF_UP))
717                                 inet_del_ifa(in_dev, ifap, 1);
718                         break;
719                 }
720                 ret = dev_change_flags(dev, ifr.ifr_flags);
721                 break;
722
723         case SIOCSIFADDR:       /* Set interface address (and family) */
724                 ret = -EINVAL;
725                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
726                         break;
727
728                 if (!ifa) {
729                         ret = -ENOBUFS;
730                         if ((ifa = inet_alloc_ifa()) == NULL)
731                                 break;
732                         if (colon)
733                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
734                         else
735                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
736                 } else {
737                         ret = 0;
738                         if (ifa->ifa_local == sin->sin_addr.s_addr)
739                                 break;
740                         inet_del_ifa(in_dev, ifap, 0);
741                         ifa->ifa_broadcast = 0;
742                         ifa->ifa_scope = 0;
743                 }
744
745                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
746
747                 if (!(dev->flags & IFF_POINTOPOINT)) {
748                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750                         if ((dev->flags & IFF_BROADCAST) &&
751                             ifa->ifa_prefixlen < 31)
752                                 ifa->ifa_broadcast = ifa->ifa_address |
753                                                      ~ifa->ifa_mask;
754                 } else {
755                         ifa->ifa_prefixlen = 32;
756                         ifa->ifa_mask = inet_make_mask(32);
757                 }
758                 ret = inet_set_ifa(dev, ifa);
759                 break;
760
761         case SIOCSIFBRDADDR:    /* Set the broadcast address */
762                 ret = 0;
763                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764                         inet_del_ifa(in_dev, ifap, 0);
765                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
766                         inet_insert_ifa(ifa);
767                 }
768                 break;
769
770         case SIOCSIFDSTADDR:    /* Set the destination address */
771                 ret = 0;
772                 if (ifa->ifa_address == sin->sin_addr.s_addr)
773                         break;
774                 ret = -EINVAL;
775                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
776                         break;
777                 ret = 0;
778                 inet_del_ifa(in_dev, ifap, 0);
779                 ifa->ifa_address = sin->sin_addr.s_addr;
780                 inet_insert_ifa(ifa);
781                 break;
782
783         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
784
785                 /*
786                  *      The mask we set must be legal.
787                  */
788                 ret = -EINVAL;
789                 if (bad_mask(sin->sin_addr.s_addr, 0))
790                         break;
791                 ret = 0;
792                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793                         __be32 old_mask = ifa->ifa_mask;
794                         inet_del_ifa(in_dev, ifap, 0);
795                         ifa->ifa_mask = sin->sin_addr.s_addr;
796                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
797
798                         /* See if current broadcast address matches
799                          * with current netmask, then recalculate
800                          * the broadcast address. Otherwise it's a
801                          * funny address, so don't touch it since
802                          * the user seems to know what (s)he's doing...
803                          */
804                         if ((dev->flags & IFF_BROADCAST) &&
805                             (ifa->ifa_prefixlen < 31) &&
806                             (ifa->ifa_broadcast ==
807                              (ifa->ifa_local|~old_mask))) {
808                                 ifa->ifa_broadcast = (ifa->ifa_local |
809                                                       ~sin->sin_addr.s_addr);
810                         }
811                         inet_insert_ifa(ifa);
812                 }
813                 break;
814         }
815 done:
816         rtnl_unlock();
817 out:
818         return ret;
819 rarok:
820         rtnl_unlock();
821         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
822         goto out;
823 }
824
825 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
826 {
827         struct in_device *in_dev = __in_dev_get_rtnl(dev);
828         struct in_ifaddr *ifa;
829         struct ifreq ifr;
830         int done = 0;
831
832         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
833                 goto out;
834
835         for (; ifa; ifa = ifa->ifa_next) {
836                 if (!buf) {
837                         done += sizeof(ifr);
838                         continue;
839                 }
840                 if (len < (int) sizeof(ifr))
841                         break;
842                 memset(&ifr, 0, sizeof(struct ifreq));
843                 if (ifa->ifa_label)
844                         strcpy(ifr.ifr_name, ifa->ifa_label);
845                 else
846                         strcpy(ifr.ifr_name, dev->name);
847
848                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
850                                                                 ifa->ifa_local;
851
852                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
853                         done = -EFAULT;
854                         break;
855                 }
856                 buf  += sizeof(struct ifreq);
857                 len  -= sizeof(struct ifreq);
858                 done += sizeof(struct ifreq);
859         }
860 out:
861         return done;
862 }
863
864 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
865 {
866         __be32 addr = 0;
867         struct in_device *in_dev;
868         struct net *net = dev_net(dev);
869
870         rcu_read_lock();
871         in_dev = __in_dev_get_rcu(dev);
872         if (!in_dev)
873                 goto no_in_dev;
874
875         for_primary_ifa(in_dev) {
876                 if (ifa->ifa_scope > scope)
877                         continue;
878                 if (!dst || inet_ifa_match(dst, ifa)) {
879                         addr = ifa->ifa_local;
880                         break;
881                 }
882                 if (!addr)
883                         addr = ifa->ifa_local;
884         } endfor_ifa(in_dev);
885 no_in_dev:
886         rcu_read_unlock();
887
888         if (addr)
889                 goto out;
890
891         /* Not loopback addresses on loopback should be preferred
892            in this case. It is importnat that lo is the first interface
893            in dev_base list.
894          */
895         read_lock(&dev_base_lock);
896         rcu_read_lock();
897         for_each_netdev(net, dev) {
898                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
899                         continue;
900
901                 for_primary_ifa(in_dev) {
902                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
903                             ifa->ifa_scope <= scope) {
904                                 addr = ifa->ifa_local;
905                                 goto out_unlock_both;
906                         }
907                 } endfor_ifa(in_dev);
908         }
909 out_unlock_both:
910         read_unlock(&dev_base_lock);
911         rcu_read_unlock();
912 out:
913         return addr;
914 }
915
916 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
917                               __be32 local, int scope)
918 {
919         int same = 0;
920         __be32 addr = 0;
921
922         for_ifa(in_dev) {
923                 if (!addr &&
924                     (local == ifa->ifa_local || !local) &&
925                     ifa->ifa_scope <= scope) {
926                         addr = ifa->ifa_local;
927                         if (same)
928                                 break;
929                 }
930                 if (!same) {
931                         same = (!local || inet_ifa_match(local, ifa)) &&
932                                 (!dst || inet_ifa_match(dst, ifa));
933                         if (same && addr) {
934                                 if (local || !dst)
935                                         break;
936                                 /* Is the selected addr into dst subnet? */
937                                 if (inet_ifa_match(addr, ifa))
938                                         break;
939                                 /* No, then can we use new local src? */
940                                 if (ifa->ifa_scope <= scope) {
941                                         addr = ifa->ifa_local;
942                                         break;
943                                 }
944                                 /* search for large dst subnet for addr */
945                                 same = 0;
946                         }
947                 }
948         } endfor_ifa(in_dev);
949
950         return same? addr : 0;
951 }
952
953 /*
954  * Confirm that local IP address exists using wildcards:
955  * - in_dev: only on this interface, 0=any interface
956  * - dst: only in the same subnet as dst, 0=any dst
957  * - local: address, 0=autoselect the local address
958  * - scope: maximum allowed scope value for the local address
959  */
960 __be32 inet_confirm_addr(struct in_device *in_dev,
961                          __be32 dst, __be32 local, int scope)
962 {
963         __be32 addr = 0;
964         struct net_device *dev;
965         struct net *net;
966
967         if (scope != RT_SCOPE_LINK)
968                 return confirm_addr_indev(in_dev, dst, local, scope);
969
970         net = dev_net(in_dev->dev);
971         read_lock(&dev_base_lock);
972         rcu_read_lock();
973         for_each_netdev(net, dev) {
974                 if ((in_dev = __in_dev_get_rcu(dev))) {
975                         addr = confirm_addr_indev(in_dev, dst, local, scope);
976                         if (addr)
977                                 break;
978                 }
979         }
980         rcu_read_unlock();
981         read_unlock(&dev_base_lock);
982
983         return addr;
984 }
985
986 /*
987  *      Device notifier
988  */
989
990 int register_inetaddr_notifier(struct notifier_block *nb)
991 {
992         return blocking_notifier_chain_register(&inetaddr_chain, nb);
993 }
994
995 int unregister_inetaddr_notifier(struct notifier_block *nb)
996 {
997         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
998 }
999
1000 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1001  * alias numbering and to create unique labels if possible.
1002 */
1003 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1004 {
1005         struct in_ifaddr *ifa;
1006         int named = 0;
1007
1008         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1009                 char old[IFNAMSIZ], *dot;
1010
1011                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1012                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1013                 if (named++ == 0)
1014                         goto skip;
1015                 dot = strchr(old, ':');
1016                 if (dot == NULL) {
1017                         sprintf(old, ":%d", named);
1018                         dot = old;
1019                 }
1020                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1021                         strcat(ifa->ifa_label, dot);
1022                 } else {
1023                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1024                 }
1025 skip:
1026                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1027         }
1028 }
1029
1030 /* Called only under RTNL semaphore */
1031
1032 static int inetdev_event(struct notifier_block *this, unsigned long event,
1033                          void *ptr)
1034 {
1035         struct net_device *dev = ptr;
1036         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1037
1038         ASSERT_RTNL();
1039
1040         if (!in_dev) {
1041                 if (event == NETDEV_REGISTER) {
1042                         in_dev = inetdev_init(dev);
1043                         if (!in_dev)
1044                                 return notifier_from_errno(-ENOMEM);
1045                         if (dev->flags & IFF_LOOPBACK) {
1046                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1047                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1048                         }
1049                 }
1050                 goto out;
1051         }
1052
1053         switch (event) {
1054         case NETDEV_REGISTER:
1055                 printk(KERN_DEBUG "inetdev_event: bug\n");
1056                 dev->ip_ptr = NULL;
1057                 break;
1058         case NETDEV_UP:
1059                 if (dev->mtu < 68)
1060                         break;
1061                 if (dev->flags & IFF_LOOPBACK) {
1062                         struct in_ifaddr *ifa;
1063                         if ((ifa = inet_alloc_ifa()) != NULL) {
1064                                 ifa->ifa_local =
1065                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1066                                 ifa->ifa_prefixlen = 8;
1067                                 ifa->ifa_mask = inet_make_mask(8);
1068                                 in_dev_hold(in_dev);
1069                                 ifa->ifa_dev = in_dev;
1070                                 ifa->ifa_scope = RT_SCOPE_HOST;
1071                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1072                                 inet_insert_ifa(ifa);
1073                         }
1074                 }
1075                 ip_mc_up(in_dev);
1076                 break;
1077         case NETDEV_DOWN:
1078                 ip_mc_down(in_dev);
1079                 break;
1080         case NETDEV_CHANGEMTU:
1081                 if (dev->mtu >= 68)
1082                         break;
1083                 /* MTU falled under 68, disable IP */
1084         case NETDEV_UNREGISTER:
1085                 inetdev_destroy(in_dev);
1086                 break;
1087         case NETDEV_CHANGENAME:
1088                 /* Do not notify about label change, this event is
1089                  * not interesting to applications using netlink.
1090                  */
1091                 inetdev_changename(dev, in_dev);
1092
1093                 devinet_sysctl_unregister(in_dev);
1094                 devinet_sysctl_register(in_dev);
1095                 break;
1096         }
1097 out:
1098         return NOTIFY_DONE;
1099 }
1100
1101 static struct notifier_block ip_netdev_notifier = {
1102         .notifier_call =inetdev_event,
1103 };
1104
1105 static inline size_t inet_nlmsg_size(void)
1106 {
1107         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1108                + nla_total_size(4) /* IFA_ADDRESS */
1109                + nla_total_size(4) /* IFA_LOCAL */
1110                + nla_total_size(4) /* IFA_BROADCAST */
1111                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1112 }
1113
1114 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1115                             u32 pid, u32 seq, int event, unsigned int flags)
1116 {
1117         struct ifaddrmsg *ifm;
1118         struct nlmsghdr  *nlh;
1119
1120         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1121         if (nlh == NULL)
1122                 return -EMSGSIZE;
1123
1124         ifm = nlmsg_data(nlh);
1125         ifm->ifa_family = AF_INET;
1126         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1127         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1128         ifm->ifa_scope = ifa->ifa_scope;
1129         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1130
1131         if (ifa->ifa_address)
1132                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1133
1134         if (ifa->ifa_local)
1135                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1136
1137         if (ifa->ifa_broadcast)
1138                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1139
1140         if (ifa->ifa_label[0])
1141                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1142
1143         return nlmsg_end(skb, nlh);
1144
1145 nla_put_failure:
1146         nlmsg_cancel(skb, nlh);
1147         return -EMSGSIZE;
1148 }
1149
1150 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1151 {
1152         struct net *net = sock_net(skb->sk);
1153         int idx, ip_idx;
1154         struct net_device *dev;
1155         struct in_device *in_dev;
1156         struct in_ifaddr *ifa;
1157         int s_ip_idx, s_idx = cb->args[0];
1158
1159         s_ip_idx = ip_idx = cb->args[1];
1160         idx = 0;
1161         for_each_netdev(net, dev) {
1162                 if (idx < s_idx)
1163                         goto cont;
1164                 if (idx > s_idx)
1165                         s_ip_idx = 0;
1166                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1167                         goto cont;
1168
1169                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1170                      ifa = ifa->ifa_next, ip_idx++) {
1171                         if (ip_idx < s_ip_idx)
1172                                 continue;
1173                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1174                                              cb->nlh->nlmsg_seq,
1175                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1176                                 goto done;
1177                 }
1178 cont:
1179                 idx++;
1180         }
1181
1182 done:
1183         cb->args[0] = idx;
1184         cb->args[1] = ip_idx;
1185
1186         return skb->len;
1187 }
1188
1189 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1190                       u32 pid)
1191 {
1192         struct sk_buff *skb;
1193         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1194         int err = -ENOBUFS;
1195         struct net *net;
1196
1197         net = dev_net(ifa->ifa_dev->dev);
1198         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1199         if (skb == NULL)
1200                 goto errout;
1201
1202         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1203         if (err < 0) {
1204                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1205                 WARN_ON(err == -EMSGSIZE);
1206                 kfree_skb(skb);
1207                 goto errout;
1208         }
1209         err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1210 errout:
1211         if (err < 0)
1212                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1213 }
1214
1215 #ifdef CONFIG_SYSCTL
1216
1217 static void devinet_copy_dflt_conf(struct net *net, int i)
1218 {
1219         struct net_device *dev;
1220
1221         read_lock(&dev_base_lock);
1222         for_each_netdev(net, dev) {
1223                 struct in_device *in_dev;
1224                 rcu_read_lock();
1225                 in_dev = __in_dev_get_rcu(dev);
1226                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1227                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1228                 rcu_read_unlock();
1229         }
1230         read_unlock(&dev_base_lock);
1231 }
1232
1233 static void inet_forward_change(struct net *net)
1234 {
1235         struct net_device *dev;
1236         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1237
1238         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1239         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1240
1241         read_lock(&dev_base_lock);
1242         for_each_netdev(net, dev) {
1243                 struct in_device *in_dev;
1244                 rcu_read_lock();
1245                 in_dev = __in_dev_get_rcu(dev);
1246                 if (in_dev)
1247                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1248                 rcu_read_unlock();
1249         }
1250         read_unlock(&dev_base_lock);
1251
1252         rt_cache_flush(0);
1253 }
1254
1255 static int devinet_conf_proc(ctl_table *ctl, int write,
1256                              struct file* filp, void __user *buffer,
1257                              size_t *lenp, loff_t *ppos)
1258 {
1259         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1260
1261         if (write) {
1262                 struct ipv4_devconf *cnf = ctl->extra1;
1263                 struct net *net = ctl->extra2;
1264                 int i = (int *)ctl->data - cnf->data;
1265
1266                 set_bit(i, cnf->state);
1267
1268                 if (cnf == net->ipv4.devconf_dflt)
1269                         devinet_copy_dflt_conf(net, i);
1270         }
1271
1272         return ret;
1273 }
1274
1275 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1276                                void __user *oldval, size_t __user *oldlenp,
1277                                void __user *newval, size_t newlen)
1278 {
1279         struct ipv4_devconf *cnf;
1280         struct net *net;
1281         int *valp = table->data;
1282         int new;
1283         int i;
1284
1285         if (!newval || !newlen)
1286                 return 0;
1287
1288         if (newlen != sizeof(int))
1289                 return -EINVAL;
1290
1291         if (get_user(new, (int __user *)newval))
1292                 return -EFAULT;
1293
1294         if (new == *valp)
1295                 return 0;
1296
1297         if (oldval && oldlenp) {
1298                 size_t len;
1299
1300                 if (get_user(len, oldlenp))
1301                         return -EFAULT;
1302
1303                 if (len) {
1304                         if (len > table->maxlen)
1305                                 len = table->maxlen;
1306                         if (copy_to_user(oldval, valp, len))
1307                                 return -EFAULT;
1308                         if (put_user(len, oldlenp))
1309                                 return -EFAULT;
1310                 }
1311         }
1312
1313         *valp = new;
1314
1315         cnf = table->extra1;
1316         net = table->extra2;
1317         i = (int *)table->data - cnf->data;
1318
1319         set_bit(i, cnf->state);
1320
1321         if (cnf == net->ipv4.devconf_dflt)
1322                 devinet_copy_dflt_conf(net, i);
1323
1324         return 1;
1325 }
1326
1327 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1328                                   struct file* filp, void __user *buffer,
1329                                   size_t *lenp, loff_t *ppos)
1330 {
1331         int *valp = ctl->data;
1332         int val = *valp;
1333         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1334
1335         if (write && *valp != val) {
1336                 struct net *net = ctl->extra2;
1337
1338                 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1339                         inet_forward_change(net);
1340                 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1341                         rt_cache_flush(0);
1342         }
1343
1344         return ret;
1345 }
1346
1347 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1348                          struct file* filp, void __user *buffer,
1349                          size_t *lenp, loff_t *ppos)
1350 {
1351         int *valp = ctl->data;
1352         int val = *valp;
1353         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1354
1355         if (write && *valp != val)
1356                 rt_cache_flush(0);
1357
1358         return ret;
1359 }
1360
1361 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1362                                   void __user *oldval, size_t __user *oldlenp,
1363                                   void __user *newval, size_t newlen)
1364 {
1365         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1366                                       newval, newlen);
1367
1368         if (ret == 1)
1369                 rt_cache_flush(0);
1370
1371         return ret;
1372 }
1373
1374
1375 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1376         { \
1377                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1378                 .procname       = name, \
1379                 .data           = ipv4_devconf.data + \
1380                                   NET_IPV4_CONF_ ## attr - 1, \
1381                 .maxlen         = sizeof(int), \
1382                 .mode           = mval, \
1383                 .proc_handler   = proc, \
1384                 .strategy       = sysctl, \
1385                 .extra1         = &ipv4_devconf, \
1386         }
1387
1388 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1389         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1390                              devinet_conf_sysctl)
1391
1392 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1393         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1394                              devinet_conf_sysctl)
1395
1396 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1397         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1398
1399 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1400         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1401                                      ipv4_doint_and_flush_strategy)
1402
1403 static struct devinet_sysctl_table {
1404         struct ctl_table_header *sysctl_header;
1405         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1406         char *dev_name;
1407 } devinet_sysctl = {
1408         .devinet_vars = {
1409                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1410                                              devinet_sysctl_forward,
1411                                              devinet_conf_sysctl),
1412                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1413
1414                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1415                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1416                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1417                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1418                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1419                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1420                                         "accept_source_route"),
1421                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1422                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1423                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1424                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1425                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1426                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1427                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1428                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1429                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1430
1431                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1432                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1433                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1434                                               "force_igmp_version"),
1435                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1436                                               "promote_secondaries"),
1437         },
1438 };
1439
1440 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1441                 int ctl_name, struct ipv4_devconf *p)
1442 {
1443         int i;
1444         struct devinet_sysctl_table *t;
1445
1446 #define DEVINET_CTL_PATH_DEV    3
1447
1448         struct ctl_path devinet_ctl_path[] = {
1449                 { .procname = "net", .ctl_name = CTL_NET, },
1450                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1451                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1452                 { /* to be set */ },
1453                 { },
1454         };
1455
1456         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1457         if (!t)
1458                 goto out;
1459
1460         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1461                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1462                 t->devinet_vars[i].extra1 = p;
1463                 t->devinet_vars[i].extra2 = net;
1464         }
1465
1466         /*
1467          * Make a copy of dev_name, because '.procname' is regarded as const
1468          * by sysctl and we wouldn't want anyone to change it under our feet
1469          * (see SIOCSIFNAME).
1470          */
1471         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1472         if (!t->dev_name)
1473                 goto free;
1474
1475         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1476         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1477
1478         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1479                         t->devinet_vars);
1480         if (!t->sysctl_header)
1481                 goto free_procname;
1482
1483         p->sysctl = t;
1484         return 0;
1485
1486 free_procname:
1487         kfree(t->dev_name);
1488 free:
1489         kfree(t);
1490 out:
1491         return -ENOBUFS;
1492 }
1493
1494 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1495 {
1496         struct devinet_sysctl_table *t = cnf->sysctl;
1497
1498         if (t == NULL)
1499                 return;
1500
1501         cnf->sysctl = NULL;
1502         unregister_sysctl_table(t->sysctl_header);
1503         kfree(t->dev_name);
1504         kfree(t);
1505 }
1506
1507 static void devinet_sysctl_register(struct in_device *idev)
1508 {
1509         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1510                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1511         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1512                         idev->dev->ifindex, &idev->cnf);
1513 }
1514
1515 static void devinet_sysctl_unregister(struct in_device *idev)
1516 {
1517         __devinet_sysctl_unregister(&idev->cnf);
1518         neigh_sysctl_unregister(idev->arp_parms);
1519 }
1520
1521 static struct ctl_table ctl_forward_entry[] = {
1522         {
1523                 .ctl_name       = NET_IPV4_FORWARD,
1524                 .procname       = "ip_forward",
1525                 .data           = &ipv4_devconf.data[
1526                                         NET_IPV4_CONF_FORWARDING - 1],
1527                 .maxlen         = sizeof(int),
1528                 .mode           = 0644,
1529                 .proc_handler   = devinet_sysctl_forward,
1530                 .strategy       = devinet_conf_sysctl,
1531                 .extra1         = &ipv4_devconf,
1532                 .extra2         = &init_net,
1533         },
1534         { },
1535 };
1536
1537 static __net_initdata struct ctl_path net_ipv4_path[] = {
1538         { .procname = "net", .ctl_name = CTL_NET, },
1539         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1540         { },
1541 };
1542 #endif
1543
1544 static __net_init int devinet_init_net(struct net *net)
1545 {
1546         int err;
1547         struct ipv4_devconf *all, *dflt;
1548 #ifdef CONFIG_SYSCTL
1549         struct ctl_table *tbl = ctl_forward_entry;
1550         struct ctl_table_header *forw_hdr;
1551 #endif
1552
1553         err = -ENOMEM;
1554         all = &ipv4_devconf;
1555         dflt = &ipv4_devconf_dflt;
1556
1557         if (net != &init_net) {
1558                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1559                 if (all == NULL)
1560                         goto err_alloc_all;
1561
1562                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1563                 if (dflt == NULL)
1564                         goto err_alloc_dflt;
1565
1566 #ifdef CONFIG_SYSCTL
1567                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1568                 if (tbl == NULL)
1569                         goto err_alloc_ctl;
1570
1571                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1572                 tbl[0].extra1 = all;
1573                 tbl[0].extra2 = net;
1574 #endif
1575         }
1576
1577 #ifdef CONFIG_SYSCTL
1578         err = __devinet_sysctl_register(net, "all",
1579                         NET_PROTO_CONF_ALL, all);
1580         if (err < 0)
1581                 goto err_reg_all;
1582
1583         err = __devinet_sysctl_register(net, "default",
1584                         NET_PROTO_CONF_DEFAULT, dflt);
1585         if (err < 0)
1586                 goto err_reg_dflt;
1587
1588         err = -ENOMEM;
1589         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1590         if (forw_hdr == NULL)
1591                 goto err_reg_ctl;
1592         net->ipv4.forw_hdr = forw_hdr;
1593 #endif
1594
1595         net->ipv4.devconf_all = all;
1596         net->ipv4.devconf_dflt = dflt;
1597         return 0;
1598
1599 #ifdef CONFIG_SYSCTL
1600 err_reg_ctl:
1601         __devinet_sysctl_unregister(dflt);
1602 err_reg_dflt:
1603         __devinet_sysctl_unregister(all);
1604 err_reg_all:
1605         if (tbl != ctl_forward_entry)
1606                 kfree(tbl);
1607 err_alloc_ctl:
1608 #endif
1609         if (dflt != &ipv4_devconf_dflt)
1610                 kfree(dflt);
1611 err_alloc_dflt:
1612         if (all != &ipv4_devconf)
1613                 kfree(all);
1614 err_alloc_all:
1615         return err;
1616 }
1617
1618 static __net_exit void devinet_exit_net(struct net *net)
1619 {
1620 #ifdef CONFIG_SYSCTL
1621         struct ctl_table *tbl;
1622
1623         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1624         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1625         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1626         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1627         kfree(tbl);
1628 #endif
1629         kfree(net->ipv4.devconf_dflt);
1630         kfree(net->ipv4.devconf_all);
1631 }
1632
1633 static __net_initdata struct pernet_operations devinet_ops = {
1634         .init = devinet_init_net,
1635         .exit = devinet_exit_net,
1636 };
1637
1638 void __init devinet_init(void)
1639 {
1640         register_pernet_subsys(&devinet_ops);
1641
1642         register_gifconf(PF_INET, inet_gifconf);
1643         register_netdevice_notifier(&ip_netdev_notifier);
1644
1645         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1646         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1647         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1648 }
1649
1650 EXPORT_SYMBOL(in_dev_finish_destroy);
1651 EXPORT_SYMBOL(inet_select_addr);
1652 EXPORT_SYMBOL(inetdev_by_index);
1653 EXPORT_SYMBOL(register_inetaddr_notifier);
1654 EXPORT_SYMBOL(unregister_inetaddr_notifier);