Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64
65 static struct ipv4_devconf ipv4_devconf = {
66         .data = {
67                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71         },
72 };
73
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75         .data = {
76                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81         },
82 };
83
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88         [IFA_LOCAL]             = { .type = NLA_U32 },
89         [IFA_ADDRESS]           = { .type = NLA_U32 },
90         [IFA_BROADCAST]         = { .type = NLA_U32 },
91         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98                          int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110
111 /* Locks all the inet devices. */
112
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
116
117         if (ifa) {
118                 INIT_RCU_HEAD(&ifa->rcu_head);
119         }
120
121         return ifa;
122 }
123
124 static void inet_rcu_free_ifa(struct rcu_head *head)
125 {
126         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
127         if (ifa->ifa_dev)
128                 in_dev_put(ifa->ifa_dev);
129         kfree(ifa);
130 }
131
132 static inline void inet_free_ifa(struct in_ifaddr *ifa)
133 {
134         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
135 }
136
137 void in_dev_finish_destroy(struct in_device *idev)
138 {
139         struct net_device *dev = idev->dev;
140
141         WARN_ON(idev->ifa_list);
142         WARN_ON(idev->mc_list);
143 #ifdef NET_REFCNT_DEBUG
144         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
145                idev, dev ? dev->name : "NIL");
146 #endif
147         dev_put(dev);
148         if (!idev->dead)
149                 printk("Freeing alive in_device %p\n", idev);
150         else {
151                 kfree(idev);
152         }
153 }
154
155 static struct in_device *inetdev_init(struct net_device *dev)
156 {
157         struct in_device *in_dev;
158
159         ASSERT_RTNL();
160
161         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
162         if (!in_dev)
163                 goto out;
164         INIT_RCU_HEAD(&in_dev->rcu_head);
165         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
166                         sizeof(in_dev->cnf));
167         in_dev->cnf.sysctl = NULL;
168         in_dev->dev = dev;
169         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
170                 goto out_kfree;
171         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
172                 dev_disable_lro(dev);
173         /* Reference in_dev->dev */
174         dev_hold(dev);
175         /* Account for reference dev->ip_ptr (below) */
176         in_dev_hold(in_dev);
177
178         devinet_sysctl_register(in_dev);
179         ip_mc_init_dev(in_dev);
180         if (dev->flags & IFF_UP)
181                 ip_mc_up(in_dev);
182
183         /* we can receive as soon as ip_ptr is set -- do this last */
184         rcu_assign_pointer(dev->ip_ptr, in_dev);
185 out:
186         return in_dev;
187 out_kfree:
188         kfree(in_dev);
189         in_dev = NULL;
190         goto out;
191 }
192
193 static void in_dev_rcu_put(struct rcu_head *head)
194 {
195         struct in_device *idev = container_of(head, struct in_device, rcu_head);
196         in_dev_put(idev);
197 }
198
199 static void inetdev_destroy(struct in_device *in_dev)
200 {
201         struct in_ifaddr *ifa;
202         struct net_device *dev;
203
204         ASSERT_RTNL();
205
206         dev = in_dev->dev;
207
208         in_dev->dead = 1;
209
210         ip_mc_destroy_dev(in_dev);
211
212         while ((ifa = in_dev->ifa_list) != NULL) {
213                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214                 inet_free_ifa(ifa);
215         }
216
217         dev->ip_ptr = NULL;
218
219         devinet_sysctl_unregister(in_dev);
220         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221         arp_ifdown(dev);
222
223         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 }
225
226 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 {
228         rcu_read_lock();
229         for_primary_ifa(in_dev) {
230                 if (inet_ifa_match(a, ifa)) {
231                         if (!b || inet_ifa_match(b, ifa)) {
232                                 rcu_read_unlock();
233                                 return 1;
234                         }
235                 }
236         } endfor_ifa(in_dev);
237         rcu_read_unlock();
238         return 0;
239 }
240
241 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
242                          int destroy, struct nlmsghdr *nlh, u32 pid)
243 {
244         struct in_ifaddr *promote = NULL;
245         struct in_ifaddr *ifa, *ifa1 = *ifap;
246         struct in_ifaddr *last_prim = in_dev->ifa_list;
247         struct in_ifaddr *prev_prom = NULL;
248         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
249
250         ASSERT_RTNL();
251
252         /* 1. Deleting primary ifaddr forces deletion all secondaries
253          * unless alias promotion is set
254          **/
255
256         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
257                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258
259                 while ((ifa = *ifap1) != NULL) {
260                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
261                             ifa1->ifa_scope <= ifa->ifa_scope)
262                                 last_prim = ifa;
263
264                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
265                             ifa1->ifa_mask != ifa->ifa_mask ||
266                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
267                                 ifap1 = &ifa->ifa_next;
268                                 prev_prom = ifa;
269                                 continue;
270                         }
271
272                         if (!do_promote) {
273                                 *ifap1 = ifa->ifa_next;
274
275                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
276                                 blocking_notifier_call_chain(&inetaddr_chain,
277                                                 NETDEV_DOWN, ifa);
278                                 inet_free_ifa(ifa);
279                         } else {
280                                 promote = ifa;
281                                 break;
282                         }
283                 }
284         }
285
286         /* 2. Unlink it */
287
288         *ifap = ifa1->ifa_next;
289
290         /* 3. Announce address deletion */
291
292         /* Send message first, then call notifier.
293            At first sight, FIB update triggered by notifier
294            will refer to already deleted ifaddr, that could confuse
295            netlink listeners. It is not true: look, gated sees
296            that route deleted and if it still thinks that ifaddr
297            is valid, it will try to restore deleted routes... Grr.
298            So that, this order is correct.
299          */
300         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
301         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302
303         if (promote) {
304
305                 if (prev_prom) {
306                         prev_prom->ifa_next = promote->ifa_next;
307                         promote->ifa_next = last_prim->ifa_next;
308                         last_prim->ifa_next = promote;
309                 }
310
311                 promote->ifa_flags &= ~IFA_F_SECONDARY;
312                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
313                 blocking_notifier_call_chain(&inetaddr_chain,
314                                 NETDEV_UP, promote);
315                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
316                         if (ifa1->ifa_mask != ifa->ifa_mask ||
317                             !inet_ifa_match(ifa1->ifa_address, ifa))
318                                         continue;
319                         fib_add_ifaddr(ifa);
320                 }
321
322         }
323         if (destroy)
324                 inet_free_ifa(ifa1);
325 }
326
327 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328                          int destroy)
329 {
330         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 }
332
333 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334                              u32 pid)
335 {
336         struct in_device *in_dev = ifa->ifa_dev;
337         struct in_ifaddr *ifa1, **ifap, **last_primary;
338
339         ASSERT_RTNL();
340
341         if (!ifa->ifa_local) {
342                 inet_free_ifa(ifa);
343                 return 0;
344         }
345
346         ifa->ifa_flags &= ~IFA_F_SECONDARY;
347         last_primary = &in_dev->ifa_list;
348
349         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
350              ifap = &ifa1->ifa_next) {
351                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
352                     ifa->ifa_scope <= ifa1->ifa_scope)
353                         last_primary = &ifa1->ifa_next;
354                 if (ifa1->ifa_mask == ifa->ifa_mask &&
355                     inet_ifa_match(ifa1->ifa_address, ifa)) {
356                         if (ifa1->ifa_local == ifa->ifa_local) {
357                                 inet_free_ifa(ifa);
358                                 return -EEXIST;
359                         }
360                         if (ifa1->ifa_scope != ifa->ifa_scope) {
361                                 inet_free_ifa(ifa);
362                                 return -EINVAL;
363                         }
364                         ifa->ifa_flags |= IFA_F_SECONDARY;
365                 }
366         }
367
368         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
369                 net_srandom(ifa->ifa_local);
370                 ifap = last_primary;
371         }
372
373         ifa->ifa_next = *ifap;
374         *ifap = ifa;
375
376         /* Send message first, then call notifier.
377            Notifier will trigger FIB update, so that
378            listeners of netlink will know about new ifaddr */
379         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
380         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381
382         return 0;
383 }
384
385 static int inet_insert_ifa(struct in_ifaddr *ifa)
386 {
387         return __inet_insert_ifa(ifa, NULL, 0);
388 }
389
390 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391 {
392         struct in_device *in_dev = __in_dev_get_rtnl(dev);
393
394         ASSERT_RTNL();
395
396         if (!in_dev) {
397                 inet_free_ifa(ifa);
398                 return -ENOBUFS;
399         }
400         ipv4_devconf_setall(in_dev);
401         if (ifa->ifa_dev != in_dev) {
402                 WARN_ON(ifa->ifa_dev);
403                 in_dev_hold(in_dev);
404                 ifa->ifa_dev = in_dev;
405         }
406         if (ipv4_is_loopback(ifa->ifa_local))
407                 ifa->ifa_scope = RT_SCOPE_HOST;
408         return inet_insert_ifa(ifa);
409 }
410
411 struct in_device *inetdev_by_index(struct net *net, int ifindex)
412 {
413         struct net_device *dev;
414         struct in_device *in_dev = NULL;
415         read_lock(&dev_base_lock);
416         dev = __dev_get_by_index(net, ifindex);
417         if (dev)
418                 in_dev = in_dev_get(dev);
419         read_unlock(&dev_base_lock);
420         return in_dev;
421 }
422
423 /* Called only from RTNL semaphored context. No locks. */
424
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426                                     __be32 mask)
427 {
428         ASSERT_RTNL();
429
430         for_primary_ifa(in_dev) {
431                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432                         return ifa;
433         } endfor_ifa(in_dev);
434         return NULL;
435 }
436
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439         struct net *net = sock_net(skb->sk);
440         struct nlattr *tb[IFA_MAX+1];
441         struct in_device *in_dev;
442         struct ifaddrmsg *ifm;
443         struct in_ifaddr *ifa, **ifap;
444         int err = -EINVAL;
445
446         ASSERT_RTNL();
447
448         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449         if (err < 0)
450                 goto errout;
451
452         ifm = nlmsg_data(nlh);
453         in_dev = inetdev_by_index(net, ifm->ifa_index);
454         if (in_dev == NULL) {
455                 err = -ENODEV;
456                 goto errout;
457         }
458
459         __in_dev_put(in_dev);
460
461         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
462              ifap = &ifa->ifa_next) {
463                 if (tb[IFA_LOCAL] &&
464                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465                         continue;
466
467                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468                         continue;
469
470                 if (tb[IFA_ADDRESS] &&
471                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
472                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473                         continue;
474
475                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476                 return 0;
477         }
478
479         err = -EADDRNOTAVAIL;
480 errout:
481         return err;
482 }
483
484 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
485 {
486         struct nlattr *tb[IFA_MAX+1];
487         struct in_ifaddr *ifa;
488         struct ifaddrmsg *ifm;
489         struct net_device *dev;
490         struct in_device *in_dev;
491         int err;
492
493         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494         if (err < 0)
495                 goto errout;
496
497         ifm = nlmsg_data(nlh);
498         err = -EINVAL;
499         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500                 goto errout;
501
502         dev = __dev_get_by_index(net, ifm->ifa_index);
503         err = -ENODEV;
504         if (dev == NULL)
505                 goto errout;
506
507         in_dev = __in_dev_get_rtnl(dev);
508         err = -ENOBUFS;
509         if (in_dev == NULL)
510                 goto errout;
511
512         ifa = inet_alloc_ifa();
513         if (ifa == NULL)
514                 /*
515                  * A potential indev allocation can be left alive, it stays
516                  * assigned to its device and is destroy with it.
517                  */
518                 goto errout;
519
520         ipv4_devconf_setall(in_dev);
521         in_dev_hold(in_dev);
522
523         if (tb[IFA_ADDRESS] == NULL)
524                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
525
526         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
527         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
528         ifa->ifa_flags = ifm->ifa_flags;
529         ifa->ifa_scope = ifm->ifa_scope;
530         ifa->ifa_dev = in_dev;
531
532         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
533         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
534
535         if (tb[IFA_BROADCAST])
536                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537
538         if (tb[IFA_LABEL])
539                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
540         else
541                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
542
543         return ifa;
544
545 errout:
546         return ERR_PTR(err);
547 }
548
549 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
550 {
551         struct net *net = sock_net(skb->sk);
552         struct in_ifaddr *ifa;
553
554         ASSERT_RTNL();
555
556         ifa = rtm_to_ifaddr(net, nlh);
557         if (IS_ERR(ifa))
558                 return PTR_ERR(ifa);
559
560         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561 }
562
563 /*
564  *      Determine a default network mask, based on the IP address.
565  */
566
567 static __inline__ int inet_abc_len(__be32 addr)
568 {
569         int rc = -1;    /* Something else, probably a multicast. */
570
571         if (ipv4_is_zeronet(addr))
572                 rc = 0;
573         else {
574                 __u32 haddr = ntohl(addr);
575
576                 if (IN_CLASSA(haddr))
577                         rc = 8;
578                 else if (IN_CLASSB(haddr))
579                         rc = 16;
580                 else if (IN_CLASSC(haddr))
581                         rc = 24;
582         }
583
584         return rc;
585 }
586
587
588 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 {
590         struct ifreq ifr;
591         struct sockaddr_in sin_orig;
592         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
593         struct in_device *in_dev;
594         struct in_ifaddr **ifap = NULL;
595         struct in_ifaddr *ifa = NULL;
596         struct net_device *dev;
597         char *colon;
598         int ret = -EFAULT;
599         int tryaddrmatch = 0;
600
601         /*
602          *      Fetch the caller's info block into kernel space
603          */
604
605         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
606                 goto out;
607         ifr.ifr_name[IFNAMSIZ - 1] = 0;
608
609         /* save original address for comparison */
610         memcpy(&sin_orig, sin, sizeof(*sin));
611
612         colon = strchr(ifr.ifr_name, ':');
613         if (colon)
614                 *colon = 0;
615
616         dev_load(net, ifr.ifr_name);
617
618         switch (cmd) {
619         case SIOCGIFADDR:       /* Get interface address */
620         case SIOCGIFBRDADDR:    /* Get the broadcast address */
621         case SIOCGIFDSTADDR:    /* Get the destination address */
622         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
623                 /* Note that these ioctls will not sleep,
624                    so that we do not impose a lock.
625                    One day we will be forced to put shlock here (I mean SMP)
626                  */
627                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
628                 memset(sin, 0, sizeof(*sin));
629                 sin->sin_family = AF_INET;
630                 break;
631
632         case SIOCSIFFLAGS:
633                 ret = -EACCES;
634                 if (!capable(CAP_NET_ADMIN))
635                         goto out;
636                 break;
637         case SIOCSIFADDR:       /* Set interface address (and family) */
638         case SIOCSIFBRDADDR:    /* Set the broadcast address */
639         case SIOCSIFDSTADDR:    /* Set the destination address */
640         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
641                 ret = -EACCES;
642                 if (!capable(CAP_NET_ADMIN))
643                         goto out;
644                 ret = -EINVAL;
645                 if (sin->sin_family != AF_INET)
646                         goto out;
647                 break;
648         default:
649                 ret = -EINVAL;
650                 goto out;
651         }
652
653         rtnl_lock();
654
655         ret = -ENODEV;
656         if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
657                 goto done;
658
659         if (colon)
660                 *colon = ':';
661
662         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
663                 if (tryaddrmatch) {
664                         /* Matthias Andree */
665                         /* compare label and address (4.4BSD style) */
666                         /* note: we only do this for a limited set of ioctls
667                            and only if the original address family was AF_INET.
668                            This is checked above. */
669                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
670                              ifap = &ifa->ifa_next) {
671                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672                                     sin_orig.sin_addr.s_addr ==
673                                                         ifa->ifa_address) {
674                                         break; /* found */
675                                 }
676                         }
677                 }
678                 /* we didn't get a match, maybe the application is
679                    4.3BSD-style and passed in junk so we fall back to
680                    comparing just the label */
681                 if (!ifa) {
682                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
683                              ifap = &ifa->ifa_next)
684                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
685                                         break;
686                 }
687         }
688
689         ret = -EADDRNOTAVAIL;
690         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
691                 goto done;
692
693         switch (cmd) {
694         case SIOCGIFADDR:       /* Get interface address */
695                 sin->sin_addr.s_addr = ifa->ifa_local;
696                 goto rarok;
697
698         case SIOCGIFBRDADDR:    /* Get the broadcast address */
699                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
700                 goto rarok;
701
702         case SIOCGIFDSTADDR:    /* Get the destination address */
703                 sin->sin_addr.s_addr = ifa->ifa_address;
704                 goto rarok;
705
706         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
707                 sin->sin_addr.s_addr = ifa->ifa_mask;
708                 goto rarok;
709
710         case SIOCSIFFLAGS:
711                 if (colon) {
712                         ret = -EADDRNOTAVAIL;
713                         if (!ifa)
714                                 break;
715                         ret = 0;
716                         if (!(ifr.ifr_flags & IFF_UP))
717                                 inet_del_ifa(in_dev, ifap, 1);
718                         break;
719                 }
720                 ret = dev_change_flags(dev, ifr.ifr_flags);
721                 break;
722
723         case SIOCSIFADDR:       /* Set interface address (and family) */
724                 ret = -EINVAL;
725                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
726                         break;
727
728                 if (!ifa) {
729                         ret = -ENOBUFS;
730                         if ((ifa = inet_alloc_ifa()) == NULL)
731                                 break;
732                         if (colon)
733                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
734                         else
735                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
736                 } else {
737                         ret = 0;
738                         if (ifa->ifa_local == sin->sin_addr.s_addr)
739                                 break;
740                         inet_del_ifa(in_dev, ifap, 0);
741                         ifa->ifa_broadcast = 0;
742                         ifa->ifa_scope = 0;
743                 }
744
745                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
746
747                 if (!(dev->flags & IFF_POINTOPOINT)) {
748                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750                         if ((dev->flags & IFF_BROADCAST) &&
751                             ifa->ifa_prefixlen < 31)
752                                 ifa->ifa_broadcast = ifa->ifa_address |
753                                                      ~ifa->ifa_mask;
754                 } else {
755                         ifa->ifa_prefixlen = 32;
756                         ifa->ifa_mask = inet_make_mask(32);
757                 }
758                 ret = inet_set_ifa(dev, ifa);
759                 break;
760
761         case SIOCSIFBRDADDR:    /* Set the broadcast address */
762                 ret = 0;
763                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764                         inet_del_ifa(in_dev, ifap, 0);
765                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
766                         inet_insert_ifa(ifa);
767                 }
768                 break;
769
770         case SIOCSIFDSTADDR:    /* Set the destination address */
771                 ret = 0;
772                 if (ifa->ifa_address == sin->sin_addr.s_addr)
773                         break;
774                 ret = -EINVAL;
775                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
776                         break;
777                 ret = 0;
778                 inet_del_ifa(in_dev, ifap, 0);
779                 ifa->ifa_address = sin->sin_addr.s_addr;
780                 inet_insert_ifa(ifa);
781                 break;
782
783         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
784
785                 /*
786                  *      The mask we set must be legal.
787                  */
788                 ret = -EINVAL;
789                 if (bad_mask(sin->sin_addr.s_addr, 0))
790                         break;
791                 ret = 0;
792                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793                         __be32 old_mask = ifa->ifa_mask;
794                         inet_del_ifa(in_dev, ifap, 0);
795                         ifa->ifa_mask = sin->sin_addr.s_addr;
796                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
797
798                         /* See if current broadcast address matches
799                          * with current netmask, then recalculate
800                          * the broadcast address. Otherwise it's a
801                          * funny address, so don't touch it since
802                          * the user seems to know what (s)he's doing...
803                          */
804                         if ((dev->flags & IFF_BROADCAST) &&
805                             (ifa->ifa_prefixlen < 31) &&
806                             (ifa->ifa_broadcast ==
807                              (ifa->ifa_local|~old_mask))) {
808                                 ifa->ifa_broadcast = (ifa->ifa_local |
809                                                       ~sin->sin_addr.s_addr);
810                         }
811                         inet_insert_ifa(ifa);
812                 }
813                 break;
814         }
815 done:
816         rtnl_unlock();
817 out:
818         return ret;
819 rarok:
820         rtnl_unlock();
821         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
822         goto out;
823 }
824
825 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
826 {
827         struct in_device *in_dev = __in_dev_get_rtnl(dev);
828         struct in_ifaddr *ifa;
829         struct ifreq ifr;
830         int done = 0;
831
832         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
833                 goto out;
834
835         for (; ifa; ifa = ifa->ifa_next) {
836                 if (!buf) {
837                         done += sizeof(ifr);
838                         continue;
839                 }
840                 if (len < (int) sizeof(ifr))
841                         break;
842                 memset(&ifr, 0, sizeof(struct ifreq));
843                 if (ifa->ifa_label)
844                         strcpy(ifr.ifr_name, ifa->ifa_label);
845                 else
846                         strcpy(ifr.ifr_name, dev->name);
847
848                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
850                                                                 ifa->ifa_local;
851
852                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
853                         done = -EFAULT;
854                         break;
855                 }
856                 buf  += sizeof(struct ifreq);
857                 len  -= sizeof(struct ifreq);
858                 done += sizeof(struct ifreq);
859         }
860 out:
861         return done;
862 }
863
864 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
865 {
866         __be32 addr = 0;
867         struct in_device *in_dev;
868         struct net *net = dev_net(dev);
869
870         rcu_read_lock();
871         in_dev = __in_dev_get_rcu(dev);
872         if (!in_dev)
873                 goto no_in_dev;
874
875         for_primary_ifa(in_dev) {
876                 if (ifa->ifa_scope > scope)
877                         continue;
878                 if (!dst || inet_ifa_match(dst, ifa)) {
879                         addr = ifa->ifa_local;
880                         break;
881                 }
882                 if (!addr)
883                         addr = ifa->ifa_local;
884         } endfor_ifa(in_dev);
885 no_in_dev:
886         rcu_read_unlock();
887
888         if (addr)
889                 goto out;
890
891         /* Not loopback addresses on loopback should be preferred
892            in this case. It is importnat that lo is the first interface
893            in dev_base list.
894          */
895         read_lock(&dev_base_lock);
896         rcu_read_lock();
897         for_each_netdev(net, dev) {
898                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
899                         continue;
900
901                 for_primary_ifa(in_dev) {
902                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
903                             ifa->ifa_scope <= scope) {
904                                 addr = ifa->ifa_local;
905                                 goto out_unlock_both;
906                         }
907                 } endfor_ifa(in_dev);
908         }
909 out_unlock_both:
910         read_unlock(&dev_base_lock);
911         rcu_read_unlock();
912 out:
913         return addr;
914 }
915
916 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
917                               __be32 local, int scope)
918 {
919         int same = 0;
920         __be32 addr = 0;
921
922         for_ifa(in_dev) {
923                 if (!addr &&
924                     (local == ifa->ifa_local || !local) &&
925                     ifa->ifa_scope <= scope) {
926                         addr = ifa->ifa_local;
927                         if (same)
928                                 break;
929                 }
930                 if (!same) {
931                         same = (!local || inet_ifa_match(local, ifa)) &&
932                                 (!dst || inet_ifa_match(dst, ifa));
933                         if (same && addr) {
934                                 if (local || !dst)
935                                         break;
936                                 /* Is the selected addr into dst subnet? */
937                                 if (inet_ifa_match(addr, ifa))
938                                         break;
939                                 /* No, then can we use new local src? */
940                                 if (ifa->ifa_scope <= scope) {
941                                         addr = ifa->ifa_local;
942                                         break;
943                                 }
944                                 /* search for large dst subnet for addr */
945                                 same = 0;
946                         }
947                 }
948         } endfor_ifa(in_dev);
949
950         return same? addr : 0;
951 }
952
953 /*
954  * Confirm that local IP address exists using wildcards:
955  * - in_dev: only on this interface, 0=any interface
956  * - dst: only in the same subnet as dst, 0=any dst
957  * - local: address, 0=autoselect the local address
958  * - scope: maximum allowed scope value for the local address
959  */
960 __be32 inet_confirm_addr(struct in_device *in_dev,
961                          __be32 dst, __be32 local, int scope)
962 {
963         __be32 addr = 0;
964         struct net_device *dev;
965         struct net *net;
966
967         if (scope != RT_SCOPE_LINK)
968                 return confirm_addr_indev(in_dev, dst, local, scope);
969
970         net = dev_net(in_dev->dev);
971         read_lock(&dev_base_lock);
972         rcu_read_lock();
973         for_each_netdev(net, dev) {
974                 if ((in_dev = __in_dev_get_rcu(dev))) {
975                         addr = confirm_addr_indev(in_dev, dst, local, scope);
976                         if (addr)
977                                 break;
978                 }
979         }
980         rcu_read_unlock();
981         read_unlock(&dev_base_lock);
982
983         return addr;
984 }
985
986 /*
987  *      Device notifier
988  */
989
990 int register_inetaddr_notifier(struct notifier_block *nb)
991 {
992         return blocking_notifier_chain_register(&inetaddr_chain, nb);
993 }
994
995 int unregister_inetaddr_notifier(struct notifier_block *nb)
996 {
997         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
998 }
999
1000 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1001  * alias numbering and to create unique labels if possible.
1002 */
1003 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1004 {
1005         struct in_ifaddr *ifa;
1006         int named = 0;
1007
1008         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1009                 char old[IFNAMSIZ], *dot;
1010
1011                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1012                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1013                 if (named++ == 0)
1014                         goto skip;
1015                 dot = strchr(old, ':');
1016                 if (dot == NULL) {
1017                         sprintf(old, ":%d", named);
1018                         dot = old;
1019                 }
1020                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1021                         strcat(ifa->ifa_label, dot);
1022                 } else {
1023                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1024                 }
1025 skip:
1026                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1027         }
1028 }
1029
1030 static inline bool inetdev_valid_mtu(unsigned mtu)
1031 {
1032         return mtu >= 68;
1033 }
1034
1035 /* Called only under RTNL semaphore */
1036
1037 static int inetdev_event(struct notifier_block *this, unsigned long event,
1038                          void *ptr)
1039 {
1040         struct net_device *dev = ptr;
1041         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1042
1043         ASSERT_RTNL();
1044
1045         if (!in_dev) {
1046                 if (event == NETDEV_REGISTER) {
1047                         in_dev = inetdev_init(dev);
1048                         if (!in_dev)
1049                                 return notifier_from_errno(-ENOMEM);
1050                         if (dev->flags & IFF_LOOPBACK) {
1051                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1052                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1053                         }
1054                 } else if (event == NETDEV_CHANGEMTU) {
1055                         /* Re-enabling IP */
1056                         if (inetdev_valid_mtu(dev->mtu))
1057                                 in_dev = inetdev_init(dev);
1058                 }
1059                 goto out;
1060         }
1061
1062         switch (event) {
1063         case NETDEV_REGISTER:
1064                 printk(KERN_DEBUG "inetdev_event: bug\n");
1065                 dev->ip_ptr = NULL;
1066                 break;
1067         case NETDEV_UP:
1068                 if (!inetdev_valid_mtu(dev->mtu))
1069                         break;
1070                 if (dev->flags & IFF_LOOPBACK) {
1071                         struct in_ifaddr *ifa;
1072                         if ((ifa = inet_alloc_ifa()) != NULL) {
1073                                 ifa->ifa_local =
1074                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1075                                 ifa->ifa_prefixlen = 8;
1076                                 ifa->ifa_mask = inet_make_mask(8);
1077                                 in_dev_hold(in_dev);
1078                                 ifa->ifa_dev = in_dev;
1079                                 ifa->ifa_scope = RT_SCOPE_HOST;
1080                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1081                                 inet_insert_ifa(ifa);
1082                         }
1083                 }
1084                 ip_mc_up(in_dev);
1085                 break;
1086         case NETDEV_DOWN:
1087                 ip_mc_down(in_dev);
1088                 break;
1089         case NETDEV_CHANGEMTU:
1090                 if (inetdev_valid_mtu(dev->mtu))
1091                         break;
1092                 /* disable IP when MTU is not enough */
1093         case NETDEV_UNREGISTER:
1094                 inetdev_destroy(in_dev);
1095                 break;
1096         case NETDEV_CHANGENAME:
1097                 /* Do not notify about label change, this event is
1098                  * not interesting to applications using netlink.
1099                  */
1100                 inetdev_changename(dev, in_dev);
1101
1102                 devinet_sysctl_unregister(in_dev);
1103                 devinet_sysctl_register(in_dev);
1104                 break;
1105         }
1106 out:
1107         return NOTIFY_DONE;
1108 }
1109
1110 static struct notifier_block ip_netdev_notifier = {
1111         .notifier_call =inetdev_event,
1112 };
1113
1114 static inline size_t inet_nlmsg_size(void)
1115 {
1116         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1117                + nla_total_size(4) /* IFA_ADDRESS */
1118                + nla_total_size(4) /* IFA_LOCAL */
1119                + nla_total_size(4) /* IFA_BROADCAST */
1120                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1121 }
1122
1123 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1124                             u32 pid, u32 seq, int event, unsigned int flags)
1125 {
1126         struct ifaddrmsg *ifm;
1127         struct nlmsghdr  *nlh;
1128
1129         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1130         if (nlh == NULL)
1131                 return -EMSGSIZE;
1132
1133         ifm = nlmsg_data(nlh);
1134         ifm->ifa_family = AF_INET;
1135         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1136         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1137         ifm->ifa_scope = ifa->ifa_scope;
1138         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1139
1140         if (ifa->ifa_address)
1141                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1142
1143         if (ifa->ifa_local)
1144                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1145
1146         if (ifa->ifa_broadcast)
1147                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1148
1149         if (ifa->ifa_label[0])
1150                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151
1152         return nlmsg_end(skb, nlh);
1153
1154 nla_put_failure:
1155         nlmsg_cancel(skb, nlh);
1156         return -EMSGSIZE;
1157 }
1158
1159 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1160 {
1161         struct net *net = sock_net(skb->sk);
1162         int idx, ip_idx;
1163         struct net_device *dev;
1164         struct in_device *in_dev;
1165         struct in_ifaddr *ifa;
1166         int s_ip_idx, s_idx = cb->args[0];
1167
1168         s_ip_idx = ip_idx = cb->args[1];
1169         idx = 0;
1170         for_each_netdev(net, dev) {
1171                 if (idx < s_idx)
1172                         goto cont;
1173                 if (idx > s_idx)
1174                         s_ip_idx = 0;
1175                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1176                         goto cont;
1177
1178                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1179                      ifa = ifa->ifa_next, ip_idx++) {
1180                         if (ip_idx < s_ip_idx)
1181                                 continue;
1182                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1183                                              cb->nlh->nlmsg_seq,
1184                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1185                                 goto done;
1186                 }
1187 cont:
1188                 idx++;
1189         }
1190
1191 done:
1192         cb->args[0] = idx;
1193         cb->args[1] = ip_idx;
1194
1195         return skb->len;
1196 }
1197
1198 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1199                       u32 pid)
1200 {
1201         struct sk_buff *skb;
1202         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1203         int err = -ENOBUFS;
1204         struct net *net;
1205
1206         net = dev_net(ifa->ifa_dev->dev);
1207         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1208         if (skb == NULL)
1209                 goto errout;
1210
1211         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1212         if (err < 0) {
1213                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1214                 WARN_ON(err == -EMSGSIZE);
1215                 kfree_skb(skb);
1216                 goto errout;
1217         }
1218         err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1219 errout:
1220         if (err < 0)
1221                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1222 }
1223
1224 #ifdef CONFIG_SYSCTL
1225
1226 static void devinet_copy_dflt_conf(struct net *net, int i)
1227 {
1228         struct net_device *dev;
1229
1230         read_lock(&dev_base_lock);
1231         for_each_netdev(net, dev) {
1232                 struct in_device *in_dev;
1233                 rcu_read_lock();
1234                 in_dev = __in_dev_get_rcu(dev);
1235                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1236                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1237                 rcu_read_unlock();
1238         }
1239         read_unlock(&dev_base_lock);
1240 }
1241
1242 static void inet_forward_change(struct net *net)
1243 {
1244         struct net_device *dev;
1245         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1246
1247         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1248         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1249
1250         read_lock(&dev_base_lock);
1251         for_each_netdev(net, dev) {
1252                 struct in_device *in_dev;
1253                 if (on)
1254                         dev_disable_lro(dev);
1255                 rcu_read_lock();
1256                 in_dev = __in_dev_get_rcu(dev);
1257                 if (in_dev)
1258                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1259                 rcu_read_unlock();
1260         }
1261         read_unlock(&dev_base_lock);
1262 }
1263
1264 static int devinet_conf_proc(ctl_table *ctl, int write,
1265                              struct file* filp, void __user *buffer,
1266                              size_t *lenp, loff_t *ppos)
1267 {
1268         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1269
1270         if (write) {
1271                 struct ipv4_devconf *cnf = ctl->extra1;
1272                 struct net *net = ctl->extra2;
1273                 int i = (int *)ctl->data - cnf->data;
1274
1275                 set_bit(i, cnf->state);
1276
1277                 if (cnf == net->ipv4.devconf_dflt)
1278                         devinet_copy_dflt_conf(net, i);
1279         }
1280
1281         return ret;
1282 }
1283
1284 static int devinet_conf_sysctl(ctl_table *table,
1285                                void __user *oldval, size_t __user *oldlenp,
1286                                void __user *newval, size_t newlen)
1287 {
1288         struct ipv4_devconf *cnf;
1289         struct net *net;
1290         int *valp = table->data;
1291         int new;
1292         int i;
1293
1294         if (!newval || !newlen)
1295                 return 0;
1296
1297         if (newlen != sizeof(int))
1298                 return -EINVAL;
1299
1300         if (get_user(new, (int __user *)newval))
1301                 return -EFAULT;
1302
1303         if (new == *valp)
1304                 return 0;
1305
1306         if (oldval && oldlenp) {
1307                 size_t len;
1308
1309                 if (get_user(len, oldlenp))
1310                         return -EFAULT;
1311
1312                 if (len) {
1313                         if (len > table->maxlen)
1314                                 len = table->maxlen;
1315                         if (copy_to_user(oldval, valp, len))
1316                                 return -EFAULT;
1317                         if (put_user(len, oldlenp))
1318                                 return -EFAULT;
1319                 }
1320         }
1321
1322         *valp = new;
1323
1324         cnf = table->extra1;
1325         net = table->extra2;
1326         i = (int *)table->data - cnf->data;
1327
1328         set_bit(i, cnf->state);
1329
1330         if (cnf == net->ipv4.devconf_dflt)
1331                 devinet_copy_dflt_conf(net, i);
1332
1333         return 1;
1334 }
1335
1336 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1337                                   struct file* filp, void __user *buffer,
1338                                   size_t *lenp, loff_t *ppos)
1339 {
1340         int *valp = ctl->data;
1341         int val = *valp;
1342         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1343
1344         if (write && *valp != val) {
1345                 struct net *net = ctl->extra2;
1346
1347                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1348                         rtnl_lock();
1349                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1350                                 inet_forward_change(net);
1351                         } else if (*valp) {
1352                                 struct ipv4_devconf *cnf = ctl->extra1;
1353                                 struct in_device *idev =
1354                                         container_of(cnf, struct in_device, cnf);
1355                                 dev_disable_lro(idev->dev);
1356                         }
1357                         rtnl_unlock();
1358                         rt_cache_flush(net, 0);
1359                 }
1360         }
1361
1362         return ret;
1363 }
1364
1365 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1366                          struct file* filp, void __user *buffer,
1367                          size_t *lenp, loff_t *ppos)
1368 {
1369         int *valp = ctl->data;
1370         int val = *valp;
1371         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1372         struct net *net = ctl->extra2;
1373
1374         if (write && *valp != val)
1375                 rt_cache_flush(net, 0);
1376
1377         return ret;
1378 }
1379
1380 int ipv4_doint_and_flush_strategy(ctl_table *table,
1381                                   void __user *oldval, size_t __user *oldlenp,
1382                                   void __user *newval, size_t newlen)
1383 {
1384         int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
1385         struct net *net = table->extra2;
1386
1387         if (ret == 1)
1388                 rt_cache_flush(net, 0);
1389
1390         return ret;
1391 }
1392
1393
1394 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1395         { \
1396                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1397                 .procname       = name, \
1398                 .data           = ipv4_devconf.data + \
1399                                   NET_IPV4_CONF_ ## attr - 1, \
1400                 .maxlen         = sizeof(int), \
1401                 .mode           = mval, \
1402                 .proc_handler   = proc, \
1403                 .strategy       = sysctl, \
1404                 .extra1         = &ipv4_devconf, \
1405         }
1406
1407 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1408         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1409                              devinet_conf_sysctl)
1410
1411 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1412         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1413                              devinet_conf_sysctl)
1414
1415 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1416         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1417
1418 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1419         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1420                                      ipv4_doint_and_flush_strategy)
1421
1422 static struct devinet_sysctl_table {
1423         struct ctl_table_header *sysctl_header;
1424         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1425         char *dev_name;
1426 } devinet_sysctl = {
1427         .devinet_vars = {
1428                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1429                                              devinet_sysctl_forward,
1430                                              devinet_conf_sysctl),
1431                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1432
1433                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1434                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1435                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1436                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1437                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1438                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1439                                         "accept_source_route"),
1440                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1441                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1442                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1443                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1444                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1445                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1446                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1447                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1448                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1449
1450                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1451                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1452                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1453                                               "force_igmp_version"),
1454                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1455                                               "promote_secondaries"),
1456         },
1457 };
1458
1459 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1460                 int ctl_name, struct ipv4_devconf *p)
1461 {
1462         int i;
1463         struct devinet_sysctl_table *t;
1464
1465 #define DEVINET_CTL_PATH_DEV    3
1466
1467         struct ctl_path devinet_ctl_path[] = {
1468                 { .procname = "net", .ctl_name = CTL_NET, },
1469                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1470                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1471                 { /* to be set */ },
1472                 { },
1473         };
1474
1475         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1476         if (!t)
1477                 goto out;
1478
1479         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1480                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1481                 t->devinet_vars[i].extra1 = p;
1482                 t->devinet_vars[i].extra2 = net;
1483         }
1484
1485         /*
1486          * Make a copy of dev_name, because '.procname' is regarded as const
1487          * by sysctl and we wouldn't want anyone to change it under our feet
1488          * (see SIOCSIFNAME).
1489          */
1490         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1491         if (!t->dev_name)
1492                 goto free;
1493
1494         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1495         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1496
1497         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1498                         t->devinet_vars);
1499         if (!t->sysctl_header)
1500                 goto free_procname;
1501
1502         p->sysctl = t;
1503         return 0;
1504
1505 free_procname:
1506         kfree(t->dev_name);
1507 free:
1508         kfree(t);
1509 out:
1510         return -ENOBUFS;
1511 }
1512
1513 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1514 {
1515         struct devinet_sysctl_table *t = cnf->sysctl;
1516
1517         if (t == NULL)
1518                 return;
1519
1520         cnf->sysctl = NULL;
1521         unregister_sysctl_table(t->sysctl_header);
1522         kfree(t->dev_name);
1523         kfree(t);
1524 }
1525
1526 static void devinet_sysctl_register(struct in_device *idev)
1527 {
1528         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1529                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1530         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1531                         idev->dev->ifindex, &idev->cnf);
1532 }
1533
1534 static void devinet_sysctl_unregister(struct in_device *idev)
1535 {
1536         __devinet_sysctl_unregister(&idev->cnf);
1537         neigh_sysctl_unregister(idev->arp_parms);
1538 }
1539
1540 static struct ctl_table ctl_forward_entry[] = {
1541         {
1542                 .ctl_name       = NET_IPV4_FORWARD,
1543                 .procname       = "ip_forward",
1544                 .data           = &ipv4_devconf.data[
1545                                         NET_IPV4_CONF_FORWARDING - 1],
1546                 .maxlen         = sizeof(int),
1547                 .mode           = 0644,
1548                 .proc_handler   = devinet_sysctl_forward,
1549                 .strategy       = devinet_conf_sysctl,
1550                 .extra1         = &ipv4_devconf,
1551                 .extra2         = &init_net,
1552         },
1553         { },
1554 };
1555
1556 static __net_initdata struct ctl_path net_ipv4_path[] = {
1557         { .procname = "net", .ctl_name = CTL_NET, },
1558         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1559         { },
1560 };
1561 #endif
1562
1563 static __net_init int devinet_init_net(struct net *net)
1564 {
1565         int err;
1566         struct ipv4_devconf *all, *dflt;
1567 #ifdef CONFIG_SYSCTL
1568         struct ctl_table *tbl = ctl_forward_entry;
1569         struct ctl_table_header *forw_hdr;
1570 #endif
1571
1572         err = -ENOMEM;
1573         all = &ipv4_devconf;
1574         dflt = &ipv4_devconf_dflt;
1575
1576         if (net != &init_net) {
1577                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1578                 if (all == NULL)
1579                         goto err_alloc_all;
1580
1581                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1582                 if (dflt == NULL)
1583                         goto err_alloc_dflt;
1584
1585 #ifdef CONFIG_SYSCTL
1586                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1587                 if (tbl == NULL)
1588                         goto err_alloc_ctl;
1589
1590                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1591                 tbl[0].extra1 = all;
1592                 tbl[0].extra2 = net;
1593 #endif
1594         }
1595
1596 #ifdef CONFIG_SYSCTL
1597         err = __devinet_sysctl_register(net, "all",
1598                         NET_PROTO_CONF_ALL, all);
1599         if (err < 0)
1600                 goto err_reg_all;
1601
1602         err = __devinet_sysctl_register(net, "default",
1603                         NET_PROTO_CONF_DEFAULT, dflt);
1604         if (err < 0)
1605                 goto err_reg_dflt;
1606
1607         err = -ENOMEM;
1608         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1609         if (forw_hdr == NULL)
1610                 goto err_reg_ctl;
1611         net->ipv4.forw_hdr = forw_hdr;
1612 #endif
1613
1614         net->ipv4.devconf_all = all;
1615         net->ipv4.devconf_dflt = dflt;
1616         return 0;
1617
1618 #ifdef CONFIG_SYSCTL
1619 err_reg_ctl:
1620         __devinet_sysctl_unregister(dflt);
1621 err_reg_dflt:
1622         __devinet_sysctl_unregister(all);
1623 err_reg_all:
1624         if (tbl != ctl_forward_entry)
1625                 kfree(tbl);
1626 err_alloc_ctl:
1627 #endif
1628         if (dflt != &ipv4_devconf_dflt)
1629                 kfree(dflt);
1630 err_alloc_dflt:
1631         if (all != &ipv4_devconf)
1632                 kfree(all);
1633 err_alloc_all:
1634         return err;
1635 }
1636
1637 static __net_exit void devinet_exit_net(struct net *net)
1638 {
1639 #ifdef CONFIG_SYSCTL
1640         struct ctl_table *tbl;
1641
1642         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1643         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1644         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1645         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1646         kfree(tbl);
1647 #endif
1648         kfree(net->ipv4.devconf_dflt);
1649         kfree(net->ipv4.devconf_all);
1650 }
1651
1652 static __net_initdata struct pernet_operations devinet_ops = {
1653         .init = devinet_init_net,
1654         .exit = devinet_exit_net,
1655 };
1656
1657 void __init devinet_init(void)
1658 {
1659         register_pernet_subsys(&devinet_ops);
1660
1661         register_gifconf(PF_INET, inet_gifconf);
1662         register_netdevice_notifier(&ip_netdev_notifier);
1663
1664         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1665         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1666         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1667 }
1668
1669 EXPORT_SYMBOL(in_dev_finish_destroy);
1670 EXPORT_SYMBOL(inet_select_addr);
1671 EXPORT_SYMBOL(inetdev_by_index);
1672 EXPORT_SYMBOL(register_inetaddr_notifier);
1673 EXPORT_SYMBOL(unregister_inetaddr_notifier);