Merge commit 'v2.6.27-rc6' into x86/cleanups
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64
65 static struct ipv4_devconf ipv4_devconf = {
66         .data = {
67                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71         },
72 };
73
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75         .data = {
76                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81         },
82 };
83
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88         [IFA_LOCAL]             = { .type = NLA_U32 },
89         [IFA_ADDRESS]           = { .type = NLA_U32 },
90         [IFA_BROADCAST]         = { .type = NLA_U32 },
91         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98                          int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110
111 /* Locks all the inet devices. */
112
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
116
117         if (ifa) {
118                 INIT_RCU_HEAD(&ifa->rcu_head);
119         }
120
121         return ifa;
122 }
123
124 static void inet_rcu_free_ifa(struct rcu_head *head)
125 {
126         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
127         if (ifa->ifa_dev)
128                 in_dev_put(ifa->ifa_dev);
129         kfree(ifa);
130 }
131
132 static inline void inet_free_ifa(struct in_ifaddr *ifa)
133 {
134         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
135 }
136
137 void in_dev_finish_destroy(struct in_device *idev)
138 {
139         struct net_device *dev = idev->dev;
140
141         WARN_ON(idev->ifa_list);
142         WARN_ON(idev->mc_list);
143 #ifdef NET_REFCNT_DEBUG
144         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
145                idev, dev ? dev->name : "NIL");
146 #endif
147         dev_put(dev);
148         if (!idev->dead)
149                 printk("Freeing alive in_device %p\n", idev);
150         else {
151                 kfree(idev);
152         }
153 }
154
155 static struct in_device *inetdev_init(struct net_device *dev)
156 {
157         struct in_device *in_dev;
158
159         ASSERT_RTNL();
160
161         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
162         if (!in_dev)
163                 goto out;
164         INIT_RCU_HEAD(&in_dev->rcu_head);
165         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
166                         sizeof(in_dev->cnf));
167         in_dev->cnf.sysctl = NULL;
168         in_dev->dev = dev;
169         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
170                 goto out_kfree;
171         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
172                 dev_disable_lro(dev);
173         /* Reference in_dev->dev */
174         dev_hold(dev);
175         /* Account for reference dev->ip_ptr (below) */
176         in_dev_hold(in_dev);
177
178         devinet_sysctl_register(in_dev);
179         ip_mc_init_dev(in_dev);
180         if (dev->flags & IFF_UP)
181                 ip_mc_up(in_dev);
182
183         /* we can receive as soon as ip_ptr is set -- do this last */
184         rcu_assign_pointer(dev->ip_ptr, in_dev);
185 out:
186         return in_dev;
187 out_kfree:
188         kfree(in_dev);
189         in_dev = NULL;
190         goto out;
191 }
192
193 static void in_dev_rcu_put(struct rcu_head *head)
194 {
195         struct in_device *idev = container_of(head, struct in_device, rcu_head);
196         in_dev_put(idev);
197 }
198
199 static void inetdev_destroy(struct in_device *in_dev)
200 {
201         struct in_ifaddr *ifa;
202         struct net_device *dev;
203
204         ASSERT_RTNL();
205
206         dev = in_dev->dev;
207
208         in_dev->dead = 1;
209
210         ip_mc_destroy_dev(in_dev);
211
212         while ((ifa = in_dev->ifa_list) != NULL) {
213                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214                 inet_free_ifa(ifa);
215         }
216
217         dev->ip_ptr = NULL;
218
219         devinet_sysctl_unregister(in_dev);
220         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221         arp_ifdown(dev);
222
223         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 }
225
226 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 {
228         rcu_read_lock();
229         for_primary_ifa(in_dev) {
230                 if (inet_ifa_match(a, ifa)) {
231                         if (!b || inet_ifa_match(b, ifa)) {
232                                 rcu_read_unlock();
233                                 return 1;
234                         }
235                 }
236         } endfor_ifa(in_dev);
237         rcu_read_unlock();
238         return 0;
239 }
240
241 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
242                          int destroy, struct nlmsghdr *nlh, u32 pid)
243 {
244         struct in_ifaddr *promote = NULL;
245         struct in_ifaddr *ifa, *ifa1 = *ifap;
246         struct in_ifaddr *last_prim = in_dev->ifa_list;
247         struct in_ifaddr *prev_prom = NULL;
248         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
249
250         ASSERT_RTNL();
251
252         /* 1. Deleting primary ifaddr forces deletion all secondaries
253          * unless alias promotion is set
254          **/
255
256         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
257                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258
259                 while ((ifa = *ifap1) != NULL) {
260                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
261                             ifa1->ifa_scope <= ifa->ifa_scope)
262                                 last_prim = ifa;
263
264                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
265                             ifa1->ifa_mask != ifa->ifa_mask ||
266                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
267                                 ifap1 = &ifa->ifa_next;
268                                 prev_prom = ifa;
269                                 continue;
270                         }
271
272                         if (!do_promote) {
273                                 *ifap1 = ifa->ifa_next;
274
275                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
276                                 blocking_notifier_call_chain(&inetaddr_chain,
277                                                 NETDEV_DOWN, ifa);
278                                 inet_free_ifa(ifa);
279                         } else {
280                                 promote = ifa;
281                                 break;
282                         }
283                 }
284         }
285
286         /* 2. Unlink it */
287
288         *ifap = ifa1->ifa_next;
289
290         /* 3. Announce address deletion */
291
292         /* Send message first, then call notifier.
293            At first sight, FIB update triggered by notifier
294            will refer to already deleted ifaddr, that could confuse
295            netlink listeners. It is not true: look, gated sees
296            that route deleted and if it still thinks that ifaddr
297            is valid, it will try to restore deleted routes... Grr.
298            So that, this order is correct.
299          */
300         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
301         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302
303         if (promote) {
304
305                 if (prev_prom) {
306                         prev_prom->ifa_next = promote->ifa_next;
307                         promote->ifa_next = last_prim->ifa_next;
308                         last_prim->ifa_next = promote;
309                 }
310
311                 promote->ifa_flags &= ~IFA_F_SECONDARY;
312                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
313                 blocking_notifier_call_chain(&inetaddr_chain,
314                                 NETDEV_UP, promote);
315                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
316                         if (ifa1->ifa_mask != ifa->ifa_mask ||
317                             !inet_ifa_match(ifa1->ifa_address, ifa))
318                                         continue;
319                         fib_add_ifaddr(ifa);
320                 }
321
322         }
323         if (destroy)
324                 inet_free_ifa(ifa1);
325 }
326
327 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328                          int destroy)
329 {
330         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 }
332
333 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334                              u32 pid)
335 {
336         struct in_device *in_dev = ifa->ifa_dev;
337         struct in_ifaddr *ifa1, **ifap, **last_primary;
338
339         ASSERT_RTNL();
340
341         if (!ifa->ifa_local) {
342                 inet_free_ifa(ifa);
343                 return 0;
344         }
345
346         ifa->ifa_flags &= ~IFA_F_SECONDARY;
347         last_primary = &in_dev->ifa_list;
348
349         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
350              ifap = &ifa1->ifa_next) {
351                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
352                     ifa->ifa_scope <= ifa1->ifa_scope)
353                         last_primary = &ifa1->ifa_next;
354                 if (ifa1->ifa_mask == ifa->ifa_mask &&
355                     inet_ifa_match(ifa1->ifa_address, ifa)) {
356                         if (ifa1->ifa_local == ifa->ifa_local) {
357                                 inet_free_ifa(ifa);
358                                 return -EEXIST;
359                         }
360                         if (ifa1->ifa_scope != ifa->ifa_scope) {
361                                 inet_free_ifa(ifa);
362                                 return -EINVAL;
363                         }
364                         ifa->ifa_flags |= IFA_F_SECONDARY;
365                 }
366         }
367
368         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
369                 net_srandom(ifa->ifa_local);
370                 ifap = last_primary;
371         }
372
373         ifa->ifa_next = *ifap;
374         *ifap = ifa;
375
376         /* Send message first, then call notifier.
377            Notifier will trigger FIB update, so that
378            listeners of netlink will know about new ifaddr */
379         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
380         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381
382         return 0;
383 }
384
385 static int inet_insert_ifa(struct in_ifaddr *ifa)
386 {
387         return __inet_insert_ifa(ifa, NULL, 0);
388 }
389
390 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391 {
392         struct in_device *in_dev = __in_dev_get_rtnl(dev);
393
394         ASSERT_RTNL();
395
396         if (!in_dev) {
397                 inet_free_ifa(ifa);
398                 return -ENOBUFS;
399         }
400         ipv4_devconf_setall(in_dev);
401         if (ifa->ifa_dev != in_dev) {
402                 WARN_ON(ifa->ifa_dev);
403                 in_dev_hold(in_dev);
404                 ifa->ifa_dev = in_dev;
405         }
406         if (ipv4_is_loopback(ifa->ifa_local))
407                 ifa->ifa_scope = RT_SCOPE_HOST;
408         return inet_insert_ifa(ifa);
409 }
410
411 struct in_device *inetdev_by_index(struct net *net, int ifindex)
412 {
413         struct net_device *dev;
414         struct in_device *in_dev = NULL;
415         read_lock(&dev_base_lock);
416         dev = __dev_get_by_index(net, ifindex);
417         if (dev)
418                 in_dev = in_dev_get(dev);
419         read_unlock(&dev_base_lock);
420         return in_dev;
421 }
422
423 /* Called only from RTNL semaphored context. No locks. */
424
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426                                     __be32 mask)
427 {
428         ASSERT_RTNL();
429
430         for_primary_ifa(in_dev) {
431                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432                         return ifa;
433         } endfor_ifa(in_dev);
434         return NULL;
435 }
436
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439         struct net *net = sock_net(skb->sk);
440         struct nlattr *tb[IFA_MAX+1];
441         struct in_device *in_dev;
442         struct ifaddrmsg *ifm;
443         struct in_ifaddr *ifa, **ifap;
444         int err = -EINVAL;
445
446         ASSERT_RTNL();
447
448         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449         if (err < 0)
450                 goto errout;
451
452         ifm = nlmsg_data(nlh);
453         in_dev = inetdev_by_index(net, ifm->ifa_index);
454         if (in_dev == NULL) {
455                 err = -ENODEV;
456                 goto errout;
457         }
458
459         __in_dev_put(in_dev);
460
461         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
462              ifap = &ifa->ifa_next) {
463                 if (tb[IFA_LOCAL] &&
464                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465                         continue;
466
467                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468                         continue;
469
470                 if (tb[IFA_ADDRESS] &&
471                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
472                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473                         continue;
474
475                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476                 return 0;
477         }
478
479         err = -EADDRNOTAVAIL;
480 errout:
481         return err;
482 }
483
484 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
485 {
486         struct nlattr *tb[IFA_MAX+1];
487         struct in_ifaddr *ifa;
488         struct ifaddrmsg *ifm;
489         struct net_device *dev;
490         struct in_device *in_dev;
491         int err;
492
493         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494         if (err < 0)
495                 goto errout;
496
497         ifm = nlmsg_data(nlh);
498         err = -EINVAL;
499         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500                 goto errout;
501
502         dev = __dev_get_by_index(net, ifm->ifa_index);
503         err = -ENODEV;
504         if (dev == NULL)
505                 goto errout;
506
507         in_dev = __in_dev_get_rtnl(dev);
508         err = -ENOBUFS;
509         if (in_dev == NULL)
510                 goto errout;
511
512         ifa = inet_alloc_ifa();
513         if (ifa == NULL)
514                 /*
515                  * A potential indev allocation can be left alive, it stays
516                  * assigned to its device and is destroy with it.
517                  */
518                 goto errout;
519
520         ipv4_devconf_setall(in_dev);
521         in_dev_hold(in_dev);
522
523         if (tb[IFA_ADDRESS] == NULL)
524                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
525
526         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
527         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
528         ifa->ifa_flags = ifm->ifa_flags;
529         ifa->ifa_scope = ifm->ifa_scope;
530         ifa->ifa_dev = in_dev;
531
532         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
533         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
534
535         if (tb[IFA_BROADCAST])
536                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537
538         if (tb[IFA_LABEL])
539                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
540         else
541                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
542
543         return ifa;
544
545 errout:
546         return ERR_PTR(err);
547 }
548
549 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
550 {
551         struct net *net = sock_net(skb->sk);
552         struct in_ifaddr *ifa;
553
554         ASSERT_RTNL();
555
556         ifa = rtm_to_ifaddr(net, nlh);
557         if (IS_ERR(ifa))
558                 return PTR_ERR(ifa);
559
560         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561 }
562
563 /*
564  *      Determine a default network mask, based on the IP address.
565  */
566
567 static __inline__ int inet_abc_len(__be32 addr)
568 {
569         int rc = -1;    /* Something else, probably a multicast. */
570
571         if (ipv4_is_zeronet(addr))
572                 rc = 0;
573         else {
574                 __u32 haddr = ntohl(addr);
575
576                 if (IN_CLASSA(haddr))
577                         rc = 8;
578                 else if (IN_CLASSB(haddr))
579                         rc = 16;
580                 else if (IN_CLASSC(haddr))
581                         rc = 24;
582         }
583
584         return rc;
585 }
586
587
588 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 {
590         struct ifreq ifr;
591         struct sockaddr_in sin_orig;
592         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
593         struct in_device *in_dev;
594         struct in_ifaddr **ifap = NULL;
595         struct in_ifaddr *ifa = NULL;
596         struct net_device *dev;
597         char *colon;
598         int ret = -EFAULT;
599         int tryaddrmatch = 0;
600
601         /*
602          *      Fetch the caller's info block into kernel space
603          */
604
605         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
606                 goto out;
607         ifr.ifr_name[IFNAMSIZ - 1] = 0;
608
609         /* save original address for comparison */
610         memcpy(&sin_orig, sin, sizeof(*sin));
611
612         colon = strchr(ifr.ifr_name, ':');
613         if (colon)
614                 *colon = 0;
615
616 #ifdef CONFIG_KMOD
617         dev_load(net, ifr.ifr_name);
618 #endif
619
620         switch (cmd) {
621         case SIOCGIFADDR:       /* Get interface address */
622         case SIOCGIFBRDADDR:    /* Get the broadcast address */
623         case SIOCGIFDSTADDR:    /* Get the destination address */
624         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
625                 /* Note that these ioctls will not sleep,
626                    so that we do not impose a lock.
627                    One day we will be forced to put shlock here (I mean SMP)
628                  */
629                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
630                 memset(sin, 0, sizeof(*sin));
631                 sin->sin_family = AF_INET;
632                 break;
633
634         case SIOCSIFFLAGS:
635                 ret = -EACCES;
636                 if (!capable(CAP_NET_ADMIN))
637                         goto out;
638                 break;
639         case SIOCSIFADDR:       /* Set interface address (and family) */
640         case SIOCSIFBRDADDR:    /* Set the broadcast address */
641         case SIOCSIFDSTADDR:    /* Set the destination address */
642         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
643                 ret = -EACCES;
644                 if (!capable(CAP_NET_ADMIN))
645                         goto out;
646                 ret = -EINVAL;
647                 if (sin->sin_family != AF_INET)
648                         goto out;
649                 break;
650         default:
651                 ret = -EINVAL;
652                 goto out;
653         }
654
655         rtnl_lock();
656
657         ret = -ENODEV;
658         if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
659                 goto done;
660
661         if (colon)
662                 *colon = ':';
663
664         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
665                 if (tryaddrmatch) {
666                         /* Matthias Andree */
667                         /* compare label and address (4.4BSD style) */
668                         /* note: we only do this for a limited set of ioctls
669                            and only if the original address family was AF_INET.
670                            This is checked above. */
671                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
672                              ifap = &ifa->ifa_next) {
673                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
674                                     sin_orig.sin_addr.s_addr ==
675                                                         ifa->ifa_address) {
676                                         break; /* found */
677                                 }
678                         }
679                 }
680                 /* we didn't get a match, maybe the application is
681                    4.3BSD-style and passed in junk so we fall back to
682                    comparing just the label */
683                 if (!ifa) {
684                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685                              ifap = &ifa->ifa_next)
686                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
687                                         break;
688                 }
689         }
690
691         ret = -EADDRNOTAVAIL;
692         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
693                 goto done;
694
695         switch (cmd) {
696         case SIOCGIFADDR:       /* Get interface address */
697                 sin->sin_addr.s_addr = ifa->ifa_local;
698                 goto rarok;
699
700         case SIOCGIFBRDADDR:    /* Get the broadcast address */
701                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
702                 goto rarok;
703
704         case SIOCGIFDSTADDR:    /* Get the destination address */
705                 sin->sin_addr.s_addr = ifa->ifa_address;
706                 goto rarok;
707
708         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
709                 sin->sin_addr.s_addr = ifa->ifa_mask;
710                 goto rarok;
711
712         case SIOCSIFFLAGS:
713                 if (colon) {
714                         ret = -EADDRNOTAVAIL;
715                         if (!ifa)
716                                 break;
717                         ret = 0;
718                         if (!(ifr.ifr_flags & IFF_UP))
719                                 inet_del_ifa(in_dev, ifap, 1);
720                         break;
721                 }
722                 ret = dev_change_flags(dev, ifr.ifr_flags);
723                 break;
724
725         case SIOCSIFADDR:       /* Set interface address (and family) */
726                 ret = -EINVAL;
727                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
728                         break;
729
730                 if (!ifa) {
731                         ret = -ENOBUFS;
732                         if ((ifa = inet_alloc_ifa()) == NULL)
733                                 break;
734                         if (colon)
735                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
736                         else
737                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
738                 } else {
739                         ret = 0;
740                         if (ifa->ifa_local == sin->sin_addr.s_addr)
741                                 break;
742                         inet_del_ifa(in_dev, ifap, 0);
743                         ifa->ifa_broadcast = 0;
744                         ifa->ifa_scope = 0;
745                 }
746
747                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
748
749                 if (!(dev->flags & IFF_POINTOPOINT)) {
750                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
751                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
752                         if ((dev->flags & IFF_BROADCAST) &&
753                             ifa->ifa_prefixlen < 31)
754                                 ifa->ifa_broadcast = ifa->ifa_address |
755                                                      ~ifa->ifa_mask;
756                 } else {
757                         ifa->ifa_prefixlen = 32;
758                         ifa->ifa_mask = inet_make_mask(32);
759                 }
760                 ret = inet_set_ifa(dev, ifa);
761                 break;
762
763         case SIOCSIFBRDADDR:    /* Set the broadcast address */
764                 ret = 0;
765                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
766                         inet_del_ifa(in_dev, ifap, 0);
767                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
768                         inet_insert_ifa(ifa);
769                 }
770                 break;
771
772         case SIOCSIFDSTADDR:    /* Set the destination address */
773                 ret = 0;
774                 if (ifa->ifa_address == sin->sin_addr.s_addr)
775                         break;
776                 ret = -EINVAL;
777                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
778                         break;
779                 ret = 0;
780                 inet_del_ifa(in_dev, ifap, 0);
781                 ifa->ifa_address = sin->sin_addr.s_addr;
782                 inet_insert_ifa(ifa);
783                 break;
784
785         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
786
787                 /*
788                  *      The mask we set must be legal.
789                  */
790                 ret = -EINVAL;
791                 if (bad_mask(sin->sin_addr.s_addr, 0))
792                         break;
793                 ret = 0;
794                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
795                         __be32 old_mask = ifa->ifa_mask;
796                         inet_del_ifa(in_dev, ifap, 0);
797                         ifa->ifa_mask = sin->sin_addr.s_addr;
798                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
799
800                         /* See if current broadcast address matches
801                          * with current netmask, then recalculate
802                          * the broadcast address. Otherwise it's a
803                          * funny address, so don't touch it since
804                          * the user seems to know what (s)he's doing...
805                          */
806                         if ((dev->flags & IFF_BROADCAST) &&
807                             (ifa->ifa_prefixlen < 31) &&
808                             (ifa->ifa_broadcast ==
809                              (ifa->ifa_local|~old_mask))) {
810                                 ifa->ifa_broadcast = (ifa->ifa_local |
811                                                       ~sin->sin_addr.s_addr);
812                         }
813                         inet_insert_ifa(ifa);
814                 }
815                 break;
816         }
817 done:
818         rtnl_unlock();
819 out:
820         return ret;
821 rarok:
822         rtnl_unlock();
823         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
824         goto out;
825 }
826
827 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
828 {
829         struct in_device *in_dev = __in_dev_get_rtnl(dev);
830         struct in_ifaddr *ifa;
831         struct ifreq ifr;
832         int done = 0;
833
834         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
835                 goto out;
836
837         for (; ifa; ifa = ifa->ifa_next) {
838                 if (!buf) {
839                         done += sizeof(ifr);
840                         continue;
841                 }
842                 if (len < (int) sizeof(ifr))
843                         break;
844                 memset(&ifr, 0, sizeof(struct ifreq));
845                 if (ifa->ifa_label)
846                         strcpy(ifr.ifr_name, ifa->ifa_label);
847                 else
848                         strcpy(ifr.ifr_name, dev->name);
849
850                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
851                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
852                                                                 ifa->ifa_local;
853
854                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
855                         done = -EFAULT;
856                         break;
857                 }
858                 buf  += sizeof(struct ifreq);
859                 len  -= sizeof(struct ifreq);
860                 done += sizeof(struct ifreq);
861         }
862 out:
863         return done;
864 }
865
866 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
867 {
868         __be32 addr = 0;
869         struct in_device *in_dev;
870         struct net *net = dev_net(dev);
871
872         rcu_read_lock();
873         in_dev = __in_dev_get_rcu(dev);
874         if (!in_dev)
875                 goto no_in_dev;
876
877         for_primary_ifa(in_dev) {
878                 if (ifa->ifa_scope > scope)
879                         continue;
880                 if (!dst || inet_ifa_match(dst, ifa)) {
881                         addr = ifa->ifa_local;
882                         break;
883                 }
884                 if (!addr)
885                         addr = ifa->ifa_local;
886         } endfor_ifa(in_dev);
887 no_in_dev:
888         rcu_read_unlock();
889
890         if (addr)
891                 goto out;
892
893         /* Not loopback addresses on loopback should be preferred
894            in this case. It is importnat that lo is the first interface
895            in dev_base list.
896          */
897         read_lock(&dev_base_lock);
898         rcu_read_lock();
899         for_each_netdev(net, dev) {
900                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
901                         continue;
902
903                 for_primary_ifa(in_dev) {
904                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
905                             ifa->ifa_scope <= scope) {
906                                 addr = ifa->ifa_local;
907                                 goto out_unlock_both;
908                         }
909                 } endfor_ifa(in_dev);
910         }
911 out_unlock_both:
912         read_unlock(&dev_base_lock);
913         rcu_read_unlock();
914 out:
915         return addr;
916 }
917
918 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
919                               __be32 local, int scope)
920 {
921         int same = 0;
922         __be32 addr = 0;
923
924         for_ifa(in_dev) {
925                 if (!addr &&
926                     (local == ifa->ifa_local || !local) &&
927                     ifa->ifa_scope <= scope) {
928                         addr = ifa->ifa_local;
929                         if (same)
930                                 break;
931                 }
932                 if (!same) {
933                         same = (!local || inet_ifa_match(local, ifa)) &&
934                                 (!dst || inet_ifa_match(dst, ifa));
935                         if (same && addr) {
936                                 if (local || !dst)
937                                         break;
938                                 /* Is the selected addr into dst subnet? */
939                                 if (inet_ifa_match(addr, ifa))
940                                         break;
941                                 /* No, then can we use new local src? */
942                                 if (ifa->ifa_scope <= scope) {
943                                         addr = ifa->ifa_local;
944                                         break;
945                                 }
946                                 /* search for large dst subnet for addr */
947                                 same = 0;
948                         }
949                 }
950         } endfor_ifa(in_dev);
951
952         return same? addr : 0;
953 }
954
955 /*
956  * Confirm that local IP address exists using wildcards:
957  * - in_dev: only on this interface, 0=any interface
958  * - dst: only in the same subnet as dst, 0=any dst
959  * - local: address, 0=autoselect the local address
960  * - scope: maximum allowed scope value for the local address
961  */
962 __be32 inet_confirm_addr(struct in_device *in_dev,
963                          __be32 dst, __be32 local, int scope)
964 {
965         __be32 addr = 0;
966         struct net_device *dev;
967         struct net *net;
968
969         if (scope != RT_SCOPE_LINK)
970                 return confirm_addr_indev(in_dev, dst, local, scope);
971
972         net = dev_net(in_dev->dev);
973         read_lock(&dev_base_lock);
974         rcu_read_lock();
975         for_each_netdev(net, dev) {
976                 if ((in_dev = __in_dev_get_rcu(dev))) {
977                         addr = confirm_addr_indev(in_dev, dst, local, scope);
978                         if (addr)
979                                 break;
980                 }
981         }
982         rcu_read_unlock();
983         read_unlock(&dev_base_lock);
984
985         return addr;
986 }
987
988 /*
989  *      Device notifier
990  */
991
992 int register_inetaddr_notifier(struct notifier_block *nb)
993 {
994         return blocking_notifier_chain_register(&inetaddr_chain, nb);
995 }
996
997 int unregister_inetaddr_notifier(struct notifier_block *nb)
998 {
999         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1000 }
1001
1002 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1003  * alias numbering and to create unique labels if possible.
1004 */
1005 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1006 {
1007         struct in_ifaddr *ifa;
1008         int named = 0;
1009
1010         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1011                 char old[IFNAMSIZ], *dot;
1012
1013                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1014                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1015                 if (named++ == 0)
1016                         goto skip;
1017                 dot = strchr(old, ':');
1018                 if (dot == NULL) {
1019                         sprintf(old, ":%d", named);
1020                         dot = old;
1021                 }
1022                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1023                         strcat(ifa->ifa_label, dot);
1024                 } else {
1025                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1026                 }
1027 skip:
1028                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1029         }
1030 }
1031
1032 static inline bool inetdev_valid_mtu(unsigned mtu)
1033 {
1034         return mtu >= 68;
1035 }
1036
1037 /* Called only under RTNL semaphore */
1038
1039 static int inetdev_event(struct notifier_block *this, unsigned long event,
1040                          void *ptr)
1041 {
1042         struct net_device *dev = ptr;
1043         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1044
1045         ASSERT_RTNL();
1046
1047         if (!in_dev) {
1048                 if (event == NETDEV_REGISTER) {
1049                         in_dev = inetdev_init(dev);
1050                         if (!in_dev)
1051                                 return notifier_from_errno(-ENOMEM);
1052                         if (dev->flags & IFF_LOOPBACK) {
1053                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1054                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1055                         }
1056                 } else if (event == NETDEV_CHANGEMTU) {
1057                         /* Re-enabling IP */
1058                         if (inetdev_valid_mtu(dev->mtu))
1059                                 in_dev = inetdev_init(dev);
1060                 }
1061                 goto out;
1062         }
1063
1064         switch (event) {
1065         case NETDEV_REGISTER:
1066                 printk(KERN_DEBUG "inetdev_event: bug\n");
1067                 dev->ip_ptr = NULL;
1068                 break;
1069         case NETDEV_UP:
1070                 if (!inetdev_valid_mtu(dev->mtu))
1071                         break;
1072                 if (dev->flags & IFF_LOOPBACK) {
1073                         struct in_ifaddr *ifa;
1074                         if ((ifa = inet_alloc_ifa()) != NULL) {
1075                                 ifa->ifa_local =
1076                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1077                                 ifa->ifa_prefixlen = 8;
1078                                 ifa->ifa_mask = inet_make_mask(8);
1079                                 in_dev_hold(in_dev);
1080                                 ifa->ifa_dev = in_dev;
1081                                 ifa->ifa_scope = RT_SCOPE_HOST;
1082                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1083                                 inet_insert_ifa(ifa);
1084                         }
1085                 }
1086                 ip_mc_up(in_dev);
1087                 break;
1088         case NETDEV_DOWN:
1089                 ip_mc_down(in_dev);
1090                 break;
1091         case NETDEV_CHANGEMTU:
1092                 if (inetdev_valid_mtu(dev->mtu))
1093                         break;
1094                 /* disable IP when MTU is not enough */
1095         case NETDEV_UNREGISTER:
1096                 inetdev_destroy(in_dev);
1097                 break;
1098         case NETDEV_CHANGENAME:
1099                 /* Do not notify about label change, this event is
1100                  * not interesting to applications using netlink.
1101                  */
1102                 inetdev_changename(dev, in_dev);
1103
1104                 devinet_sysctl_unregister(in_dev);
1105                 devinet_sysctl_register(in_dev);
1106                 break;
1107         }
1108 out:
1109         return NOTIFY_DONE;
1110 }
1111
1112 static struct notifier_block ip_netdev_notifier = {
1113         .notifier_call =inetdev_event,
1114 };
1115
1116 static inline size_t inet_nlmsg_size(void)
1117 {
1118         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1119                + nla_total_size(4) /* IFA_ADDRESS */
1120                + nla_total_size(4) /* IFA_LOCAL */
1121                + nla_total_size(4) /* IFA_BROADCAST */
1122                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1123 }
1124
1125 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1126                             u32 pid, u32 seq, int event, unsigned int flags)
1127 {
1128         struct ifaddrmsg *ifm;
1129         struct nlmsghdr  *nlh;
1130
1131         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1132         if (nlh == NULL)
1133                 return -EMSGSIZE;
1134
1135         ifm = nlmsg_data(nlh);
1136         ifm->ifa_family = AF_INET;
1137         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1138         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1139         ifm->ifa_scope = ifa->ifa_scope;
1140         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1141
1142         if (ifa->ifa_address)
1143                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1144
1145         if (ifa->ifa_local)
1146                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1147
1148         if (ifa->ifa_broadcast)
1149                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1150
1151         if (ifa->ifa_label[0])
1152                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1153
1154         return nlmsg_end(skb, nlh);
1155
1156 nla_put_failure:
1157         nlmsg_cancel(skb, nlh);
1158         return -EMSGSIZE;
1159 }
1160
1161 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1162 {
1163         struct net *net = sock_net(skb->sk);
1164         int idx, ip_idx;
1165         struct net_device *dev;
1166         struct in_device *in_dev;
1167         struct in_ifaddr *ifa;
1168         int s_ip_idx, s_idx = cb->args[0];
1169
1170         s_ip_idx = ip_idx = cb->args[1];
1171         idx = 0;
1172         for_each_netdev(net, dev) {
1173                 if (idx < s_idx)
1174                         goto cont;
1175                 if (idx > s_idx)
1176                         s_ip_idx = 0;
1177                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1178                         goto cont;
1179
1180                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1181                      ifa = ifa->ifa_next, ip_idx++) {
1182                         if (ip_idx < s_ip_idx)
1183                                 continue;
1184                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1185                                              cb->nlh->nlmsg_seq,
1186                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1187                                 goto done;
1188                 }
1189 cont:
1190                 idx++;
1191         }
1192
1193 done:
1194         cb->args[0] = idx;
1195         cb->args[1] = ip_idx;
1196
1197         return skb->len;
1198 }
1199
1200 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1201                       u32 pid)
1202 {
1203         struct sk_buff *skb;
1204         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1205         int err = -ENOBUFS;
1206         struct net *net;
1207
1208         net = dev_net(ifa->ifa_dev->dev);
1209         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1210         if (skb == NULL)
1211                 goto errout;
1212
1213         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1214         if (err < 0) {
1215                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1216                 WARN_ON(err == -EMSGSIZE);
1217                 kfree_skb(skb);
1218                 goto errout;
1219         }
1220         err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1221 errout:
1222         if (err < 0)
1223                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1224 }
1225
1226 #ifdef CONFIG_SYSCTL
1227
1228 static void devinet_copy_dflt_conf(struct net *net, int i)
1229 {
1230         struct net_device *dev;
1231
1232         read_lock(&dev_base_lock);
1233         for_each_netdev(net, dev) {
1234                 struct in_device *in_dev;
1235                 rcu_read_lock();
1236                 in_dev = __in_dev_get_rcu(dev);
1237                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1238                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1239                 rcu_read_unlock();
1240         }
1241         read_unlock(&dev_base_lock);
1242 }
1243
1244 static void inet_forward_change(struct net *net)
1245 {
1246         struct net_device *dev;
1247         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1248
1249         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1250         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1251
1252         read_lock(&dev_base_lock);
1253         for_each_netdev(net, dev) {
1254                 struct in_device *in_dev;
1255                 if (on)
1256                         dev_disable_lro(dev);
1257                 rcu_read_lock();
1258                 in_dev = __in_dev_get_rcu(dev);
1259                 if (in_dev)
1260                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1261                 rcu_read_unlock();
1262         }
1263         read_unlock(&dev_base_lock);
1264 }
1265
1266 static int devinet_conf_proc(ctl_table *ctl, int write,
1267                              struct file* filp, void __user *buffer,
1268                              size_t *lenp, loff_t *ppos)
1269 {
1270         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1271
1272         if (write) {
1273                 struct ipv4_devconf *cnf = ctl->extra1;
1274                 struct net *net = ctl->extra2;
1275                 int i = (int *)ctl->data - cnf->data;
1276
1277                 set_bit(i, cnf->state);
1278
1279                 if (cnf == net->ipv4.devconf_dflt)
1280                         devinet_copy_dflt_conf(net, i);
1281         }
1282
1283         return ret;
1284 }
1285
1286 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1287                                void __user *oldval, size_t __user *oldlenp,
1288                                void __user *newval, size_t newlen)
1289 {
1290         struct ipv4_devconf *cnf;
1291         struct net *net;
1292         int *valp = table->data;
1293         int new;
1294         int i;
1295
1296         if (!newval || !newlen)
1297                 return 0;
1298
1299         if (newlen != sizeof(int))
1300                 return -EINVAL;
1301
1302         if (get_user(new, (int __user *)newval))
1303                 return -EFAULT;
1304
1305         if (new == *valp)
1306                 return 0;
1307
1308         if (oldval && oldlenp) {
1309                 size_t len;
1310
1311                 if (get_user(len, oldlenp))
1312                         return -EFAULT;
1313
1314                 if (len) {
1315                         if (len > table->maxlen)
1316                                 len = table->maxlen;
1317                         if (copy_to_user(oldval, valp, len))
1318                                 return -EFAULT;
1319                         if (put_user(len, oldlenp))
1320                                 return -EFAULT;
1321                 }
1322         }
1323
1324         *valp = new;
1325
1326         cnf = table->extra1;
1327         net = table->extra2;
1328         i = (int *)table->data - cnf->data;
1329
1330         set_bit(i, cnf->state);
1331
1332         if (cnf == net->ipv4.devconf_dflt)
1333                 devinet_copy_dflt_conf(net, i);
1334
1335         return 1;
1336 }
1337
1338 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1339                                   struct file* filp, void __user *buffer,
1340                                   size_t *lenp, loff_t *ppos)
1341 {
1342         int *valp = ctl->data;
1343         int val = *valp;
1344         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1345
1346         if (write && *valp != val) {
1347                 struct net *net = ctl->extra2;
1348
1349                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1350                         rtnl_lock();
1351                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1352                                 inet_forward_change(net);
1353                         } else if (*valp) {
1354                                 struct ipv4_devconf *cnf = ctl->extra1;
1355                                 struct in_device *idev =
1356                                         container_of(cnf, struct in_device, cnf);
1357                                 dev_disable_lro(idev->dev);
1358                         }
1359                         rtnl_unlock();
1360                         rt_cache_flush(net, 0);
1361                 }
1362         }
1363
1364         return ret;
1365 }
1366
1367 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1368                          struct file* filp, void __user *buffer,
1369                          size_t *lenp, loff_t *ppos)
1370 {
1371         int *valp = ctl->data;
1372         int val = *valp;
1373         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1374         struct net *net = ctl->extra2;
1375
1376         if (write && *valp != val)
1377                 rt_cache_flush(net, 0);
1378
1379         return ret;
1380 }
1381
1382 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1383                                   void __user *oldval, size_t __user *oldlenp,
1384                                   void __user *newval, size_t newlen)
1385 {
1386         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1387                                       newval, newlen);
1388         struct net *net = table->extra2;
1389
1390         if (ret == 1)
1391                 rt_cache_flush(net, 0);
1392
1393         return ret;
1394 }
1395
1396
1397 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1398         { \
1399                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1400                 .procname       = name, \
1401                 .data           = ipv4_devconf.data + \
1402                                   NET_IPV4_CONF_ ## attr - 1, \
1403                 .maxlen         = sizeof(int), \
1404                 .mode           = mval, \
1405                 .proc_handler   = proc, \
1406                 .strategy       = sysctl, \
1407                 .extra1         = &ipv4_devconf, \
1408         }
1409
1410 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1411         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1412                              devinet_conf_sysctl)
1413
1414 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1415         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1416                              devinet_conf_sysctl)
1417
1418 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1419         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1420
1421 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1422         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1423                                      ipv4_doint_and_flush_strategy)
1424
1425 static struct devinet_sysctl_table {
1426         struct ctl_table_header *sysctl_header;
1427         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1428         char *dev_name;
1429 } devinet_sysctl = {
1430         .devinet_vars = {
1431                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1432                                              devinet_sysctl_forward,
1433                                              devinet_conf_sysctl),
1434                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1435
1436                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1437                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1438                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1439                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1440                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1441                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1442                                         "accept_source_route"),
1443                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1444                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1445                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1446                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1447                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1448                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1449                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1450                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1451                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1452
1453                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1454                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1455                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1456                                               "force_igmp_version"),
1457                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1458                                               "promote_secondaries"),
1459         },
1460 };
1461
1462 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1463                 int ctl_name, struct ipv4_devconf *p)
1464 {
1465         int i;
1466         struct devinet_sysctl_table *t;
1467
1468 #define DEVINET_CTL_PATH_DEV    3
1469
1470         struct ctl_path devinet_ctl_path[] = {
1471                 { .procname = "net", .ctl_name = CTL_NET, },
1472                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1473                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1474                 { /* to be set */ },
1475                 { },
1476         };
1477
1478         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1479         if (!t)
1480                 goto out;
1481
1482         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1483                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1484                 t->devinet_vars[i].extra1 = p;
1485                 t->devinet_vars[i].extra2 = net;
1486         }
1487
1488         /*
1489          * Make a copy of dev_name, because '.procname' is regarded as const
1490          * by sysctl and we wouldn't want anyone to change it under our feet
1491          * (see SIOCSIFNAME).
1492          */
1493         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1494         if (!t->dev_name)
1495                 goto free;
1496
1497         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1498         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1499
1500         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1501                         t->devinet_vars);
1502         if (!t->sysctl_header)
1503                 goto free_procname;
1504
1505         p->sysctl = t;
1506         return 0;
1507
1508 free_procname:
1509         kfree(t->dev_name);
1510 free:
1511         kfree(t);
1512 out:
1513         return -ENOBUFS;
1514 }
1515
1516 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1517 {
1518         struct devinet_sysctl_table *t = cnf->sysctl;
1519
1520         if (t == NULL)
1521                 return;
1522
1523         cnf->sysctl = NULL;
1524         unregister_sysctl_table(t->sysctl_header);
1525         kfree(t->dev_name);
1526         kfree(t);
1527 }
1528
1529 static void devinet_sysctl_register(struct in_device *idev)
1530 {
1531         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1532                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1533         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1534                         idev->dev->ifindex, &idev->cnf);
1535 }
1536
1537 static void devinet_sysctl_unregister(struct in_device *idev)
1538 {
1539         __devinet_sysctl_unregister(&idev->cnf);
1540         neigh_sysctl_unregister(idev->arp_parms);
1541 }
1542
1543 static struct ctl_table ctl_forward_entry[] = {
1544         {
1545                 .ctl_name       = NET_IPV4_FORWARD,
1546                 .procname       = "ip_forward",
1547                 .data           = &ipv4_devconf.data[
1548                                         NET_IPV4_CONF_FORWARDING - 1],
1549                 .maxlen         = sizeof(int),
1550                 .mode           = 0644,
1551                 .proc_handler   = devinet_sysctl_forward,
1552                 .strategy       = devinet_conf_sysctl,
1553                 .extra1         = &ipv4_devconf,
1554                 .extra2         = &init_net,
1555         },
1556         { },
1557 };
1558
1559 static __net_initdata struct ctl_path net_ipv4_path[] = {
1560         { .procname = "net", .ctl_name = CTL_NET, },
1561         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1562         { },
1563 };
1564 #endif
1565
1566 static __net_init int devinet_init_net(struct net *net)
1567 {
1568         int err;
1569         struct ipv4_devconf *all, *dflt;
1570 #ifdef CONFIG_SYSCTL
1571         struct ctl_table *tbl = ctl_forward_entry;
1572         struct ctl_table_header *forw_hdr;
1573 #endif
1574
1575         err = -ENOMEM;
1576         all = &ipv4_devconf;
1577         dflt = &ipv4_devconf_dflt;
1578
1579         if (net != &init_net) {
1580                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1581                 if (all == NULL)
1582                         goto err_alloc_all;
1583
1584                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1585                 if (dflt == NULL)
1586                         goto err_alloc_dflt;
1587
1588 #ifdef CONFIG_SYSCTL
1589                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1590                 if (tbl == NULL)
1591                         goto err_alloc_ctl;
1592
1593                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1594                 tbl[0].extra1 = all;
1595                 tbl[0].extra2 = net;
1596 #endif
1597         }
1598
1599 #ifdef CONFIG_SYSCTL
1600         err = __devinet_sysctl_register(net, "all",
1601                         NET_PROTO_CONF_ALL, all);
1602         if (err < 0)
1603                 goto err_reg_all;
1604
1605         err = __devinet_sysctl_register(net, "default",
1606                         NET_PROTO_CONF_DEFAULT, dflt);
1607         if (err < 0)
1608                 goto err_reg_dflt;
1609
1610         err = -ENOMEM;
1611         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1612         if (forw_hdr == NULL)
1613                 goto err_reg_ctl;
1614         net->ipv4.forw_hdr = forw_hdr;
1615 #endif
1616
1617         net->ipv4.devconf_all = all;
1618         net->ipv4.devconf_dflt = dflt;
1619         return 0;
1620
1621 #ifdef CONFIG_SYSCTL
1622 err_reg_ctl:
1623         __devinet_sysctl_unregister(dflt);
1624 err_reg_dflt:
1625         __devinet_sysctl_unregister(all);
1626 err_reg_all:
1627         if (tbl != ctl_forward_entry)
1628                 kfree(tbl);
1629 err_alloc_ctl:
1630 #endif
1631         if (dflt != &ipv4_devconf_dflt)
1632                 kfree(dflt);
1633 err_alloc_dflt:
1634         if (all != &ipv4_devconf)
1635                 kfree(all);
1636 err_alloc_all:
1637         return err;
1638 }
1639
1640 static __net_exit void devinet_exit_net(struct net *net)
1641 {
1642 #ifdef CONFIG_SYSCTL
1643         struct ctl_table *tbl;
1644
1645         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1646         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1647         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1648         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1649         kfree(tbl);
1650 #endif
1651         kfree(net->ipv4.devconf_dflt);
1652         kfree(net->ipv4.devconf_all);
1653 }
1654
1655 static __net_initdata struct pernet_operations devinet_ops = {
1656         .init = devinet_init_net,
1657         .exit = devinet_exit_net,
1658 };
1659
1660 void __init devinet_init(void)
1661 {
1662         register_pernet_subsys(&devinet_ops);
1663
1664         register_gifconf(PF_INET, inet_gifconf);
1665         register_netdevice_notifier(&ip_netdev_notifier);
1666
1667         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1668         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1669         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1670 }
1671
1672 EXPORT_SYMBOL(in_dev_finish_destroy);
1673 EXPORT_SYMBOL(inet_select_addr);
1674 EXPORT_SYMBOL(inetdev_by_index);
1675 EXPORT_SYMBOL(register_inetaddr_notifier);
1676 EXPORT_SYMBOL(unregister_inetaddr_notifier);