[IPSEC]: Make callers of xfrm_lookup to use XFRM_LOOKUP_WAIT
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65
66 struct ipv4_devconf ipv4_devconf = {
67         .data = {
68                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
69                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
71                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
72         },
73 };
74
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76         .data = {
77                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
78                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
80                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
81                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82         },
83 };
84
85 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
86
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88         [IFA_LOCAL]             = { .type = NLA_U32 },
89         [IFA_ADDRESS]           = { .type = NLA_U32 },
90         [IFA_BROADCAST]         = { .type = NLA_U32 },
91         [IFA_ANYCAST]           = { .type = NLA_U32 },
92         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99                          int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
103 #else
104 static inline void devinet_sysctl_register(struct in_device *idev)
105 {
106 }
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
108 {
109 }
110 #endif
111
112 /* Locks all the inet devices. */
113
114 static struct in_ifaddr *inet_alloc_ifa(void)
115 {
116         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
117
118         if (ifa) {
119                 INIT_RCU_HEAD(&ifa->rcu_head);
120         }
121
122         return ifa;
123 }
124
125 static void inet_rcu_free_ifa(struct rcu_head *head)
126 {
127         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
128         if (ifa->ifa_dev)
129                 in_dev_put(ifa->ifa_dev);
130         kfree(ifa);
131 }
132
133 static inline void inet_free_ifa(struct in_ifaddr *ifa)
134 {
135         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
136 }
137
138 void in_dev_finish_destroy(struct in_device *idev)
139 {
140         struct net_device *dev = idev->dev;
141
142         BUG_TRAP(!idev->ifa_list);
143         BUG_TRAP(!idev->mc_list);
144 #ifdef NET_REFCNT_DEBUG
145         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
146                idev, dev ? dev->name : "NIL");
147 #endif
148         dev_put(dev);
149         if (!idev->dead)
150                 printk("Freeing alive in_device %p\n", idev);
151         else {
152                 kfree(idev);
153         }
154 }
155
156 static struct in_device *inetdev_init(struct net_device *dev)
157 {
158         struct in_device *in_dev;
159
160         ASSERT_RTNL();
161
162         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
163         if (!in_dev)
164                 goto out;
165         INIT_RCU_HEAD(&in_dev->rcu_head);
166         memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
167         in_dev->cnf.sysctl = NULL;
168         in_dev->dev = dev;
169         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
170                 goto out_kfree;
171         /* Reference in_dev->dev */
172         dev_hold(dev);
173         /* Account for reference dev->ip_ptr (below) */
174         in_dev_hold(in_dev);
175
176         devinet_sysctl_register(in_dev);
177         ip_mc_init_dev(in_dev);
178         if (dev->flags & IFF_UP)
179                 ip_mc_up(in_dev);
180
181         /* we can receive as soon as ip_ptr is set -- do this last */
182         rcu_assign_pointer(dev->ip_ptr, in_dev);
183 out:
184         return in_dev;
185 out_kfree:
186         kfree(in_dev);
187         in_dev = NULL;
188         goto out;
189 }
190
191 static void in_dev_rcu_put(struct rcu_head *head)
192 {
193         struct in_device *idev = container_of(head, struct in_device, rcu_head);
194         in_dev_put(idev);
195 }
196
197 static void inetdev_destroy(struct in_device *in_dev)
198 {
199         struct in_ifaddr *ifa;
200         struct net_device *dev;
201
202         ASSERT_RTNL();
203
204         dev = in_dev->dev;
205
206         in_dev->dead = 1;
207
208         ip_mc_destroy_dev(in_dev);
209
210         while ((ifa = in_dev->ifa_list) != NULL) {
211                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
212                 inet_free_ifa(ifa);
213         }
214
215         dev->ip_ptr = NULL;
216
217         devinet_sysctl_unregister(in_dev);
218         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
219         arp_ifdown(dev);
220
221         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
222 }
223
224 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
225 {
226         rcu_read_lock();
227         for_primary_ifa(in_dev) {
228                 if (inet_ifa_match(a, ifa)) {
229                         if (!b || inet_ifa_match(b, ifa)) {
230                                 rcu_read_unlock();
231                                 return 1;
232                         }
233                 }
234         } endfor_ifa(in_dev);
235         rcu_read_unlock();
236         return 0;
237 }
238
239 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
240                          int destroy, struct nlmsghdr *nlh, u32 pid)
241 {
242         struct in_ifaddr *promote = NULL;
243         struct in_ifaddr *ifa, *ifa1 = *ifap;
244         struct in_ifaddr *last_prim = in_dev->ifa_list;
245         struct in_ifaddr *prev_prom = NULL;
246         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
247
248         ASSERT_RTNL();
249
250         /* 1. Deleting primary ifaddr forces deletion all secondaries
251          * unless alias promotion is set
252          **/
253
254         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
255                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
256
257                 while ((ifa = *ifap1) != NULL) {
258                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
259                             ifa1->ifa_scope <= ifa->ifa_scope)
260                                 last_prim = ifa;
261
262                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
263                             ifa1->ifa_mask != ifa->ifa_mask ||
264                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
265                                 ifap1 = &ifa->ifa_next;
266                                 prev_prom = ifa;
267                                 continue;
268                         }
269
270                         if (!do_promote) {
271                                 *ifap1 = ifa->ifa_next;
272
273                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
274                                 blocking_notifier_call_chain(&inetaddr_chain,
275                                                 NETDEV_DOWN, ifa);
276                                 inet_free_ifa(ifa);
277                         } else {
278                                 promote = ifa;
279                                 break;
280                         }
281                 }
282         }
283
284         /* 2. Unlink it */
285
286         *ifap = ifa1->ifa_next;
287
288         /* 3. Announce address deletion */
289
290         /* Send message first, then call notifier.
291            At first sight, FIB update triggered by notifier
292            will refer to already deleted ifaddr, that could confuse
293            netlink listeners. It is not true: look, gated sees
294            that route deleted and if it still thinks that ifaddr
295            is valid, it will try to restore deleted routes... Grr.
296            So that, this order is correct.
297          */
298         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
299         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
300
301         if (promote) {
302
303                 if (prev_prom) {
304                         prev_prom->ifa_next = promote->ifa_next;
305                         promote->ifa_next = last_prim->ifa_next;
306                         last_prim->ifa_next = promote;
307                 }
308
309                 promote->ifa_flags &= ~IFA_F_SECONDARY;
310                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
311                 blocking_notifier_call_chain(&inetaddr_chain,
312                                 NETDEV_UP, promote);
313                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
314                         if (ifa1->ifa_mask != ifa->ifa_mask ||
315                             !inet_ifa_match(ifa1->ifa_address, ifa))
316                                         continue;
317                         fib_add_ifaddr(ifa);
318                 }
319
320         }
321         if (destroy)
322                 inet_free_ifa(ifa1);
323 }
324
325 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
326                          int destroy)
327 {
328         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
329 }
330
331 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
332                              u32 pid)
333 {
334         struct in_device *in_dev = ifa->ifa_dev;
335         struct in_ifaddr *ifa1, **ifap, **last_primary;
336
337         ASSERT_RTNL();
338
339         if (!ifa->ifa_local) {
340                 inet_free_ifa(ifa);
341                 return 0;
342         }
343
344         ifa->ifa_flags &= ~IFA_F_SECONDARY;
345         last_primary = &in_dev->ifa_list;
346
347         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
348              ifap = &ifa1->ifa_next) {
349                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
350                     ifa->ifa_scope <= ifa1->ifa_scope)
351                         last_primary = &ifa1->ifa_next;
352                 if (ifa1->ifa_mask == ifa->ifa_mask &&
353                     inet_ifa_match(ifa1->ifa_address, ifa)) {
354                         if (ifa1->ifa_local == ifa->ifa_local) {
355                                 inet_free_ifa(ifa);
356                                 return -EEXIST;
357                         }
358                         if (ifa1->ifa_scope != ifa->ifa_scope) {
359                                 inet_free_ifa(ifa);
360                                 return -EINVAL;
361                         }
362                         ifa->ifa_flags |= IFA_F_SECONDARY;
363                 }
364         }
365
366         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
367                 net_srandom(ifa->ifa_local);
368                 ifap = last_primary;
369         }
370
371         ifa->ifa_next = *ifap;
372         *ifap = ifa;
373
374         /* Send message first, then call notifier.
375            Notifier will trigger FIB update, so that
376            listeners of netlink will know about new ifaddr */
377         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
378         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
379
380         return 0;
381 }
382
383 static int inet_insert_ifa(struct in_ifaddr *ifa)
384 {
385         return __inet_insert_ifa(ifa, NULL, 0);
386 }
387
388 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
389 {
390         struct in_device *in_dev = __in_dev_get_rtnl(dev);
391
392         ASSERT_RTNL();
393
394         if (!in_dev) {
395                 inet_free_ifa(ifa);
396                 return -ENOBUFS;
397         }
398         ipv4_devconf_setall(in_dev);
399         if (ifa->ifa_dev != in_dev) {
400                 BUG_TRAP(!ifa->ifa_dev);
401                 in_dev_hold(in_dev);
402                 ifa->ifa_dev = in_dev;
403         }
404         if (LOOPBACK(ifa->ifa_local))
405                 ifa->ifa_scope = RT_SCOPE_HOST;
406         return inet_insert_ifa(ifa);
407 }
408
409 struct in_device *inetdev_by_index(int ifindex)
410 {
411         struct net_device *dev;
412         struct in_device *in_dev = NULL;
413         read_lock(&dev_base_lock);
414         dev = __dev_get_by_index(&init_net, ifindex);
415         if (dev)
416                 in_dev = in_dev_get(dev);
417         read_unlock(&dev_base_lock);
418         return in_dev;
419 }
420
421 /* Called only from RTNL semaphored context. No locks. */
422
423 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
424                                     __be32 mask)
425 {
426         ASSERT_RTNL();
427
428         for_primary_ifa(in_dev) {
429                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
430                         return ifa;
431         } endfor_ifa(in_dev);
432         return NULL;
433 }
434
435 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
436 {
437         struct net *net = skb->sk->sk_net;
438         struct nlattr *tb[IFA_MAX+1];
439         struct in_device *in_dev;
440         struct ifaddrmsg *ifm;
441         struct in_ifaddr *ifa, **ifap;
442         int err = -EINVAL;
443
444         ASSERT_RTNL();
445
446         if (net != &init_net)
447                 return -EINVAL;
448
449         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
450         if (err < 0)
451                 goto errout;
452
453         ifm = nlmsg_data(nlh);
454         in_dev = inetdev_by_index(ifm->ifa_index);
455         if (in_dev == NULL) {
456                 err = -ENODEV;
457                 goto errout;
458         }
459
460         __in_dev_put(in_dev);
461
462         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
463              ifap = &ifa->ifa_next) {
464                 if (tb[IFA_LOCAL] &&
465                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
466                         continue;
467
468                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
469                         continue;
470
471                 if (tb[IFA_ADDRESS] &&
472                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
473                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
474                         continue;
475
476                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
477                 return 0;
478         }
479
480         err = -EADDRNOTAVAIL;
481 errout:
482         return err;
483 }
484
485 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
486 {
487         struct nlattr *tb[IFA_MAX+1];
488         struct in_ifaddr *ifa;
489         struct ifaddrmsg *ifm;
490         struct net_device *dev;
491         struct in_device *in_dev;
492         int err = -EINVAL;
493
494         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
495         if (err < 0)
496                 goto errout;
497
498         ifm = nlmsg_data(nlh);
499         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
500                 err = -EINVAL;
501                 goto errout;
502         }
503
504         dev = __dev_get_by_index(&init_net, ifm->ifa_index);
505         if (dev == NULL) {
506                 err = -ENODEV;
507                 goto errout;
508         }
509
510         in_dev = __in_dev_get_rtnl(dev);
511         if (in_dev == NULL) {
512                 err = -ENOBUFS;
513                 goto errout;
514         }
515
516         ifa = inet_alloc_ifa();
517         if (ifa == NULL) {
518                 /*
519                  * A potential indev allocation can be left alive, it stays
520                  * assigned to its device and is destroy with it.
521                  */
522                 err = -ENOBUFS;
523                 goto errout;
524         }
525
526         ipv4_devconf_setall(in_dev);
527         in_dev_hold(in_dev);
528
529         if (tb[IFA_ADDRESS] == NULL)
530                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
531
532         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
533         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
534         ifa->ifa_flags = ifm->ifa_flags;
535         ifa->ifa_scope = ifm->ifa_scope;
536         ifa->ifa_dev = in_dev;
537
538         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
539         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
540
541         if (tb[IFA_BROADCAST])
542                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
543
544         if (tb[IFA_ANYCAST])
545                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
546
547         if (tb[IFA_LABEL])
548                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
549         else
550                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
551
552         return ifa;
553
554 errout:
555         return ERR_PTR(err);
556 }
557
558 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
559 {
560         struct net *net = skb->sk->sk_net;
561         struct in_ifaddr *ifa;
562
563         ASSERT_RTNL();
564
565         if (net != &init_net)
566                 return -EINVAL;
567
568         ifa = rtm_to_ifaddr(nlh);
569         if (IS_ERR(ifa))
570                 return PTR_ERR(ifa);
571
572         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
573 }
574
575 /*
576  *      Determine a default network mask, based on the IP address.
577  */
578
579 static __inline__ int inet_abc_len(__be32 addr)
580 {
581         int rc = -1;    /* Something else, probably a multicast. */
582
583         if (ZERONET(addr))
584                 rc = 0;
585         else {
586                 __u32 haddr = ntohl(addr);
587
588                 if (IN_CLASSA(haddr))
589                         rc = 8;
590                 else if (IN_CLASSB(haddr))
591                         rc = 16;
592                 else if (IN_CLASSC(haddr))
593                         rc = 24;
594         }
595
596         return rc;
597 }
598
599
600 int devinet_ioctl(unsigned int cmd, void __user *arg)
601 {
602         struct ifreq ifr;
603         struct sockaddr_in sin_orig;
604         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
605         struct in_device *in_dev;
606         struct in_ifaddr **ifap = NULL;
607         struct in_ifaddr *ifa = NULL;
608         struct net_device *dev;
609         char *colon;
610         int ret = -EFAULT;
611         int tryaddrmatch = 0;
612
613         /*
614          *      Fetch the caller's info block into kernel space
615          */
616
617         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
618                 goto out;
619         ifr.ifr_name[IFNAMSIZ - 1] = 0;
620
621         /* save original address for comparison */
622         memcpy(&sin_orig, sin, sizeof(*sin));
623
624         colon = strchr(ifr.ifr_name, ':');
625         if (colon)
626                 *colon = 0;
627
628 #ifdef CONFIG_KMOD
629         dev_load(&init_net, ifr.ifr_name);
630 #endif
631
632         switch (cmd) {
633         case SIOCGIFADDR:       /* Get interface address */
634         case SIOCGIFBRDADDR:    /* Get the broadcast address */
635         case SIOCGIFDSTADDR:    /* Get the destination address */
636         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
637                 /* Note that these ioctls will not sleep,
638                    so that we do not impose a lock.
639                    One day we will be forced to put shlock here (I mean SMP)
640                  */
641                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
642                 memset(sin, 0, sizeof(*sin));
643                 sin->sin_family = AF_INET;
644                 break;
645
646         case SIOCSIFFLAGS:
647                 ret = -EACCES;
648                 if (!capable(CAP_NET_ADMIN))
649                         goto out;
650                 break;
651         case SIOCSIFADDR:       /* Set interface address (and family) */
652         case SIOCSIFBRDADDR:    /* Set the broadcast address */
653         case SIOCSIFDSTADDR:    /* Set the destination address */
654         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
655                 ret = -EACCES;
656                 if (!capable(CAP_NET_ADMIN))
657                         goto out;
658                 ret = -EINVAL;
659                 if (sin->sin_family != AF_INET)
660                         goto out;
661                 break;
662         default:
663                 ret = -EINVAL;
664                 goto out;
665         }
666
667         rtnl_lock();
668
669         ret = -ENODEV;
670         if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
671                 goto done;
672
673         if (colon)
674                 *colon = ':';
675
676         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
677                 if (tryaddrmatch) {
678                         /* Matthias Andree */
679                         /* compare label and address (4.4BSD style) */
680                         /* note: we only do this for a limited set of ioctls
681                            and only if the original address family was AF_INET.
682                            This is checked above. */
683                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
684                              ifap = &ifa->ifa_next) {
685                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
686                                     sin_orig.sin_addr.s_addr ==
687                                                         ifa->ifa_address) {
688                                         break; /* found */
689                                 }
690                         }
691                 }
692                 /* we didn't get a match, maybe the application is
693                    4.3BSD-style and passed in junk so we fall back to
694                    comparing just the label */
695                 if (!ifa) {
696                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
697                              ifap = &ifa->ifa_next)
698                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
699                                         break;
700                 }
701         }
702
703         ret = -EADDRNOTAVAIL;
704         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
705                 goto done;
706
707         switch (cmd) {
708         case SIOCGIFADDR:       /* Get interface address */
709                 sin->sin_addr.s_addr = ifa->ifa_local;
710                 goto rarok;
711
712         case SIOCGIFBRDADDR:    /* Get the broadcast address */
713                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
714                 goto rarok;
715
716         case SIOCGIFDSTADDR:    /* Get the destination address */
717                 sin->sin_addr.s_addr = ifa->ifa_address;
718                 goto rarok;
719
720         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
721                 sin->sin_addr.s_addr = ifa->ifa_mask;
722                 goto rarok;
723
724         case SIOCSIFFLAGS:
725                 if (colon) {
726                         ret = -EADDRNOTAVAIL;
727                         if (!ifa)
728                                 break;
729                         ret = 0;
730                         if (!(ifr.ifr_flags & IFF_UP))
731                                 inet_del_ifa(in_dev, ifap, 1);
732                         break;
733                 }
734                 ret = dev_change_flags(dev, ifr.ifr_flags);
735                 break;
736
737         case SIOCSIFADDR:       /* Set interface address (and family) */
738                 ret = -EINVAL;
739                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
740                         break;
741
742                 if (!ifa) {
743                         ret = -ENOBUFS;
744                         if ((ifa = inet_alloc_ifa()) == NULL)
745                                 break;
746                         if (colon)
747                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
748                         else
749                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
750                 } else {
751                         ret = 0;
752                         if (ifa->ifa_local == sin->sin_addr.s_addr)
753                                 break;
754                         inet_del_ifa(in_dev, ifap, 0);
755                         ifa->ifa_broadcast = 0;
756                         ifa->ifa_anycast = 0;
757                 }
758
759                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
760
761                 if (!(dev->flags & IFF_POINTOPOINT)) {
762                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
763                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
764                         if ((dev->flags & IFF_BROADCAST) &&
765                             ifa->ifa_prefixlen < 31)
766                                 ifa->ifa_broadcast = ifa->ifa_address |
767                                                      ~ifa->ifa_mask;
768                 } else {
769                         ifa->ifa_prefixlen = 32;
770                         ifa->ifa_mask = inet_make_mask(32);
771                 }
772                 ret = inet_set_ifa(dev, ifa);
773                 break;
774
775         case SIOCSIFBRDADDR:    /* Set the broadcast address */
776                 ret = 0;
777                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
778                         inet_del_ifa(in_dev, ifap, 0);
779                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
780                         inet_insert_ifa(ifa);
781                 }
782                 break;
783
784         case SIOCSIFDSTADDR:    /* Set the destination address */
785                 ret = 0;
786                 if (ifa->ifa_address == sin->sin_addr.s_addr)
787                         break;
788                 ret = -EINVAL;
789                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
790                         break;
791                 ret = 0;
792                 inet_del_ifa(in_dev, ifap, 0);
793                 ifa->ifa_address = sin->sin_addr.s_addr;
794                 inet_insert_ifa(ifa);
795                 break;
796
797         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
798
799                 /*
800                  *      The mask we set must be legal.
801                  */
802                 ret = -EINVAL;
803                 if (bad_mask(sin->sin_addr.s_addr, 0))
804                         break;
805                 ret = 0;
806                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
807                         __be32 old_mask = ifa->ifa_mask;
808                         inet_del_ifa(in_dev, ifap, 0);
809                         ifa->ifa_mask = sin->sin_addr.s_addr;
810                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
811
812                         /* See if current broadcast address matches
813                          * with current netmask, then recalculate
814                          * the broadcast address. Otherwise it's a
815                          * funny address, so don't touch it since
816                          * the user seems to know what (s)he's doing...
817                          */
818                         if ((dev->flags & IFF_BROADCAST) &&
819                             (ifa->ifa_prefixlen < 31) &&
820                             (ifa->ifa_broadcast ==
821                              (ifa->ifa_local|~old_mask))) {
822                                 ifa->ifa_broadcast = (ifa->ifa_local |
823                                                       ~sin->sin_addr.s_addr);
824                         }
825                         inet_insert_ifa(ifa);
826                 }
827                 break;
828         }
829 done:
830         rtnl_unlock();
831 out:
832         return ret;
833 rarok:
834         rtnl_unlock();
835         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
836         goto out;
837 }
838
839 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
840 {
841         struct in_device *in_dev = __in_dev_get_rtnl(dev);
842         struct in_ifaddr *ifa;
843         struct ifreq ifr;
844         int done = 0;
845
846         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
847                 goto out;
848
849         for (; ifa; ifa = ifa->ifa_next) {
850                 if (!buf) {
851                         done += sizeof(ifr);
852                         continue;
853                 }
854                 if (len < (int) sizeof(ifr))
855                         break;
856                 memset(&ifr, 0, sizeof(struct ifreq));
857                 if (ifa->ifa_label)
858                         strcpy(ifr.ifr_name, ifa->ifa_label);
859                 else
860                         strcpy(ifr.ifr_name, dev->name);
861
862                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
863                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
864                                                                 ifa->ifa_local;
865
866                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
867                         done = -EFAULT;
868                         break;
869                 }
870                 buf  += sizeof(struct ifreq);
871                 len  -= sizeof(struct ifreq);
872                 done += sizeof(struct ifreq);
873         }
874 out:
875         return done;
876 }
877
878 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
879 {
880         __be32 addr = 0;
881         struct in_device *in_dev;
882
883         rcu_read_lock();
884         in_dev = __in_dev_get_rcu(dev);
885         if (!in_dev)
886                 goto no_in_dev;
887
888         for_primary_ifa(in_dev) {
889                 if (ifa->ifa_scope > scope)
890                         continue;
891                 if (!dst || inet_ifa_match(dst, ifa)) {
892                         addr = ifa->ifa_local;
893                         break;
894                 }
895                 if (!addr)
896                         addr = ifa->ifa_local;
897         } endfor_ifa(in_dev);
898 no_in_dev:
899         rcu_read_unlock();
900
901         if (addr)
902                 goto out;
903
904         /* Not loopback addresses on loopback should be preferred
905            in this case. It is importnat that lo is the first interface
906            in dev_base list.
907          */
908         read_lock(&dev_base_lock);
909         rcu_read_lock();
910         for_each_netdev(&init_net, dev) {
911                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
912                         continue;
913
914                 for_primary_ifa(in_dev) {
915                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
916                             ifa->ifa_scope <= scope) {
917                                 addr = ifa->ifa_local;
918                                 goto out_unlock_both;
919                         }
920                 } endfor_ifa(in_dev);
921         }
922 out_unlock_both:
923         read_unlock(&dev_base_lock);
924         rcu_read_unlock();
925 out:
926         return addr;
927 }
928
929 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
930                               __be32 local, int scope)
931 {
932         int same = 0;
933         __be32 addr = 0;
934
935         for_ifa(in_dev) {
936                 if (!addr &&
937                     (local == ifa->ifa_local || !local) &&
938                     ifa->ifa_scope <= scope) {
939                         addr = ifa->ifa_local;
940                         if (same)
941                                 break;
942                 }
943                 if (!same) {
944                         same = (!local || inet_ifa_match(local, ifa)) &&
945                                 (!dst || inet_ifa_match(dst, ifa));
946                         if (same && addr) {
947                                 if (local || !dst)
948                                         break;
949                                 /* Is the selected addr into dst subnet? */
950                                 if (inet_ifa_match(addr, ifa))
951                                         break;
952                                 /* No, then can we use new local src? */
953                                 if (ifa->ifa_scope <= scope) {
954                                         addr = ifa->ifa_local;
955                                         break;
956                                 }
957                                 /* search for large dst subnet for addr */
958                                 same = 0;
959                         }
960                 }
961         } endfor_ifa(in_dev);
962
963         return same? addr : 0;
964 }
965
966 /*
967  * Confirm that local IP address exists using wildcards:
968  * - dev: only on this interface, 0=any interface
969  * - dst: only in the same subnet as dst, 0=any dst
970  * - local: address, 0=autoselect the local address
971  * - scope: maximum allowed scope value for the local address
972  */
973 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
974 {
975         __be32 addr = 0;
976         struct in_device *in_dev;
977
978         if (dev) {
979                 rcu_read_lock();
980                 if ((in_dev = __in_dev_get_rcu(dev)))
981                         addr = confirm_addr_indev(in_dev, dst, local, scope);
982                 rcu_read_unlock();
983
984                 return addr;
985         }
986
987         read_lock(&dev_base_lock);
988         rcu_read_lock();
989         for_each_netdev(&init_net, dev) {
990                 if ((in_dev = __in_dev_get_rcu(dev))) {
991                         addr = confirm_addr_indev(in_dev, dst, local, scope);
992                         if (addr)
993                                 break;
994                 }
995         }
996         rcu_read_unlock();
997         read_unlock(&dev_base_lock);
998
999         return addr;
1000 }
1001
1002 /*
1003  *      Device notifier
1004  */
1005
1006 int register_inetaddr_notifier(struct notifier_block *nb)
1007 {
1008         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1009 }
1010
1011 int unregister_inetaddr_notifier(struct notifier_block *nb)
1012 {
1013         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1014 }
1015
1016 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1017  * alias numbering and to create unique labels if possible.
1018 */
1019 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1020 {
1021         struct in_ifaddr *ifa;
1022         int named = 0;
1023
1024         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1025                 char old[IFNAMSIZ], *dot;
1026
1027                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1028                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1029                 if (named++ == 0)
1030                         continue;
1031                 dot = strchr(old, ':');
1032                 if (dot == NULL) {
1033                         sprintf(old, ":%d", named);
1034                         dot = old;
1035                 }
1036                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1037                         strcat(ifa->ifa_label, dot);
1038                 } else {
1039                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1040                 }
1041         }
1042 }
1043
1044 /* Called only under RTNL semaphore */
1045
1046 static int inetdev_event(struct notifier_block *this, unsigned long event,
1047                          void *ptr)
1048 {
1049         struct net_device *dev = ptr;
1050         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1051
1052         if (dev->nd_net != &init_net)
1053                 return NOTIFY_DONE;
1054
1055         ASSERT_RTNL();
1056
1057         if (!in_dev) {
1058                 if (event == NETDEV_REGISTER) {
1059                         in_dev = inetdev_init(dev);
1060                         if (!in_dev)
1061                                 return notifier_from_errno(-ENOMEM);
1062                         if (dev->flags & IFF_LOOPBACK) {
1063                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1064                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1065                         }
1066                 }
1067                 goto out;
1068         }
1069
1070         switch (event) {
1071         case NETDEV_REGISTER:
1072                 printk(KERN_DEBUG "inetdev_event: bug\n");
1073                 dev->ip_ptr = NULL;
1074                 break;
1075         case NETDEV_UP:
1076                 if (dev->mtu < 68)
1077                         break;
1078                 if (dev->flags & IFF_LOOPBACK) {
1079                         struct in_ifaddr *ifa;
1080                         if ((ifa = inet_alloc_ifa()) != NULL) {
1081                                 ifa->ifa_local =
1082                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1083                                 ifa->ifa_prefixlen = 8;
1084                                 ifa->ifa_mask = inet_make_mask(8);
1085                                 in_dev_hold(in_dev);
1086                                 ifa->ifa_dev = in_dev;
1087                                 ifa->ifa_scope = RT_SCOPE_HOST;
1088                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1089                                 inet_insert_ifa(ifa);
1090                         }
1091                 }
1092                 ip_mc_up(in_dev);
1093                 break;
1094         case NETDEV_DOWN:
1095                 ip_mc_down(in_dev);
1096                 break;
1097         case NETDEV_CHANGEMTU:
1098                 if (dev->mtu >= 68)
1099                         break;
1100                 /* MTU falled under 68, disable IP */
1101         case NETDEV_UNREGISTER:
1102                 inetdev_destroy(in_dev);
1103                 break;
1104         case NETDEV_CHANGENAME:
1105                 /* Do not notify about label change, this event is
1106                  * not interesting to applications using netlink.
1107                  */
1108                 inetdev_changename(dev, in_dev);
1109
1110                 devinet_sysctl_unregister(in_dev);
1111                 devinet_sysctl_register(in_dev);
1112                 break;
1113         }
1114 out:
1115         return NOTIFY_DONE;
1116 }
1117
1118 static struct notifier_block ip_netdev_notifier = {
1119         .notifier_call =inetdev_event,
1120 };
1121
1122 static inline size_t inet_nlmsg_size(void)
1123 {
1124         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1125                + nla_total_size(4) /* IFA_ADDRESS */
1126                + nla_total_size(4) /* IFA_LOCAL */
1127                + nla_total_size(4) /* IFA_BROADCAST */
1128                + nla_total_size(4) /* IFA_ANYCAST */
1129                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1130 }
1131
1132 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1133                             u32 pid, u32 seq, int event, unsigned int flags)
1134 {
1135         struct ifaddrmsg *ifm;
1136         struct nlmsghdr  *nlh;
1137
1138         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1139         if (nlh == NULL)
1140                 return -EMSGSIZE;
1141
1142         ifm = nlmsg_data(nlh);
1143         ifm->ifa_family = AF_INET;
1144         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1145         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1146         ifm->ifa_scope = ifa->ifa_scope;
1147         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1148
1149         if (ifa->ifa_address)
1150                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1151
1152         if (ifa->ifa_local)
1153                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1154
1155         if (ifa->ifa_broadcast)
1156                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1157
1158         if (ifa->ifa_anycast)
1159                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1160
1161         if (ifa->ifa_label[0])
1162                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1163
1164         return nlmsg_end(skb, nlh);
1165
1166 nla_put_failure:
1167         nlmsg_cancel(skb, nlh);
1168         return -EMSGSIZE;
1169 }
1170
1171 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1172 {
1173         struct net *net = skb->sk->sk_net;
1174         int idx, ip_idx;
1175         struct net_device *dev;
1176         struct in_device *in_dev;
1177         struct in_ifaddr *ifa;
1178         int s_ip_idx, s_idx = cb->args[0];
1179
1180         if (net != &init_net)
1181                 return 0;
1182
1183         s_ip_idx = ip_idx = cb->args[1];
1184         idx = 0;
1185         for_each_netdev(&init_net, dev) {
1186                 if (idx < s_idx)
1187                         goto cont;
1188                 if (idx > s_idx)
1189                         s_ip_idx = 0;
1190                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1191                         goto cont;
1192
1193                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1194                      ifa = ifa->ifa_next, ip_idx++) {
1195                         if (ip_idx < s_ip_idx)
1196                                 continue;
1197                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1198                                              cb->nlh->nlmsg_seq,
1199                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1200                                 goto done;
1201                 }
1202 cont:
1203                 idx++;
1204         }
1205
1206 done:
1207         cb->args[0] = idx;
1208         cb->args[1] = ip_idx;
1209
1210         return skb->len;
1211 }
1212
1213 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1214                       u32 pid)
1215 {
1216         struct sk_buff *skb;
1217         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1218         int err = -ENOBUFS;
1219
1220         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1221         if (skb == NULL)
1222                 goto errout;
1223
1224         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1225         if (err < 0) {
1226                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1227                 WARN_ON(err == -EMSGSIZE);
1228                 kfree_skb(skb);
1229                 goto errout;
1230         }
1231         err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1232 errout:
1233         if (err < 0)
1234                 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
1235 }
1236
1237 #ifdef CONFIG_SYSCTL
1238
1239 static void devinet_copy_dflt_conf(int i)
1240 {
1241         struct net_device *dev;
1242
1243         read_lock(&dev_base_lock);
1244         for_each_netdev(&init_net, dev) {
1245                 struct in_device *in_dev;
1246                 rcu_read_lock();
1247                 in_dev = __in_dev_get_rcu(dev);
1248                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1249                         in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1250                 rcu_read_unlock();
1251         }
1252         read_unlock(&dev_base_lock);
1253 }
1254
1255 static void inet_forward_change(void)
1256 {
1257         struct net_device *dev;
1258         int on = IPV4_DEVCONF_ALL(FORWARDING);
1259
1260         IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1261         IPV4_DEVCONF_DFLT(FORWARDING) = on;
1262
1263         read_lock(&dev_base_lock);
1264         for_each_netdev(&init_net, dev) {
1265                 struct in_device *in_dev;
1266                 rcu_read_lock();
1267                 in_dev = __in_dev_get_rcu(dev);
1268                 if (in_dev)
1269                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1270                 rcu_read_unlock();
1271         }
1272         read_unlock(&dev_base_lock);
1273
1274         rt_cache_flush(0);
1275 }
1276
1277 static int devinet_conf_proc(ctl_table *ctl, int write,
1278                              struct file* filp, void __user *buffer,
1279                              size_t *lenp, loff_t *ppos)
1280 {
1281         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1282
1283         if (write) {
1284                 struct ipv4_devconf *cnf = ctl->extra1;
1285                 int i = (int *)ctl->data - cnf->data;
1286
1287                 set_bit(i, cnf->state);
1288
1289                 if (cnf == &ipv4_devconf_dflt)
1290                         devinet_copy_dflt_conf(i);
1291         }
1292
1293         return ret;
1294 }
1295
1296 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1297                                void __user *oldval, size_t __user *oldlenp,
1298                                void __user *newval, size_t newlen)
1299 {
1300         struct ipv4_devconf *cnf;
1301         int *valp = table->data;
1302         int new;
1303         int i;
1304
1305         if (!newval || !newlen)
1306                 return 0;
1307
1308         if (newlen != sizeof(int))
1309                 return -EINVAL;
1310
1311         if (get_user(new, (int __user *)newval))
1312                 return -EFAULT;
1313
1314         if (new == *valp)
1315                 return 0;
1316
1317         if (oldval && oldlenp) {
1318                 size_t len;
1319
1320                 if (get_user(len, oldlenp))
1321                         return -EFAULT;
1322
1323                 if (len) {
1324                         if (len > table->maxlen)
1325                                 len = table->maxlen;
1326                         if (copy_to_user(oldval, valp, len))
1327                                 return -EFAULT;
1328                         if (put_user(len, oldlenp))
1329                                 return -EFAULT;
1330                 }
1331         }
1332
1333         *valp = new;
1334
1335         cnf = table->extra1;
1336         i = (int *)table->data - cnf->data;
1337
1338         set_bit(i, cnf->state);
1339
1340         if (cnf == &ipv4_devconf_dflt)
1341                 devinet_copy_dflt_conf(i);
1342
1343         return 1;
1344 }
1345
1346 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1347                                   struct file* filp, void __user *buffer,
1348                                   size_t *lenp, loff_t *ppos)
1349 {
1350         int *valp = ctl->data;
1351         int val = *valp;
1352         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1353
1354         if (write && *valp != val) {
1355                 if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1356                         inet_forward_change();
1357                 else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1358                         rt_cache_flush(0);
1359         }
1360
1361         return ret;
1362 }
1363
1364 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1365                          struct file* filp, void __user *buffer,
1366                          size_t *lenp, loff_t *ppos)
1367 {
1368         int *valp = ctl->data;
1369         int val = *valp;
1370         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1371
1372         if (write && *valp != val)
1373                 rt_cache_flush(0);
1374
1375         return ret;
1376 }
1377
1378 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1379                                   void __user *oldval, size_t __user *oldlenp,
1380                                   void __user *newval, size_t newlen)
1381 {
1382         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1383                                       newval, newlen);
1384
1385         if (ret == 1)
1386                 rt_cache_flush(0);
1387
1388         return ret;
1389 }
1390
1391
1392 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1393         { \
1394                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1395                 .procname       = name, \
1396                 .data           = ipv4_devconf.data + \
1397                                   NET_IPV4_CONF_ ## attr - 1, \
1398                 .maxlen         = sizeof(int), \
1399                 .mode           = mval, \
1400                 .proc_handler   = proc, \
1401                 .strategy       = sysctl, \
1402                 .extra1         = &ipv4_devconf, \
1403         }
1404
1405 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1406         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1407                              devinet_conf_sysctl)
1408
1409 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1410         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1411                              devinet_conf_sysctl)
1412
1413 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1414         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1415
1416 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1417         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1418                                      ipv4_doint_and_flush_strategy)
1419
1420 static struct devinet_sysctl_table {
1421         struct ctl_table_header *sysctl_header;
1422         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1423         char *dev_name;
1424 } devinet_sysctl = {
1425         .devinet_vars = {
1426                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1427                                              devinet_sysctl_forward,
1428                                              devinet_conf_sysctl),
1429                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1430
1431                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1432                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1433                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1434                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1435                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1436                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1437                                         "accept_source_route"),
1438                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1439                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1440                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1441                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1442                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1443                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1444                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1445                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1446                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1447
1448                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1449                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1450                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1451                                               "force_igmp_version"),
1452                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1453                                               "promote_secondaries"),
1454         },
1455 };
1456
1457 static void __devinet_sysctl_register(char *dev_name, int ctl_name,
1458                 struct ipv4_devconf *p)
1459 {
1460         int i;
1461         struct devinet_sysctl_table *t;
1462
1463 #define DEVINET_CTL_PATH_DEV    3
1464
1465         struct ctl_path devinet_ctl_path[] = {
1466                 { .procname = "net", .ctl_name = CTL_NET, },
1467                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1468                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1469                 { /* to be set */ },
1470                 { },
1471         };
1472
1473         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1474         if (!t)
1475                 goto out;
1476
1477         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1478                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1479                 t->devinet_vars[i].extra1 = p;
1480         }
1481
1482         /*
1483          * Make a copy of dev_name, because '.procname' is regarded as const
1484          * by sysctl and we wouldn't want anyone to change it under our feet
1485          * (see SIOCSIFNAME).
1486          */
1487         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1488         if (!t->dev_name)
1489                 goto free;
1490
1491         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1492         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1493
1494         t->sysctl_header = register_sysctl_paths(devinet_ctl_path,
1495                         t->devinet_vars);
1496         if (!t->sysctl_header)
1497                 goto free_procname;
1498
1499         p->sysctl = t;
1500         return;
1501
1502 free_procname:
1503         kfree(t->dev_name);
1504 free:
1505         kfree(t);
1506 out:
1507         return;
1508 }
1509
1510 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1511 {
1512         struct devinet_sysctl_table *t = cnf->sysctl;
1513
1514         if (t == NULL)
1515                 return;
1516
1517         cnf->sysctl = NULL;
1518         unregister_sysctl_table(t->sysctl_header);
1519         kfree(t->dev_name);
1520         kfree(t);
1521 }
1522
1523 static void devinet_sysctl_register(struct in_device *idev)
1524 {
1525         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1526                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1527         __devinet_sysctl_register(idev->dev->name, idev->dev->ifindex,
1528                         &idev->cnf);
1529 }
1530
1531 static void devinet_sysctl_unregister(struct in_device *idev)
1532 {
1533         __devinet_sysctl_unregister(&idev->cnf);
1534         neigh_sysctl_unregister(idev->arp_parms);
1535 }
1536 #endif
1537
1538 static struct ctl_table ctl_forward_entry[] = {
1539         {
1540                 .ctl_name       = NET_IPV4_FORWARD,
1541                 .procname       = "ip_forward",
1542                 .data           = &ipv4_devconf.data[
1543                                         NET_IPV4_CONF_FORWARDING - 1],
1544                 .maxlen         = sizeof(int),
1545                 .mode           = 0644,
1546                 .proc_handler   = devinet_sysctl_forward,
1547                 .strategy       = devinet_conf_sysctl,
1548                 .extra1         = &ipv4_devconf,
1549         },
1550         { },
1551 };
1552
1553 static __initdata struct ctl_path net_ipv4_path[] = {
1554         { .procname = "net", .ctl_name = CTL_NET, },
1555         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1556         { },
1557 };
1558
1559 void __init devinet_init(void)
1560 {
1561         register_gifconf(PF_INET, inet_gifconf);
1562         register_netdevice_notifier(&ip_netdev_notifier);
1563
1564         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1565         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1566         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1567 #ifdef CONFIG_SYSCTL
1568         __devinet_sysctl_register("all", NET_PROTO_CONF_ALL,
1569                         &ipv4_devconf);
1570         __devinet_sysctl_register("default", NET_PROTO_CONF_DEFAULT,
1571                         &ipv4_devconf_dflt);
1572         register_sysctl_paths(net_ipv4_path, ctl_forward_entry);
1573 #endif
1574 }
1575
1576 EXPORT_SYMBOL(in_dev_finish_destroy);
1577 EXPORT_SYMBOL(inet_select_addr);
1578 EXPORT_SYMBOL(inetdev_by_index);
1579 EXPORT_SYMBOL(register_inetaddr_notifier);
1580 EXPORT_SYMBOL(unregister_inetaddr_notifier);