Merge git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-2.6
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 struct ipv4_devconf ipv4_devconf = {
68         .data = {
69                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73         },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .data = {
78                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83         },
84 };
85
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90         [IFA_LOCAL]             = { .type = NLA_U32 },
91         [IFA_ADDRESS]           = { .type = NLA_U32 },
92         [IFA_BROADCAST]         = { .type = NLA_U32 },
93         [IFA_ANYCAST]           = { .type = NLA_U32 },
94         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96
97 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98
99 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101                          int destroy);
102 #ifdef CONFIG_SYSCTL
103 static void devinet_sysctl_register(struct in_device *idev);
104 static void devinet_sysctl_unregister(struct in_device *idev);
105 #else
106 static inline void devinet_sysctl_register(struct in_device *idev)
107 {
108 }
109 static inline void devinet_sysctl_unregister(struct in_device *idev)
110 {
111 }
112 #endif
113
114 /* Locks all the inet devices. */
115
116 static struct in_ifaddr *inet_alloc_ifa(void)
117 {
118         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119
120         if (ifa) {
121                 INIT_RCU_HEAD(&ifa->rcu_head);
122         }
123
124         return ifa;
125 }
126
127 static void inet_rcu_free_ifa(struct rcu_head *head)
128 {
129         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130         if (ifa->ifa_dev)
131                 in_dev_put(ifa->ifa_dev);
132         kfree(ifa);
133 }
134
135 static inline void inet_free_ifa(struct in_ifaddr *ifa)
136 {
137         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138 }
139
140 void in_dev_finish_destroy(struct in_device *idev)
141 {
142         struct net_device *dev = idev->dev;
143
144         BUG_TRAP(!idev->ifa_list);
145         BUG_TRAP(!idev->mc_list);
146 #ifdef NET_REFCNT_DEBUG
147         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148                idev, dev ? dev->name : "NIL");
149 #endif
150         dev_put(dev);
151         if (!idev->dead)
152                 printk("Freeing alive in_device %p\n", idev);
153         else {
154                 kfree(idev);
155         }
156 }
157
158 static struct in_device *inetdev_init(struct net_device *dev)
159 {
160         struct in_device *in_dev;
161
162         ASSERT_RTNL();
163
164         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165         if (!in_dev)
166                 goto out;
167         INIT_RCU_HEAD(&in_dev->rcu_head);
168         memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169                         sizeof(in_dev->cnf));
170         in_dev->cnf.sysctl = NULL;
171         in_dev->dev = dev;
172         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
173                 goto out_kfree;
174         /* Reference in_dev->dev */
175         dev_hold(dev);
176         /* Account for reference dev->ip_ptr (below) */
177         in_dev_hold(in_dev);
178
179         devinet_sysctl_register(in_dev);
180         ip_mc_init_dev(in_dev);
181         if (dev->flags & IFF_UP)
182                 ip_mc_up(in_dev);
183
184         /* we can receive as soon as ip_ptr is set -- do this last */
185         rcu_assign_pointer(dev->ip_ptr, in_dev);
186 out:
187         return in_dev;
188 out_kfree:
189         kfree(in_dev);
190         in_dev = NULL;
191         goto out;
192 }
193
194 static void in_dev_rcu_put(struct rcu_head *head)
195 {
196         struct in_device *idev = container_of(head, struct in_device, rcu_head);
197         in_dev_put(idev);
198 }
199
200 static void inetdev_destroy(struct in_device *in_dev)
201 {
202         struct in_ifaddr *ifa;
203         struct net_device *dev;
204
205         ASSERT_RTNL();
206
207         dev = in_dev->dev;
208
209         in_dev->dead = 1;
210
211         ip_mc_destroy_dev(in_dev);
212
213         while ((ifa = in_dev->ifa_list) != NULL) {
214                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215                 inet_free_ifa(ifa);
216         }
217
218         dev->ip_ptr = NULL;
219
220         devinet_sysctl_unregister(in_dev);
221         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222         arp_ifdown(dev);
223
224         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225 }
226
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228 {
229         rcu_read_lock();
230         for_primary_ifa(in_dev) {
231                 if (inet_ifa_match(a, ifa)) {
232                         if (!b || inet_ifa_match(b, ifa)) {
233                                 rcu_read_unlock();
234                                 return 1;
235                         }
236                 }
237         } endfor_ifa(in_dev);
238         rcu_read_unlock();
239         return 0;
240 }
241
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243                          int destroy, struct nlmsghdr *nlh, u32 pid)
244 {
245         struct in_ifaddr *promote = NULL;
246         struct in_ifaddr *ifa, *ifa1 = *ifap;
247         struct in_ifaddr *last_prim = in_dev->ifa_list;
248         struct in_ifaddr *prev_prom = NULL;
249         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250
251         ASSERT_RTNL();
252
253         /* 1. Deleting primary ifaddr forces deletion all secondaries
254          * unless alias promotion is set
255          **/
256
257         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259
260                 while ((ifa = *ifap1) != NULL) {
261                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262                             ifa1->ifa_scope <= ifa->ifa_scope)
263                                 last_prim = ifa;
264
265                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266                             ifa1->ifa_mask != ifa->ifa_mask ||
267                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
268                                 ifap1 = &ifa->ifa_next;
269                                 prev_prom = ifa;
270                                 continue;
271                         }
272
273                         if (!do_promote) {
274                                 *ifap1 = ifa->ifa_next;
275
276                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277                                 blocking_notifier_call_chain(&inetaddr_chain,
278                                                 NETDEV_DOWN, ifa);
279                                 inet_free_ifa(ifa);
280                         } else {
281                                 promote = ifa;
282                                 break;
283                         }
284                 }
285         }
286
287         /* 2. Unlink it */
288
289         *ifap = ifa1->ifa_next;
290
291         /* 3. Announce address deletion */
292
293         /* Send message first, then call notifier.
294            At first sight, FIB update triggered by notifier
295            will refer to already deleted ifaddr, that could confuse
296            netlink listeners. It is not true: look, gated sees
297            that route deleted and if it still thinks that ifaddr
298            is valid, it will try to restore deleted routes... Grr.
299            So that, this order is correct.
300          */
301         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303
304         if (promote) {
305
306                 if (prev_prom) {
307                         prev_prom->ifa_next = promote->ifa_next;
308                         promote->ifa_next = last_prim->ifa_next;
309                         last_prim->ifa_next = promote;
310                 }
311
312                 promote->ifa_flags &= ~IFA_F_SECONDARY;
313                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314                 blocking_notifier_call_chain(&inetaddr_chain,
315                                 NETDEV_UP, promote);
316                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317                         if (ifa1->ifa_mask != ifa->ifa_mask ||
318                             !inet_ifa_match(ifa1->ifa_address, ifa))
319                                         continue;
320                         fib_add_ifaddr(ifa);
321                 }
322
323         }
324         if (destroy)
325                 inet_free_ifa(ifa1);
326 }
327
328 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329                          int destroy)
330 {
331         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332 }
333
334 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335                              u32 pid)
336 {
337         struct in_device *in_dev = ifa->ifa_dev;
338         struct in_ifaddr *ifa1, **ifap, **last_primary;
339
340         ASSERT_RTNL();
341
342         if (!ifa->ifa_local) {
343                 inet_free_ifa(ifa);
344                 return 0;
345         }
346
347         ifa->ifa_flags &= ~IFA_F_SECONDARY;
348         last_primary = &in_dev->ifa_list;
349
350         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351              ifap = &ifa1->ifa_next) {
352                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353                     ifa->ifa_scope <= ifa1->ifa_scope)
354                         last_primary = &ifa1->ifa_next;
355                 if (ifa1->ifa_mask == ifa->ifa_mask &&
356                     inet_ifa_match(ifa1->ifa_address, ifa)) {
357                         if (ifa1->ifa_local == ifa->ifa_local) {
358                                 inet_free_ifa(ifa);
359                                 return -EEXIST;
360                         }
361                         if (ifa1->ifa_scope != ifa->ifa_scope) {
362                                 inet_free_ifa(ifa);
363                                 return -EINVAL;
364                         }
365                         ifa->ifa_flags |= IFA_F_SECONDARY;
366                 }
367         }
368
369         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370                 net_srandom(ifa->ifa_local);
371                 ifap = last_primary;
372         }
373
374         ifa->ifa_next = *ifap;
375         *ifap = ifa;
376
377         /* Send message first, then call notifier.
378            Notifier will trigger FIB update, so that
379            listeners of netlink will know about new ifaddr */
380         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
382
383         return 0;
384 }
385
386 static int inet_insert_ifa(struct in_ifaddr *ifa)
387 {
388         return __inet_insert_ifa(ifa, NULL, 0);
389 }
390
391 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
392 {
393         struct in_device *in_dev = __in_dev_get_rtnl(dev);
394
395         ASSERT_RTNL();
396
397         if (!in_dev) {
398                 inet_free_ifa(ifa);
399                 return -ENOBUFS;
400         }
401         ipv4_devconf_setall(in_dev);
402         if (ifa->ifa_dev != in_dev) {
403                 BUG_TRAP(!ifa->ifa_dev);
404                 in_dev_hold(in_dev);
405                 ifa->ifa_dev = in_dev;
406         }
407         if (ipv4_is_loopback(ifa->ifa_local))
408                 ifa->ifa_scope = RT_SCOPE_HOST;
409         return inet_insert_ifa(ifa);
410 }
411
412 struct in_device *inetdev_by_index(struct net *net, int ifindex)
413 {
414         struct net_device *dev;
415         struct in_device *in_dev = NULL;
416         read_lock(&dev_base_lock);
417         dev = __dev_get_by_index(net, ifindex);
418         if (dev)
419                 in_dev = in_dev_get(dev);
420         read_unlock(&dev_base_lock);
421         return in_dev;
422 }
423
424 /* Called only from RTNL semaphored context. No locks. */
425
426 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427                                     __be32 mask)
428 {
429         ASSERT_RTNL();
430
431         for_primary_ifa(in_dev) {
432                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433                         return ifa;
434         } endfor_ifa(in_dev);
435         return NULL;
436 }
437
438 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439 {
440         struct net *net = skb->sk->sk_net;
441         struct nlattr *tb[IFA_MAX+1];
442         struct in_device *in_dev;
443         struct ifaddrmsg *ifm;
444         struct in_ifaddr *ifa, **ifap;
445         int err = -EINVAL;
446
447         ASSERT_RTNL();
448
449         if (net != &init_net)
450                 return -EINVAL;
451
452         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453         if (err < 0)
454                 goto errout;
455
456         ifm = nlmsg_data(nlh);
457         in_dev = inetdev_by_index(net, ifm->ifa_index);
458         if (in_dev == NULL) {
459                 err = -ENODEV;
460                 goto errout;
461         }
462
463         __in_dev_put(in_dev);
464
465         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
466              ifap = &ifa->ifa_next) {
467                 if (tb[IFA_LOCAL] &&
468                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
469                         continue;
470
471                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
472                         continue;
473
474                 if (tb[IFA_ADDRESS] &&
475                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
476                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
477                         continue;
478
479                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480                 return 0;
481         }
482
483         err = -EADDRNOTAVAIL;
484 errout:
485         return err;
486 }
487
488 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
489 {
490         struct nlattr *tb[IFA_MAX+1];
491         struct in_ifaddr *ifa;
492         struct ifaddrmsg *ifm;
493         struct net_device *dev;
494         struct in_device *in_dev;
495         int err = -EINVAL;
496
497         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498         if (err < 0)
499                 goto errout;
500
501         ifm = nlmsg_data(nlh);
502         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
503                 err = -EINVAL;
504                 goto errout;
505         }
506
507         dev = __dev_get_by_index(&init_net, ifm->ifa_index);
508         if (dev == NULL) {
509                 err = -ENODEV;
510                 goto errout;
511         }
512
513         in_dev = __in_dev_get_rtnl(dev);
514         if (in_dev == NULL) {
515                 err = -ENOBUFS;
516                 goto errout;
517         }
518
519         ifa = inet_alloc_ifa();
520         if (ifa == NULL) {
521                 /*
522                  * A potential indev allocation can be left alive, it stays
523                  * assigned to its device and is destroy with it.
524                  */
525                 err = -ENOBUFS;
526                 goto errout;
527         }
528
529         ipv4_devconf_setall(in_dev);
530         in_dev_hold(in_dev);
531
532         if (tb[IFA_ADDRESS] == NULL)
533                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
534
535         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
536         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
537         ifa->ifa_flags = ifm->ifa_flags;
538         ifa->ifa_scope = ifm->ifa_scope;
539         ifa->ifa_dev = in_dev;
540
541         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
542         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
543
544         if (tb[IFA_BROADCAST])
545                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
546
547         if (tb[IFA_ANYCAST])
548                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
549
550         if (tb[IFA_LABEL])
551                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
552         else
553                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
554
555         return ifa;
556
557 errout:
558         return ERR_PTR(err);
559 }
560
561 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
562 {
563         struct net *net = skb->sk->sk_net;
564         struct in_ifaddr *ifa;
565
566         ASSERT_RTNL();
567
568         if (net != &init_net)
569                 return -EINVAL;
570
571         ifa = rtm_to_ifaddr(nlh);
572         if (IS_ERR(ifa))
573                 return PTR_ERR(ifa);
574
575         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
576 }
577
578 /*
579  *      Determine a default network mask, based on the IP address.
580  */
581
582 static __inline__ int inet_abc_len(__be32 addr)
583 {
584         int rc = -1;    /* Something else, probably a multicast. */
585
586         if (ipv4_is_zeronet(addr))
587                 rc = 0;
588         else {
589                 __u32 haddr = ntohl(addr);
590
591                 if (IN_CLASSA(haddr))
592                         rc = 8;
593                 else if (IN_CLASSB(haddr))
594                         rc = 16;
595                 else if (IN_CLASSC(haddr))
596                         rc = 24;
597         }
598
599         return rc;
600 }
601
602
603 int devinet_ioctl(unsigned int cmd, void __user *arg)
604 {
605         struct ifreq ifr;
606         struct sockaddr_in sin_orig;
607         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
608         struct in_device *in_dev;
609         struct in_ifaddr **ifap = NULL;
610         struct in_ifaddr *ifa = NULL;
611         struct net_device *dev;
612         char *colon;
613         int ret = -EFAULT;
614         int tryaddrmatch = 0;
615
616         /*
617          *      Fetch the caller's info block into kernel space
618          */
619
620         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
621                 goto out;
622         ifr.ifr_name[IFNAMSIZ - 1] = 0;
623
624         /* save original address for comparison */
625         memcpy(&sin_orig, sin, sizeof(*sin));
626
627         colon = strchr(ifr.ifr_name, ':');
628         if (colon)
629                 *colon = 0;
630
631 #ifdef CONFIG_KMOD
632         dev_load(&init_net, ifr.ifr_name);
633 #endif
634
635         switch (cmd) {
636         case SIOCGIFADDR:       /* Get interface address */
637         case SIOCGIFBRDADDR:    /* Get the broadcast address */
638         case SIOCGIFDSTADDR:    /* Get the destination address */
639         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
640                 /* Note that these ioctls will not sleep,
641                    so that we do not impose a lock.
642                    One day we will be forced to put shlock here (I mean SMP)
643                  */
644                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
645                 memset(sin, 0, sizeof(*sin));
646                 sin->sin_family = AF_INET;
647                 break;
648
649         case SIOCSIFFLAGS:
650                 ret = -EACCES;
651                 if (!capable(CAP_NET_ADMIN))
652                         goto out;
653                 break;
654         case SIOCSIFADDR:       /* Set interface address (and family) */
655         case SIOCSIFBRDADDR:    /* Set the broadcast address */
656         case SIOCSIFDSTADDR:    /* Set the destination address */
657         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
658                 ret = -EACCES;
659                 if (!capable(CAP_NET_ADMIN))
660                         goto out;
661                 ret = -EINVAL;
662                 if (sin->sin_family != AF_INET)
663                         goto out;
664                 break;
665         default:
666                 ret = -EINVAL;
667                 goto out;
668         }
669
670         rtnl_lock();
671
672         ret = -ENODEV;
673         if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
674                 goto done;
675
676         if (colon)
677                 *colon = ':';
678
679         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
680                 if (tryaddrmatch) {
681                         /* Matthias Andree */
682                         /* compare label and address (4.4BSD style) */
683                         /* note: we only do this for a limited set of ioctls
684                            and only if the original address family was AF_INET.
685                            This is checked above. */
686                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
687                              ifap = &ifa->ifa_next) {
688                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
689                                     sin_orig.sin_addr.s_addr ==
690                                                         ifa->ifa_address) {
691                                         break; /* found */
692                                 }
693                         }
694                 }
695                 /* we didn't get a match, maybe the application is
696                    4.3BSD-style and passed in junk so we fall back to
697                    comparing just the label */
698                 if (!ifa) {
699                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
700                              ifap = &ifa->ifa_next)
701                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
702                                         break;
703                 }
704         }
705
706         ret = -EADDRNOTAVAIL;
707         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
708                 goto done;
709
710         switch (cmd) {
711         case SIOCGIFADDR:       /* Get interface address */
712                 sin->sin_addr.s_addr = ifa->ifa_local;
713                 goto rarok;
714
715         case SIOCGIFBRDADDR:    /* Get the broadcast address */
716                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
717                 goto rarok;
718
719         case SIOCGIFDSTADDR:    /* Get the destination address */
720                 sin->sin_addr.s_addr = ifa->ifa_address;
721                 goto rarok;
722
723         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
724                 sin->sin_addr.s_addr = ifa->ifa_mask;
725                 goto rarok;
726
727         case SIOCSIFFLAGS:
728                 if (colon) {
729                         ret = -EADDRNOTAVAIL;
730                         if (!ifa)
731                                 break;
732                         ret = 0;
733                         if (!(ifr.ifr_flags & IFF_UP))
734                                 inet_del_ifa(in_dev, ifap, 1);
735                         break;
736                 }
737                 ret = dev_change_flags(dev, ifr.ifr_flags);
738                 break;
739
740         case SIOCSIFADDR:       /* Set interface address (and family) */
741                 ret = -EINVAL;
742                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
743                         break;
744
745                 if (!ifa) {
746                         ret = -ENOBUFS;
747                         if ((ifa = inet_alloc_ifa()) == NULL)
748                                 break;
749                         if (colon)
750                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
751                         else
752                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
753                 } else {
754                         ret = 0;
755                         if (ifa->ifa_local == sin->sin_addr.s_addr)
756                                 break;
757                         inet_del_ifa(in_dev, ifap, 0);
758                         ifa->ifa_broadcast = 0;
759                         ifa->ifa_anycast = 0;
760                 }
761
762                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
763
764                 if (!(dev->flags & IFF_POINTOPOINT)) {
765                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
766                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
767                         if ((dev->flags & IFF_BROADCAST) &&
768                             ifa->ifa_prefixlen < 31)
769                                 ifa->ifa_broadcast = ifa->ifa_address |
770                                                      ~ifa->ifa_mask;
771                 } else {
772                         ifa->ifa_prefixlen = 32;
773                         ifa->ifa_mask = inet_make_mask(32);
774                 }
775                 ret = inet_set_ifa(dev, ifa);
776                 break;
777
778         case SIOCSIFBRDADDR:    /* Set the broadcast address */
779                 ret = 0;
780                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
781                         inet_del_ifa(in_dev, ifap, 0);
782                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
783                         inet_insert_ifa(ifa);
784                 }
785                 break;
786
787         case SIOCSIFDSTADDR:    /* Set the destination address */
788                 ret = 0;
789                 if (ifa->ifa_address == sin->sin_addr.s_addr)
790                         break;
791                 ret = -EINVAL;
792                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
793                         break;
794                 ret = 0;
795                 inet_del_ifa(in_dev, ifap, 0);
796                 ifa->ifa_address = sin->sin_addr.s_addr;
797                 inet_insert_ifa(ifa);
798                 break;
799
800         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
801
802                 /*
803                  *      The mask we set must be legal.
804                  */
805                 ret = -EINVAL;
806                 if (bad_mask(sin->sin_addr.s_addr, 0))
807                         break;
808                 ret = 0;
809                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
810                         __be32 old_mask = ifa->ifa_mask;
811                         inet_del_ifa(in_dev, ifap, 0);
812                         ifa->ifa_mask = sin->sin_addr.s_addr;
813                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
814
815                         /* See if current broadcast address matches
816                          * with current netmask, then recalculate
817                          * the broadcast address. Otherwise it's a
818                          * funny address, so don't touch it since
819                          * the user seems to know what (s)he's doing...
820                          */
821                         if ((dev->flags & IFF_BROADCAST) &&
822                             (ifa->ifa_prefixlen < 31) &&
823                             (ifa->ifa_broadcast ==
824                              (ifa->ifa_local|~old_mask))) {
825                                 ifa->ifa_broadcast = (ifa->ifa_local |
826                                                       ~sin->sin_addr.s_addr);
827                         }
828                         inet_insert_ifa(ifa);
829                 }
830                 break;
831         }
832 done:
833         rtnl_unlock();
834 out:
835         return ret;
836 rarok:
837         rtnl_unlock();
838         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
839         goto out;
840 }
841
842 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
843 {
844         struct in_device *in_dev = __in_dev_get_rtnl(dev);
845         struct in_ifaddr *ifa;
846         struct ifreq ifr;
847         int done = 0;
848
849         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
850                 goto out;
851
852         for (; ifa; ifa = ifa->ifa_next) {
853                 if (!buf) {
854                         done += sizeof(ifr);
855                         continue;
856                 }
857                 if (len < (int) sizeof(ifr))
858                         break;
859                 memset(&ifr, 0, sizeof(struct ifreq));
860                 if (ifa->ifa_label)
861                         strcpy(ifr.ifr_name, ifa->ifa_label);
862                 else
863                         strcpy(ifr.ifr_name, dev->name);
864
865                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
866                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
867                                                                 ifa->ifa_local;
868
869                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
870                         done = -EFAULT;
871                         break;
872                 }
873                 buf  += sizeof(struct ifreq);
874                 len  -= sizeof(struct ifreq);
875                 done += sizeof(struct ifreq);
876         }
877 out:
878         return done;
879 }
880
881 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
882 {
883         __be32 addr = 0;
884         struct in_device *in_dev;
885
886         rcu_read_lock();
887         in_dev = __in_dev_get_rcu(dev);
888         if (!in_dev)
889                 goto no_in_dev;
890
891         for_primary_ifa(in_dev) {
892                 if (ifa->ifa_scope > scope)
893                         continue;
894                 if (!dst || inet_ifa_match(dst, ifa)) {
895                         addr = ifa->ifa_local;
896                         break;
897                 }
898                 if (!addr)
899                         addr = ifa->ifa_local;
900         } endfor_ifa(in_dev);
901 no_in_dev:
902         rcu_read_unlock();
903
904         if (addr)
905                 goto out;
906
907         /* Not loopback addresses on loopback should be preferred
908            in this case. It is importnat that lo is the first interface
909            in dev_base list.
910          */
911         read_lock(&dev_base_lock);
912         rcu_read_lock();
913         for_each_netdev(&init_net, dev) {
914                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
915                         continue;
916
917                 for_primary_ifa(in_dev) {
918                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
919                             ifa->ifa_scope <= scope) {
920                                 addr = ifa->ifa_local;
921                                 goto out_unlock_both;
922                         }
923                 } endfor_ifa(in_dev);
924         }
925 out_unlock_both:
926         read_unlock(&dev_base_lock);
927         rcu_read_unlock();
928 out:
929         return addr;
930 }
931
932 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
933                               __be32 local, int scope)
934 {
935         int same = 0;
936         __be32 addr = 0;
937
938         for_ifa(in_dev) {
939                 if (!addr &&
940                     (local == ifa->ifa_local || !local) &&
941                     ifa->ifa_scope <= scope) {
942                         addr = ifa->ifa_local;
943                         if (same)
944                                 break;
945                 }
946                 if (!same) {
947                         same = (!local || inet_ifa_match(local, ifa)) &&
948                                 (!dst || inet_ifa_match(dst, ifa));
949                         if (same && addr) {
950                                 if (local || !dst)
951                                         break;
952                                 /* Is the selected addr into dst subnet? */
953                                 if (inet_ifa_match(addr, ifa))
954                                         break;
955                                 /* No, then can we use new local src? */
956                                 if (ifa->ifa_scope <= scope) {
957                                         addr = ifa->ifa_local;
958                                         break;
959                                 }
960                                 /* search for large dst subnet for addr */
961                                 same = 0;
962                         }
963                 }
964         } endfor_ifa(in_dev);
965
966         return same? addr : 0;
967 }
968
969 /*
970  * Confirm that local IP address exists using wildcards:
971  * - in_dev: only on this interface, 0=any interface
972  * - dst: only in the same subnet as dst, 0=any dst
973  * - local: address, 0=autoselect the local address
974  * - scope: maximum allowed scope value for the local address
975  */
976 __be32 inet_confirm_addr(struct in_device *in_dev,
977                          __be32 dst, __be32 local, int scope)
978 {
979         __be32 addr = 0;
980         struct net_device *dev;
981         struct net *net;
982
983         if (scope != RT_SCOPE_LINK)
984                 return confirm_addr_indev(in_dev, dst, local, scope);
985
986         net = in_dev->dev->nd_net;
987         read_lock(&dev_base_lock);
988         rcu_read_lock();
989         for_each_netdev(net, dev) {
990                 if ((in_dev = __in_dev_get_rcu(dev))) {
991                         addr = confirm_addr_indev(in_dev, dst, local, scope);
992                         if (addr)
993                                 break;
994                 }
995         }
996         rcu_read_unlock();
997         read_unlock(&dev_base_lock);
998
999         return addr;
1000 }
1001
1002 /*
1003  *      Device notifier
1004  */
1005
1006 int register_inetaddr_notifier(struct notifier_block *nb)
1007 {
1008         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1009 }
1010
1011 int unregister_inetaddr_notifier(struct notifier_block *nb)
1012 {
1013         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1014 }
1015
1016 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1017  * alias numbering and to create unique labels if possible.
1018 */
1019 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1020 {
1021         struct in_ifaddr *ifa;
1022         int named = 0;
1023
1024         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1025                 char old[IFNAMSIZ], *dot;
1026
1027                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1028                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1029                 if (named++ == 0)
1030                         continue;
1031                 dot = strchr(old, ':');
1032                 if (dot == NULL) {
1033                         sprintf(old, ":%d", named);
1034                         dot = old;
1035                 }
1036                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1037                         strcat(ifa->ifa_label, dot);
1038                 } else {
1039                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1040                 }
1041         }
1042 }
1043
1044 /* Called only under RTNL semaphore */
1045
1046 static int inetdev_event(struct notifier_block *this, unsigned long event,
1047                          void *ptr)
1048 {
1049         struct net_device *dev = ptr;
1050         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1051
1052         if (dev->nd_net != &init_net)
1053                 return NOTIFY_DONE;
1054
1055         ASSERT_RTNL();
1056
1057         if (!in_dev) {
1058                 if (event == NETDEV_REGISTER) {
1059                         in_dev = inetdev_init(dev);
1060                         if (!in_dev)
1061                                 return notifier_from_errno(-ENOMEM);
1062                         if (dev->flags & IFF_LOOPBACK) {
1063                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1064                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1065                         }
1066                 }
1067                 goto out;
1068         }
1069
1070         switch (event) {
1071         case NETDEV_REGISTER:
1072                 printk(KERN_DEBUG "inetdev_event: bug\n");
1073                 dev->ip_ptr = NULL;
1074                 break;
1075         case NETDEV_UP:
1076                 if (dev->mtu < 68)
1077                         break;
1078                 if (dev->flags & IFF_LOOPBACK) {
1079                         struct in_ifaddr *ifa;
1080                         if ((ifa = inet_alloc_ifa()) != NULL) {
1081                                 ifa->ifa_local =
1082                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1083                                 ifa->ifa_prefixlen = 8;
1084                                 ifa->ifa_mask = inet_make_mask(8);
1085                                 in_dev_hold(in_dev);
1086                                 ifa->ifa_dev = in_dev;
1087                                 ifa->ifa_scope = RT_SCOPE_HOST;
1088                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1089                                 inet_insert_ifa(ifa);
1090                         }
1091                 }
1092                 ip_mc_up(in_dev);
1093                 break;
1094         case NETDEV_DOWN:
1095                 ip_mc_down(in_dev);
1096                 break;
1097         case NETDEV_CHANGEMTU:
1098                 if (dev->mtu >= 68)
1099                         break;
1100                 /* MTU falled under 68, disable IP */
1101         case NETDEV_UNREGISTER:
1102                 inetdev_destroy(in_dev);
1103                 break;
1104         case NETDEV_CHANGENAME:
1105                 /* Do not notify about label change, this event is
1106                  * not interesting to applications using netlink.
1107                  */
1108                 inetdev_changename(dev, in_dev);
1109
1110                 devinet_sysctl_unregister(in_dev);
1111                 devinet_sysctl_register(in_dev);
1112                 break;
1113         }
1114 out:
1115         return NOTIFY_DONE;
1116 }
1117
1118 static struct notifier_block ip_netdev_notifier = {
1119         .notifier_call =inetdev_event,
1120 };
1121
1122 static inline size_t inet_nlmsg_size(void)
1123 {
1124         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1125                + nla_total_size(4) /* IFA_ADDRESS */
1126                + nla_total_size(4) /* IFA_LOCAL */
1127                + nla_total_size(4) /* IFA_BROADCAST */
1128                + nla_total_size(4) /* IFA_ANYCAST */
1129                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1130 }
1131
1132 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1133                             u32 pid, u32 seq, int event, unsigned int flags)
1134 {
1135         struct ifaddrmsg *ifm;
1136         struct nlmsghdr  *nlh;
1137
1138         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1139         if (nlh == NULL)
1140                 return -EMSGSIZE;
1141
1142         ifm = nlmsg_data(nlh);
1143         ifm->ifa_family = AF_INET;
1144         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1145         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1146         ifm->ifa_scope = ifa->ifa_scope;
1147         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1148
1149         if (ifa->ifa_address)
1150                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1151
1152         if (ifa->ifa_local)
1153                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1154
1155         if (ifa->ifa_broadcast)
1156                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1157
1158         if (ifa->ifa_anycast)
1159                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1160
1161         if (ifa->ifa_label[0])
1162                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1163
1164         return nlmsg_end(skb, nlh);
1165
1166 nla_put_failure:
1167         nlmsg_cancel(skb, nlh);
1168         return -EMSGSIZE;
1169 }
1170
1171 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1172 {
1173         struct net *net = skb->sk->sk_net;
1174         int idx, ip_idx;
1175         struct net_device *dev;
1176         struct in_device *in_dev;
1177         struct in_ifaddr *ifa;
1178         int s_ip_idx, s_idx = cb->args[0];
1179
1180         if (net != &init_net)
1181                 return 0;
1182
1183         s_ip_idx = ip_idx = cb->args[1];
1184         idx = 0;
1185         for_each_netdev(&init_net, dev) {
1186                 if (idx < s_idx)
1187                         goto cont;
1188                 if (idx > s_idx)
1189                         s_ip_idx = 0;
1190                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1191                         goto cont;
1192
1193                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1194                      ifa = ifa->ifa_next, ip_idx++) {
1195                         if (ip_idx < s_ip_idx)
1196                                 continue;
1197                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1198                                              cb->nlh->nlmsg_seq,
1199                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1200                                 goto done;
1201                 }
1202 cont:
1203                 idx++;
1204         }
1205
1206 done:
1207         cb->args[0] = idx;
1208         cb->args[1] = ip_idx;
1209
1210         return skb->len;
1211 }
1212
1213 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1214                       u32 pid)
1215 {
1216         struct sk_buff *skb;
1217         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1218         int err = -ENOBUFS;
1219
1220         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1221         if (skb == NULL)
1222                 goto errout;
1223
1224         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1225         if (err < 0) {
1226                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1227                 WARN_ON(err == -EMSGSIZE);
1228                 kfree_skb(skb);
1229                 goto errout;
1230         }
1231         err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1232 errout:
1233         if (err < 0)
1234                 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
1235 }
1236
1237 #ifdef CONFIG_SYSCTL
1238
1239 static void devinet_copy_dflt_conf(struct net *net, int i)
1240 {
1241         struct net_device *dev;
1242
1243         read_lock(&dev_base_lock);
1244         for_each_netdev(net, dev) {
1245                 struct in_device *in_dev;
1246                 rcu_read_lock();
1247                 in_dev = __in_dev_get_rcu(dev);
1248                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1249                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1250                 rcu_read_unlock();
1251         }
1252         read_unlock(&dev_base_lock);
1253 }
1254
1255 static void inet_forward_change(struct net *net)
1256 {
1257         struct net_device *dev;
1258         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1259
1260         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1261         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1262
1263         read_lock(&dev_base_lock);
1264         for_each_netdev(net, dev) {
1265                 struct in_device *in_dev;
1266                 rcu_read_lock();
1267                 in_dev = __in_dev_get_rcu(dev);
1268                 if (in_dev)
1269                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1270                 rcu_read_unlock();
1271         }
1272         read_unlock(&dev_base_lock);
1273
1274         rt_cache_flush(0);
1275 }
1276
1277 static int devinet_conf_proc(ctl_table *ctl, int write,
1278                              struct file* filp, void __user *buffer,
1279                              size_t *lenp, loff_t *ppos)
1280 {
1281         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1282
1283         if (write) {
1284                 struct ipv4_devconf *cnf = ctl->extra1;
1285                 struct net *net = ctl->extra2;
1286                 int i = (int *)ctl->data - cnf->data;
1287
1288                 set_bit(i, cnf->state);
1289
1290                 if (cnf == net->ipv4.devconf_dflt)
1291                         devinet_copy_dflt_conf(net, i);
1292         }
1293
1294         return ret;
1295 }
1296
1297 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1298                                void __user *oldval, size_t __user *oldlenp,
1299                                void __user *newval, size_t newlen)
1300 {
1301         struct ipv4_devconf *cnf;
1302         struct net *net;
1303         int *valp = table->data;
1304         int new;
1305         int i;
1306
1307         if (!newval || !newlen)
1308                 return 0;
1309
1310         if (newlen != sizeof(int))
1311                 return -EINVAL;
1312
1313         if (get_user(new, (int __user *)newval))
1314                 return -EFAULT;
1315
1316         if (new == *valp)
1317                 return 0;
1318
1319         if (oldval && oldlenp) {
1320                 size_t len;
1321
1322                 if (get_user(len, oldlenp))
1323                         return -EFAULT;
1324
1325                 if (len) {
1326                         if (len > table->maxlen)
1327                                 len = table->maxlen;
1328                         if (copy_to_user(oldval, valp, len))
1329                                 return -EFAULT;
1330                         if (put_user(len, oldlenp))
1331                                 return -EFAULT;
1332                 }
1333         }
1334
1335         *valp = new;
1336
1337         cnf = table->extra1;
1338         net = table->extra2;
1339         i = (int *)table->data - cnf->data;
1340
1341         set_bit(i, cnf->state);
1342
1343         if (cnf == net->ipv4.devconf_dflt)
1344                 devinet_copy_dflt_conf(net, i);
1345
1346         return 1;
1347 }
1348
1349 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1350                                   struct file* filp, void __user *buffer,
1351                                   size_t *lenp, loff_t *ppos)
1352 {
1353         int *valp = ctl->data;
1354         int val = *valp;
1355         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1356
1357         if (write && *valp != val) {
1358                 struct net *net = ctl->extra2;
1359
1360                 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1361                         inet_forward_change(net);
1362                 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1363                         rt_cache_flush(0);
1364         }
1365
1366         return ret;
1367 }
1368
1369 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1370                          struct file* filp, void __user *buffer,
1371                          size_t *lenp, loff_t *ppos)
1372 {
1373         int *valp = ctl->data;
1374         int val = *valp;
1375         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1376
1377         if (write && *valp != val)
1378                 rt_cache_flush(0);
1379
1380         return ret;
1381 }
1382
1383 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1384                                   void __user *oldval, size_t __user *oldlenp,
1385                                   void __user *newval, size_t newlen)
1386 {
1387         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1388                                       newval, newlen);
1389
1390         if (ret == 1)
1391                 rt_cache_flush(0);
1392
1393         return ret;
1394 }
1395
1396
1397 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1398         { \
1399                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1400                 .procname       = name, \
1401                 .data           = ipv4_devconf.data + \
1402                                   NET_IPV4_CONF_ ## attr - 1, \
1403                 .maxlen         = sizeof(int), \
1404                 .mode           = mval, \
1405                 .proc_handler   = proc, \
1406                 .strategy       = sysctl, \
1407                 .extra1         = &ipv4_devconf, \
1408         }
1409
1410 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1411         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1412                              devinet_conf_sysctl)
1413
1414 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1415         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1416                              devinet_conf_sysctl)
1417
1418 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1419         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1420
1421 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1422         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1423                                      ipv4_doint_and_flush_strategy)
1424
1425 static struct devinet_sysctl_table {
1426         struct ctl_table_header *sysctl_header;
1427         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1428         char *dev_name;
1429 } devinet_sysctl = {
1430         .devinet_vars = {
1431                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1432                                              devinet_sysctl_forward,
1433                                              devinet_conf_sysctl),
1434                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1435
1436                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1437                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1438                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1439                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1440                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1441                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1442                                         "accept_source_route"),
1443                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1444                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1445                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1446                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1447                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1448                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1449                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1450                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1451                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1452
1453                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1454                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1455                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1456                                               "force_igmp_version"),
1457                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1458                                               "promote_secondaries"),
1459         },
1460 };
1461
1462 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1463                 int ctl_name, struct ipv4_devconf *p)
1464 {
1465         int i;
1466         struct devinet_sysctl_table *t;
1467
1468 #define DEVINET_CTL_PATH_DEV    3
1469
1470         struct ctl_path devinet_ctl_path[] = {
1471                 { .procname = "net", .ctl_name = CTL_NET, },
1472                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1473                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1474                 { /* to be set */ },
1475                 { },
1476         };
1477
1478         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1479         if (!t)
1480                 goto out;
1481
1482         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1483                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1484                 t->devinet_vars[i].extra1 = p;
1485                 t->devinet_vars[i].extra2 = net;
1486         }
1487
1488         /*
1489          * Make a copy of dev_name, because '.procname' is regarded as const
1490          * by sysctl and we wouldn't want anyone to change it under our feet
1491          * (see SIOCSIFNAME).
1492          */
1493         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1494         if (!t->dev_name)
1495                 goto free;
1496
1497         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1498         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1499
1500         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1501                         t->devinet_vars);
1502         if (!t->sysctl_header)
1503                 goto free_procname;
1504
1505         p->sysctl = t;
1506         return 0;
1507
1508 free_procname:
1509         kfree(t->dev_name);
1510 free:
1511         kfree(t);
1512 out:
1513         return -ENOBUFS;
1514 }
1515
1516 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1517 {
1518         struct devinet_sysctl_table *t = cnf->sysctl;
1519
1520         if (t == NULL)
1521                 return;
1522
1523         cnf->sysctl = NULL;
1524         unregister_sysctl_table(t->sysctl_header);
1525         kfree(t->dev_name);
1526         kfree(t);
1527 }
1528
1529 static void devinet_sysctl_register(struct in_device *idev)
1530 {
1531         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1532                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1533         __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1534                         idev->dev->ifindex, &idev->cnf);
1535 }
1536
1537 static void devinet_sysctl_unregister(struct in_device *idev)
1538 {
1539         __devinet_sysctl_unregister(&idev->cnf);
1540         neigh_sysctl_unregister(idev->arp_parms);
1541 }
1542
1543 static struct ctl_table ctl_forward_entry[] = {
1544         {
1545                 .ctl_name       = NET_IPV4_FORWARD,
1546                 .procname       = "ip_forward",
1547                 .data           = &ipv4_devconf.data[
1548                                         NET_IPV4_CONF_FORWARDING - 1],
1549                 .maxlen         = sizeof(int),
1550                 .mode           = 0644,
1551                 .proc_handler   = devinet_sysctl_forward,
1552                 .strategy       = devinet_conf_sysctl,
1553                 .extra1         = &ipv4_devconf,
1554                 .extra2         = &init_net,
1555         },
1556         { },
1557 };
1558
1559 static __net_initdata struct ctl_path net_ipv4_path[] = {
1560         { .procname = "net", .ctl_name = CTL_NET, },
1561         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1562         { },
1563 };
1564 #endif
1565
1566 static __net_init int devinet_init_net(struct net *net)
1567 {
1568         int err;
1569         struct ipv4_devconf *all, *dflt;
1570 #ifdef CONFIG_SYSCTL
1571         struct ctl_table *tbl = ctl_forward_entry;
1572         struct ctl_table_header *forw_hdr;
1573 #endif
1574
1575         err = -ENOMEM;
1576         all = &ipv4_devconf;
1577         dflt = &ipv4_devconf_dflt;
1578
1579         if (net != &init_net) {
1580                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1581                 if (all == NULL)
1582                         goto err_alloc_all;
1583
1584                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1585                 if (dflt == NULL)
1586                         goto err_alloc_dflt;
1587
1588 #ifdef CONFIG_SYSCTL
1589                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1590                 if (tbl == NULL)
1591                         goto err_alloc_ctl;
1592
1593                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1594                 tbl[0].extra1 = all;
1595                 tbl[0].extra2 = net;
1596 #endif
1597         }
1598
1599 #ifdef CONFIG_SYSCTL
1600         err = __devinet_sysctl_register(net, "all",
1601                         NET_PROTO_CONF_ALL, all);
1602         if (err < 0)
1603                 goto err_reg_all;
1604
1605         err = __devinet_sysctl_register(net, "default",
1606                         NET_PROTO_CONF_DEFAULT, dflt);
1607         if (err < 0)
1608                 goto err_reg_dflt;
1609
1610         err = -ENOMEM;
1611         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1612         if (forw_hdr == NULL)
1613                 goto err_reg_ctl;
1614         net->ipv4.forw_hdr = forw_hdr;
1615 #endif
1616
1617         net->ipv4.devconf_all = all;
1618         net->ipv4.devconf_dflt = dflt;
1619         return 0;
1620
1621 #ifdef CONFIG_SYSCTL
1622 err_reg_ctl:
1623         __devinet_sysctl_unregister(dflt);
1624 err_reg_dflt:
1625         __devinet_sysctl_unregister(all);
1626 err_reg_all:
1627         if (tbl != ctl_forward_entry)
1628                 kfree(tbl);
1629 err_alloc_ctl:
1630 #endif
1631         if (dflt != &ipv4_devconf_dflt)
1632                 kfree(dflt);
1633 err_alloc_dflt:
1634         if (all != &ipv4_devconf)
1635                 kfree(all);
1636 err_alloc_all:
1637         return err;
1638 }
1639
1640 static __net_exit void devinet_exit_net(struct net *net)
1641 {
1642 #ifdef CONFIG_SYSCTL
1643         struct ctl_table *tbl;
1644
1645         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1646         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1647         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1648         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1649         kfree(tbl);
1650 #endif
1651         kfree(net->ipv4.devconf_dflt);
1652         kfree(net->ipv4.devconf_all);
1653 }
1654
1655 static __net_initdata struct pernet_operations devinet_ops = {
1656         .init = devinet_init_net,
1657         .exit = devinet_exit_net,
1658 };
1659
1660 void __init devinet_init(void)
1661 {
1662         register_pernet_subsys(&devinet_ops);
1663
1664         register_gifconf(PF_INET, inet_gifconf);
1665         register_netdevice_notifier(&ip_netdev_notifier);
1666
1667         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1668         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1669         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1670 }
1671
1672 EXPORT_SYMBOL(in_dev_finish_destroy);
1673 EXPORT_SYMBOL(inet_select_addr);
1674 EXPORT_SYMBOL(inetdev_by_index);
1675 EXPORT_SYMBOL(register_inetaddr_notifier);
1676 EXPORT_SYMBOL(unregister_inetaddr_notifier);