[SK_BUFF]: Introduce skb_reset_network_header(skb)
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/rtnetlink.h>
52 #include <linux/init.h>
53 #include <linux/notifier.h>
54 #include <linux/inetdevice.h>
55 #include <linux/igmp.h>
56 #ifdef CONFIG_SYSCTL
57 #include <linux/sysctl.h>
58 #endif
59 #include <linux/kmod.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/netlink.h>
66
67 struct ipv4_devconf ipv4_devconf = {
68         .accept_redirects = 1,
69         .send_redirects =  1,
70         .secure_redirects = 1,
71         .shared_media =   1,
72 };
73
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75         .accept_redirects =  1,
76         .send_redirects =    1,
77         .secure_redirects =  1,
78         .shared_media =      1,
79         .accept_source_route = 1,
80 };
81
82 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
83         [IFA_LOCAL]             = { .type = NLA_U32 },
84         [IFA_ADDRESS]           = { .type = NLA_U32 },
85         [IFA_BROADCAST]         = { .type = NLA_U32 },
86         [IFA_ANYCAST]           = { .type = NLA_U32 },
87         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
88 };
89
90 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
91
92 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
93 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
94                          int destroy);
95 #ifdef CONFIG_SYSCTL
96 static void devinet_sysctl_register(struct in_device *in_dev,
97                                     struct ipv4_devconf *p);
98 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
99 #endif
100
101 /* Locks all the inet devices. */
102
103 static struct in_ifaddr *inet_alloc_ifa(void)
104 {
105         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
106
107         if (ifa) {
108                 INIT_RCU_HEAD(&ifa->rcu_head);
109         }
110
111         return ifa;
112 }
113
114 static void inet_rcu_free_ifa(struct rcu_head *head)
115 {
116         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
117         if (ifa->ifa_dev)
118                 in_dev_put(ifa->ifa_dev);
119         kfree(ifa);
120 }
121
122 static inline void inet_free_ifa(struct in_ifaddr *ifa)
123 {
124         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
125 }
126
127 void in_dev_finish_destroy(struct in_device *idev)
128 {
129         struct net_device *dev = idev->dev;
130
131         BUG_TRAP(!idev->ifa_list);
132         BUG_TRAP(!idev->mc_list);
133 #ifdef NET_REFCNT_DEBUG
134         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
135                idev, dev ? dev->name : "NIL");
136 #endif
137         dev_put(dev);
138         if (!idev->dead)
139                 printk("Freeing alive in_device %p\n", idev);
140         else {
141                 kfree(idev);
142         }
143 }
144
145 struct in_device *inetdev_init(struct net_device *dev)
146 {
147         struct in_device *in_dev;
148
149         ASSERT_RTNL();
150
151         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
152         if (!in_dev)
153                 goto out;
154         INIT_RCU_HEAD(&in_dev->rcu_head);
155         memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
156         in_dev->cnf.sysctl = NULL;
157         in_dev->dev = dev;
158         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
159                 goto out_kfree;
160         /* Reference in_dev->dev */
161         dev_hold(dev);
162 #ifdef CONFIG_SYSCTL
163         neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
164                               NET_IPV4_NEIGH, "ipv4", NULL, NULL);
165 #endif
166
167         /* Account for reference dev->ip_ptr (below) */
168         in_dev_hold(in_dev);
169
170 #ifdef CONFIG_SYSCTL
171         devinet_sysctl_register(in_dev, &in_dev->cnf);
172 #endif
173         ip_mc_init_dev(in_dev);
174         if (dev->flags & IFF_UP)
175                 ip_mc_up(in_dev);
176
177         /* we can receive as soon as ip_ptr is set -- do this last */
178         rcu_assign_pointer(dev->ip_ptr, in_dev);
179 out:
180         return in_dev;
181 out_kfree:
182         kfree(in_dev);
183         in_dev = NULL;
184         goto out;
185 }
186
187 static void in_dev_rcu_put(struct rcu_head *head)
188 {
189         struct in_device *idev = container_of(head, struct in_device, rcu_head);
190         in_dev_put(idev);
191 }
192
193 static void inetdev_destroy(struct in_device *in_dev)
194 {
195         struct in_ifaddr *ifa;
196         struct net_device *dev;
197
198         ASSERT_RTNL();
199
200         dev = in_dev->dev;
201         if (dev == &loopback_dev)
202                 return;
203
204         in_dev->dead = 1;
205
206         ip_mc_destroy_dev(in_dev);
207
208         while ((ifa = in_dev->ifa_list) != NULL) {
209                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
210                 inet_free_ifa(ifa);
211         }
212
213 #ifdef CONFIG_SYSCTL
214         devinet_sysctl_unregister(&in_dev->cnf);
215 #endif
216
217         dev->ip_ptr = NULL;
218
219 #ifdef CONFIG_SYSCTL
220         neigh_sysctl_unregister(in_dev->arp_parms);
221 #endif
222         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
223         arp_ifdown(dev);
224
225         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
226 }
227
228 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
229 {
230         rcu_read_lock();
231         for_primary_ifa(in_dev) {
232                 if (inet_ifa_match(a, ifa)) {
233                         if (!b || inet_ifa_match(b, ifa)) {
234                                 rcu_read_unlock();
235                                 return 1;
236                         }
237                 }
238         } endfor_ifa(in_dev);
239         rcu_read_unlock();
240         return 0;
241 }
242
243 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
244                          int destroy, struct nlmsghdr *nlh, u32 pid)
245 {
246         struct in_ifaddr *promote = NULL;
247         struct in_ifaddr *ifa, *ifa1 = *ifap;
248         struct in_ifaddr *last_prim = in_dev->ifa_list;
249         struct in_ifaddr *prev_prom = NULL;
250         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
251
252         ASSERT_RTNL();
253
254         /* 1. Deleting primary ifaddr forces deletion all secondaries
255          * unless alias promotion is set
256          **/
257
258         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
259                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
260
261                 while ((ifa = *ifap1) != NULL) {
262                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
263                             ifa1->ifa_scope <= ifa->ifa_scope)
264                                 last_prim = ifa;
265
266                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
267                             ifa1->ifa_mask != ifa->ifa_mask ||
268                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
269                                 ifap1 = &ifa->ifa_next;
270                                 prev_prom = ifa;
271                                 continue;
272                         }
273
274                         if (!do_promote) {
275                                 *ifap1 = ifa->ifa_next;
276
277                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
278                                 blocking_notifier_call_chain(&inetaddr_chain,
279                                                 NETDEV_DOWN, ifa);
280                                 inet_free_ifa(ifa);
281                         } else {
282                                 promote = ifa;
283                                 break;
284                         }
285                 }
286         }
287
288         /* 2. Unlink it */
289
290         *ifap = ifa1->ifa_next;
291
292         /* 3. Announce address deletion */
293
294         /* Send message first, then call notifier.
295            At first sight, FIB update triggered by notifier
296            will refer to already deleted ifaddr, that could confuse
297            netlink listeners. It is not true: look, gated sees
298            that route deleted and if it still thinks that ifaddr
299            is valid, it will try to restore deleted routes... Grr.
300            So that, this order is correct.
301          */
302         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
303         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
304
305         if (promote) {
306
307                 if (prev_prom) {
308                         prev_prom->ifa_next = promote->ifa_next;
309                         promote->ifa_next = last_prim->ifa_next;
310                         last_prim->ifa_next = promote;
311                 }
312
313                 promote->ifa_flags &= ~IFA_F_SECONDARY;
314                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
315                 blocking_notifier_call_chain(&inetaddr_chain,
316                                 NETDEV_UP, promote);
317                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
318                         if (ifa1->ifa_mask != ifa->ifa_mask ||
319                             !inet_ifa_match(ifa1->ifa_address, ifa))
320                                         continue;
321                         fib_add_ifaddr(ifa);
322                 }
323
324         }
325         if (destroy) {
326                 inet_free_ifa(ifa1);
327
328                 if (!in_dev->ifa_list)
329                         inetdev_destroy(in_dev);
330         }
331 }
332
333 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
334                          int destroy)
335 {
336         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
337 }
338
339 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
340                              u32 pid)
341 {
342         struct in_device *in_dev = ifa->ifa_dev;
343         struct in_ifaddr *ifa1, **ifap, **last_primary;
344
345         ASSERT_RTNL();
346
347         if (!ifa->ifa_local) {
348                 inet_free_ifa(ifa);
349                 return 0;
350         }
351
352         ifa->ifa_flags &= ~IFA_F_SECONDARY;
353         last_primary = &in_dev->ifa_list;
354
355         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
356              ifap = &ifa1->ifa_next) {
357                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
358                     ifa->ifa_scope <= ifa1->ifa_scope)
359                         last_primary = &ifa1->ifa_next;
360                 if (ifa1->ifa_mask == ifa->ifa_mask &&
361                     inet_ifa_match(ifa1->ifa_address, ifa)) {
362                         if (ifa1->ifa_local == ifa->ifa_local) {
363                                 inet_free_ifa(ifa);
364                                 return -EEXIST;
365                         }
366                         if (ifa1->ifa_scope != ifa->ifa_scope) {
367                                 inet_free_ifa(ifa);
368                                 return -EINVAL;
369                         }
370                         ifa->ifa_flags |= IFA_F_SECONDARY;
371                 }
372         }
373
374         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
375                 net_srandom(ifa->ifa_local);
376                 ifap = last_primary;
377         }
378
379         ifa->ifa_next = *ifap;
380         *ifap = ifa;
381
382         /* Send message first, then call notifier.
383            Notifier will trigger FIB update, so that
384            listeners of netlink will know about new ifaddr */
385         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
386         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
387
388         return 0;
389 }
390
391 static int inet_insert_ifa(struct in_ifaddr *ifa)
392 {
393         return __inet_insert_ifa(ifa, NULL, 0);
394 }
395
396 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
397 {
398         struct in_device *in_dev = __in_dev_get_rtnl(dev);
399
400         ASSERT_RTNL();
401
402         if (!in_dev) {
403                 in_dev = inetdev_init(dev);
404                 if (!in_dev) {
405                         inet_free_ifa(ifa);
406                         return -ENOBUFS;
407                 }
408         }
409         if (ifa->ifa_dev != in_dev) {
410                 BUG_TRAP(!ifa->ifa_dev);
411                 in_dev_hold(in_dev);
412                 ifa->ifa_dev = in_dev;
413         }
414         if (LOOPBACK(ifa->ifa_local))
415                 ifa->ifa_scope = RT_SCOPE_HOST;
416         return inet_insert_ifa(ifa);
417 }
418
419 struct in_device *inetdev_by_index(int ifindex)
420 {
421         struct net_device *dev;
422         struct in_device *in_dev = NULL;
423         read_lock(&dev_base_lock);
424         dev = __dev_get_by_index(ifindex);
425         if (dev)
426                 in_dev = in_dev_get(dev);
427         read_unlock(&dev_base_lock);
428         return in_dev;
429 }
430
431 /* Called only from RTNL semaphored context. No locks. */
432
433 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
434                                     __be32 mask)
435 {
436         ASSERT_RTNL();
437
438         for_primary_ifa(in_dev) {
439                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
440                         return ifa;
441         } endfor_ifa(in_dev);
442         return NULL;
443 }
444
445 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
446 {
447         struct nlattr *tb[IFA_MAX+1];
448         struct in_device *in_dev;
449         struct ifaddrmsg *ifm;
450         struct in_ifaddr *ifa, **ifap;
451         int err = -EINVAL;
452
453         ASSERT_RTNL();
454
455         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
456         if (err < 0)
457                 goto errout;
458
459         ifm = nlmsg_data(nlh);
460         in_dev = inetdev_by_index(ifm->ifa_index);
461         if (in_dev == NULL) {
462                 err = -ENODEV;
463                 goto errout;
464         }
465
466         __in_dev_put(in_dev);
467
468         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
469              ifap = &ifa->ifa_next) {
470                 if (tb[IFA_LOCAL] &&
471                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
472                         continue;
473
474                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
475                         continue;
476
477                 if (tb[IFA_ADDRESS] &&
478                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
479                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
480                         continue;
481
482                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
483                 return 0;
484         }
485
486         err = -EADDRNOTAVAIL;
487 errout:
488         return err;
489 }
490
491 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
492 {
493         struct nlattr *tb[IFA_MAX+1];
494         struct in_ifaddr *ifa;
495         struct ifaddrmsg *ifm;
496         struct net_device *dev;
497         struct in_device *in_dev;
498         int err = -EINVAL;
499
500         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
501         if (err < 0)
502                 goto errout;
503
504         ifm = nlmsg_data(nlh);
505         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
506                 err = -EINVAL;
507                 goto errout;
508         }
509
510         dev = __dev_get_by_index(ifm->ifa_index);
511         if (dev == NULL) {
512                 err = -ENODEV;
513                 goto errout;
514         }
515
516         in_dev = __in_dev_get_rtnl(dev);
517         if (in_dev == NULL) {
518                 in_dev = inetdev_init(dev);
519                 if (in_dev == NULL) {
520                         err = -ENOBUFS;
521                         goto errout;
522                 }
523         }
524
525         ifa = inet_alloc_ifa();
526         if (ifa == NULL) {
527                 /*
528                  * A potential indev allocation can be left alive, it stays
529                  * assigned to its device and is destroy with it.
530                  */
531                 err = -ENOBUFS;
532                 goto errout;
533         }
534
535         in_dev_hold(in_dev);
536
537         if (tb[IFA_ADDRESS] == NULL)
538                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
539
540         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
541         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
542         ifa->ifa_flags = ifm->ifa_flags;
543         ifa->ifa_scope = ifm->ifa_scope;
544         ifa->ifa_dev = in_dev;
545
546         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
547         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
548
549         if (tb[IFA_BROADCAST])
550                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
551
552         if (tb[IFA_ANYCAST])
553                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
554
555         if (tb[IFA_LABEL])
556                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
557         else
558                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
559
560         return ifa;
561
562 errout:
563         return ERR_PTR(err);
564 }
565
566 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
567 {
568         struct in_ifaddr *ifa;
569
570         ASSERT_RTNL();
571
572         ifa = rtm_to_ifaddr(nlh);
573         if (IS_ERR(ifa))
574                 return PTR_ERR(ifa);
575
576         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
577 }
578
579 /*
580  *      Determine a default network mask, based on the IP address.
581  */
582
583 static __inline__ int inet_abc_len(__be32 addr)
584 {
585         int rc = -1;    /* Something else, probably a multicast. */
586
587         if (ZERONET(addr))
588                 rc = 0;
589         else {
590                 __u32 haddr = ntohl(addr);
591
592                 if (IN_CLASSA(haddr))
593                         rc = 8;
594                 else if (IN_CLASSB(haddr))
595                         rc = 16;
596                 else if (IN_CLASSC(haddr))
597                         rc = 24;
598         }
599
600         return rc;
601 }
602
603
604 int devinet_ioctl(unsigned int cmd, void __user *arg)
605 {
606         struct ifreq ifr;
607         struct sockaddr_in sin_orig;
608         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
609         struct in_device *in_dev;
610         struct in_ifaddr **ifap = NULL;
611         struct in_ifaddr *ifa = NULL;
612         struct net_device *dev;
613         char *colon;
614         int ret = -EFAULT;
615         int tryaddrmatch = 0;
616
617         /*
618          *      Fetch the caller's info block into kernel space
619          */
620
621         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
622                 goto out;
623         ifr.ifr_name[IFNAMSIZ - 1] = 0;
624
625         /* save original address for comparison */
626         memcpy(&sin_orig, sin, sizeof(*sin));
627
628         colon = strchr(ifr.ifr_name, ':');
629         if (colon)
630                 *colon = 0;
631
632 #ifdef CONFIG_KMOD
633         dev_load(ifr.ifr_name);
634 #endif
635
636         switch (cmd) {
637         case SIOCGIFADDR:       /* Get interface address */
638         case SIOCGIFBRDADDR:    /* Get the broadcast address */
639         case SIOCGIFDSTADDR:    /* Get the destination address */
640         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
641                 /* Note that these ioctls will not sleep,
642                    so that we do not impose a lock.
643                    One day we will be forced to put shlock here (I mean SMP)
644                  */
645                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
646                 memset(sin, 0, sizeof(*sin));
647                 sin->sin_family = AF_INET;
648                 break;
649
650         case SIOCSIFFLAGS:
651                 ret = -EACCES;
652                 if (!capable(CAP_NET_ADMIN))
653                         goto out;
654                 break;
655         case SIOCSIFADDR:       /* Set interface address (and family) */
656         case SIOCSIFBRDADDR:    /* Set the broadcast address */
657         case SIOCSIFDSTADDR:    /* Set the destination address */
658         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
659                 ret = -EACCES;
660                 if (!capable(CAP_NET_ADMIN))
661                         goto out;
662                 ret = -EINVAL;
663                 if (sin->sin_family != AF_INET)
664                         goto out;
665                 break;
666         default:
667                 ret = -EINVAL;
668                 goto out;
669         }
670
671         rtnl_lock();
672
673         ret = -ENODEV;
674         if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
675                 goto done;
676
677         if (colon)
678                 *colon = ':';
679
680         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
681                 if (tryaddrmatch) {
682                         /* Matthias Andree */
683                         /* compare label and address (4.4BSD style) */
684                         /* note: we only do this for a limited set of ioctls
685                            and only if the original address family was AF_INET.
686                            This is checked above. */
687                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
688                              ifap = &ifa->ifa_next) {
689                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
690                                     sin_orig.sin_addr.s_addr ==
691                                                         ifa->ifa_address) {
692                                         break; /* found */
693                                 }
694                         }
695                 }
696                 /* we didn't get a match, maybe the application is
697                    4.3BSD-style and passed in junk so we fall back to
698                    comparing just the label */
699                 if (!ifa) {
700                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
701                              ifap = &ifa->ifa_next)
702                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
703                                         break;
704                 }
705         }
706
707         ret = -EADDRNOTAVAIL;
708         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
709                 goto done;
710
711         switch (cmd) {
712         case SIOCGIFADDR:       /* Get interface address */
713                 sin->sin_addr.s_addr = ifa->ifa_local;
714                 goto rarok;
715
716         case SIOCGIFBRDADDR:    /* Get the broadcast address */
717                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
718                 goto rarok;
719
720         case SIOCGIFDSTADDR:    /* Get the destination address */
721                 sin->sin_addr.s_addr = ifa->ifa_address;
722                 goto rarok;
723
724         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
725                 sin->sin_addr.s_addr = ifa->ifa_mask;
726                 goto rarok;
727
728         case SIOCSIFFLAGS:
729                 if (colon) {
730                         ret = -EADDRNOTAVAIL;
731                         if (!ifa)
732                                 break;
733                         ret = 0;
734                         if (!(ifr.ifr_flags & IFF_UP))
735                                 inet_del_ifa(in_dev, ifap, 1);
736                         break;
737                 }
738                 ret = dev_change_flags(dev, ifr.ifr_flags);
739                 break;
740
741         case SIOCSIFADDR:       /* Set interface address (and family) */
742                 ret = -EINVAL;
743                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
744                         break;
745
746                 if (!ifa) {
747                         ret = -ENOBUFS;
748                         if ((ifa = inet_alloc_ifa()) == NULL)
749                                 break;
750                         if (colon)
751                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
752                         else
753                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
754                 } else {
755                         ret = 0;
756                         if (ifa->ifa_local == sin->sin_addr.s_addr)
757                                 break;
758                         inet_del_ifa(in_dev, ifap, 0);
759                         ifa->ifa_broadcast = 0;
760                         ifa->ifa_anycast = 0;
761                 }
762
763                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
764
765                 if (!(dev->flags & IFF_POINTOPOINT)) {
766                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
767                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
768                         if ((dev->flags & IFF_BROADCAST) &&
769                             ifa->ifa_prefixlen < 31)
770                                 ifa->ifa_broadcast = ifa->ifa_address |
771                                                      ~ifa->ifa_mask;
772                 } else {
773                         ifa->ifa_prefixlen = 32;
774                         ifa->ifa_mask = inet_make_mask(32);
775                 }
776                 ret = inet_set_ifa(dev, ifa);
777                 break;
778
779         case SIOCSIFBRDADDR:    /* Set the broadcast address */
780                 ret = 0;
781                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
782                         inet_del_ifa(in_dev, ifap, 0);
783                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
784                         inet_insert_ifa(ifa);
785                 }
786                 break;
787
788         case SIOCSIFDSTADDR:    /* Set the destination address */
789                 ret = 0;
790                 if (ifa->ifa_address == sin->sin_addr.s_addr)
791                         break;
792                 ret = -EINVAL;
793                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
794                         break;
795                 ret = 0;
796                 inet_del_ifa(in_dev, ifap, 0);
797                 ifa->ifa_address = sin->sin_addr.s_addr;
798                 inet_insert_ifa(ifa);
799                 break;
800
801         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
802
803                 /*
804                  *      The mask we set must be legal.
805                  */
806                 ret = -EINVAL;
807                 if (bad_mask(sin->sin_addr.s_addr, 0))
808                         break;
809                 ret = 0;
810                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
811                         __be32 old_mask = ifa->ifa_mask;
812                         inet_del_ifa(in_dev, ifap, 0);
813                         ifa->ifa_mask = sin->sin_addr.s_addr;
814                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
815
816                         /* See if current broadcast address matches
817                          * with current netmask, then recalculate
818                          * the broadcast address. Otherwise it's a
819                          * funny address, so don't touch it since
820                          * the user seems to know what (s)he's doing...
821                          */
822                         if ((dev->flags & IFF_BROADCAST) &&
823                             (ifa->ifa_prefixlen < 31) &&
824                             (ifa->ifa_broadcast ==
825                              (ifa->ifa_local|~old_mask))) {
826                                 ifa->ifa_broadcast = (ifa->ifa_local |
827                                                       ~sin->sin_addr.s_addr);
828                         }
829                         inet_insert_ifa(ifa);
830                 }
831                 break;
832         }
833 done:
834         rtnl_unlock();
835 out:
836         return ret;
837 rarok:
838         rtnl_unlock();
839         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
840         goto out;
841 }
842
843 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
844 {
845         struct in_device *in_dev = __in_dev_get_rtnl(dev);
846         struct in_ifaddr *ifa;
847         struct ifreq ifr;
848         int done = 0;
849
850         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
851                 goto out;
852
853         for (; ifa; ifa = ifa->ifa_next) {
854                 if (!buf) {
855                         done += sizeof(ifr);
856                         continue;
857                 }
858                 if (len < (int) sizeof(ifr))
859                         break;
860                 memset(&ifr, 0, sizeof(struct ifreq));
861                 if (ifa->ifa_label)
862                         strcpy(ifr.ifr_name, ifa->ifa_label);
863                 else
864                         strcpy(ifr.ifr_name, dev->name);
865
866                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
867                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
868                                                                 ifa->ifa_local;
869
870                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
871                         done = -EFAULT;
872                         break;
873                 }
874                 buf  += sizeof(struct ifreq);
875                 len  -= sizeof(struct ifreq);
876                 done += sizeof(struct ifreq);
877         }
878 out:
879         return done;
880 }
881
882 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
883 {
884         __be32 addr = 0;
885         struct in_device *in_dev;
886
887         rcu_read_lock();
888         in_dev = __in_dev_get_rcu(dev);
889         if (!in_dev)
890                 goto no_in_dev;
891
892         for_primary_ifa(in_dev) {
893                 if (ifa->ifa_scope > scope)
894                         continue;
895                 if (!dst || inet_ifa_match(dst, ifa)) {
896                         addr = ifa->ifa_local;
897                         break;
898                 }
899                 if (!addr)
900                         addr = ifa->ifa_local;
901         } endfor_ifa(in_dev);
902 no_in_dev:
903         rcu_read_unlock();
904
905         if (addr)
906                 goto out;
907
908         /* Not loopback addresses on loopback should be preferred
909            in this case. It is importnat that lo is the first interface
910            in dev_base list.
911          */
912         read_lock(&dev_base_lock);
913         rcu_read_lock();
914         for (dev = dev_base; dev; dev = dev->next) {
915                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
916                         continue;
917
918                 for_primary_ifa(in_dev) {
919                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
920                             ifa->ifa_scope <= scope) {
921                                 addr = ifa->ifa_local;
922                                 goto out_unlock_both;
923                         }
924                 } endfor_ifa(in_dev);
925         }
926 out_unlock_both:
927         read_unlock(&dev_base_lock);
928         rcu_read_unlock();
929 out:
930         return addr;
931 }
932
933 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
934                               __be32 local, int scope)
935 {
936         int same = 0;
937         __be32 addr = 0;
938
939         for_ifa(in_dev) {
940                 if (!addr &&
941                     (local == ifa->ifa_local || !local) &&
942                     ifa->ifa_scope <= scope) {
943                         addr = ifa->ifa_local;
944                         if (same)
945                                 break;
946                 }
947                 if (!same) {
948                         same = (!local || inet_ifa_match(local, ifa)) &&
949                                 (!dst || inet_ifa_match(dst, ifa));
950                         if (same && addr) {
951                                 if (local || !dst)
952                                         break;
953                                 /* Is the selected addr into dst subnet? */
954                                 if (inet_ifa_match(addr, ifa))
955                                         break;
956                                 /* No, then can we use new local src? */
957                                 if (ifa->ifa_scope <= scope) {
958                                         addr = ifa->ifa_local;
959                                         break;
960                                 }
961                                 /* search for large dst subnet for addr */
962                                 same = 0;
963                         }
964                 }
965         } endfor_ifa(in_dev);
966
967         return same? addr : 0;
968 }
969
970 /*
971  * Confirm that local IP address exists using wildcards:
972  * - dev: only on this interface, 0=any interface
973  * - dst: only in the same subnet as dst, 0=any dst
974  * - local: address, 0=autoselect the local address
975  * - scope: maximum allowed scope value for the local address
976  */
977 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
978 {
979         __be32 addr = 0;
980         struct in_device *in_dev;
981
982         if (dev) {
983                 rcu_read_lock();
984                 if ((in_dev = __in_dev_get_rcu(dev)))
985                         addr = confirm_addr_indev(in_dev, dst, local, scope);
986                 rcu_read_unlock();
987
988                 return addr;
989         }
990
991         read_lock(&dev_base_lock);
992         rcu_read_lock();
993         for (dev = dev_base; dev; dev = dev->next) {
994                 if ((in_dev = __in_dev_get_rcu(dev))) {
995                         addr = confirm_addr_indev(in_dev, dst, local, scope);
996                         if (addr)
997                                 break;
998                 }
999         }
1000         rcu_read_unlock();
1001         read_unlock(&dev_base_lock);
1002
1003         return addr;
1004 }
1005
1006 /*
1007  *      Device notifier
1008  */
1009
1010 int register_inetaddr_notifier(struct notifier_block *nb)
1011 {
1012         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1013 }
1014
1015 int unregister_inetaddr_notifier(struct notifier_block *nb)
1016 {
1017         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1018 }
1019
1020 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1021  * alias numbering and to create unique labels if possible.
1022 */
1023 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1024 {
1025         struct in_ifaddr *ifa;
1026         int named = 0;
1027
1028         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1029                 char old[IFNAMSIZ], *dot;
1030
1031                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1032                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1033                 if (named++ == 0)
1034                         continue;
1035                 dot = strchr(ifa->ifa_label, ':');
1036                 if (dot == NULL) {
1037                         sprintf(old, ":%d", named);
1038                         dot = old;
1039                 }
1040                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1041                         strcat(ifa->ifa_label, dot);
1042                 } else {
1043                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1044                 }
1045         }
1046 }
1047
1048 /* Called only under RTNL semaphore */
1049
1050 static int inetdev_event(struct notifier_block *this, unsigned long event,
1051                          void *ptr)
1052 {
1053         struct net_device *dev = ptr;
1054         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1055
1056         ASSERT_RTNL();
1057
1058         if (!in_dev) {
1059                 if (event == NETDEV_REGISTER) {
1060                         in_dev = inetdev_init(dev);
1061                         if (!in_dev)
1062                                 panic("devinet: Failed to create loopback\n");
1063                         if (dev == &loopback_dev) {
1064                                 in_dev->cnf.no_xfrm = 1;
1065                                 in_dev->cnf.no_policy = 1;
1066                         }
1067                 }
1068                 goto out;
1069         }
1070
1071         switch (event) {
1072         case NETDEV_REGISTER:
1073                 printk(KERN_DEBUG "inetdev_event: bug\n");
1074                 dev->ip_ptr = NULL;
1075                 break;
1076         case NETDEV_UP:
1077                 if (dev->mtu < 68)
1078                         break;
1079                 if (dev == &loopback_dev) {
1080                         struct in_ifaddr *ifa;
1081                         if ((ifa = inet_alloc_ifa()) != NULL) {
1082                                 ifa->ifa_local =
1083                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1084                                 ifa->ifa_prefixlen = 8;
1085                                 ifa->ifa_mask = inet_make_mask(8);
1086                                 in_dev_hold(in_dev);
1087                                 ifa->ifa_dev = in_dev;
1088                                 ifa->ifa_scope = RT_SCOPE_HOST;
1089                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1090                                 inet_insert_ifa(ifa);
1091                         }
1092                 }
1093                 ip_mc_up(in_dev);
1094                 break;
1095         case NETDEV_DOWN:
1096                 ip_mc_down(in_dev);
1097                 break;
1098         case NETDEV_CHANGEMTU:
1099                 if (dev->mtu >= 68)
1100                         break;
1101                 /* MTU falled under 68, disable IP */
1102         case NETDEV_UNREGISTER:
1103                 inetdev_destroy(in_dev);
1104                 break;
1105         case NETDEV_CHANGENAME:
1106                 /* Do not notify about label change, this event is
1107                  * not interesting to applications using netlink.
1108                  */
1109                 inetdev_changename(dev, in_dev);
1110
1111 #ifdef CONFIG_SYSCTL
1112                 devinet_sysctl_unregister(&in_dev->cnf);
1113                 neigh_sysctl_unregister(in_dev->arp_parms);
1114                 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1115                                       NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1116                 devinet_sysctl_register(in_dev, &in_dev->cnf);
1117 #endif
1118                 break;
1119         }
1120 out:
1121         return NOTIFY_DONE;
1122 }
1123
1124 static struct notifier_block ip_netdev_notifier = {
1125         .notifier_call =inetdev_event,
1126 };
1127
1128 static inline size_t inet_nlmsg_size(void)
1129 {
1130         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1131                + nla_total_size(4) /* IFA_ADDRESS */
1132                + nla_total_size(4) /* IFA_LOCAL */
1133                + nla_total_size(4) /* IFA_BROADCAST */
1134                + nla_total_size(4) /* IFA_ANYCAST */
1135                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1136 }
1137
1138 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1139                             u32 pid, u32 seq, int event, unsigned int flags)
1140 {
1141         struct ifaddrmsg *ifm;
1142         struct nlmsghdr  *nlh;
1143
1144         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1145         if (nlh == NULL)
1146                 return -EMSGSIZE;
1147
1148         ifm = nlmsg_data(nlh);
1149         ifm->ifa_family = AF_INET;
1150         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1151         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1152         ifm->ifa_scope = ifa->ifa_scope;
1153         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1154
1155         if (ifa->ifa_address)
1156                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1157
1158         if (ifa->ifa_local)
1159                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1160
1161         if (ifa->ifa_broadcast)
1162                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1163
1164         if (ifa->ifa_anycast)
1165                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1166
1167         if (ifa->ifa_label[0])
1168                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1169
1170         return nlmsg_end(skb, nlh);
1171
1172 nla_put_failure:
1173         nlmsg_cancel(skb, nlh);
1174         return -EMSGSIZE;
1175 }
1176
1177 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1178 {
1179         int idx, ip_idx;
1180         struct net_device *dev;
1181         struct in_device *in_dev;
1182         struct in_ifaddr *ifa;
1183         int s_ip_idx, s_idx = cb->args[0];
1184
1185         s_ip_idx = ip_idx = cb->args[1];
1186         read_lock(&dev_base_lock);
1187         for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1188                 if (idx < s_idx)
1189                         continue;
1190                 if (idx > s_idx)
1191                         s_ip_idx = 0;
1192                 rcu_read_lock();
1193                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1194                         rcu_read_unlock();
1195                         continue;
1196                 }
1197
1198                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1199                      ifa = ifa->ifa_next, ip_idx++) {
1200                         if (ip_idx < s_ip_idx)
1201                                 continue;
1202                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1203                                              cb->nlh->nlmsg_seq,
1204                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1205                                 rcu_read_unlock();
1206                                 goto done;
1207                         }
1208                 }
1209                 rcu_read_unlock();
1210         }
1211
1212 done:
1213         read_unlock(&dev_base_lock);
1214         cb->args[0] = idx;
1215         cb->args[1] = ip_idx;
1216
1217         return skb->len;
1218 }
1219
1220 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1221                       u32 pid)
1222 {
1223         struct sk_buff *skb;
1224         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1225         int err = -ENOBUFS;
1226
1227         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1228         if (skb == NULL)
1229                 goto errout;
1230
1231         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1232         if (err < 0) {
1233                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1234                 WARN_ON(err == -EMSGSIZE);
1235                 kfree_skb(skb);
1236                 goto errout;
1237         }
1238         err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1239 errout:
1240         if (err < 0)
1241                 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1242 }
1243
1244 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1245         [RTM_NEWADDR  - RTM_BASE] = { .doit     = inet_rtm_newaddr,     },
1246         [RTM_DELADDR  - RTM_BASE] = { .doit     = inet_rtm_deladdr,     },
1247         [RTM_GETADDR  - RTM_BASE] = { .dumpit   = inet_dump_ifaddr,     },
1248         [RTM_NEWROUTE - RTM_BASE] = { .doit     = inet_rtm_newroute,    },
1249         [RTM_DELROUTE - RTM_BASE] = { .doit     = inet_rtm_delroute,    },
1250         [RTM_GETROUTE - RTM_BASE] = { .doit     = inet_rtm_getroute,
1251                                       .dumpit   = inet_dump_fib,        },
1252 #ifdef CONFIG_IP_MULTIPLE_TABLES
1253         [RTM_GETRULE  - RTM_BASE] = { .dumpit   = fib4_rules_dump,      },
1254 #endif
1255 };
1256
1257 #ifdef CONFIG_SYSCTL
1258
1259 void inet_forward_change(void)
1260 {
1261         struct net_device *dev;
1262         int on = ipv4_devconf.forwarding;
1263
1264         ipv4_devconf.accept_redirects = !on;
1265         ipv4_devconf_dflt.forwarding = on;
1266
1267         read_lock(&dev_base_lock);
1268         for (dev = dev_base; dev; dev = dev->next) {
1269                 struct in_device *in_dev;
1270                 rcu_read_lock();
1271                 in_dev = __in_dev_get_rcu(dev);
1272                 if (in_dev)
1273                         in_dev->cnf.forwarding = on;
1274                 rcu_read_unlock();
1275         }
1276         read_unlock(&dev_base_lock);
1277
1278         rt_cache_flush(0);
1279 }
1280
1281 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1282                                   struct file* filp, void __user *buffer,
1283                                   size_t *lenp, loff_t *ppos)
1284 {
1285         int *valp = ctl->data;
1286         int val = *valp;
1287         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1288
1289         if (write && *valp != val) {
1290                 if (valp == &ipv4_devconf.forwarding)
1291                         inet_forward_change();
1292                 else if (valp != &ipv4_devconf_dflt.forwarding)
1293                         rt_cache_flush(0);
1294         }
1295
1296         return ret;
1297 }
1298
1299 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1300                          struct file* filp, void __user *buffer,
1301                          size_t *lenp, loff_t *ppos)
1302 {
1303         int *valp = ctl->data;
1304         int val = *valp;
1305         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1306
1307         if (write && *valp != val)
1308                 rt_cache_flush(0);
1309
1310         return ret;
1311 }
1312
1313 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1314                                   void __user *oldval, size_t __user *oldlenp,
1315                                   void __user *newval, size_t newlen)
1316 {
1317         int *valp = table->data;
1318         int new;
1319
1320         if (!newval || !newlen)
1321                 return 0;
1322
1323         if (newlen != sizeof(int))
1324                 return -EINVAL;
1325
1326         if (get_user(new, (int __user *)newval))
1327                 return -EFAULT;
1328
1329         if (new == *valp)
1330                 return 0;
1331
1332         if (oldval && oldlenp) {
1333                 size_t len;
1334
1335                 if (get_user(len, oldlenp))
1336                         return -EFAULT;
1337
1338                 if (len) {
1339                         if (len > table->maxlen)
1340                                 len = table->maxlen;
1341                         if (copy_to_user(oldval, valp, len))
1342                                 return -EFAULT;
1343                         if (put_user(len, oldlenp))
1344                                 return -EFAULT;
1345                 }
1346         }
1347
1348         *valp = new;
1349         rt_cache_flush(0);
1350         return 1;
1351 }
1352
1353
1354 static struct devinet_sysctl_table {
1355         struct ctl_table_header *sysctl_header;
1356         ctl_table               devinet_vars[__NET_IPV4_CONF_MAX];
1357         ctl_table               devinet_dev[2];
1358         ctl_table               devinet_conf_dir[2];
1359         ctl_table               devinet_proto_dir[2];
1360         ctl_table               devinet_root_dir[2];
1361 } devinet_sysctl = {
1362         .devinet_vars = {
1363                 {
1364                         .ctl_name       = NET_IPV4_CONF_FORWARDING,
1365                         .procname       = "forwarding",
1366                         .data           = &ipv4_devconf.forwarding,
1367                         .maxlen         = sizeof(int),
1368                         .mode           = 0644,
1369                         .proc_handler   = &devinet_sysctl_forward,
1370                 },
1371                 {
1372                         .ctl_name       = NET_IPV4_CONF_MC_FORWARDING,
1373                         .procname       = "mc_forwarding",
1374                         .data           = &ipv4_devconf.mc_forwarding,
1375                         .maxlen         = sizeof(int),
1376                         .mode           = 0444,
1377                         .proc_handler   = &proc_dointvec,
1378                 },
1379                 {
1380                         .ctl_name       = NET_IPV4_CONF_ACCEPT_REDIRECTS,
1381                         .procname       = "accept_redirects",
1382                         .data           = &ipv4_devconf.accept_redirects,
1383                         .maxlen         = sizeof(int),
1384                         .mode           = 0644,
1385                         .proc_handler   = &proc_dointvec,
1386                 },
1387                 {
1388                         .ctl_name       = NET_IPV4_CONF_SECURE_REDIRECTS,
1389                         .procname       = "secure_redirects",
1390                         .data           = &ipv4_devconf.secure_redirects,
1391                         .maxlen         = sizeof(int),
1392                         .mode           = 0644,
1393                         .proc_handler   = &proc_dointvec,
1394                 },
1395                 {
1396                         .ctl_name       = NET_IPV4_CONF_SHARED_MEDIA,
1397                         .procname       = "shared_media",
1398                         .data           = &ipv4_devconf.shared_media,
1399                         .maxlen         = sizeof(int),
1400                         .mode           = 0644,
1401                         .proc_handler   = &proc_dointvec,
1402                 },
1403                 {
1404                         .ctl_name       = NET_IPV4_CONF_RP_FILTER,
1405                         .procname       = "rp_filter",
1406                         .data           = &ipv4_devconf.rp_filter,
1407                         .maxlen         = sizeof(int),
1408                         .mode           = 0644,
1409                         .proc_handler   = &proc_dointvec,
1410                 },
1411                 {
1412                         .ctl_name       = NET_IPV4_CONF_SEND_REDIRECTS,
1413                         .procname       = "send_redirects",
1414                         .data           = &ipv4_devconf.send_redirects,
1415                         .maxlen         = sizeof(int),
1416                         .mode           = 0644,
1417                         .proc_handler   = &proc_dointvec,
1418                 },
1419                 {
1420                         .ctl_name       = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1421                         .procname       = "accept_source_route",
1422                         .data           = &ipv4_devconf.accept_source_route,
1423                         .maxlen         = sizeof(int),
1424                         .mode           = 0644,
1425                         .proc_handler   = &proc_dointvec,
1426                 },
1427                 {
1428                         .ctl_name       = NET_IPV4_CONF_PROXY_ARP,
1429                         .procname       = "proxy_arp",
1430                         .data           = &ipv4_devconf.proxy_arp,
1431                         .maxlen         = sizeof(int),
1432                         .mode           = 0644,
1433                         .proc_handler   = &proc_dointvec,
1434                 },
1435                 {
1436                         .ctl_name       = NET_IPV4_CONF_MEDIUM_ID,
1437                         .procname       = "medium_id",
1438                         .data           = &ipv4_devconf.medium_id,
1439                         .maxlen         = sizeof(int),
1440                         .mode           = 0644,
1441                         .proc_handler   = &proc_dointvec,
1442                 },
1443                 {
1444                         .ctl_name       = NET_IPV4_CONF_BOOTP_RELAY,
1445                         .procname       = "bootp_relay",
1446                         .data           = &ipv4_devconf.bootp_relay,
1447                         .maxlen         = sizeof(int),
1448                         .mode           = 0644,
1449                         .proc_handler   = &proc_dointvec,
1450                 },
1451                 {
1452                         .ctl_name       = NET_IPV4_CONF_LOG_MARTIANS,
1453                         .procname       = "log_martians",
1454                         .data           = &ipv4_devconf.log_martians,
1455                         .maxlen         = sizeof(int),
1456                         .mode           = 0644,
1457                         .proc_handler   = &proc_dointvec,
1458                 },
1459                 {
1460                         .ctl_name       = NET_IPV4_CONF_TAG,
1461                         .procname       = "tag",
1462                         .data           = &ipv4_devconf.tag,
1463                         .maxlen         = sizeof(int),
1464                         .mode           = 0644,
1465                         .proc_handler   = &proc_dointvec,
1466                 },
1467                 {
1468                         .ctl_name       = NET_IPV4_CONF_ARPFILTER,
1469                         .procname       = "arp_filter",
1470                         .data           = &ipv4_devconf.arp_filter,
1471                         .maxlen         = sizeof(int),
1472                         .mode           = 0644,
1473                         .proc_handler   = &proc_dointvec,
1474                 },
1475                 {
1476                         .ctl_name       = NET_IPV4_CONF_ARP_ANNOUNCE,
1477                         .procname       = "arp_announce",
1478                         .data           = &ipv4_devconf.arp_announce,
1479                         .maxlen         = sizeof(int),
1480                         .mode           = 0644,
1481                         .proc_handler   = &proc_dointvec,
1482                 },
1483                 {
1484                         .ctl_name       = NET_IPV4_CONF_ARP_IGNORE,
1485                         .procname       = "arp_ignore",
1486                         .data           = &ipv4_devconf.arp_ignore,
1487                         .maxlen         = sizeof(int),
1488                         .mode           = 0644,
1489                         .proc_handler   = &proc_dointvec,
1490                 },
1491                 {
1492                         .ctl_name       = NET_IPV4_CONF_ARP_ACCEPT,
1493                         .procname       = "arp_accept",
1494                         .data           = &ipv4_devconf.arp_accept,
1495                         .maxlen         = sizeof(int),
1496                         .mode           = 0644,
1497                         .proc_handler   = &proc_dointvec,
1498                 },
1499                 {
1500                         .ctl_name       = NET_IPV4_CONF_NOXFRM,
1501                         .procname       = "disable_xfrm",
1502                         .data           = &ipv4_devconf.no_xfrm,
1503                         .maxlen         = sizeof(int),
1504                         .mode           = 0644,
1505                         .proc_handler   = &ipv4_doint_and_flush,
1506                         .strategy       = &ipv4_doint_and_flush_strategy,
1507                 },
1508                 {
1509                         .ctl_name       = NET_IPV4_CONF_NOPOLICY,
1510                         .procname       = "disable_policy",
1511                         .data           = &ipv4_devconf.no_policy,
1512                         .maxlen         = sizeof(int),
1513                         .mode           = 0644,
1514                         .proc_handler   = &ipv4_doint_and_flush,
1515                         .strategy       = &ipv4_doint_and_flush_strategy,
1516                 },
1517                 {
1518                         .ctl_name       = NET_IPV4_CONF_FORCE_IGMP_VERSION,
1519                         .procname       = "force_igmp_version",
1520                         .data           = &ipv4_devconf.force_igmp_version,
1521                         .maxlen         = sizeof(int),
1522                         .mode           = 0644,
1523                         .proc_handler   = &ipv4_doint_and_flush,
1524                         .strategy       = &ipv4_doint_and_flush_strategy,
1525                 },
1526                 {
1527                         .ctl_name       = NET_IPV4_CONF_PROMOTE_SECONDARIES,
1528                         .procname       = "promote_secondaries",
1529                         .data           = &ipv4_devconf.promote_secondaries,
1530                         .maxlen         = sizeof(int),
1531                         .mode           = 0644,
1532                         .proc_handler   = &ipv4_doint_and_flush,
1533                         .strategy       = &ipv4_doint_and_flush_strategy,
1534                 },
1535         },
1536         .devinet_dev = {
1537                 {
1538                         .ctl_name       = NET_PROTO_CONF_ALL,
1539                         .procname       = "all",
1540                         .mode           = 0555,
1541                         .child          = devinet_sysctl.devinet_vars,
1542                 },
1543         },
1544         .devinet_conf_dir = {
1545                 {
1546                         .ctl_name       = NET_IPV4_CONF,
1547                         .procname       = "conf",
1548                         .mode           = 0555,
1549                         .child          = devinet_sysctl.devinet_dev,
1550                 },
1551         },
1552         .devinet_proto_dir = {
1553                 {
1554                         .ctl_name       = NET_IPV4,
1555                         .procname       = "ipv4",
1556                         .mode           = 0555,
1557                         .child          = devinet_sysctl.devinet_conf_dir,
1558                 },
1559         },
1560         .devinet_root_dir = {
1561                 {
1562                         .ctl_name       = CTL_NET,
1563                         .procname       = "net",
1564                         .mode           = 0555,
1565                         .child          = devinet_sysctl.devinet_proto_dir,
1566                 },
1567         },
1568 };
1569
1570 static void devinet_sysctl_register(struct in_device *in_dev,
1571                                     struct ipv4_devconf *p)
1572 {
1573         int i;
1574         struct net_device *dev = in_dev ? in_dev->dev : NULL;
1575         struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1576                                                  GFP_KERNEL);
1577         char *dev_name = NULL;
1578
1579         if (!t)
1580                 return;
1581         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1582                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1583         }
1584
1585         if (dev) {
1586                 dev_name = dev->name;
1587                 t->devinet_dev[0].ctl_name = dev->ifindex;
1588         } else {
1589                 dev_name = "default";
1590                 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1591         }
1592
1593         /*
1594          * Make a copy of dev_name, because '.procname' is regarded as const
1595          * by sysctl and we wouldn't want anyone to change it under our feet
1596          * (see SIOCSIFNAME).
1597          */
1598         dev_name = kstrdup(dev_name, GFP_KERNEL);
1599         if (!dev_name)
1600             goto free;
1601
1602         t->devinet_dev[0].procname    = dev_name;
1603         t->devinet_dev[0].child       = t->devinet_vars;
1604         t->devinet_conf_dir[0].child  = t->devinet_dev;
1605         t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1606         t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1607
1608         t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
1609         if (!t->sysctl_header)
1610             goto free_procname;
1611
1612         p->sysctl = t;
1613         return;
1614
1615         /* error path */
1616  free_procname:
1617         kfree(dev_name);
1618  free:
1619         kfree(t);
1620         return;
1621 }
1622
1623 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1624 {
1625         if (p->sysctl) {
1626                 struct devinet_sysctl_table *t = p->sysctl;
1627                 p->sysctl = NULL;
1628                 unregister_sysctl_table(t->sysctl_header);
1629                 kfree(t->devinet_dev[0].procname);
1630                 kfree(t);
1631         }
1632 }
1633 #endif
1634
1635 void __init devinet_init(void)
1636 {
1637         register_gifconf(PF_INET, inet_gifconf);
1638         register_netdevice_notifier(&ip_netdev_notifier);
1639         rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1640 #ifdef CONFIG_SYSCTL
1641         devinet_sysctl.sysctl_header =
1642                 register_sysctl_table(devinet_sysctl.devinet_root_dir);
1643         devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1644 #endif
1645 }
1646
1647 EXPORT_SYMBOL(in_dev_finish_destroy);
1648 EXPORT_SYMBOL(inet_select_addr);
1649 EXPORT_SYMBOL(inetdev_by_index);
1650 EXPORT_SYMBOL(register_inetaddr_notifier);
1651 EXPORT_SYMBOL(unregister_inetaddr_notifier);