Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/sched.h>
39 #include <linux/string.h>
40 #include <linux/mm.h>
41 #include <linux/socket.h>
42 #include <linux/sockios.h>
43 #include <linux/in.h>
44 #include <linux/errno.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_addr.h>
47 #include <linux/if_ether.h>
48 #include <linux/inet.h>
49 #include <linux/netdevice.h>
50 #include <linux/etherdevice.h>
51 #include <linux/skbuff.h>
52 #include <linux/rtnetlink.h>
53 #include <linux/init.h>
54 #include <linux/notifier.h>
55 #include <linux/inetdevice.h>
56 #include <linux/igmp.h>
57 #ifdef CONFIG_SYSCTL
58 #include <linux/sysctl.h>
59 #endif
60 #include <linux/kmod.h>
61
62 #include <net/arp.h>
63 #include <net/ip.h>
64 #include <net/route.h>
65 #include <net/ip_fib.h>
66 #include <net/netlink.h>
67
68 struct ipv4_devconf ipv4_devconf = {
69         .accept_redirects = 1,
70         .send_redirects =  1,
71         .secure_redirects = 1,
72         .shared_media =   1,
73 };
74
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76         .accept_redirects =  1,
77         .send_redirects =    1,
78         .secure_redirects =  1,
79         .shared_media =      1,
80         .accept_source_route = 1,
81 };
82
83 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
84         [IFA_LOCAL]             = { .type = NLA_U32 },
85         [IFA_ADDRESS]           = { .type = NLA_U32 },
86         [IFA_BROADCAST]         = { .type = NLA_U32 },
87         [IFA_ANYCAST]           = { .type = NLA_U32 },
88         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
89 };
90
91 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
92
93 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
94 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
95                          int destroy);
96 #ifdef CONFIG_SYSCTL
97 static void devinet_sysctl_register(struct in_device *in_dev,
98                                     struct ipv4_devconf *p);
99 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
100 #endif
101
102 /* Locks all the inet devices. */
103
104 static struct in_ifaddr *inet_alloc_ifa(void)
105 {
106         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
107
108         if (ifa) {
109                 INIT_RCU_HEAD(&ifa->rcu_head);
110         }
111
112         return ifa;
113 }
114
115 static void inet_rcu_free_ifa(struct rcu_head *head)
116 {
117         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
118         if (ifa->ifa_dev)
119                 in_dev_put(ifa->ifa_dev);
120         kfree(ifa);
121 }
122
123 static inline void inet_free_ifa(struct in_ifaddr *ifa)
124 {
125         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
126 }
127
128 void in_dev_finish_destroy(struct in_device *idev)
129 {
130         struct net_device *dev = idev->dev;
131
132         BUG_TRAP(!idev->ifa_list);
133         BUG_TRAP(!idev->mc_list);
134 #ifdef NET_REFCNT_DEBUG
135         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
136                idev, dev ? dev->name : "NIL");
137 #endif
138         dev_put(dev);
139         if (!idev->dead)
140                 printk("Freeing alive in_device %p\n", idev);
141         else {
142                 kfree(idev);
143         }
144 }
145
146 struct in_device *inetdev_init(struct net_device *dev)
147 {
148         struct in_device *in_dev;
149
150         ASSERT_RTNL();
151
152         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
153         if (!in_dev)
154                 goto out;
155         INIT_RCU_HEAD(&in_dev->rcu_head);
156         memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
157         in_dev->cnf.sysctl = NULL;
158         in_dev->dev = dev;
159         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
160                 goto out_kfree;
161         /* Reference in_dev->dev */
162         dev_hold(dev);
163 #ifdef CONFIG_SYSCTL
164         neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
165                               NET_IPV4_NEIGH, "ipv4", NULL, NULL);
166 #endif
167
168         /* Account for reference dev->ip_ptr (below) */
169         in_dev_hold(in_dev);
170
171 #ifdef CONFIG_SYSCTL
172         devinet_sysctl_register(in_dev, &in_dev->cnf);
173 #endif
174         ip_mc_init_dev(in_dev);
175         if (dev->flags & IFF_UP)
176                 ip_mc_up(in_dev);
177
178         /* we can receive as soon as ip_ptr is set -- do this last */
179         rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181         return in_dev;
182 out_kfree:
183         kfree(in_dev);
184         in_dev = NULL;
185         goto out;
186 }
187
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190         struct in_device *idev = container_of(head, struct in_device, rcu_head);
191         in_dev_put(idev);
192 }
193
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196         struct in_ifaddr *ifa;
197         struct net_device *dev;
198
199         ASSERT_RTNL();
200
201         dev = in_dev->dev;
202         if (dev == &loopback_dev)
203                 return;
204
205         in_dev->dead = 1;
206
207         ip_mc_destroy_dev(in_dev);
208
209         while ((ifa = in_dev->ifa_list) != NULL) {
210                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
211                 inet_free_ifa(ifa);
212         }
213
214 #ifdef CONFIG_SYSCTL
215         devinet_sysctl_unregister(&in_dev->cnf);
216 #endif
217
218         dev->ip_ptr = NULL;
219
220 #ifdef CONFIG_SYSCTL
221         neigh_sysctl_unregister(in_dev->arp_parms);
222 #endif
223         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
224         arp_ifdown(dev);
225
226         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
227 }
228
229 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
230 {
231         rcu_read_lock();
232         for_primary_ifa(in_dev) {
233                 if (inet_ifa_match(a, ifa)) {
234                         if (!b || inet_ifa_match(b, ifa)) {
235                                 rcu_read_unlock();
236                                 return 1;
237                         }
238                 }
239         } endfor_ifa(in_dev);
240         rcu_read_unlock();
241         return 0;
242 }
243
244 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
245                          int destroy, struct nlmsghdr *nlh, u32 pid)
246 {
247         struct in_ifaddr *promote = NULL;
248         struct in_ifaddr *ifa, *ifa1 = *ifap;
249         struct in_ifaddr *last_prim = in_dev->ifa_list;
250         struct in_ifaddr *prev_prom = NULL;
251         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
252
253         ASSERT_RTNL();
254
255         /* 1. Deleting primary ifaddr forces deletion all secondaries 
256          * unless alias promotion is set
257          **/
258
259         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
260                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
261
262                 while ((ifa = *ifap1) != NULL) {
263                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) && 
264                             ifa1->ifa_scope <= ifa->ifa_scope)
265                                 last_prim = ifa;
266
267                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
268                             ifa1->ifa_mask != ifa->ifa_mask ||
269                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
270                                 ifap1 = &ifa->ifa_next;
271                                 prev_prom = ifa;
272                                 continue;
273                         }
274
275                         if (!do_promote) {
276                                 *ifap1 = ifa->ifa_next;
277
278                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
279                                 blocking_notifier_call_chain(&inetaddr_chain,
280                                                 NETDEV_DOWN, ifa);
281                                 inet_free_ifa(ifa);
282                         } else {
283                                 promote = ifa;
284                                 break;
285                         }
286                 }
287         }
288
289         /* 2. Unlink it */
290
291         *ifap = ifa1->ifa_next;
292
293         /* 3. Announce address deletion */
294
295         /* Send message first, then call notifier.
296            At first sight, FIB update triggered by notifier
297            will refer to already deleted ifaddr, that could confuse
298            netlink listeners. It is not true: look, gated sees
299            that route deleted and if it still thinks that ifaddr
300            is valid, it will try to restore deleted routes... Grr.
301            So that, this order is correct.
302          */
303         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
304         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
305
306         if (promote) {
307
308                 if (prev_prom) {
309                         prev_prom->ifa_next = promote->ifa_next;
310                         promote->ifa_next = last_prim->ifa_next;
311                         last_prim->ifa_next = promote;
312                 }
313
314                 promote->ifa_flags &= ~IFA_F_SECONDARY;
315                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
316                 blocking_notifier_call_chain(&inetaddr_chain,
317                                 NETDEV_UP, promote);
318                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
319                         if (ifa1->ifa_mask != ifa->ifa_mask ||
320                             !inet_ifa_match(ifa1->ifa_address, ifa))
321                                         continue;
322                         fib_add_ifaddr(ifa);
323                 }
324
325         }
326         if (destroy) {
327                 inet_free_ifa(ifa1);
328
329                 if (!in_dev->ifa_list)
330                         inetdev_destroy(in_dev);
331         }
332 }
333
334 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
335                          int destroy)
336 {
337         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
338 }
339
340 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
341                              u32 pid)
342 {
343         struct in_device *in_dev = ifa->ifa_dev;
344         struct in_ifaddr *ifa1, **ifap, **last_primary;
345
346         ASSERT_RTNL();
347
348         if (!ifa->ifa_local) {
349                 inet_free_ifa(ifa);
350                 return 0;
351         }
352
353         ifa->ifa_flags &= ~IFA_F_SECONDARY;
354         last_primary = &in_dev->ifa_list;
355
356         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
357              ifap = &ifa1->ifa_next) {
358                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
359                     ifa->ifa_scope <= ifa1->ifa_scope)
360                         last_primary = &ifa1->ifa_next;
361                 if (ifa1->ifa_mask == ifa->ifa_mask &&
362                     inet_ifa_match(ifa1->ifa_address, ifa)) {
363                         if (ifa1->ifa_local == ifa->ifa_local) {
364                                 inet_free_ifa(ifa);
365                                 return -EEXIST;
366                         }
367                         if (ifa1->ifa_scope != ifa->ifa_scope) {
368                                 inet_free_ifa(ifa);
369                                 return -EINVAL;
370                         }
371                         ifa->ifa_flags |= IFA_F_SECONDARY;
372                 }
373         }
374
375         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
376                 net_srandom(ifa->ifa_local);
377                 ifap = last_primary;
378         }
379
380         ifa->ifa_next = *ifap;
381         *ifap = ifa;
382
383         /* Send message first, then call notifier.
384            Notifier will trigger FIB update, so that
385            listeners of netlink will know about new ifaddr */
386         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
387         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
388
389         return 0;
390 }
391
392 static int inet_insert_ifa(struct in_ifaddr *ifa)
393 {
394         return __inet_insert_ifa(ifa, NULL, 0);
395 }
396
397 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
398 {
399         struct in_device *in_dev = __in_dev_get_rtnl(dev);
400
401         ASSERT_RTNL();
402
403         if (!in_dev) {
404                 in_dev = inetdev_init(dev);
405                 if (!in_dev) {
406                         inet_free_ifa(ifa);
407                         return -ENOBUFS;
408                 }
409         }
410         if (ifa->ifa_dev != in_dev) {
411                 BUG_TRAP(!ifa->ifa_dev);
412                 in_dev_hold(in_dev);
413                 ifa->ifa_dev = in_dev;
414         }
415         if (LOOPBACK(ifa->ifa_local))
416                 ifa->ifa_scope = RT_SCOPE_HOST;
417         return inet_insert_ifa(ifa);
418 }
419
420 struct in_device *inetdev_by_index(int ifindex)
421 {
422         struct net_device *dev;
423         struct in_device *in_dev = NULL;
424         read_lock(&dev_base_lock);
425         dev = __dev_get_by_index(ifindex);
426         if (dev)
427                 in_dev = in_dev_get(dev);
428         read_unlock(&dev_base_lock);
429         return in_dev;
430 }
431
432 /* Called only from RTNL semaphored context. No locks. */
433
434 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
435                                     __be32 mask)
436 {
437         ASSERT_RTNL();
438
439         for_primary_ifa(in_dev) {
440                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
441                         return ifa;
442         } endfor_ifa(in_dev);
443         return NULL;
444 }
445
446 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
447 {
448         struct nlattr *tb[IFA_MAX+1];
449         struct in_device *in_dev;
450         struct ifaddrmsg *ifm;
451         struct in_ifaddr *ifa, **ifap;
452         int err = -EINVAL;
453
454         ASSERT_RTNL();
455
456         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
457         if (err < 0)
458                 goto errout;
459
460         ifm = nlmsg_data(nlh);
461         in_dev = inetdev_by_index(ifm->ifa_index);
462         if (in_dev == NULL) {
463                 err = -ENODEV;
464                 goto errout;
465         }
466
467         __in_dev_put(in_dev);
468
469         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
470              ifap = &ifa->ifa_next) {
471                 if (tb[IFA_LOCAL] &&
472                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
473                         continue;
474
475                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
476                         continue;
477
478                 if (tb[IFA_ADDRESS] &&
479                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
480                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
481                         continue;
482
483                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
484                 return 0;
485         }
486
487         err = -EADDRNOTAVAIL;
488 errout:
489         return err;
490 }
491
492 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
493 {
494         struct nlattr *tb[IFA_MAX+1];
495         struct in_ifaddr *ifa;
496         struct ifaddrmsg *ifm;
497         struct net_device *dev;
498         struct in_device *in_dev;
499         int err = -EINVAL;
500
501         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
502         if (err < 0)
503                 goto errout;
504
505         ifm = nlmsg_data(nlh);
506         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
507                 goto errout;
508
509         dev = __dev_get_by_index(ifm->ifa_index);
510         if (dev == NULL) {
511                 err = -ENODEV;
512                 goto errout;
513         }
514
515         in_dev = __in_dev_get_rtnl(dev);
516         if (in_dev == NULL) {
517                 in_dev = inetdev_init(dev);
518                 if (in_dev == NULL) {
519                         err = -ENOBUFS;
520                         goto errout;
521                 }
522         }
523
524         ifa = inet_alloc_ifa();
525         if (ifa == NULL) {
526                 /*
527                  * A potential indev allocation can be left alive, it stays
528                  * assigned to its device and is destroy with it.
529                  */
530                 err = -ENOBUFS;
531                 goto errout;
532         }
533
534         in_dev_hold(in_dev);
535
536         if (tb[IFA_ADDRESS] == NULL)
537                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
538
539         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
540         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
541         ifa->ifa_flags = ifm->ifa_flags;
542         ifa->ifa_scope = ifm->ifa_scope;
543         ifa->ifa_dev = in_dev;
544
545         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
546         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
547
548         if (tb[IFA_BROADCAST])
549                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
550
551         if (tb[IFA_ANYCAST])
552                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
553
554         if (tb[IFA_LABEL])
555                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
556         else
557                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
558
559         return ifa;
560
561 errout:
562         return ERR_PTR(err);
563 }
564
565 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
566 {
567         struct in_ifaddr *ifa;
568
569         ASSERT_RTNL();
570
571         ifa = rtm_to_ifaddr(nlh);
572         if (IS_ERR(ifa))
573                 return PTR_ERR(ifa);
574
575         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
576 }
577
578 /*
579  *      Determine a default network mask, based on the IP address.
580  */
581
582 static __inline__ int inet_abc_len(__be32 addr)
583 {
584         int rc = -1;    /* Something else, probably a multicast. */
585
586         if (ZERONET(addr))
587                 rc = 0;
588         else {
589                 __u32 haddr = ntohl(addr);
590
591                 if (IN_CLASSA(haddr))
592                         rc = 8;
593                 else if (IN_CLASSB(haddr))
594                         rc = 16;
595                 else if (IN_CLASSC(haddr))
596                         rc = 24;
597         }
598
599         return rc;
600 }
601
602
603 int devinet_ioctl(unsigned int cmd, void __user *arg)
604 {
605         struct ifreq ifr;
606         struct sockaddr_in sin_orig;
607         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
608         struct in_device *in_dev;
609         struct in_ifaddr **ifap = NULL;
610         struct in_ifaddr *ifa = NULL;
611         struct net_device *dev;
612         char *colon;
613         int ret = -EFAULT;
614         int tryaddrmatch = 0;
615
616         /*
617          *      Fetch the caller's info block into kernel space
618          */
619
620         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
621                 goto out;
622         ifr.ifr_name[IFNAMSIZ - 1] = 0;
623
624         /* save original address for comparison */
625         memcpy(&sin_orig, sin, sizeof(*sin));
626
627         colon = strchr(ifr.ifr_name, ':');
628         if (colon)
629                 *colon = 0;
630
631 #ifdef CONFIG_KMOD
632         dev_load(ifr.ifr_name);
633 #endif
634
635         switch(cmd) {
636         case SIOCGIFADDR:       /* Get interface address */
637         case SIOCGIFBRDADDR:    /* Get the broadcast address */
638         case SIOCGIFDSTADDR:    /* Get the destination address */
639         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
640                 /* Note that these ioctls will not sleep,
641                    so that we do not impose a lock.
642                    One day we will be forced to put shlock here (I mean SMP)
643                  */
644                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
645                 memset(sin, 0, sizeof(*sin));
646                 sin->sin_family = AF_INET;
647                 break;
648
649         case SIOCSIFFLAGS:
650                 ret = -EACCES;
651                 if (!capable(CAP_NET_ADMIN))
652                         goto out;
653                 break;
654         case SIOCSIFADDR:       /* Set interface address (and family) */
655         case SIOCSIFBRDADDR:    /* Set the broadcast address */
656         case SIOCSIFDSTADDR:    /* Set the destination address */
657         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
658                 ret = -EACCES;
659                 if (!capable(CAP_NET_ADMIN))
660                         goto out;
661                 ret = -EINVAL;
662                 if (sin->sin_family != AF_INET)
663                         goto out;
664                 break;
665         default:
666                 ret = -EINVAL;
667                 goto out;
668         }
669
670         rtnl_lock();
671
672         ret = -ENODEV;
673         if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
674                 goto done;
675
676         if (colon)
677                 *colon = ':';
678
679         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
680                 if (tryaddrmatch) {
681                         /* Matthias Andree */
682                         /* compare label and address (4.4BSD style) */
683                         /* note: we only do this for a limited set of ioctls
684                            and only if the original address family was AF_INET.
685                            This is checked above. */
686                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
687                              ifap = &ifa->ifa_next) {
688                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
689                                     sin_orig.sin_addr.s_addr ==
690                                                         ifa->ifa_address) {
691                                         break; /* found */
692                                 }
693                         }
694                 }
695                 /* we didn't get a match, maybe the application is
696                    4.3BSD-style and passed in junk so we fall back to
697                    comparing just the label */
698                 if (!ifa) {
699                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
700                              ifap = &ifa->ifa_next)
701                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
702                                         break;
703                 }
704         }
705
706         ret = -EADDRNOTAVAIL;
707         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
708                 goto done;
709
710         switch(cmd) {
711         case SIOCGIFADDR:       /* Get interface address */
712                 sin->sin_addr.s_addr = ifa->ifa_local;
713                 goto rarok;
714
715         case SIOCGIFBRDADDR:    /* Get the broadcast address */
716                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
717                 goto rarok;
718
719         case SIOCGIFDSTADDR:    /* Get the destination address */
720                 sin->sin_addr.s_addr = ifa->ifa_address;
721                 goto rarok;
722
723         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
724                 sin->sin_addr.s_addr = ifa->ifa_mask;
725                 goto rarok;
726
727         case SIOCSIFFLAGS:
728                 if (colon) {
729                         ret = -EADDRNOTAVAIL;
730                         if (!ifa)
731                                 break;
732                         ret = 0;
733                         if (!(ifr.ifr_flags & IFF_UP))
734                                 inet_del_ifa(in_dev, ifap, 1);
735                         break;
736                 }
737                 ret = dev_change_flags(dev, ifr.ifr_flags);
738                 break;
739
740         case SIOCSIFADDR:       /* Set interface address (and family) */
741                 ret = -EINVAL;
742                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
743                         break;
744
745                 if (!ifa) {
746                         ret = -ENOBUFS;
747                         if ((ifa = inet_alloc_ifa()) == NULL)
748                                 break;
749                         if (colon)
750                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
751                         else
752                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
753                 } else {
754                         ret = 0;
755                         if (ifa->ifa_local == sin->sin_addr.s_addr)
756                                 break;
757                         inet_del_ifa(in_dev, ifap, 0);
758                         ifa->ifa_broadcast = 0;
759                         ifa->ifa_anycast = 0;
760                 }
761
762                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
763
764                 if (!(dev->flags & IFF_POINTOPOINT)) {
765                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
766                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
767                         if ((dev->flags & IFF_BROADCAST) &&
768                             ifa->ifa_prefixlen < 31)
769                                 ifa->ifa_broadcast = ifa->ifa_address |
770                                                      ~ifa->ifa_mask;
771                 } else {
772                         ifa->ifa_prefixlen = 32;
773                         ifa->ifa_mask = inet_make_mask(32);
774                 }
775                 ret = inet_set_ifa(dev, ifa);
776                 break;
777
778         case SIOCSIFBRDADDR:    /* Set the broadcast address */
779                 ret = 0;
780                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
781                         inet_del_ifa(in_dev, ifap, 0);
782                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
783                         inet_insert_ifa(ifa);
784                 }
785                 break;
786
787         case SIOCSIFDSTADDR:    /* Set the destination address */
788                 ret = 0;
789                 if (ifa->ifa_address == sin->sin_addr.s_addr)
790                         break;
791                 ret = -EINVAL;
792                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
793                         break;
794                 ret = 0;
795                 inet_del_ifa(in_dev, ifap, 0);
796                 ifa->ifa_address = sin->sin_addr.s_addr;
797                 inet_insert_ifa(ifa);
798                 break;
799
800         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
801
802                 /*
803                  *      The mask we set must be legal.
804                  */
805                 ret = -EINVAL;
806                 if (bad_mask(sin->sin_addr.s_addr, 0))
807                         break;
808                 ret = 0;
809                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
810                         __be32 old_mask = ifa->ifa_mask;
811                         inet_del_ifa(in_dev, ifap, 0);
812                         ifa->ifa_mask = sin->sin_addr.s_addr;
813                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
814
815                         /* See if current broadcast address matches
816                          * with current netmask, then recalculate
817                          * the broadcast address. Otherwise it's a
818                          * funny address, so don't touch it since
819                          * the user seems to know what (s)he's doing...
820                          */
821                         if ((dev->flags & IFF_BROADCAST) &&
822                             (ifa->ifa_prefixlen < 31) &&
823                             (ifa->ifa_broadcast ==
824                              (ifa->ifa_local|~old_mask))) {
825                                 ifa->ifa_broadcast = (ifa->ifa_local |
826                                                       ~sin->sin_addr.s_addr);
827                         }
828                         inet_insert_ifa(ifa);
829                 }
830                 break;
831         }
832 done:
833         rtnl_unlock();
834 out:
835         return ret;
836 rarok:
837         rtnl_unlock();
838         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
839         goto out;
840 }
841
842 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
843 {
844         struct in_device *in_dev = __in_dev_get_rtnl(dev);
845         struct in_ifaddr *ifa;
846         struct ifreq ifr;
847         int done = 0;
848
849         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
850                 goto out;
851
852         for (; ifa; ifa = ifa->ifa_next) {
853                 if (!buf) {
854                         done += sizeof(ifr);
855                         continue;
856                 }
857                 if (len < (int) sizeof(ifr))
858                         break;
859                 memset(&ifr, 0, sizeof(struct ifreq));
860                 if (ifa->ifa_label)
861                         strcpy(ifr.ifr_name, ifa->ifa_label);
862                 else
863                         strcpy(ifr.ifr_name, dev->name);
864
865                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
866                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
867                                                                 ifa->ifa_local;
868
869                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
870                         done = -EFAULT;
871                         break;
872                 }
873                 buf  += sizeof(struct ifreq);
874                 len  -= sizeof(struct ifreq);
875                 done += sizeof(struct ifreq);
876         }
877 out:
878         return done;
879 }
880
881 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
882 {
883         __be32 addr = 0;
884         struct in_device *in_dev;
885
886         rcu_read_lock();
887         in_dev = __in_dev_get_rcu(dev);
888         if (!in_dev)
889                 goto no_in_dev;
890
891         for_primary_ifa(in_dev) {
892                 if (ifa->ifa_scope > scope)
893                         continue;
894                 if (!dst || inet_ifa_match(dst, ifa)) {
895                         addr = ifa->ifa_local;
896                         break;
897                 }
898                 if (!addr)
899                         addr = ifa->ifa_local;
900         } endfor_ifa(in_dev);
901 no_in_dev:
902         rcu_read_unlock();
903
904         if (addr)
905                 goto out;
906
907         /* Not loopback addresses on loopback should be preferred
908            in this case. It is importnat that lo is the first interface
909            in dev_base list.
910          */
911         read_lock(&dev_base_lock);
912         rcu_read_lock();
913         for (dev = dev_base; dev; dev = dev->next) {
914                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
915                         continue;
916
917                 for_primary_ifa(in_dev) {
918                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
919                             ifa->ifa_scope <= scope) {
920                                 addr = ifa->ifa_local;
921                                 goto out_unlock_both;
922                         }
923                 } endfor_ifa(in_dev);
924         }
925 out_unlock_both:
926         read_unlock(&dev_base_lock);
927         rcu_read_unlock();
928 out:
929         return addr;
930 }
931
932 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
933                               __be32 local, int scope)
934 {
935         int same = 0;
936         __be32 addr = 0;
937
938         for_ifa(in_dev) {
939                 if (!addr &&
940                     (local == ifa->ifa_local || !local) &&
941                     ifa->ifa_scope <= scope) {
942                         addr = ifa->ifa_local;
943                         if (same)
944                                 break;
945                 }
946                 if (!same) {
947                         same = (!local || inet_ifa_match(local, ifa)) &&
948                                 (!dst || inet_ifa_match(dst, ifa));
949                         if (same && addr) {
950                                 if (local || !dst)
951                                         break;
952                                 /* Is the selected addr into dst subnet? */
953                                 if (inet_ifa_match(addr, ifa))
954                                         break;
955                                 /* No, then can we use new local src? */
956                                 if (ifa->ifa_scope <= scope) {
957                                         addr = ifa->ifa_local;
958                                         break;
959                                 }
960                                 /* search for large dst subnet for addr */
961                                 same = 0;
962                         }
963                 }
964         } endfor_ifa(in_dev);
965
966         return same? addr : 0;
967 }
968
969 /*
970  * Confirm that local IP address exists using wildcards:
971  * - dev: only on this interface, 0=any interface
972  * - dst: only in the same subnet as dst, 0=any dst
973  * - local: address, 0=autoselect the local address
974  * - scope: maximum allowed scope value for the local address
975  */
976 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
977 {
978         __be32 addr = 0;
979         struct in_device *in_dev;
980
981         if (dev) {
982                 rcu_read_lock();
983                 if ((in_dev = __in_dev_get_rcu(dev)))
984                         addr = confirm_addr_indev(in_dev, dst, local, scope);
985                 rcu_read_unlock();
986
987                 return addr;
988         }
989
990         read_lock(&dev_base_lock);
991         rcu_read_lock();
992         for (dev = dev_base; dev; dev = dev->next) {
993                 if ((in_dev = __in_dev_get_rcu(dev))) {
994                         addr = confirm_addr_indev(in_dev, dst, local, scope);
995                         if (addr)
996                                 break;
997                 }
998         }
999         rcu_read_unlock();
1000         read_unlock(&dev_base_lock);
1001
1002         return addr;
1003 }
1004
1005 /*
1006  *      Device notifier
1007  */
1008
1009 int register_inetaddr_notifier(struct notifier_block *nb)
1010 {
1011         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1012 }
1013
1014 int unregister_inetaddr_notifier(struct notifier_block *nb)
1015 {
1016         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1017 }
1018
1019 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1020  * alias numbering and to create unique labels if possible.
1021 */
1022 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1023
1024         struct in_ifaddr *ifa;
1025         int named = 0;
1026
1027         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 
1028                 char old[IFNAMSIZ], *dot; 
1029
1030                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1031                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); 
1032                 if (named++ == 0)
1033                         continue;
1034                 dot = strchr(ifa->ifa_label, ':');
1035                 if (dot == NULL) { 
1036                         sprintf(old, ":%d", named); 
1037                         dot = old;
1038                 }
1039                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) { 
1040                         strcat(ifa->ifa_label, dot); 
1041                 } else { 
1042                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); 
1043                 } 
1044         }       
1045
1046
1047 /* Called only under RTNL semaphore */
1048
1049 static int inetdev_event(struct notifier_block *this, unsigned long event,
1050                          void *ptr)
1051 {
1052         struct net_device *dev = ptr;
1053         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1054
1055         ASSERT_RTNL();
1056
1057         if (!in_dev) {
1058                 if (event == NETDEV_REGISTER && dev == &loopback_dev) {
1059                         in_dev = inetdev_init(dev);
1060                         if (!in_dev)
1061                                 panic("devinet: Failed to create loopback\n");
1062                         in_dev->cnf.no_xfrm = 1;
1063                         in_dev->cnf.no_policy = 1;
1064                 }
1065                 goto out;
1066         }
1067
1068         switch (event) {
1069         case NETDEV_REGISTER:
1070                 printk(KERN_DEBUG "inetdev_event: bug\n");
1071                 dev->ip_ptr = NULL;
1072                 break;
1073         case NETDEV_UP:
1074                 if (dev->mtu < 68)
1075                         break;
1076                 if (dev == &loopback_dev) {
1077                         struct in_ifaddr *ifa;
1078                         if ((ifa = inet_alloc_ifa()) != NULL) {
1079                                 ifa->ifa_local =
1080                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1081                                 ifa->ifa_prefixlen = 8;
1082                                 ifa->ifa_mask = inet_make_mask(8);
1083                                 in_dev_hold(in_dev);
1084                                 ifa->ifa_dev = in_dev;
1085                                 ifa->ifa_scope = RT_SCOPE_HOST;
1086                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1087                                 inet_insert_ifa(ifa);
1088                         }
1089                 }
1090                 ip_mc_up(in_dev);
1091                 break;
1092         case NETDEV_DOWN:
1093                 ip_mc_down(in_dev);
1094                 break;
1095         case NETDEV_CHANGEMTU:
1096                 if (dev->mtu >= 68)
1097                         break;
1098                 /* MTU falled under 68, disable IP */
1099         case NETDEV_UNREGISTER:
1100                 inetdev_destroy(in_dev);
1101                 break;
1102         case NETDEV_CHANGENAME:
1103                 /* Do not notify about label change, this event is
1104                  * not interesting to applications using netlink.
1105                  */
1106                 inetdev_changename(dev, in_dev);
1107
1108 #ifdef CONFIG_SYSCTL
1109                 devinet_sysctl_unregister(&in_dev->cnf);
1110                 neigh_sysctl_unregister(in_dev->arp_parms);
1111                 neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1112                                       NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1113                 devinet_sysctl_register(in_dev, &in_dev->cnf);
1114 #endif
1115                 break;
1116         }
1117 out:
1118         return NOTIFY_DONE;
1119 }
1120
1121 static struct notifier_block ip_netdev_notifier = {
1122         .notifier_call =inetdev_event,
1123 };
1124
1125 static inline size_t inet_nlmsg_size(void)
1126 {
1127         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1128                + nla_total_size(4) /* IFA_ADDRESS */
1129                + nla_total_size(4) /* IFA_LOCAL */
1130                + nla_total_size(4) /* IFA_BROADCAST */
1131                + nla_total_size(4) /* IFA_ANYCAST */
1132                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1133 }
1134
1135 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1136                             u32 pid, u32 seq, int event, unsigned int flags)
1137 {
1138         struct ifaddrmsg *ifm;
1139         struct nlmsghdr  *nlh;
1140
1141         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1142         if (nlh == NULL)
1143                 return -EMSGSIZE;
1144
1145         ifm = nlmsg_data(nlh);
1146         ifm->ifa_family = AF_INET;
1147         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1148         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1149         ifm->ifa_scope = ifa->ifa_scope;
1150         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1151
1152         if (ifa->ifa_address)
1153                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1154
1155         if (ifa->ifa_local)
1156                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1157
1158         if (ifa->ifa_broadcast)
1159                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1160
1161         if (ifa->ifa_anycast)
1162                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1163
1164         if (ifa->ifa_label[0])
1165                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1166
1167         return nlmsg_end(skb, nlh);
1168
1169 nla_put_failure:
1170         nlmsg_cancel(skb, nlh);
1171         return -EMSGSIZE;
1172 }
1173
1174 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1175 {
1176         int idx, ip_idx;
1177         struct net_device *dev;
1178         struct in_device *in_dev;
1179         struct in_ifaddr *ifa;
1180         int s_ip_idx, s_idx = cb->args[0];
1181
1182         s_ip_idx = ip_idx = cb->args[1];
1183         read_lock(&dev_base_lock);
1184         for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1185                 if (idx < s_idx)
1186                         continue;
1187                 if (idx > s_idx)
1188                         s_ip_idx = 0;
1189                 rcu_read_lock();
1190                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1191                         rcu_read_unlock();
1192                         continue;
1193                 }
1194
1195                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1196                      ifa = ifa->ifa_next, ip_idx++) {
1197                         if (ip_idx < s_ip_idx)
1198                                 continue;
1199                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1200                                              cb->nlh->nlmsg_seq,
1201                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1202                                 rcu_read_unlock();
1203                                 goto done;
1204                         }
1205                 }
1206                 rcu_read_unlock();
1207         }
1208
1209 done:
1210         read_unlock(&dev_base_lock);
1211         cb->args[0] = idx;
1212         cb->args[1] = ip_idx;
1213
1214         return skb->len;
1215 }
1216
1217 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1218                       u32 pid)
1219 {
1220         struct sk_buff *skb;
1221         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1222         int err = -ENOBUFS;
1223
1224         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1225         if (skb == NULL)
1226                 goto errout;
1227
1228         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1229         if (err < 0) {
1230                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1231                 WARN_ON(err == -EMSGSIZE);
1232                 kfree_skb(skb);
1233                 goto errout;
1234         }
1235         err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1236 errout:
1237         if (err < 0)
1238                 rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1239 }
1240
1241 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1242         [RTM_NEWADDR  - RTM_BASE] = { .doit     = inet_rtm_newaddr,     },
1243         [RTM_DELADDR  - RTM_BASE] = { .doit     = inet_rtm_deladdr,     },
1244         [RTM_GETADDR  - RTM_BASE] = { .dumpit   = inet_dump_ifaddr,     },
1245         [RTM_NEWROUTE - RTM_BASE] = { .doit     = inet_rtm_newroute,    },
1246         [RTM_DELROUTE - RTM_BASE] = { .doit     = inet_rtm_delroute,    },
1247         [RTM_GETROUTE - RTM_BASE] = { .doit     = inet_rtm_getroute,
1248                                       .dumpit   = inet_dump_fib,        },
1249 #ifdef CONFIG_IP_MULTIPLE_TABLES
1250         [RTM_GETRULE  - RTM_BASE] = { .dumpit   = fib4_rules_dump,      },
1251 #endif
1252 };
1253
1254 #ifdef CONFIG_SYSCTL
1255
1256 void inet_forward_change(void)
1257 {
1258         struct net_device *dev;
1259         int on = ipv4_devconf.forwarding;
1260
1261         ipv4_devconf.accept_redirects = !on;
1262         ipv4_devconf_dflt.forwarding = on;
1263
1264         read_lock(&dev_base_lock);
1265         for (dev = dev_base; dev; dev = dev->next) {
1266                 struct in_device *in_dev;
1267                 rcu_read_lock();
1268                 in_dev = __in_dev_get_rcu(dev);
1269                 if (in_dev)
1270                         in_dev->cnf.forwarding = on;
1271                 rcu_read_unlock();
1272         }
1273         read_unlock(&dev_base_lock);
1274
1275         rt_cache_flush(0);
1276 }
1277
1278 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1279                                   struct file* filp, void __user *buffer,
1280                                   size_t *lenp, loff_t *ppos)
1281 {
1282         int *valp = ctl->data;
1283         int val = *valp;
1284         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1285
1286         if (write && *valp != val) {
1287                 if (valp == &ipv4_devconf.forwarding)
1288                         inet_forward_change();
1289                 else if (valp != &ipv4_devconf_dflt.forwarding)
1290                         rt_cache_flush(0);
1291         }
1292
1293         return ret;
1294 }
1295
1296 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1297                          struct file* filp, void __user *buffer,
1298                          size_t *lenp, loff_t *ppos)
1299 {
1300         int *valp = ctl->data;
1301         int val = *valp;
1302         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1303
1304         if (write && *valp != val)
1305                 rt_cache_flush(0);
1306
1307         return ret;
1308 }
1309
1310 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1311                                   void __user *oldval, size_t __user *oldlenp,
1312                                   void __user *newval, size_t newlen)
1313 {
1314         int *valp = table->data;
1315         int new;
1316
1317         if (!newval || !newlen)
1318                 return 0;
1319
1320         if (newlen != sizeof(int))
1321                 return -EINVAL;
1322
1323         if (get_user(new, (int __user *)newval))
1324                 return -EFAULT;
1325
1326         if (new == *valp)
1327                 return 0;
1328
1329         if (oldval && oldlenp) {
1330                 size_t len;
1331
1332                 if (get_user(len, oldlenp))
1333                         return -EFAULT;
1334
1335                 if (len) {
1336                         if (len > table->maxlen)
1337                                 len = table->maxlen;
1338                         if (copy_to_user(oldval, valp, len))
1339                                 return -EFAULT;
1340                         if (put_user(len, oldlenp))
1341                                 return -EFAULT;
1342                 }
1343         }
1344
1345         *valp = new;
1346         rt_cache_flush(0);
1347         return 1;
1348 }
1349
1350
1351 static struct devinet_sysctl_table {
1352         struct ctl_table_header *sysctl_header;
1353         ctl_table               devinet_vars[__NET_IPV4_CONF_MAX];
1354         ctl_table               devinet_dev[2];
1355         ctl_table               devinet_conf_dir[2];
1356         ctl_table               devinet_proto_dir[2];
1357         ctl_table               devinet_root_dir[2];
1358 } devinet_sysctl = {
1359         .devinet_vars = {
1360                 {
1361                         .ctl_name       = NET_IPV4_CONF_FORWARDING,
1362                         .procname       = "forwarding",
1363                         .data           = &ipv4_devconf.forwarding,
1364                         .maxlen         = sizeof(int),
1365                         .mode           = 0644,
1366                         .proc_handler   = &devinet_sysctl_forward,
1367                 },
1368                 {
1369                         .ctl_name       = NET_IPV4_CONF_MC_FORWARDING,
1370                         .procname       = "mc_forwarding",
1371                         .data           = &ipv4_devconf.mc_forwarding,
1372                         .maxlen         = sizeof(int),
1373                         .mode           = 0444,
1374                         .proc_handler   = &proc_dointvec,
1375                 },
1376                 {
1377                         .ctl_name       = NET_IPV4_CONF_ACCEPT_REDIRECTS,
1378                         .procname       = "accept_redirects",
1379                         .data           = &ipv4_devconf.accept_redirects,
1380                         .maxlen         = sizeof(int),
1381                         .mode           = 0644,
1382                         .proc_handler   = &proc_dointvec,
1383                 },
1384                 {
1385                         .ctl_name       = NET_IPV4_CONF_SECURE_REDIRECTS,
1386                         .procname       = "secure_redirects",
1387                         .data           = &ipv4_devconf.secure_redirects,
1388                         .maxlen         = sizeof(int),
1389                         .mode           = 0644,
1390                         .proc_handler   = &proc_dointvec,
1391                 },
1392                 {
1393                         .ctl_name       = NET_IPV4_CONF_SHARED_MEDIA,
1394                         .procname       = "shared_media",
1395                         .data           = &ipv4_devconf.shared_media,
1396                         .maxlen         = sizeof(int),
1397                         .mode           = 0644,
1398                         .proc_handler   = &proc_dointvec,
1399                 },
1400                 {
1401                         .ctl_name       = NET_IPV4_CONF_RP_FILTER,
1402                         .procname       = "rp_filter",
1403                         .data           = &ipv4_devconf.rp_filter,
1404                         .maxlen         = sizeof(int),
1405                         .mode           = 0644,
1406                         .proc_handler   = &proc_dointvec,
1407                 },
1408                 {
1409                         .ctl_name       = NET_IPV4_CONF_SEND_REDIRECTS,
1410                         .procname       = "send_redirects",
1411                         .data           = &ipv4_devconf.send_redirects,
1412                         .maxlen         = sizeof(int),
1413                         .mode           = 0644,
1414                         .proc_handler   = &proc_dointvec,
1415                 },
1416                 {
1417                         .ctl_name       = NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1418                         .procname       = "accept_source_route",
1419                         .data           = &ipv4_devconf.accept_source_route,
1420                         .maxlen         = sizeof(int),
1421                         .mode           = 0644,
1422                         .proc_handler   = &proc_dointvec,
1423                 },
1424                 {
1425                         .ctl_name       = NET_IPV4_CONF_PROXY_ARP,
1426                         .procname       = "proxy_arp",
1427                         .data           = &ipv4_devconf.proxy_arp,
1428                         .maxlen         = sizeof(int),
1429                         .mode           = 0644,
1430                         .proc_handler   = &proc_dointvec,
1431                 },
1432                 {
1433                         .ctl_name       = NET_IPV4_CONF_MEDIUM_ID,
1434                         .procname       = "medium_id",
1435                         .data           = &ipv4_devconf.medium_id,
1436                         .maxlen         = sizeof(int),
1437                         .mode           = 0644,
1438                         .proc_handler   = &proc_dointvec,
1439                 },
1440                 {
1441                         .ctl_name       = NET_IPV4_CONF_BOOTP_RELAY,
1442                         .procname       = "bootp_relay",
1443                         .data           = &ipv4_devconf.bootp_relay,
1444                         .maxlen         = sizeof(int),
1445                         .mode           = 0644,
1446                         .proc_handler   = &proc_dointvec,
1447                 },
1448                 {
1449                         .ctl_name       = NET_IPV4_CONF_LOG_MARTIANS,
1450                         .procname       = "log_martians",
1451                         .data           = &ipv4_devconf.log_martians,
1452                         .maxlen         = sizeof(int),
1453                         .mode           = 0644,
1454                         .proc_handler   = &proc_dointvec,
1455                 },
1456                 {
1457                         .ctl_name       = NET_IPV4_CONF_TAG,
1458                         .procname       = "tag",
1459                         .data           = &ipv4_devconf.tag,
1460                         .maxlen         = sizeof(int),
1461                         .mode           = 0644,
1462                         .proc_handler   = &proc_dointvec,
1463                 },
1464                 {
1465                         .ctl_name       = NET_IPV4_CONF_ARPFILTER,
1466                         .procname       = "arp_filter",
1467                         .data           = &ipv4_devconf.arp_filter,
1468                         .maxlen         = sizeof(int),
1469                         .mode           = 0644,
1470                         .proc_handler   = &proc_dointvec,
1471                 },
1472                 {
1473                         .ctl_name       = NET_IPV4_CONF_ARP_ANNOUNCE,
1474                         .procname       = "arp_announce",
1475                         .data           = &ipv4_devconf.arp_announce,
1476                         .maxlen         = sizeof(int),
1477                         .mode           = 0644,
1478                         .proc_handler   = &proc_dointvec,
1479                 },
1480                 {
1481                         .ctl_name       = NET_IPV4_CONF_ARP_IGNORE,
1482                         .procname       = "arp_ignore",
1483                         .data           = &ipv4_devconf.arp_ignore,
1484                         .maxlen         = sizeof(int),
1485                         .mode           = 0644,
1486                         .proc_handler   = &proc_dointvec,
1487                 },
1488                 {
1489                         .ctl_name       = NET_IPV4_CONF_ARP_ACCEPT,
1490                         .procname       = "arp_accept",
1491                         .data           = &ipv4_devconf.arp_accept,
1492                         .maxlen         = sizeof(int),
1493                         .mode           = 0644,
1494                         .proc_handler   = &proc_dointvec,
1495                 },
1496                 {
1497                         .ctl_name       = NET_IPV4_CONF_NOXFRM,
1498                         .procname       = "disable_xfrm",
1499                         .data           = &ipv4_devconf.no_xfrm,
1500                         .maxlen         = sizeof(int),
1501                         .mode           = 0644,
1502                         .proc_handler   = &ipv4_doint_and_flush,
1503                         .strategy       = &ipv4_doint_and_flush_strategy,
1504                 },
1505                 {
1506                         .ctl_name       = NET_IPV4_CONF_NOPOLICY,
1507                         .procname       = "disable_policy",
1508                         .data           = &ipv4_devconf.no_policy,
1509                         .maxlen         = sizeof(int),
1510                         .mode           = 0644,
1511                         .proc_handler   = &ipv4_doint_and_flush,
1512                         .strategy       = &ipv4_doint_and_flush_strategy,
1513                 },
1514                 {
1515                         .ctl_name       = NET_IPV4_CONF_FORCE_IGMP_VERSION,
1516                         .procname       = "force_igmp_version",
1517                         .data           = &ipv4_devconf.force_igmp_version,
1518                         .maxlen         = sizeof(int),
1519                         .mode           = 0644,
1520                         .proc_handler   = &ipv4_doint_and_flush,
1521                         .strategy       = &ipv4_doint_and_flush_strategy,
1522                 },
1523                 {
1524                         .ctl_name       = NET_IPV4_CONF_PROMOTE_SECONDARIES,
1525                         .procname       = "promote_secondaries",
1526                         .data           = &ipv4_devconf.promote_secondaries,
1527                         .maxlen         = sizeof(int),
1528                         .mode           = 0644,
1529                         .proc_handler   = &ipv4_doint_and_flush,
1530                         .strategy       = &ipv4_doint_and_flush_strategy,
1531                 },
1532         },
1533         .devinet_dev = {
1534                 {
1535                         .ctl_name       = NET_PROTO_CONF_ALL,
1536                         .procname       = "all",
1537                         .mode           = 0555,
1538                         .child          = devinet_sysctl.devinet_vars,
1539                 },
1540         },
1541         .devinet_conf_dir = {
1542                 {
1543                         .ctl_name       = NET_IPV4_CONF,
1544                         .procname       = "conf",
1545                         .mode           = 0555,
1546                         .child          = devinet_sysctl.devinet_dev,
1547                 },
1548         },
1549         .devinet_proto_dir = {
1550                 {
1551                         .ctl_name       = NET_IPV4,
1552                         .procname       = "ipv4",
1553                         .mode           = 0555,
1554                         .child          = devinet_sysctl.devinet_conf_dir,
1555                 },
1556         },
1557         .devinet_root_dir = {
1558                 {
1559                         .ctl_name       = CTL_NET,
1560                         .procname       = "net",
1561                         .mode           = 0555,
1562                         .child          = devinet_sysctl.devinet_proto_dir,
1563                 },
1564         },
1565 };
1566
1567 static void devinet_sysctl_register(struct in_device *in_dev,
1568                                     struct ipv4_devconf *p)
1569 {
1570         int i;
1571         struct net_device *dev = in_dev ? in_dev->dev : NULL;
1572         struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1573                                                  GFP_KERNEL);
1574         char *dev_name = NULL;
1575
1576         if (!t)
1577                 return;
1578         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1579                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1580                 t->devinet_vars[i].de = NULL;
1581         }
1582
1583         if (dev) {
1584                 dev_name = dev->name; 
1585                 t->devinet_dev[0].ctl_name = dev->ifindex;
1586         } else {
1587                 dev_name = "default";
1588                 t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1589         }
1590
1591         /* 
1592          * Make a copy of dev_name, because '.procname' is regarded as const 
1593          * by sysctl and we wouldn't want anyone to change it under our feet
1594          * (see SIOCSIFNAME).
1595          */     
1596         dev_name = kstrdup(dev_name, GFP_KERNEL);
1597         if (!dev_name)
1598             goto free;
1599
1600         t->devinet_dev[0].procname    = dev_name;
1601         t->devinet_dev[0].child       = t->devinet_vars;
1602         t->devinet_dev[0].de          = NULL;
1603         t->devinet_conf_dir[0].child  = t->devinet_dev;
1604         t->devinet_conf_dir[0].de     = NULL;
1605         t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1606         t->devinet_proto_dir[0].de    = NULL;
1607         t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1608         t->devinet_root_dir[0].de     = NULL;
1609
1610         t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
1611         if (!t->sysctl_header)
1612             goto free_procname;
1613
1614         p->sysctl = t;
1615         return;
1616
1617         /* error path */
1618  free_procname:
1619         kfree(dev_name);
1620  free:
1621         kfree(t);
1622         return;
1623 }
1624
1625 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1626 {
1627         if (p->sysctl) {
1628                 struct devinet_sysctl_table *t = p->sysctl;
1629                 p->sysctl = NULL;
1630                 unregister_sysctl_table(t->sysctl_header);
1631                 kfree(t->devinet_dev[0].procname);
1632                 kfree(t);
1633         }
1634 }
1635 #endif
1636
1637 void __init devinet_init(void)
1638 {
1639         register_gifconf(PF_INET, inet_gifconf);
1640         register_netdevice_notifier(&ip_netdev_notifier);
1641         rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1642 #ifdef CONFIG_SYSCTL
1643         devinet_sysctl.sysctl_header =
1644                 register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
1645         devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1646 #endif
1647 }
1648
1649 EXPORT_SYMBOL(in_dev_finish_destroy);
1650 EXPORT_SYMBOL(inet_select_addr);
1651 EXPORT_SYMBOL(inetdev_by_index);
1652 EXPORT_SYMBOL(register_inetaddr_notifier);
1653 EXPORT_SYMBOL(unregister_inetaddr_notifier);