Merge branch 'stealer/ipvs/sync-daemon-cleanup-for-next' of git://git.stealer.net...
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #ifdef CONFIG_SYSCTL
54 #include <linux/sysctl.h>
55 #endif
56 #include <linux/kmod.h>
57
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64
65 static struct ipv4_devconf ipv4_devconf = {
66         .data = {
67                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
68                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
69                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
71         },
72 };
73
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75         .data = {
76                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
78                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
80                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
81         },
82 };
83
84 #define IPV4_DEVCONF_DFLT(net, attr) \
85         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
86
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88         [IFA_LOCAL]             = { .type = NLA_U32 },
89         [IFA_ADDRESS]           = { .type = NLA_U32 },
90         [IFA_BROADCAST]         = { .type = NLA_U32 },
91         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
92 };
93
94 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
95
96 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
97 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
98                          int destroy);
99 #ifdef CONFIG_SYSCTL
100 static void devinet_sysctl_register(struct in_device *idev);
101 static void devinet_sysctl_unregister(struct in_device *idev);
102 #else
103 static inline void devinet_sysctl_register(struct in_device *idev)
104 {
105 }
106 static inline void devinet_sysctl_unregister(struct in_device *idev)
107 {
108 }
109 #endif
110
111 /* Locks all the inet devices. */
112
113 static struct in_ifaddr *inet_alloc_ifa(void)
114 {
115         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
116
117         if (ifa) {
118                 INIT_RCU_HEAD(&ifa->rcu_head);
119         }
120
121         return ifa;
122 }
123
124 static void inet_rcu_free_ifa(struct rcu_head *head)
125 {
126         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
127         if (ifa->ifa_dev)
128                 in_dev_put(ifa->ifa_dev);
129         kfree(ifa);
130 }
131
132 static inline void inet_free_ifa(struct in_ifaddr *ifa)
133 {
134         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
135 }
136
137 void in_dev_finish_destroy(struct in_device *idev)
138 {
139         struct net_device *dev = idev->dev;
140
141         BUG_TRAP(!idev->ifa_list);
142         BUG_TRAP(!idev->mc_list);
143 #ifdef NET_REFCNT_DEBUG
144         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
145                idev, dev ? dev->name : "NIL");
146 #endif
147         dev_put(dev);
148         if (!idev->dead)
149                 printk("Freeing alive in_device %p\n", idev);
150         else {
151                 kfree(idev);
152         }
153 }
154
155 static struct in_device *inetdev_init(struct net_device *dev)
156 {
157         struct in_device *in_dev;
158
159         ASSERT_RTNL();
160
161         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
162         if (!in_dev)
163                 goto out;
164         INIT_RCU_HEAD(&in_dev->rcu_head);
165         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
166                         sizeof(in_dev->cnf));
167         in_dev->cnf.sysctl = NULL;
168         in_dev->dev = dev;
169         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
170                 goto out_kfree;
171         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
172                 dev_disable_lro(dev);
173         /* Reference in_dev->dev */
174         dev_hold(dev);
175         /* Account for reference dev->ip_ptr (below) */
176         in_dev_hold(in_dev);
177
178         devinet_sysctl_register(in_dev);
179         ip_mc_init_dev(in_dev);
180         if (dev->flags & IFF_UP)
181                 ip_mc_up(in_dev);
182
183         /* we can receive as soon as ip_ptr is set -- do this last */
184         rcu_assign_pointer(dev->ip_ptr, in_dev);
185 out:
186         return in_dev;
187 out_kfree:
188         kfree(in_dev);
189         in_dev = NULL;
190         goto out;
191 }
192
193 static void in_dev_rcu_put(struct rcu_head *head)
194 {
195         struct in_device *idev = container_of(head, struct in_device, rcu_head);
196         in_dev_put(idev);
197 }
198
199 static void inetdev_destroy(struct in_device *in_dev)
200 {
201         struct in_ifaddr *ifa;
202         struct net_device *dev;
203
204         ASSERT_RTNL();
205
206         dev = in_dev->dev;
207
208         in_dev->dead = 1;
209
210         ip_mc_destroy_dev(in_dev);
211
212         while ((ifa = in_dev->ifa_list) != NULL) {
213                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
214                 inet_free_ifa(ifa);
215         }
216
217         dev->ip_ptr = NULL;
218
219         devinet_sysctl_unregister(in_dev);
220         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
221         arp_ifdown(dev);
222
223         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
224 }
225
226 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
227 {
228         rcu_read_lock();
229         for_primary_ifa(in_dev) {
230                 if (inet_ifa_match(a, ifa)) {
231                         if (!b || inet_ifa_match(b, ifa)) {
232                                 rcu_read_unlock();
233                                 return 1;
234                         }
235                 }
236         } endfor_ifa(in_dev);
237         rcu_read_unlock();
238         return 0;
239 }
240
241 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
242                          int destroy, struct nlmsghdr *nlh, u32 pid)
243 {
244         struct in_ifaddr *promote = NULL;
245         struct in_ifaddr *ifa, *ifa1 = *ifap;
246         struct in_ifaddr *last_prim = in_dev->ifa_list;
247         struct in_ifaddr *prev_prom = NULL;
248         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
249
250         ASSERT_RTNL();
251
252         /* 1. Deleting primary ifaddr forces deletion all secondaries
253          * unless alias promotion is set
254          **/
255
256         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
257                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
258
259                 while ((ifa = *ifap1) != NULL) {
260                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
261                             ifa1->ifa_scope <= ifa->ifa_scope)
262                                 last_prim = ifa;
263
264                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
265                             ifa1->ifa_mask != ifa->ifa_mask ||
266                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
267                                 ifap1 = &ifa->ifa_next;
268                                 prev_prom = ifa;
269                                 continue;
270                         }
271
272                         if (!do_promote) {
273                                 *ifap1 = ifa->ifa_next;
274
275                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
276                                 blocking_notifier_call_chain(&inetaddr_chain,
277                                                 NETDEV_DOWN, ifa);
278                                 inet_free_ifa(ifa);
279                         } else {
280                                 promote = ifa;
281                                 break;
282                         }
283                 }
284         }
285
286         /* 2. Unlink it */
287
288         *ifap = ifa1->ifa_next;
289
290         /* 3. Announce address deletion */
291
292         /* Send message first, then call notifier.
293            At first sight, FIB update triggered by notifier
294            will refer to already deleted ifaddr, that could confuse
295            netlink listeners. It is not true: look, gated sees
296            that route deleted and if it still thinks that ifaddr
297            is valid, it will try to restore deleted routes... Grr.
298            So that, this order is correct.
299          */
300         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
301         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
302
303         if (promote) {
304
305                 if (prev_prom) {
306                         prev_prom->ifa_next = promote->ifa_next;
307                         promote->ifa_next = last_prim->ifa_next;
308                         last_prim->ifa_next = promote;
309                 }
310
311                 promote->ifa_flags &= ~IFA_F_SECONDARY;
312                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
313                 blocking_notifier_call_chain(&inetaddr_chain,
314                                 NETDEV_UP, promote);
315                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
316                         if (ifa1->ifa_mask != ifa->ifa_mask ||
317                             !inet_ifa_match(ifa1->ifa_address, ifa))
318                                         continue;
319                         fib_add_ifaddr(ifa);
320                 }
321
322         }
323         if (destroy)
324                 inet_free_ifa(ifa1);
325 }
326
327 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
328                          int destroy)
329 {
330         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
331 }
332
333 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
334                              u32 pid)
335 {
336         struct in_device *in_dev = ifa->ifa_dev;
337         struct in_ifaddr *ifa1, **ifap, **last_primary;
338
339         ASSERT_RTNL();
340
341         if (!ifa->ifa_local) {
342                 inet_free_ifa(ifa);
343                 return 0;
344         }
345
346         ifa->ifa_flags &= ~IFA_F_SECONDARY;
347         last_primary = &in_dev->ifa_list;
348
349         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
350              ifap = &ifa1->ifa_next) {
351                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
352                     ifa->ifa_scope <= ifa1->ifa_scope)
353                         last_primary = &ifa1->ifa_next;
354                 if (ifa1->ifa_mask == ifa->ifa_mask &&
355                     inet_ifa_match(ifa1->ifa_address, ifa)) {
356                         if (ifa1->ifa_local == ifa->ifa_local) {
357                                 inet_free_ifa(ifa);
358                                 return -EEXIST;
359                         }
360                         if (ifa1->ifa_scope != ifa->ifa_scope) {
361                                 inet_free_ifa(ifa);
362                                 return -EINVAL;
363                         }
364                         ifa->ifa_flags |= IFA_F_SECONDARY;
365                 }
366         }
367
368         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
369                 net_srandom(ifa->ifa_local);
370                 ifap = last_primary;
371         }
372
373         ifa->ifa_next = *ifap;
374         *ifap = ifa;
375
376         /* Send message first, then call notifier.
377            Notifier will trigger FIB update, so that
378            listeners of netlink will know about new ifaddr */
379         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
380         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
381
382         return 0;
383 }
384
385 static int inet_insert_ifa(struct in_ifaddr *ifa)
386 {
387         return __inet_insert_ifa(ifa, NULL, 0);
388 }
389
390 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
391 {
392         struct in_device *in_dev = __in_dev_get_rtnl(dev);
393
394         ASSERT_RTNL();
395
396         if (!in_dev) {
397                 inet_free_ifa(ifa);
398                 return -ENOBUFS;
399         }
400         ipv4_devconf_setall(in_dev);
401         if (ifa->ifa_dev != in_dev) {
402                 BUG_TRAP(!ifa->ifa_dev);
403                 in_dev_hold(in_dev);
404                 ifa->ifa_dev = in_dev;
405         }
406         if (ipv4_is_loopback(ifa->ifa_local))
407                 ifa->ifa_scope = RT_SCOPE_HOST;
408         return inet_insert_ifa(ifa);
409 }
410
411 struct in_device *inetdev_by_index(struct net *net, int ifindex)
412 {
413         struct net_device *dev;
414         struct in_device *in_dev = NULL;
415         read_lock(&dev_base_lock);
416         dev = __dev_get_by_index(net, ifindex);
417         if (dev)
418                 in_dev = in_dev_get(dev);
419         read_unlock(&dev_base_lock);
420         return in_dev;
421 }
422
423 /* Called only from RTNL semaphored context. No locks. */
424
425 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
426                                     __be32 mask)
427 {
428         ASSERT_RTNL();
429
430         for_primary_ifa(in_dev) {
431                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
432                         return ifa;
433         } endfor_ifa(in_dev);
434         return NULL;
435 }
436
437 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
438 {
439         struct net *net = sock_net(skb->sk);
440         struct nlattr *tb[IFA_MAX+1];
441         struct in_device *in_dev;
442         struct ifaddrmsg *ifm;
443         struct in_ifaddr *ifa, **ifap;
444         int err = -EINVAL;
445
446         ASSERT_RTNL();
447
448         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
449         if (err < 0)
450                 goto errout;
451
452         ifm = nlmsg_data(nlh);
453         in_dev = inetdev_by_index(net, ifm->ifa_index);
454         if (in_dev == NULL) {
455                 err = -ENODEV;
456                 goto errout;
457         }
458
459         __in_dev_put(in_dev);
460
461         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
462              ifap = &ifa->ifa_next) {
463                 if (tb[IFA_LOCAL] &&
464                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
465                         continue;
466
467                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
468                         continue;
469
470                 if (tb[IFA_ADDRESS] &&
471                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
472                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
473                         continue;
474
475                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
476                 return 0;
477         }
478
479         err = -EADDRNOTAVAIL;
480 errout:
481         return err;
482 }
483
484 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
485 {
486         struct nlattr *tb[IFA_MAX+1];
487         struct in_ifaddr *ifa;
488         struct ifaddrmsg *ifm;
489         struct net_device *dev;
490         struct in_device *in_dev;
491         int err;
492
493         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
494         if (err < 0)
495                 goto errout;
496
497         ifm = nlmsg_data(nlh);
498         err = -EINVAL;
499         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
500                 goto errout;
501
502         dev = __dev_get_by_index(net, ifm->ifa_index);
503         err = -ENODEV;
504         if (dev == NULL)
505                 goto errout;
506
507         in_dev = __in_dev_get_rtnl(dev);
508         err = -ENOBUFS;
509         if (in_dev == NULL)
510                 goto errout;
511
512         ifa = inet_alloc_ifa();
513         if (ifa == NULL)
514                 /*
515                  * A potential indev allocation can be left alive, it stays
516                  * assigned to its device and is destroy with it.
517                  */
518                 goto errout;
519
520         ipv4_devconf_setall(in_dev);
521         in_dev_hold(in_dev);
522
523         if (tb[IFA_ADDRESS] == NULL)
524                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
525
526         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
527         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
528         ifa->ifa_flags = ifm->ifa_flags;
529         ifa->ifa_scope = ifm->ifa_scope;
530         ifa->ifa_dev = in_dev;
531
532         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
533         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
534
535         if (tb[IFA_BROADCAST])
536                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
537
538         if (tb[IFA_LABEL])
539                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
540         else
541                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
542
543         return ifa;
544
545 errout:
546         return ERR_PTR(err);
547 }
548
549 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
550 {
551         struct net *net = sock_net(skb->sk);
552         struct in_ifaddr *ifa;
553
554         ASSERT_RTNL();
555
556         ifa = rtm_to_ifaddr(net, nlh);
557         if (IS_ERR(ifa))
558                 return PTR_ERR(ifa);
559
560         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
561 }
562
563 /*
564  *      Determine a default network mask, based on the IP address.
565  */
566
567 static __inline__ int inet_abc_len(__be32 addr)
568 {
569         int rc = -1;    /* Something else, probably a multicast. */
570
571         if (ipv4_is_zeronet(addr))
572                 rc = 0;
573         else {
574                 __u32 haddr = ntohl(addr);
575
576                 if (IN_CLASSA(haddr))
577                         rc = 8;
578                 else if (IN_CLASSB(haddr))
579                         rc = 16;
580                 else if (IN_CLASSC(haddr))
581                         rc = 24;
582         }
583
584         return rc;
585 }
586
587
588 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
589 {
590         struct ifreq ifr;
591         struct sockaddr_in sin_orig;
592         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
593         struct in_device *in_dev;
594         struct in_ifaddr **ifap = NULL;
595         struct in_ifaddr *ifa = NULL;
596         struct net_device *dev;
597         char *colon;
598         int ret = -EFAULT;
599         int tryaddrmatch = 0;
600
601         /*
602          *      Fetch the caller's info block into kernel space
603          */
604
605         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
606                 goto out;
607         ifr.ifr_name[IFNAMSIZ - 1] = 0;
608
609         /* save original address for comparison */
610         memcpy(&sin_orig, sin, sizeof(*sin));
611
612         colon = strchr(ifr.ifr_name, ':');
613         if (colon)
614                 *colon = 0;
615
616 #ifdef CONFIG_KMOD
617         dev_load(net, ifr.ifr_name);
618 #endif
619
620         switch (cmd) {
621         case SIOCGIFADDR:       /* Get interface address */
622         case SIOCGIFBRDADDR:    /* Get the broadcast address */
623         case SIOCGIFDSTADDR:    /* Get the destination address */
624         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
625                 /* Note that these ioctls will not sleep,
626                    so that we do not impose a lock.
627                    One day we will be forced to put shlock here (I mean SMP)
628                  */
629                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
630                 memset(sin, 0, sizeof(*sin));
631                 sin->sin_family = AF_INET;
632                 break;
633
634         case SIOCSIFFLAGS:
635                 ret = -EACCES;
636                 if (!capable(CAP_NET_ADMIN))
637                         goto out;
638                 break;
639         case SIOCSIFADDR:       /* Set interface address (and family) */
640         case SIOCSIFBRDADDR:    /* Set the broadcast address */
641         case SIOCSIFDSTADDR:    /* Set the destination address */
642         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
643                 ret = -EACCES;
644                 if (!capable(CAP_NET_ADMIN))
645                         goto out;
646                 ret = -EINVAL;
647                 if (sin->sin_family != AF_INET)
648                         goto out;
649                 break;
650         default:
651                 ret = -EINVAL;
652                 goto out;
653         }
654
655         rtnl_lock();
656
657         ret = -ENODEV;
658         if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
659                 goto done;
660
661         if (colon)
662                 *colon = ':';
663
664         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
665                 if (tryaddrmatch) {
666                         /* Matthias Andree */
667                         /* compare label and address (4.4BSD style) */
668                         /* note: we only do this for a limited set of ioctls
669                            and only if the original address family was AF_INET.
670                            This is checked above. */
671                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
672                              ifap = &ifa->ifa_next) {
673                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
674                                     sin_orig.sin_addr.s_addr ==
675                                                         ifa->ifa_address) {
676                                         break; /* found */
677                                 }
678                         }
679                 }
680                 /* we didn't get a match, maybe the application is
681                    4.3BSD-style and passed in junk so we fall back to
682                    comparing just the label */
683                 if (!ifa) {
684                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685                              ifap = &ifa->ifa_next)
686                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
687                                         break;
688                 }
689         }
690
691         ret = -EADDRNOTAVAIL;
692         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
693                 goto done;
694
695         switch (cmd) {
696         case SIOCGIFADDR:       /* Get interface address */
697                 sin->sin_addr.s_addr = ifa->ifa_local;
698                 goto rarok;
699
700         case SIOCGIFBRDADDR:    /* Get the broadcast address */
701                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
702                 goto rarok;
703
704         case SIOCGIFDSTADDR:    /* Get the destination address */
705                 sin->sin_addr.s_addr = ifa->ifa_address;
706                 goto rarok;
707
708         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
709                 sin->sin_addr.s_addr = ifa->ifa_mask;
710                 goto rarok;
711
712         case SIOCSIFFLAGS:
713                 if (colon) {
714                         ret = -EADDRNOTAVAIL;
715                         if (!ifa)
716                                 break;
717                         ret = 0;
718                         if (!(ifr.ifr_flags & IFF_UP))
719                                 inet_del_ifa(in_dev, ifap, 1);
720                         break;
721                 }
722                 ret = dev_change_flags(dev, ifr.ifr_flags);
723                 break;
724
725         case SIOCSIFADDR:       /* Set interface address (and family) */
726                 ret = -EINVAL;
727                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
728                         break;
729
730                 if (!ifa) {
731                         ret = -ENOBUFS;
732                         if ((ifa = inet_alloc_ifa()) == NULL)
733                                 break;
734                         if (colon)
735                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
736                         else
737                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
738                 } else {
739                         ret = 0;
740                         if (ifa->ifa_local == sin->sin_addr.s_addr)
741                                 break;
742                         inet_del_ifa(in_dev, ifap, 0);
743                         ifa->ifa_broadcast = 0;
744                         ifa->ifa_scope = 0;
745                 }
746
747                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
748
749                 if (!(dev->flags & IFF_POINTOPOINT)) {
750                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
751                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
752                         if ((dev->flags & IFF_BROADCAST) &&
753                             ifa->ifa_prefixlen < 31)
754                                 ifa->ifa_broadcast = ifa->ifa_address |
755                                                      ~ifa->ifa_mask;
756                 } else {
757                         ifa->ifa_prefixlen = 32;
758                         ifa->ifa_mask = inet_make_mask(32);
759                 }
760                 ret = inet_set_ifa(dev, ifa);
761                 break;
762
763         case SIOCSIFBRDADDR:    /* Set the broadcast address */
764                 ret = 0;
765                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
766                         inet_del_ifa(in_dev, ifap, 0);
767                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
768                         inet_insert_ifa(ifa);
769                 }
770                 break;
771
772         case SIOCSIFDSTADDR:    /* Set the destination address */
773                 ret = 0;
774                 if (ifa->ifa_address == sin->sin_addr.s_addr)
775                         break;
776                 ret = -EINVAL;
777                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
778                         break;
779                 ret = 0;
780                 inet_del_ifa(in_dev, ifap, 0);
781                 ifa->ifa_address = sin->sin_addr.s_addr;
782                 inet_insert_ifa(ifa);
783                 break;
784
785         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
786
787                 /*
788                  *      The mask we set must be legal.
789                  */
790                 ret = -EINVAL;
791                 if (bad_mask(sin->sin_addr.s_addr, 0))
792                         break;
793                 ret = 0;
794                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
795                         __be32 old_mask = ifa->ifa_mask;
796                         inet_del_ifa(in_dev, ifap, 0);
797                         ifa->ifa_mask = sin->sin_addr.s_addr;
798                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
799
800                         /* See if current broadcast address matches
801                          * with current netmask, then recalculate
802                          * the broadcast address. Otherwise it's a
803                          * funny address, so don't touch it since
804                          * the user seems to know what (s)he's doing...
805                          */
806                         if ((dev->flags & IFF_BROADCAST) &&
807                             (ifa->ifa_prefixlen < 31) &&
808                             (ifa->ifa_broadcast ==
809                              (ifa->ifa_local|~old_mask))) {
810                                 ifa->ifa_broadcast = (ifa->ifa_local |
811                                                       ~sin->sin_addr.s_addr);
812                         }
813                         inet_insert_ifa(ifa);
814                 }
815                 break;
816         }
817 done:
818         rtnl_unlock();
819 out:
820         return ret;
821 rarok:
822         rtnl_unlock();
823         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
824         goto out;
825 }
826
827 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
828 {
829         struct in_device *in_dev = __in_dev_get_rtnl(dev);
830         struct in_ifaddr *ifa;
831         struct ifreq ifr;
832         int done = 0;
833
834         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
835                 goto out;
836
837         for (; ifa; ifa = ifa->ifa_next) {
838                 if (!buf) {
839                         done += sizeof(ifr);
840                         continue;
841                 }
842                 if (len < (int) sizeof(ifr))
843                         break;
844                 memset(&ifr, 0, sizeof(struct ifreq));
845                 if (ifa->ifa_label)
846                         strcpy(ifr.ifr_name, ifa->ifa_label);
847                 else
848                         strcpy(ifr.ifr_name, dev->name);
849
850                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
851                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
852                                                                 ifa->ifa_local;
853
854                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
855                         done = -EFAULT;
856                         break;
857                 }
858                 buf  += sizeof(struct ifreq);
859                 len  -= sizeof(struct ifreq);
860                 done += sizeof(struct ifreq);
861         }
862 out:
863         return done;
864 }
865
866 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
867 {
868         __be32 addr = 0;
869         struct in_device *in_dev;
870         struct net *net = dev_net(dev);
871
872         rcu_read_lock();
873         in_dev = __in_dev_get_rcu(dev);
874         if (!in_dev)
875                 goto no_in_dev;
876
877         for_primary_ifa(in_dev) {
878                 if (ifa->ifa_scope > scope)
879                         continue;
880                 if (!dst || inet_ifa_match(dst, ifa)) {
881                         addr = ifa->ifa_local;
882                         break;
883                 }
884                 if (!addr)
885                         addr = ifa->ifa_local;
886         } endfor_ifa(in_dev);
887 no_in_dev:
888         rcu_read_unlock();
889
890         if (addr)
891                 goto out;
892
893         /* Not loopback addresses on loopback should be preferred
894            in this case. It is importnat that lo is the first interface
895            in dev_base list.
896          */
897         read_lock(&dev_base_lock);
898         rcu_read_lock();
899         for_each_netdev(net, dev) {
900                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
901                         continue;
902
903                 for_primary_ifa(in_dev) {
904                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
905                             ifa->ifa_scope <= scope) {
906                                 addr = ifa->ifa_local;
907                                 goto out_unlock_both;
908                         }
909                 } endfor_ifa(in_dev);
910         }
911 out_unlock_both:
912         read_unlock(&dev_base_lock);
913         rcu_read_unlock();
914 out:
915         return addr;
916 }
917
918 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
919                               __be32 local, int scope)
920 {
921         int same = 0;
922         __be32 addr = 0;
923
924         for_ifa(in_dev) {
925                 if (!addr &&
926                     (local == ifa->ifa_local || !local) &&
927                     ifa->ifa_scope <= scope) {
928                         addr = ifa->ifa_local;
929                         if (same)
930                                 break;
931                 }
932                 if (!same) {
933                         same = (!local || inet_ifa_match(local, ifa)) &&
934                                 (!dst || inet_ifa_match(dst, ifa));
935                         if (same && addr) {
936                                 if (local || !dst)
937                                         break;
938                                 /* Is the selected addr into dst subnet? */
939                                 if (inet_ifa_match(addr, ifa))
940                                         break;
941                                 /* No, then can we use new local src? */
942                                 if (ifa->ifa_scope <= scope) {
943                                         addr = ifa->ifa_local;
944                                         break;
945                                 }
946                                 /* search for large dst subnet for addr */
947                                 same = 0;
948                         }
949                 }
950         } endfor_ifa(in_dev);
951
952         return same? addr : 0;
953 }
954
955 /*
956  * Confirm that local IP address exists using wildcards:
957  * - in_dev: only on this interface, 0=any interface
958  * - dst: only in the same subnet as dst, 0=any dst
959  * - local: address, 0=autoselect the local address
960  * - scope: maximum allowed scope value for the local address
961  */
962 __be32 inet_confirm_addr(struct in_device *in_dev,
963                          __be32 dst, __be32 local, int scope)
964 {
965         __be32 addr = 0;
966         struct net_device *dev;
967         struct net *net;
968
969         if (scope != RT_SCOPE_LINK)
970                 return confirm_addr_indev(in_dev, dst, local, scope);
971
972         net = dev_net(in_dev->dev);
973         read_lock(&dev_base_lock);
974         rcu_read_lock();
975         for_each_netdev(net, dev) {
976                 if ((in_dev = __in_dev_get_rcu(dev))) {
977                         addr = confirm_addr_indev(in_dev, dst, local, scope);
978                         if (addr)
979                                 break;
980                 }
981         }
982         rcu_read_unlock();
983         read_unlock(&dev_base_lock);
984
985         return addr;
986 }
987
988 /*
989  *      Device notifier
990  */
991
992 int register_inetaddr_notifier(struct notifier_block *nb)
993 {
994         return blocking_notifier_chain_register(&inetaddr_chain, nb);
995 }
996
997 int unregister_inetaddr_notifier(struct notifier_block *nb)
998 {
999         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1000 }
1001
1002 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1003  * alias numbering and to create unique labels if possible.
1004 */
1005 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1006 {
1007         struct in_ifaddr *ifa;
1008         int named = 0;
1009
1010         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1011                 char old[IFNAMSIZ], *dot;
1012
1013                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1014                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1015                 if (named++ == 0)
1016                         goto skip;
1017                 dot = strchr(old, ':');
1018                 if (dot == NULL) {
1019                         sprintf(old, ":%d", named);
1020                         dot = old;
1021                 }
1022                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1023                         strcat(ifa->ifa_label, dot);
1024                 } else {
1025                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1026                 }
1027 skip:
1028                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1029         }
1030 }
1031
1032 /* Called only under RTNL semaphore */
1033
1034 static int inetdev_event(struct notifier_block *this, unsigned long event,
1035                          void *ptr)
1036 {
1037         struct net_device *dev = ptr;
1038         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1039
1040         ASSERT_RTNL();
1041
1042         if (!in_dev) {
1043                 if (event == NETDEV_REGISTER) {
1044                         in_dev = inetdev_init(dev);
1045                         if (!in_dev)
1046                                 return notifier_from_errno(-ENOMEM);
1047                         if (dev->flags & IFF_LOOPBACK) {
1048                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1049                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1050                         }
1051                 }
1052                 goto out;
1053         }
1054
1055         switch (event) {
1056         case NETDEV_REGISTER:
1057                 printk(KERN_DEBUG "inetdev_event: bug\n");
1058                 dev->ip_ptr = NULL;
1059                 break;
1060         case NETDEV_UP:
1061                 if (dev->mtu < 68)
1062                         break;
1063                 if (dev->flags & IFF_LOOPBACK) {
1064                         struct in_ifaddr *ifa;
1065                         if ((ifa = inet_alloc_ifa()) != NULL) {
1066                                 ifa->ifa_local =
1067                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1068                                 ifa->ifa_prefixlen = 8;
1069                                 ifa->ifa_mask = inet_make_mask(8);
1070                                 in_dev_hold(in_dev);
1071                                 ifa->ifa_dev = in_dev;
1072                                 ifa->ifa_scope = RT_SCOPE_HOST;
1073                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1074                                 inet_insert_ifa(ifa);
1075                         }
1076                 }
1077                 ip_mc_up(in_dev);
1078                 break;
1079         case NETDEV_DOWN:
1080                 ip_mc_down(in_dev);
1081                 break;
1082         case NETDEV_CHANGEMTU:
1083                 if (dev->mtu >= 68)
1084                         break;
1085                 /* MTU falled under 68, disable IP */
1086         case NETDEV_UNREGISTER:
1087                 inetdev_destroy(in_dev);
1088                 break;
1089         case NETDEV_CHANGENAME:
1090                 /* Do not notify about label change, this event is
1091                  * not interesting to applications using netlink.
1092                  */
1093                 inetdev_changename(dev, in_dev);
1094
1095                 devinet_sysctl_unregister(in_dev);
1096                 devinet_sysctl_register(in_dev);
1097                 break;
1098         }
1099 out:
1100         return NOTIFY_DONE;
1101 }
1102
1103 static struct notifier_block ip_netdev_notifier = {
1104         .notifier_call =inetdev_event,
1105 };
1106
1107 static inline size_t inet_nlmsg_size(void)
1108 {
1109         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1110                + nla_total_size(4) /* IFA_ADDRESS */
1111                + nla_total_size(4) /* IFA_LOCAL */
1112                + nla_total_size(4) /* IFA_BROADCAST */
1113                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1114 }
1115
1116 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1117                             u32 pid, u32 seq, int event, unsigned int flags)
1118 {
1119         struct ifaddrmsg *ifm;
1120         struct nlmsghdr  *nlh;
1121
1122         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1123         if (nlh == NULL)
1124                 return -EMSGSIZE;
1125
1126         ifm = nlmsg_data(nlh);
1127         ifm->ifa_family = AF_INET;
1128         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1129         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1130         ifm->ifa_scope = ifa->ifa_scope;
1131         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1132
1133         if (ifa->ifa_address)
1134                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1135
1136         if (ifa->ifa_local)
1137                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1138
1139         if (ifa->ifa_broadcast)
1140                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1141
1142         if (ifa->ifa_label[0])
1143                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1144
1145         return nlmsg_end(skb, nlh);
1146
1147 nla_put_failure:
1148         nlmsg_cancel(skb, nlh);
1149         return -EMSGSIZE;
1150 }
1151
1152 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1153 {
1154         struct net *net = sock_net(skb->sk);
1155         int idx, ip_idx;
1156         struct net_device *dev;
1157         struct in_device *in_dev;
1158         struct in_ifaddr *ifa;
1159         int s_ip_idx, s_idx = cb->args[0];
1160
1161         s_ip_idx = ip_idx = cb->args[1];
1162         idx = 0;
1163         for_each_netdev(net, dev) {
1164                 if (idx < s_idx)
1165                         goto cont;
1166                 if (idx > s_idx)
1167                         s_ip_idx = 0;
1168                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1169                         goto cont;
1170
1171                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1172                      ifa = ifa->ifa_next, ip_idx++) {
1173                         if (ip_idx < s_ip_idx)
1174                                 continue;
1175                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1176                                              cb->nlh->nlmsg_seq,
1177                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1178                                 goto done;
1179                 }
1180 cont:
1181                 idx++;
1182         }
1183
1184 done:
1185         cb->args[0] = idx;
1186         cb->args[1] = ip_idx;
1187
1188         return skb->len;
1189 }
1190
1191 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1192                       u32 pid)
1193 {
1194         struct sk_buff *skb;
1195         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1196         int err = -ENOBUFS;
1197         struct net *net;
1198
1199         net = dev_net(ifa->ifa_dev->dev);
1200         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1201         if (skb == NULL)
1202                 goto errout;
1203
1204         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1205         if (err < 0) {
1206                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1207                 WARN_ON(err == -EMSGSIZE);
1208                 kfree_skb(skb);
1209                 goto errout;
1210         }
1211         err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1212 errout:
1213         if (err < 0)
1214                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1215 }
1216
1217 #ifdef CONFIG_SYSCTL
1218
1219 static void devinet_copy_dflt_conf(struct net *net, int i)
1220 {
1221         struct net_device *dev;
1222
1223         read_lock(&dev_base_lock);
1224         for_each_netdev(net, dev) {
1225                 struct in_device *in_dev;
1226                 rcu_read_lock();
1227                 in_dev = __in_dev_get_rcu(dev);
1228                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1229                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1230                 rcu_read_unlock();
1231         }
1232         read_unlock(&dev_base_lock);
1233 }
1234
1235 static void inet_forward_change(struct net *net)
1236 {
1237         struct net_device *dev;
1238         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1239
1240         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1241         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1242
1243         read_lock(&dev_base_lock);
1244         for_each_netdev(net, dev) {
1245                 struct in_device *in_dev;
1246                 if (on)
1247                         dev_disable_lro(dev);
1248                 rcu_read_lock();
1249                 in_dev = __in_dev_get_rcu(dev);
1250                 if (in_dev)
1251                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1252                 rcu_read_unlock();
1253         }
1254         read_unlock(&dev_base_lock);
1255 }
1256
1257 static int devinet_conf_proc(ctl_table *ctl, int write,
1258                              struct file* filp, void __user *buffer,
1259                              size_t *lenp, loff_t *ppos)
1260 {
1261         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1262
1263         if (write) {
1264                 struct ipv4_devconf *cnf = ctl->extra1;
1265                 struct net *net = ctl->extra2;
1266                 int i = (int *)ctl->data - cnf->data;
1267
1268                 set_bit(i, cnf->state);
1269
1270                 if (cnf == net->ipv4.devconf_dflt)
1271                         devinet_copy_dflt_conf(net, i);
1272         }
1273
1274         return ret;
1275 }
1276
1277 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1278                                void __user *oldval, size_t __user *oldlenp,
1279                                void __user *newval, size_t newlen)
1280 {
1281         struct ipv4_devconf *cnf;
1282         struct net *net;
1283         int *valp = table->data;
1284         int new;
1285         int i;
1286
1287         if (!newval || !newlen)
1288                 return 0;
1289
1290         if (newlen != sizeof(int))
1291                 return -EINVAL;
1292
1293         if (get_user(new, (int __user *)newval))
1294                 return -EFAULT;
1295
1296         if (new == *valp)
1297                 return 0;
1298
1299         if (oldval && oldlenp) {
1300                 size_t len;
1301
1302                 if (get_user(len, oldlenp))
1303                         return -EFAULT;
1304
1305                 if (len) {
1306                         if (len > table->maxlen)
1307                                 len = table->maxlen;
1308                         if (copy_to_user(oldval, valp, len))
1309                                 return -EFAULT;
1310                         if (put_user(len, oldlenp))
1311                                 return -EFAULT;
1312                 }
1313         }
1314
1315         *valp = new;
1316
1317         cnf = table->extra1;
1318         net = table->extra2;
1319         i = (int *)table->data - cnf->data;
1320
1321         set_bit(i, cnf->state);
1322
1323         if (cnf == net->ipv4.devconf_dflt)
1324                 devinet_copy_dflt_conf(net, i);
1325
1326         return 1;
1327 }
1328
1329 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1330                                   struct file* filp, void __user *buffer,
1331                                   size_t *lenp, loff_t *ppos)
1332 {
1333         int *valp = ctl->data;
1334         int val = *valp;
1335         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1336
1337         if (write && *valp != val) {
1338                 struct net *net = ctl->extra2;
1339
1340                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1341                         rtnl_lock();
1342                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1343                                 inet_forward_change(net);
1344                         } else if (*valp) {
1345                                 struct ipv4_devconf *cnf = ctl->extra1;
1346                                 struct in_device *idev =
1347                                         container_of(cnf, struct in_device, cnf);
1348                                 dev_disable_lro(idev->dev);
1349                         }
1350                         rtnl_unlock();
1351                         rt_cache_flush(net, 0);
1352                 }
1353         }
1354
1355         return ret;
1356 }
1357
1358 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1359                          struct file* filp, void __user *buffer,
1360                          size_t *lenp, loff_t *ppos)
1361 {
1362         int *valp = ctl->data;
1363         int val = *valp;
1364         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1365         struct net *net = ctl->extra2;
1366
1367         if (write && *valp != val)
1368                 rt_cache_flush(net, 0);
1369
1370         return ret;
1371 }
1372
1373 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1374                                   void __user *oldval, size_t __user *oldlenp,
1375                                   void __user *newval, size_t newlen)
1376 {
1377         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1378                                       newval, newlen);
1379         struct net *net = table->extra2;
1380
1381         if (ret == 1)
1382                 rt_cache_flush(net, 0);
1383
1384         return ret;
1385 }
1386
1387
1388 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1389         { \
1390                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1391                 .procname       = name, \
1392                 .data           = ipv4_devconf.data + \
1393                                   NET_IPV4_CONF_ ## attr - 1, \
1394                 .maxlen         = sizeof(int), \
1395                 .mode           = mval, \
1396                 .proc_handler   = proc, \
1397                 .strategy       = sysctl, \
1398                 .extra1         = &ipv4_devconf, \
1399         }
1400
1401 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1402         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1403                              devinet_conf_sysctl)
1404
1405 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1406         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1407                              devinet_conf_sysctl)
1408
1409 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1410         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1411
1412 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1413         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1414                                      ipv4_doint_and_flush_strategy)
1415
1416 static struct devinet_sysctl_table {
1417         struct ctl_table_header *sysctl_header;
1418         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1419         char *dev_name;
1420 } devinet_sysctl = {
1421         .devinet_vars = {
1422                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1423                                              devinet_sysctl_forward,
1424                                              devinet_conf_sysctl),
1425                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1426
1427                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1428                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1429                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1430                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1431                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1432                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1433                                         "accept_source_route"),
1434                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1435                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1436                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1437                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1438                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1439                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1440                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1441                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1442                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1443
1444                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1445                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1446                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1447                                               "force_igmp_version"),
1448                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1449                                               "promote_secondaries"),
1450         },
1451 };
1452
1453 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1454                 int ctl_name, struct ipv4_devconf *p)
1455 {
1456         int i;
1457         struct devinet_sysctl_table *t;
1458
1459 #define DEVINET_CTL_PATH_DEV    3
1460
1461         struct ctl_path devinet_ctl_path[] = {
1462                 { .procname = "net", .ctl_name = CTL_NET, },
1463                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1464                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1465                 { /* to be set */ },
1466                 { },
1467         };
1468
1469         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1470         if (!t)
1471                 goto out;
1472
1473         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1474                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1475                 t->devinet_vars[i].extra1 = p;
1476                 t->devinet_vars[i].extra2 = net;
1477         }
1478
1479         /*
1480          * Make a copy of dev_name, because '.procname' is regarded as const
1481          * by sysctl and we wouldn't want anyone to change it under our feet
1482          * (see SIOCSIFNAME).
1483          */
1484         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1485         if (!t->dev_name)
1486                 goto free;
1487
1488         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1489         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1490
1491         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1492                         t->devinet_vars);
1493         if (!t->sysctl_header)
1494                 goto free_procname;
1495
1496         p->sysctl = t;
1497         return 0;
1498
1499 free_procname:
1500         kfree(t->dev_name);
1501 free:
1502         kfree(t);
1503 out:
1504         return -ENOBUFS;
1505 }
1506
1507 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1508 {
1509         struct devinet_sysctl_table *t = cnf->sysctl;
1510
1511         if (t == NULL)
1512                 return;
1513
1514         cnf->sysctl = NULL;
1515         unregister_sysctl_table(t->sysctl_header);
1516         kfree(t->dev_name);
1517         kfree(t);
1518 }
1519
1520 static void devinet_sysctl_register(struct in_device *idev)
1521 {
1522         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1523                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1524         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1525                         idev->dev->ifindex, &idev->cnf);
1526 }
1527
1528 static void devinet_sysctl_unregister(struct in_device *idev)
1529 {
1530         __devinet_sysctl_unregister(&idev->cnf);
1531         neigh_sysctl_unregister(idev->arp_parms);
1532 }
1533
1534 static struct ctl_table ctl_forward_entry[] = {
1535         {
1536                 .ctl_name       = NET_IPV4_FORWARD,
1537                 .procname       = "ip_forward",
1538                 .data           = &ipv4_devconf.data[
1539                                         NET_IPV4_CONF_FORWARDING - 1],
1540                 .maxlen         = sizeof(int),
1541                 .mode           = 0644,
1542                 .proc_handler   = devinet_sysctl_forward,
1543                 .strategy       = devinet_conf_sysctl,
1544                 .extra1         = &ipv4_devconf,
1545                 .extra2         = &init_net,
1546         },
1547         { },
1548 };
1549
1550 static __net_initdata struct ctl_path net_ipv4_path[] = {
1551         { .procname = "net", .ctl_name = CTL_NET, },
1552         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1553         { },
1554 };
1555 #endif
1556
1557 static __net_init int devinet_init_net(struct net *net)
1558 {
1559         int err;
1560         struct ipv4_devconf *all, *dflt;
1561 #ifdef CONFIG_SYSCTL
1562         struct ctl_table *tbl = ctl_forward_entry;
1563         struct ctl_table_header *forw_hdr;
1564 #endif
1565
1566         err = -ENOMEM;
1567         all = &ipv4_devconf;
1568         dflt = &ipv4_devconf_dflt;
1569
1570         if (net != &init_net) {
1571                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1572                 if (all == NULL)
1573                         goto err_alloc_all;
1574
1575                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1576                 if (dflt == NULL)
1577                         goto err_alloc_dflt;
1578
1579 #ifdef CONFIG_SYSCTL
1580                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1581                 if (tbl == NULL)
1582                         goto err_alloc_ctl;
1583
1584                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1585                 tbl[0].extra1 = all;
1586                 tbl[0].extra2 = net;
1587 #endif
1588         }
1589
1590 #ifdef CONFIG_SYSCTL
1591         err = __devinet_sysctl_register(net, "all",
1592                         NET_PROTO_CONF_ALL, all);
1593         if (err < 0)
1594                 goto err_reg_all;
1595
1596         err = __devinet_sysctl_register(net, "default",
1597                         NET_PROTO_CONF_DEFAULT, dflt);
1598         if (err < 0)
1599                 goto err_reg_dflt;
1600
1601         err = -ENOMEM;
1602         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1603         if (forw_hdr == NULL)
1604                 goto err_reg_ctl;
1605         net->ipv4.forw_hdr = forw_hdr;
1606 #endif
1607
1608         net->ipv4.devconf_all = all;
1609         net->ipv4.devconf_dflt = dflt;
1610         return 0;
1611
1612 #ifdef CONFIG_SYSCTL
1613 err_reg_ctl:
1614         __devinet_sysctl_unregister(dflt);
1615 err_reg_dflt:
1616         __devinet_sysctl_unregister(all);
1617 err_reg_all:
1618         if (tbl != ctl_forward_entry)
1619                 kfree(tbl);
1620 err_alloc_ctl:
1621 #endif
1622         if (dflt != &ipv4_devconf_dflt)
1623                 kfree(dflt);
1624 err_alloc_dflt:
1625         if (all != &ipv4_devconf)
1626                 kfree(all);
1627 err_alloc_all:
1628         return err;
1629 }
1630
1631 static __net_exit void devinet_exit_net(struct net *net)
1632 {
1633 #ifdef CONFIG_SYSCTL
1634         struct ctl_table *tbl;
1635
1636         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1637         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1638         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1639         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1640         kfree(tbl);
1641 #endif
1642         kfree(net->ipv4.devconf_dflt);
1643         kfree(net->ipv4.devconf_all);
1644 }
1645
1646 static __net_initdata struct pernet_operations devinet_ops = {
1647         .init = devinet_init_net,
1648         .exit = devinet_exit_net,
1649 };
1650
1651 void __init devinet_init(void)
1652 {
1653         register_pernet_subsys(&devinet_ops);
1654
1655         register_gifconf(PF_INET, inet_gifconf);
1656         register_netdevice_notifier(&ip_netdev_notifier);
1657
1658         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1659         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1660         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1661 }
1662
1663 EXPORT_SYMBOL(in_dev_finish_destroy);
1664 EXPORT_SYMBOL(inet_select_addr);
1665 EXPORT_SYMBOL(inetdev_by_index);
1666 EXPORT_SYMBOL(register_inetaddr_notifier);
1667 EXPORT_SYMBOL(unregister_inetaddr_notifier);