tcp: Limit cwnd growth when deferring for GSO
[linux-2.6] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 static struct ipv4_devconf ipv4_devconf = {
68         .data = {
69                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73         },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .data = {
78                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83         },
84 };
85
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90         [IFA_LOCAL]             = { .type = NLA_U32 },
91         [IFA_ADDRESS]           = { .type = NLA_U32 },
92         [IFA_BROADCAST]         = { .type = NLA_U32 },
93         [IFA_ANYCAST]           = { .type = NLA_U32 },
94         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96
97 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98
99 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101                          int destroy);
102 #ifdef CONFIG_SYSCTL
103 static void devinet_sysctl_register(struct in_device *idev);
104 static void devinet_sysctl_unregister(struct in_device *idev);
105 #else
106 static inline void devinet_sysctl_register(struct in_device *idev)
107 {
108 }
109 static inline void devinet_sysctl_unregister(struct in_device *idev)
110 {
111 }
112 #endif
113
114 /* Locks all the inet devices. */
115
116 static struct in_ifaddr *inet_alloc_ifa(void)
117 {
118         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119
120         if (ifa) {
121                 INIT_RCU_HEAD(&ifa->rcu_head);
122         }
123
124         return ifa;
125 }
126
127 static void inet_rcu_free_ifa(struct rcu_head *head)
128 {
129         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130         if (ifa->ifa_dev)
131                 in_dev_put(ifa->ifa_dev);
132         kfree(ifa);
133 }
134
135 static inline void inet_free_ifa(struct in_ifaddr *ifa)
136 {
137         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138 }
139
140 void in_dev_finish_destroy(struct in_device *idev)
141 {
142         struct net_device *dev = idev->dev;
143
144         BUG_TRAP(!idev->ifa_list);
145         BUG_TRAP(!idev->mc_list);
146 #ifdef NET_REFCNT_DEBUG
147         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148                idev, dev ? dev->name : "NIL");
149 #endif
150         dev_put(dev);
151         if (!idev->dead)
152                 printk("Freeing alive in_device %p\n", idev);
153         else {
154                 kfree(idev);
155         }
156 }
157
158 static struct in_device *inetdev_init(struct net_device *dev)
159 {
160         struct in_device *in_dev;
161
162         ASSERT_RTNL();
163
164         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165         if (!in_dev)
166                 goto out;
167         INIT_RCU_HEAD(&in_dev->rcu_head);
168         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
169                         sizeof(in_dev->cnf));
170         in_dev->cnf.sysctl = NULL;
171         in_dev->dev = dev;
172         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
173                 goto out_kfree;
174         /* Reference in_dev->dev */
175         dev_hold(dev);
176         /* Account for reference dev->ip_ptr (below) */
177         in_dev_hold(in_dev);
178
179         devinet_sysctl_register(in_dev);
180         ip_mc_init_dev(in_dev);
181         if (dev->flags & IFF_UP)
182                 ip_mc_up(in_dev);
183
184         /* we can receive as soon as ip_ptr is set -- do this last */
185         rcu_assign_pointer(dev->ip_ptr, in_dev);
186 out:
187         return in_dev;
188 out_kfree:
189         kfree(in_dev);
190         in_dev = NULL;
191         goto out;
192 }
193
194 static void in_dev_rcu_put(struct rcu_head *head)
195 {
196         struct in_device *idev = container_of(head, struct in_device, rcu_head);
197         in_dev_put(idev);
198 }
199
200 static void inetdev_destroy(struct in_device *in_dev)
201 {
202         struct in_ifaddr *ifa;
203         struct net_device *dev;
204
205         ASSERT_RTNL();
206
207         dev = in_dev->dev;
208
209         in_dev->dead = 1;
210
211         ip_mc_destroy_dev(in_dev);
212
213         while ((ifa = in_dev->ifa_list) != NULL) {
214                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215                 inet_free_ifa(ifa);
216         }
217
218         dev->ip_ptr = NULL;
219
220         devinet_sysctl_unregister(in_dev);
221         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222         arp_ifdown(dev);
223
224         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225 }
226
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228 {
229         rcu_read_lock();
230         for_primary_ifa(in_dev) {
231                 if (inet_ifa_match(a, ifa)) {
232                         if (!b || inet_ifa_match(b, ifa)) {
233                                 rcu_read_unlock();
234                                 return 1;
235                         }
236                 }
237         } endfor_ifa(in_dev);
238         rcu_read_unlock();
239         return 0;
240 }
241
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243                          int destroy, struct nlmsghdr *nlh, u32 pid)
244 {
245         struct in_ifaddr *promote = NULL;
246         struct in_ifaddr *ifa, *ifa1 = *ifap;
247         struct in_ifaddr *last_prim = in_dev->ifa_list;
248         struct in_ifaddr *prev_prom = NULL;
249         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250
251         ASSERT_RTNL();
252
253         /* 1. Deleting primary ifaddr forces deletion all secondaries
254          * unless alias promotion is set
255          **/
256
257         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259
260                 while ((ifa = *ifap1) != NULL) {
261                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262                             ifa1->ifa_scope <= ifa->ifa_scope)
263                                 last_prim = ifa;
264
265                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266                             ifa1->ifa_mask != ifa->ifa_mask ||
267                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
268                                 ifap1 = &ifa->ifa_next;
269                                 prev_prom = ifa;
270                                 continue;
271                         }
272
273                         if (!do_promote) {
274                                 *ifap1 = ifa->ifa_next;
275
276                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277                                 blocking_notifier_call_chain(&inetaddr_chain,
278                                                 NETDEV_DOWN, ifa);
279                                 inet_free_ifa(ifa);
280                         } else {
281                                 promote = ifa;
282                                 break;
283                         }
284                 }
285         }
286
287         /* 2. Unlink it */
288
289         *ifap = ifa1->ifa_next;
290
291         /* 3. Announce address deletion */
292
293         /* Send message first, then call notifier.
294            At first sight, FIB update triggered by notifier
295            will refer to already deleted ifaddr, that could confuse
296            netlink listeners. It is not true: look, gated sees
297            that route deleted and if it still thinks that ifaddr
298            is valid, it will try to restore deleted routes... Grr.
299            So that, this order is correct.
300          */
301         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303
304         if (promote) {
305
306                 if (prev_prom) {
307                         prev_prom->ifa_next = promote->ifa_next;
308                         promote->ifa_next = last_prim->ifa_next;
309                         last_prim->ifa_next = promote;
310                 }
311
312                 promote->ifa_flags &= ~IFA_F_SECONDARY;
313                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314                 blocking_notifier_call_chain(&inetaddr_chain,
315                                 NETDEV_UP, promote);
316                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317                         if (ifa1->ifa_mask != ifa->ifa_mask ||
318                             !inet_ifa_match(ifa1->ifa_address, ifa))
319                                         continue;
320                         fib_add_ifaddr(ifa);
321                 }
322
323         }
324         if (destroy)
325                 inet_free_ifa(ifa1);
326 }
327
328 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329                          int destroy)
330 {
331         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332 }
333
334 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335                              u32 pid)
336 {
337         struct in_device *in_dev = ifa->ifa_dev;
338         struct in_ifaddr *ifa1, **ifap, **last_primary;
339
340         ASSERT_RTNL();
341
342         if (!ifa->ifa_local) {
343                 inet_free_ifa(ifa);
344                 return 0;
345         }
346
347         ifa->ifa_flags &= ~IFA_F_SECONDARY;
348         last_primary = &in_dev->ifa_list;
349
350         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351              ifap = &ifa1->ifa_next) {
352                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353                     ifa->ifa_scope <= ifa1->ifa_scope)
354                         last_primary = &ifa1->ifa_next;
355                 if (ifa1->ifa_mask == ifa->ifa_mask &&
356                     inet_ifa_match(ifa1->ifa_address, ifa)) {
357                         if (ifa1->ifa_local == ifa->ifa_local) {
358                                 inet_free_ifa(ifa);
359                                 return -EEXIST;
360                         }
361                         if (ifa1->ifa_scope != ifa->ifa_scope) {
362                                 inet_free_ifa(ifa);
363                                 return -EINVAL;
364                         }
365                         ifa->ifa_flags |= IFA_F_SECONDARY;
366                 }
367         }
368
369         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370                 net_srandom(ifa->ifa_local);
371                 ifap = last_primary;
372         }
373
374         ifa->ifa_next = *ifap;
375         *ifap = ifa;
376
377         /* Send message first, then call notifier.
378            Notifier will trigger FIB update, so that
379            listeners of netlink will know about new ifaddr */
380         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
382
383         return 0;
384 }
385
386 static int inet_insert_ifa(struct in_ifaddr *ifa)
387 {
388         return __inet_insert_ifa(ifa, NULL, 0);
389 }
390
391 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
392 {
393         struct in_device *in_dev = __in_dev_get_rtnl(dev);
394
395         ASSERT_RTNL();
396
397         if (!in_dev) {
398                 inet_free_ifa(ifa);
399                 return -ENOBUFS;
400         }
401         ipv4_devconf_setall(in_dev);
402         if (ifa->ifa_dev != in_dev) {
403                 BUG_TRAP(!ifa->ifa_dev);
404                 in_dev_hold(in_dev);
405                 ifa->ifa_dev = in_dev;
406         }
407         if (ipv4_is_loopback(ifa->ifa_local))
408                 ifa->ifa_scope = RT_SCOPE_HOST;
409         return inet_insert_ifa(ifa);
410 }
411
412 struct in_device *inetdev_by_index(struct net *net, int ifindex)
413 {
414         struct net_device *dev;
415         struct in_device *in_dev = NULL;
416         read_lock(&dev_base_lock);
417         dev = __dev_get_by_index(net, ifindex);
418         if (dev)
419                 in_dev = in_dev_get(dev);
420         read_unlock(&dev_base_lock);
421         return in_dev;
422 }
423
424 /* Called only from RTNL semaphored context. No locks. */
425
426 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427                                     __be32 mask)
428 {
429         ASSERT_RTNL();
430
431         for_primary_ifa(in_dev) {
432                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433                         return ifa;
434         } endfor_ifa(in_dev);
435         return NULL;
436 }
437
438 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439 {
440         struct net *net = sock_net(skb->sk);
441         struct nlattr *tb[IFA_MAX+1];
442         struct in_device *in_dev;
443         struct ifaddrmsg *ifm;
444         struct in_ifaddr *ifa, **ifap;
445         int err = -EINVAL;
446
447         ASSERT_RTNL();
448
449         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
450         if (err < 0)
451                 goto errout;
452
453         ifm = nlmsg_data(nlh);
454         in_dev = inetdev_by_index(net, ifm->ifa_index);
455         if (in_dev == NULL) {
456                 err = -ENODEV;
457                 goto errout;
458         }
459
460         __in_dev_put(in_dev);
461
462         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
463              ifap = &ifa->ifa_next) {
464                 if (tb[IFA_LOCAL] &&
465                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
466                         continue;
467
468                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
469                         continue;
470
471                 if (tb[IFA_ADDRESS] &&
472                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
473                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
474                         continue;
475
476                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
477                 return 0;
478         }
479
480         err = -EADDRNOTAVAIL;
481 errout:
482         return err;
483 }
484
485 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
486 {
487         struct nlattr *tb[IFA_MAX+1];
488         struct in_ifaddr *ifa;
489         struct ifaddrmsg *ifm;
490         struct net_device *dev;
491         struct in_device *in_dev;
492         int err;
493
494         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
495         if (err < 0)
496                 goto errout;
497
498         ifm = nlmsg_data(nlh);
499         err = -EINVAL;
500         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
501                 goto errout;
502
503         dev = __dev_get_by_index(net, ifm->ifa_index);
504         err = -ENODEV;
505         if (dev == NULL)
506                 goto errout;
507
508         in_dev = __in_dev_get_rtnl(dev);
509         err = -ENOBUFS;
510         if (in_dev == NULL)
511                 goto errout;
512
513         ifa = inet_alloc_ifa();
514         if (ifa == NULL)
515                 /*
516                  * A potential indev allocation can be left alive, it stays
517                  * assigned to its device and is destroy with it.
518                  */
519                 goto errout;
520
521         ipv4_devconf_setall(in_dev);
522         in_dev_hold(in_dev);
523
524         if (tb[IFA_ADDRESS] == NULL)
525                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
526
527         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
528         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
529         ifa->ifa_flags = ifm->ifa_flags;
530         ifa->ifa_scope = ifm->ifa_scope;
531         ifa->ifa_dev = in_dev;
532
533         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
534         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
535
536         if (tb[IFA_BROADCAST])
537                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
538
539         if (tb[IFA_ANYCAST])
540                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
541
542         if (tb[IFA_LABEL])
543                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
544         else
545                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
546
547         return ifa;
548
549 errout:
550         return ERR_PTR(err);
551 }
552
553 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
554 {
555         struct net *net = sock_net(skb->sk);
556         struct in_ifaddr *ifa;
557
558         ASSERT_RTNL();
559
560         ifa = rtm_to_ifaddr(net, nlh);
561         if (IS_ERR(ifa))
562                 return PTR_ERR(ifa);
563
564         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
565 }
566
567 /*
568  *      Determine a default network mask, based on the IP address.
569  */
570
571 static __inline__ int inet_abc_len(__be32 addr)
572 {
573         int rc = -1;    /* Something else, probably a multicast. */
574
575         if (ipv4_is_zeronet(addr))
576                 rc = 0;
577         else {
578                 __u32 haddr = ntohl(addr);
579
580                 if (IN_CLASSA(haddr))
581                         rc = 8;
582                 else if (IN_CLASSB(haddr))
583                         rc = 16;
584                 else if (IN_CLASSC(haddr))
585                         rc = 24;
586         }
587
588         return rc;
589 }
590
591
592 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
593 {
594         struct ifreq ifr;
595         struct sockaddr_in sin_orig;
596         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
597         struct in_device *in_dev;
598         struct in_ifaddr **ifap = NULL;
599         struct in_ifaddr *ifa = NULL;
600         struct net_device *dev;
601         char *colon;
602         int ret = -EFAULT;
603         int tryaddrmatch = 0;
604
605         /*
606          *      Fetch the caller's info block into kernel space
607          */
608
609         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
610                 goto out;
611         ifr.ifr_name[IFNAMSIZ - 1] = 0;
612
613         /* save original address for comparison */
614         memcpy(&sin_orig, sin, sizeof(*sin));
615
616         colon = strchr(ifr.ifr_name, ':');
617         if (colon)
618                 *colon = 0;
619
620 #ifdef CONFIG_KMOD
621         dev_load(net, ifr.ifr_name);
622 #endif
623
624         switch (cmd) {
625         case SIOCGIFADDR:       /* Get interface address */
626         case SIOCGIFBRDADDR:    /* Get the broadcast address */
627         case SIOCGIFDSTADDR:    /* Get the destination address */
628         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
629                 /* Note that these ioctls will not sleep,
630                    so that we do not impose a lock.
631                    One day we will be forced to put shlock here (I mean SMP)
632                  */
633                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
634                 memset(sin, 0, sizeof(*sin));
635                 sin->sin_family = AF_INET;
636                 break;
637
638         case SIOCSIFFLAGS:
639                 ret = -EACCES;
640                 if (!capable(CAP_NET_ADMIN))
641                         goto out;
642                 break;
643         case SIOCSIFADDR:       /* Set interface address (and family) */
644         case SIOCSIFBRDADDR:    /* Set the broadcast address */
645         case SIOCSIFDSTADDR:    /* Set the destination address */
646         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
647                 ret = -EACCES;
648                 if (!capable(CAP_NET_ADMIN))
649                         goto out;
650                 ret = -EINVAL;
651                 if (sin->sin_family != AF_INET)
652                         goto out;
653                 break;
654         default:
655                 ret = -EINVAL;
656                 goto out;
657         }
658
659         rtnl_lock();
660
661         ret = -ENODEV;
662         if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
663                 goto done;
664
665         if (colon)
666                 *colon = ':';
667
668         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
669                 if (tryaddrmatch) {
670                         /* Matthias Andree */
671                         /* compare label and address (4.4BSD style) */
672                         /* note: we only do this for a limited set of ioctls
673                            and only if the original address family was AF_INET.
674                            This is checked above. */
675                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
676                              ifap = &ifa->ifa_next) {
677                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
678                                     sin_orig.sin_addr.s_addr ==
679                                                         ifa->ifa_address) {
680                                         break; /* found */
681                                 }
682                         }
683                 }
684                 /* we didn't get a match, maybe the application is
685                    4.3BSD-style and passed in junk so we fall back to
686                    comparing just the label */
687                 if (!ifa) {
688                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
689                              ifap = &ifa->ifa_next)
690                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
691                                         break;
692                 }
693         }
694
695         ret = -EADDRNOTAVAIL;
696         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
697                 goto done;
698
699         switch (cmd) {
700         case SIOCGIFADDR:       /* Get interface address */
701                 sin->sin_addr.s_addr = ifa->ifa_local;
702                 goto rarok;
703
704         case SIOCGIFBRDADDR:    /* Get the broadcast address */
705                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
706                 goto rarok;
707
708         case SIOCGIFDSTADDR:    /* Get the destination address */
709                 sin->sin_addr.s_addr = ifa->ifa_address;
710                 goto rarok;
711
712         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
713                 sin->sin_addr.s_addr = ifa->ifa_mask;
714                 goto rarok;
715
716         case SIOCSIFFLAGS:
717                 if (colon) {
718                         ret = -EADDRNOTAVAIL;
719                         if (!ifa)
720                                 break;
721                         ret = 0;
722                         if (!(ifr.ifr_flags & IFF_UP))
723                                 inet_del_ifa(in_dev, ifap, 1);
724                         break;
725                 }
726                 ret = dev_change_flags(dev, ifr.ifr_flags);
727                 break;
728
729         case SIOCSIFADDR:       /* Set interface address (and family) */
730                 ret = -EINVAL;
731                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
732                         break;
733
734                 if (!ifa) {
735                         ret = -ENOBUFS;
736                         if ((ifa = inet_alloc_ifa()) == NULL)
737                                 break;
738                         if (colon)
739                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
740                         else
741                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
742                 } else {
743                         ret = 0;
744                         if (ifa->ifa_local == sin->sin_addr.s_addr)
745                                 break;
746                         inet_del_ifa(in_dev, ifap, 0);
747                         ifa->ifa_broadcast = 0;
748                         ifa->ifa_anycast = 0;
749                         ifa->ifa_scope = 0;
750                 }
751
752                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
753
754                 if (!(dev->flags & IFF_POINTOPOINT)) {
755                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
756                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
757                         if ((dev->flags & IFF_BROADCAST) &&
758                             ifa->ifa_prefixlen < 31)
759                                 ifa->ifa_broadcast = ifa->ifa_address |
760                                                      ~ifa->ifa_mask;
761                 } else {
762                         ifa->ifa_prefixlen = 32;
763                         ifa->ifa_mask = inet_make_mask(32);
764                 }
765                 ret = inet_set_ifa(dev, ifa);
766                 break;
767
768         case SIOCSIFBRDADDR:    /* Set the broadcast address */
769                 ret = 0;
770                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
771                         inet_del_ifa(in_dev, ifap, 0);
772                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
773                         inet_insert_ifa(ifa);
774                 }
775                 break;
776
777         case SIOCSIFDSTADDR:    /* Set the destination address */
778                 ret = 0;
779                 if (ifa->ifa_address == sin->sin_addr.s_addr)
780                         break;
781                 ret = -EINVAL;
782                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
783                         break;
784                 ret = 0;
785                 inet_del_ifa(in_dev, ifap, 0);
786                 ifa->ifa_address = sin->sin_addr.s_addr;
787                 inet_insert_ifa(ifa);
788                 break;
789
790         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
791
792                 /*
793                  *      The mask we set must be legal.
794                  */
795                 ret = -EINVAL;
796                 if (bad_mask(sin->sin_addr.s_addr, 0))
797                         break;
798                 ret = 0;
799                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
800                         __be32 old_mask = ifa->ifa_mask;
801                         inet_del_ifa(in_dev, ifap, 0);
802                         ifa->ifa_mask = sin->sin_addr.s_addr;
803                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
804
805                         /* See if current broadcast address matches
806                          * with current netmask, then recalculate
807                          * the broadcast address. Otherwise it's a
808                          * funny address, so don't touch it since
809                          * the user seems to know what (s)he's doing...
810                          */
811                         if ((dev->flags & IFF_BROADCAST) &&
812                             (ifa->ifa_prefixlen < 31) &&
813                             (ifa->ifa_broadcast ==
814                              (ifa->ifa_local|~old_mask))) {
815                                 ifa->ifa_broadcast = (ifa->ifa_local |
816                                                       ~sin->sin_addr.s_addr);
817                         }
818                         inet_insert_ifa(ifa);
819                 }
820                 break;
821         }
822 done:
823         rtnl_unlock();
824 out:
825         return ret;
826 rarok:
827         rtnl_unlock();
828         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
829         goto out;
830 }
831
832 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
833 {
834         struct in_device *in_dev = __in_dev_get_rtnl(dev);
835         struct in_ifaddr *ifa;
836         struct ifreq ifr;
837         int done = 0;
838
839         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
840                 goto out;
841
842         for (; ifa; ifa = ifa->ifa_next) {
843                 if (!buf) {
844                         done += sizeof(ifr);
845                         continue;
846                 }
847                 if (len < (int) sizeof(ifr))
848                         break;
849                 memset(&ifr, 0, sizeof(struct ifreq));
850                 if (ifa->ifa_label)
851                         strcpy(ifr.ifr_name, ifa->ifa_label);
852                 else
853                         strcpy(ifr.ifr_name, dev->name);
854
855                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
856                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
857                                                                 ifa->ifa_local;
858
859                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
860                         done = -EFAULT;
861                         break;
862                 }
863                 buf  += sizeof(struct ifreq);
864                 len  -= sizeof(struct ifreq);
865                 done += sizeof(struct ifreq);
866         }
867 out:
868         return done;
869 }
870
871 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
872 {
873         __be32 addr = 0;
874         struct in_device *in_dev;
875         struct net *net = dev_net(dev);
876
877         rcu_read_lock();
878         in_dev = __in_dev_get_rcu(dev);
879         if (!in_dev)
880                 goto no_in_dev;
881
882         for_primary_ifa(in_dev) {
883                 if (ifa->ifa_scope > scope)
884                         continue;
885                 if (!dst || inet_ifa_match(dst, ifa)) {
886                         addr = ifa->ifa_local;
887                         break;
888                 }
889                 if (!addr)
890                         addr = ifa->ifa_local;
891         } endfor_ifa(in_dev);
892 no_in_dev:
893         rcu_read_unlock();
894
895         if (addr)
896                 goto out;
897
898         /* Not loopback addresses on loopback should be preferred
899            in this case. It is importnat that lo is the first interface
900            in dev_base list.
901          */
902         read_lock(&dev_base_lock);
903         rcu_read_lock();
904         for_each_netdev(net, dev) {
905                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
906                         continue;
907
908                 for_primary_ifa(in_dev) {
909                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
910                             ifa->ifa_scope <= scope) {
911                                 addr = ifa->ifa_local;
912                                 goto out_unlock_both;
913                         }
914                 } endfor_ifa(in_dev);
915         }
916 out_unlock_both:
917         read_unlock(&dev_base_lock);
918         rcu_read_unlock();
919 out:
920         return addr;
921 }
922
923 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
924                               __be32 local, int scope)
925 {
926         int same = 0;
927         __be32 addr = 0;
928
929         for_ifa(in_dev) {
930                 if (!addr &&
931                     (local == ifa->ifa_local || !local) &&
932                     ifa->ifa_scope <= scope) {
933                         addr = ifa->ifa_local;
934                         if (same)
935                                 break;
936                 }
937                 if (!same) {
938                         same = (!local || inet_ifa_match(local, ifa)) &&
939                                 (!dst || inet_ifa_match(dst, ifa));
940                         if (same && addr) {
941                                 if (local || !dst)
942                                         break;
943                                 /* Is the selected addr into dst subnet? */
944                                 if (inet_ifa_match(addr, ifa))
945                                         break;
946                                 /* No, then can we use new local src? */
947                                 if (ifa->ifa_scope <= scope) {
948                                         addr = ifa->ifa_local;
949                                         break;
950                                 }
951                                 /* search for large dst subnet for addr */
952                                 same = 0;
953                         }
954                 }
955         } endfor_ifa(in_dev);
956
957         return same? addr : 0;
958 }
959
960 /*
961  * Confirm that local IP address exists using wildcards:
962  * - in_dev: only on this interface, 0=any interface
963  * - dst: only in the same subnet as dst, 0=any dst
964  * - local: address, 0=autoselect the local address
965  * - scope: maximum allowed scope value for the local address
966  */
967 __be32 inet_confirm_addr(struct in_device *in_dev,
968                          __be32 dst, __be32 local, int scope)
969 {
970         __be32 addr = 0;
971         struct net_device *dev;
972         struct net *net;
973
974         if (scope != RT_SCOPE_LINK)
975                 return confirm_addr_indev(in_dev, dst, local, scope);
976
977         net = dev_net(in_dev->dev);
978         read_lock(&dev_base_lock);
979         rcu_read_lock();
980         for_each_netdev(net, dev) {
981                 if ((in_dev = __in_dev_get_rcu(dev))) {
982                         addr = confirm_addr_indev(in_dev, dst, local, scope);
983                         if (addr)
984                                 break;
985                 }
986         }
987         rcu_read_unlock();
988         read_unlock(&dev_base_lock);
989
990         return addr;
991 }
992
993 /*
994  *      Device notifier
995  */
996
997 int register_inetaddr_notifier(struct notifier_block *nb)
998 {
999         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1000 }
1001
1002 int unregister_inetaddr_notifier(struct notifier_block *nb)
1003 {
1004         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1005 }
1006
1007 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1008  * alias numbering and to create unique labels if possible.
1009 */
1010 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1011 {
1012         struct in_ifaddr *ifa;
1013         int named = 0;
1014
1015         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1016                 char old[IFNAMSIZ], *dot;
1017
1018                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1019                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1020                 if (named++ == 0)
1021                         continue;
1022                 dot = strchr(old, ':');
1023                 if (dot == NULL) {
1024                         sprintf(old, ":%d", named);
1025                         dot = old;
1026                 }
1027                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1028                         strcat(ifa->ifa_label, dot);
1029                 } else {
1030                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1031                 }
1032         }
1033 }
1034
1035 /* Called only under RTNL semaphore */
1036
1037 static int inetdev_event(struct notifier_block *this, unsigned long event,
1038                          void *ptr)
1039 {
1040         struct net_device *dev = ptr;
1041         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1042
1043         ASSERT_RTNL();
1044
1045         if (!in_dev) {
1046                 if (event == NETDEV_REGISTER) {
1047                         in_dev = inetdev_init(dev);
1048                         if (!in_dev)
1049                                 return notifier_from_errno(-ENOMEM);
1050                         if (dev->flags & IFF_LOOPBACK) {
1051                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1052                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1053                         }
1054                 }
1055                 goto out;
1056         }
1057
1058         switch (event) {
1059         case NETDEV_REGISTER:
1060                 printk(KERN_DEBUG "inetdev_event: bug\n");
1061                 dev->ip_ptr = NULL;
1062                 break;
1063         case NETDEV_UP:
1064                 if (dev->mtu < 68)
1065                         break;
1066                 if (dev->flags & IFF_LOOPBACK) {
1067                         struct in_ifaddr *ifa;
1068                         if ((ifa = inet_alloc_ifa()) != NULL) {
1069                                 ifa->ifa_local =
1070                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1071                                 ifa->ifa_prefixlen = 8;
1072                                 ifa->ifa_mask = inet_make_mask(8);
1073                                 in_dev_hold(in_dev);
1074                                 ifa->ifa_dev = in_dev;
1075                                 ifa->ifa_scope = RT_SCOPE_HOST;
1076                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1077                                 inet_insert_ifa(ifa);
1078                         }
1079                 }
1080                 ip_mc_up(in_dev);
1081                 break;
1082         case NETDEV_DOWN:
1083                 ip_mc_down(in_dev);
1084                 break;
1085         case NETDEV_CHANGEMTU:
1086                 if (dev->mtu >= 68)
1087                         break;
1088                 /* MTU falled under 68, disable IP */
1089         case NETDEV_UNREGISTER:
1090                 inetdev_destroy(in_dev);
1091                 break;
1092         case NETDEV_CHANGENAME:
1093                 /* Do not notify about label change, this event is
1094                  * not interesting to applications using netlink.
1095                  */
1096                 inetdev_changename(dev, in_dev);
1097
1098                 devinet_sysctl_unregister(in_dev);
1099                 devinet_sysctl_register(in_dev);
1100                 break;
1101         }
1102 out:
1103         return NOTIFY_DONE;
1104 }
1105
1106 static struct notifier_block ip_netdev_notifier = {
1107         .notifier_call =inetdev_event,
1108 };
1109
1110 static inline size_t inet_nlmsg_size(void)
1111 {
1112         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1113                + nla_total_size(4) /* IFA_ADDRESS */
1114                + nla_total_size(4) /* IFA_LOCAL */
1115                + nla_total_size(4) /* IFA_BROADCAST */
1116                + nla_total_size(4) /* IFA_ANYCAST */
1117                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1118 }
1119
1120 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1121                             u32 pid, u32 seq, int event, unsigned int flags)
1122 {
1123         struct ifaddrmsg *ifm;
1124         struct nlmsghdr  *nlh;
1125
1126         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1127         if (nlh == NULL)
1128                 return -EMSGSIZE;
1129
1130         ifm = nlmsg_data(nlh);
1131         ifm->ifa_family = AF_INET;
1132         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1133         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1134         ifm->ifa_scope = ifa->ifa_scope;
1135         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1136
1137         if (ifa->ifa_address)
1138                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1139
1140         if (ifa->ifa_local)
1141                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1142
1143         if (ifa->ifa_broadcast)
1144                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1145
1146         if (ifa->ifa_anycast)
1147                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1148
1149         if (ifa->ifa_label[0])
1150                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1151
1152         return nlmsg_end(skb, nlh);
1153
1154 nla_put_failure:
1155         nlmsg_cancel(skb, nlh);
1156         return -EMSGSIZE;
1157 }
1158
1159 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1160 {
1161         struct net *net = sock_net(skb->sk);
1162         int idx, ip_idx;
1163         struct net_device *dev;
1164         struct in_device *in_dev;
1165         struct in_ifaddr *ifa;
1166         int s_ip_idx, s_idx = cb->args[0];
1167
1168         s_ip_idx = ip_idx = cb->args[1];
1169         idx = 0;
1170         for_each_netdev(net, dev) {
1171                 if (idx < s_idx)
1172                         goto cont;
1173                 if (idx > s_idx)
1174                         s_ip_idx = 0;
1175                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1176                         goto cont;
1177
1178                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1179                      ifa = ifa->ifa_next, ip_idx++) {
1180                         if (ip_idx < s_ip_idx)
1181                                 continue;
1182                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1183                                              cb->nlh->nlmsg_seq,
1184                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1185                                 goto done;
1186                 }
1187 cont:
1188                 idx++;
1189         }
1190
1191 done:
1192         cb->args[0] = idx;
1193         cb->args[1] = ip_idx;
1194
1195         return skb->len;
1196 }
1197
1198 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1199                       u32 pid)
1200 {
1201         struct sk_buff *skb;
1202         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1203         int err = -ENOBUFS;
1204         struct net *net;
1205
1206         net = dev_net(ifa->ifa_dev->dev);
1207         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1208         if (skb == NULL)
1209                 goto errout;
1210
1211         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1212         if (err < 0) {
1213                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1214                 WARN_ON(err == -EMSGSIZE);
1215                 kfree_skb(skb);
1216                 goto errout;
1217         }
1218         err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1219 errout:
1220         if (err < 0)
1221                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1222 }
1223
1224 #ifdef CONFIG_SYSCTL
1225
1226 static void devinet_copy_dflt_conf(struct net *net, int i)
1227 {
1228         struct net_device *dev;
1229
1230         read_lock(&dev_base_lock);
1231         for_each_netdev(net, dev) {
1232                 struct in_device *in_dev;
1233                 rcu_read_lock();
1234                 in_dev = __in_dev_get_rcu(dev);
1235                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1236                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1237                 rcu_read_unlock();
1238         }
1239         read_unlock(&dev_base_lock);
1240 }
1241
1242 static void inet_forward_change(struct net *net)
1243 {
1244         struct net_device *dev;
1245         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1246
1247         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1248         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1249
1250         read_lock(&dev_base_lock);
1251         for_each_netdev(net, dev) {
1252                 struct in_device *in_dev;
1253                 rcu_read_lock();
1254                 in_dev = __in_dev_get_rcu(dev);
1255                 if (in_dev)
1256                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1257                 rcu_read_unlock();
1258         }
1259         read_unlock(&dev_base_lock);
1260
1261         rt_cache_flush(0);
1262 }
1263
1264 static int devinet_conf_proc(ctl_table *ctl, int write,
1265                              struct file* filp, void __user *buffer,
1266                              size_t *lenp, loff_t *ppos)
1267 {
1268         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1269
1270         if (write) {
1271                 struct ipv4_devconf *cnf = ctl->extra1;
1272                 struct net *net = ctl->extra2;
1273                 int i = (int *)ctl->data - cnf->data;
1274
1275                 set_bit(i, cnf->state);
1276
1277                 if (cnf == net->ipv4.devconf_dflt)
1278                         devinet_copy_dflt_conf(net, i);
1279         }
1280
1281         return ret;
1282 }
1283
1284 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1285                                void __user *oldval, size_t __user *oldlenp,
1286                                void __user *newval, size_t newlen)
1287 {
1288         struct ipv4_devconf *cnf;
1289         struct net *net;
1290         int *valp = table->data;
1291         int new;
1292         int i;
1293
1294         if (!newval || !newlen)
1295                 return 0;
1296
1297         if (newlen != sizeof(int))
1298                 return -EINVAL;
1299
1300         if (get_user(new, (int __user *)newval))
1301                 return -EFAULT;
1302
1303         if (new == *valp)
1304                 return 0;
1305
1306         if (oldval && oldlenp) {
1307                 size_t len;
1308
1309                 if (get_user(len, oldlenp))
1310                         return -EFAULT;
1311
1312                 if (len) {
1313                         if (len > table->maxlen)
1314                                 len = table->maxlen;
1315                         if (copy_to_user(oldval, valp, len))
1316                                 return -EFAULT;
1317                         if (put_user(len, oldlenp))
1318                                 return -EFAULT;
1319                 }
1320         }
1321
1322         *valp = new;
1323
1324         cnf = table->extra1;
1325         net = table->extra2;
1326         i = (int *)table->data - cnf->data;
1327
1328         set_bit(i, cnf->state);
1329
1330         if (cnf == net->ipv4.devconf_dflt)
1331                 devinet_copy_dflt_conf(net, i);
1332
1333         return 1;
1334 }
1335
1336 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1337                                   struct file* filp, void __user *buffer,
1338                                   size_t *lenp, loff_t *ppos)
1339 {
1340         int *valp = ctl->data;
1341         int val = *valp;
1342         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1343
1344         if (write && *valp != val) {
1345                 struct net *net = ctl->extra2;
1346
1347                 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1348                         inet_forward_change(net);
1349                 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1350                         rt_cache_flush(0);
1351         }
1352
1353         return ret;
1354 }
1355
1356 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1357                          struct file* filp, void __user *buffer,
1358                          size_t *lenp, loff_t *ppos)
1359 {
1360         int *valp = ctl->data;
1361         int val = *valp;
1362         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1363
1364         if (write && *valp != val)
1365                 rt_cache_flush(0);
1366
1367         return ret;
1368 }
1369
1370 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1371                                   void __user *oldval, size_t __user *oldlenp,
1372                                   void __user *newval, size_t newlen)
1373 {
1374         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1375                                       newval, newlen);
1376
1377         if (ret == 1)
1378                 rt_cache_flush(0);
1379
1380         return ret;
1381 }
1382
1383
1384 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1385         { \
1386                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1387                 .procname       = name, \
1388                 .data           = ipv4_devconf.data + \
1389                                   NET_IPV4_CONF_ ## attr - 1, \
1390                 .maxlen         = sizeof(int), \
1391                 .mode           = mval, \
1392                 .proc_handler   = proc, \
1393                 .strategy       = sysctl, \
1394                 .extra1         = &ipv4_devconf, \
1395         }
1396
1397 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1398         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1399                              devinet_conf_sysctl)
1400
1401 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1402         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1403                              devinet_conf_sysctl)
1404
1405 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1406         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1407
1408 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1409         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1410                                      ipv4_doint_and_flush_strategy)
1411
1412 static struct devinet_sysctl_table {
1413         struct ctl_table_header *sysctl_header;
1414         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1415         char *dev_name;
1416 } devinet_sysctl = {
1417         .devinet_vars = {
1418                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1419                                              devinet_sysctl_forward,
1420                                              devinet_conf_sysctl),
1421                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1422
1423                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1424                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1425                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1426                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1427                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1428                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1429                                         "accept_source_route"),
1430                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1431                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1432                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1433                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1434                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1435                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1436                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1437                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1438                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1439
1440                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1441                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1442                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1443                                               "force_igmp_version"),
1444                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1445                                               "promote_secondaries"),
1446         },
1447 };
1448
1449 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1450                 int ctl_name, struct ipv4_devconf *p)
1451 {
1452         int i;
1453         struct devinet_sysctl_table *t;
1454
1455 #define DEVINET_CTL_PATH_DEV    3
1456
1457         struct ctl_path devinet_ctl_path[] = {
1458                 { .procname = "net", .ctl_name = CTL_NET, },
1459                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1460                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1461                 { /* to be set */ },
1462                 { },
1463         };
1464
1465         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1466         if (!t)
1467                 goto out;
1468
1469         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1470                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1471                 t->devinet_vars[i].extra1 = p;
1472                 t->devinet_vars[i].extra2 = net;
1473         }
1474
1475         /*
1476          * Make a copy of dev_name, because '.procname' is regarded as const
1477          * by sysctl and we wouldn't want anyone to change it under our feet
1478          * (see SIOCSIFNAME).
1479          */
1480         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1481         if (!t->dev_name)
1482                 goto free;
1483
1484         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1485         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1486
1487         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1488                         t->devinet_vars);
1489         if (!t->sysctl_header)
1490                 goto free_procname;
1491
1492         p->sysctl = t;
1493         return 0;
1494
1495 free_procname:
1496         kfree(t->dev_name);
1497 free:
1498         kfree(t);
1499 out:
1500         return -ENOBUFS;
1501 }
1502
1503 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1504 {
1505         struct devinet_sysctl_table *t = cnf->sysctl;
1506
1507         if (t == NULL)
1508                 return;
1509
1510         cnf->sysctl = NULL;
1511         unregister_sysctl_table(t->sysctl_header);
1512         kfree(t->dev_name);
1513         kfree(t);
1514 }
1515
1516 static void devinet_sysctl_register(struct in_device *idev)
1517 {
1518         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1519                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1520         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1521                         idev->dev->ifindex, &idev->cnf);
1522 }
1523
1524 static void devinet_sysctl_unregister(struct in_device *idev)
1525 {
1526         __devinet_sysctl_unregister(&idev->cnf);
1527         neigh_sysctl_unregister(idev->arp_parms);
1528 }
1529
1530 static struct ctl_table ctl_forward_entry[] = {
1531         {
1532                 .ctl_name       = NET_IPV4_FORWARD,
1533                 .procname       = "ip_forward",
1534                 .data           = &ipv4_devconf.data[
1535                                         NET_IPV4_CONF_FORWARDING - 1],
1536                 .maxlen         = sizeof(int),
1537                 .mode           = 0644,
1538                 .proc_handler   = devinet_sysctl_forward,
1539                 .strategy       = devinet_conf_sysctl,
1540                 .extra1         = &ipv4_devconf,
1541                 .extra2         = &init_net,
1542         },
1543         { },
1544 };
1545
1546 static __net_initdata struct ctl_path net_ipv4_path[] = {
1547         { .procname = "net", .ctl_name = CTL_NET, },
1548         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1549         { },
1550 };
1551 #endif
1552
1553 static __net_init int devinet_init_net(struct net *net)
1554 {
1555         int err;
1556         struct ipv4_devconf *all, *dflt;
1557 #ifdef CONFIG_SYSCTL
1558         struct ctl_table *tbl = ctl_forward_entry;
1559         struct ctl_table_header *forw_hdr;
1560 #endif
1561
1562         err = -ENOMEM;
1563         all = &ipv4_devconf;
1564         dflt = &ipv4_devconf_dflt;
1565
1566         if (net != &init_net) {
1567                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1568                 if (all == NULL)
1569                         goto err_alloc_all;
1570
1571                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1572                 if (dflt == NULL)
1573                         goto err_alloc_dflt;
1574
1575 #ifdef CONFIG_SYSCTL
1576                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1577                 if (tbl == NULL)
1578                         goto err_alloc_ctl;
1579
1580                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1581                 tbl[0].extra1 = all;
1582                 tbl[0].extra2 = net;
1583 #endif
1584         }
1585
1586 #ifdef CONFIG_SYSCTL
1587         err = __devinet_sysctl_register(net, "all",
1588                         NET_PROTO_CONF_ALL, all);
1589         if (err < 0)
1590                 goto err_reg_all;
1591
1592         err = __devinet_sysctl_register(net, "default",
1593                         NET_PROTO_CONF_DEFAULT, dflt);
1594         if (err < 0)
1595                 goto err_reg_dflt;
1596
1597         err = -ENOMEM;
1598         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1599         if (forw_hdr == NULL)
1600                 goto err_reg_ctl;
1601         net->ipv4.forw_hdr = forw_hdr;
1602 #endif
1603
1604         net->ipv4.devconf_all = all;
1605         net->ipv4.devconf_dflt = dflt;
1606         return 0;
1607
1608 #ifdef CONFIG_SYSCTL
1609 err_reg_ctl:
1610         __devinet_sysctl_unregister(dflt);
1611 err_reg_dflt:
1612         __devinet_sysctl_unregister(all);
1613 err_reg_all:
1614         if (tbl != ctl_forward_entry)
1615                 kfree(tbl);
1616 err_alloc_ctl:
1617 #endif
1618         if (dflt != &ipv4_devconf_dflt)
1619                 kfree(dflt);
1620 err_alloc_dflt:
1621         if (all != &ipv4_devconf)
1622                 kfree(all);
1623 err_alloc_all:
1624         return err;
1625 }
1626
1627 static __net_exit void devinet_exit_net(struct net *net)
1628 {
1629 #ifdef CONFIG_SYSCTL
1630         struct ctl_table *tbl;
1631
1632         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1633         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1634         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1635         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1636         kfree(tbl);
1637 #endif
1638         kfree(net->ipv4.devconf_dflt);
1639         kfree(net->ipv4.devconf_all);
1640 }
1641
1642 static __net_initdata struct pernet_operations devinet_ops = {
1643         .init = devinet_init_net,
1644         .exit = devinet_exit_net,
1645 };
1646
1647 void __init devinet_init(void)
1648 {
1649         register_pernet_subsys(&devinet_ops);
1650
1651         register_gifconf(PF_INET, inet_gifconf);
1652         register_netdevice_notifier(&ip_netdev_notifier);
1653
1654         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1655         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1656         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1657 }
1658
1659 EXPORT_SYMBOL(in_dev_finish_destroy);
1660 EXPORT_SYMBOL(inet_select_addr);
1661 EXPORT_SYMBOL(inetdev_by_index);
1662 EXPORT_SYMBOL(register_inetaddr_notifier);
1663 EXPORT_SYMBOL(unregister_inetaddr_notifier);