[IPV6]: Simplify IPv6 control sockets creation.
[linux-2.6] / net / ipv6 / ndisc.c
1 /*
2  *      Neighbour Discovery for IPv6
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *      Mike Shaver             <shaver@ingenia.com>
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14
15 /*
16  *      Changes:
17  *
18  *      Pierre Ynard                    :       export userland ND options
19  *                                              through netlink (RDNSS support)
20  *      Lars Fenneberg                  :       fixed MTU setting on receipt
21  *                                              of an RA.
22  *      Janos Farkas                    :       kmalloc failure checks
23  *      Alexey Kuznetsov                :       state machine reworked
24  *                                              and moved to net/core.
25  *      Pekka Savola                    :       RFC2461 validation
26  *      YOSHIFUJI Hideaki @USAGI        :       Verify ND options properly
27  */
28
29 /* Set to 3 to get tracing... */
30 #define ND_DEBUG 1
31
32 #define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
33 #define ND_NOPRINTK(x...) do { ; } while(0)
34 #define ND_PRINTK0 ND_PRINTK
35 #define ND_PRINTK1 ND_NOPRINTK
36 #define ND_PRINTK2 ND_NOPRINTK
37 #define ND_PRINTK3 ND_NOPRINTK
38 #if ND_DEBUG >= 1
39 #undef ND_PRINTK1
40 #define ND_PRINTK1 ND_PRINTK
41 #endif
42 #if ND_DEBUG >= 2
43 #undef ND_PRINTK2
44 #define ND_PRINTK2 ND_PRINTK
45 #endif
46 #if ND_DEBUG >= 3
47 #undef ND_PRINTK3
48 #define ND_PRINTK3 ND_PRINTK
49 #endif
50
51 #include <linux/module.h>
52 #include <linux/errno.h>
53 #include <linux/types.h>
54 #include <linux/socket.h>
55 #include <linux/sockios.h>
56 #include <linux/sched.h>
57 #include <linux/net.h>
58 #include <linux/in6.h>
59 #include <linux/route.h>
60 #include <linux/init.h>
61 #include <linux/rcupdate.h>
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 #include <linux/if_addr.h>
67 #include <linux/if_arp.h>
68 #include <linux/ipv6.h>
69 #include <linux/icmpv6.h>
70 #include <linux/jhash.h>
71
72 #include <net/sock.h>
73 #include <net/snmp.h>
74
75 #include <net/ipv6.h>
76 #include <net/protocol.h>
77 #include <net/ndisc.h>
78 #include <net/ip6_route.h>
79 #include <net/addrconf.h>
80 #include <net/icmp.h>
81
82 #include <net/netlink.h>
83 #include <linux/rtnetlink.h>
84
85 #include <net/flow.h>
86 #include <net/ip6_checksum.h>
87 #include <net/inet_common.h>
88 #include <linux/proc_fs.h>
89
90 #include <linux/netfilter.h>
91 #include <linux/netfilter_ipv6.h>
92
93 static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
94 static int ndisc_constructor(struct neighbour *neigh);
95 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
96 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
97 static int pndisc_constructor(struct pneigh_entry *n);
98 static void pndisc_destructor(struct pneigh_entry *n);
99 static void pndisc_redo(struct sk_buff *skb);
100
101 static struct neigh_ops ndisc_generic_ops = {
102         .family =               AF_INET6,
103         .solicit =              ndisc_solicit,
104         .error_report =         ndisc_error_report,
105         .output =               neigh_resolve_output,
106         .connected_output =     neigh_connected_output,
107         .hh_output =            dev_queue_xmit,
108         .queue_xmit =           dev_queue_xmit,
109 };
110
111 static struct neigh_ops ndisc_hh_ops = {
112         .family =               AF_INET6,
113         .solicit =              ndisc_solicit,
114         .error_report =         ndisc_error_report,
115         .output =               neigh_resolve_output,
116         .connected_output =     neigh_resolve_output,
117         .hh_output =            dev_queue_xmit,
118         .queue_xmit =           dev_queue_xmit,
119 };
120
121
122 static struct neigh_ops ndisc_direct_ops = {
123         .family =               AF_INET6,
124         .output =               dev_queue_xmit,
125         .connected_output =     dev_queue_xmit,
126         .hh_output =            dev_queue_xmit,
127         .queue_xmit =           dev_queue_xmit,
128 };
129
130 struct neigh_table nd_tbl = {
131         .family =       AF_INET6,
132         .entry_size =   sizeof(struct neighbour) + sizeof(struct in6_addr),
133         .key_len =      sizeof(struct in6_addr),
134         .hash =         ndisc_hash,
135         .constructor =  ndisc_constructor,
136         .pconstructor = pndisc_constructor,
137         .pdestructor =  pndisc_destructor,
138         .proxy_redo =   pndisc_redo,
139         .id =           "ndisc_cache",
140         .parms = {
141                 .tbl =                  &nd_tbl,
142                 .base_reachable_time =  30 * HZ,
143                 .retrans_time =  1 * HZ,
144                 .gc_staletime = 60 * HZ,
145                 .reachable_time =               30 * HZ,
146                 .delay_probe_time =      5 * HZ,
147                 .queue_len =             3,
148                 .ucast_probes =  3,
149                 .mcast_probes =  3,
150                 .anycast_delay =         1 * HZ,
151                 .proxy_delay =          (8 * HZ) / 10,
152                 .proxy_qlen =           64,
153         },
154         .gc_interval =    30 * HZ,
155         .gc_thresh1 =    128,
156         .gc_thresh2 =    512,
157         .gc_thresh3 =   1024,
158 };
159
160 /* ND options */
161 struct ndisc_options {
162         struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
163 #ifdef CONFIG_IPV6_ROUTE_INFO
164         struct nd_opt_hdr *nd_opts_ri;
165         struct nd_opt_hdr *nd_opts_ri_end;
166 #endif
167         struct nd_opt_hdr *nd_useropts;
168         struct nd_opt_hdr *nd_useropts_end;
169 };
170
171 #define nd_opts_src_lladdr      nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
172 #define nd_opts_tgt_lladdr      nd_opt_array[ND_OPT_TARGET_LL_ADDR]
173 #define nd_opts_pi              nd_opt_array[ND_OPT_PREFIX_INFO]
174 #define nd_opts_pi_end          nd_opt_array[__ND_OPT_PREFIX_INFO_END]
175 #define nd_opts_rh              nd_opt_array[ND_OPT_REDIRECT_HDR]
176 #define nd_opts_mtu             nd_opt_array[ND_OPT_MTU]
177
178 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
179
180 /*
181  * Return the padding between the option length and the start of the
182  * link addr.  Currently only IP-over-InfiniBand needs this, although
183  * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
184  * also need a pad of 2.
185  */
186 static int ndisc_addr_option_pad(unsigned short type)
187 {
188         switch (type) {
189         case ARPHRD_INFINIBAND: return 2;
190         default:                return 0;
191         }
192 }
193
194 static inline int ndisc_opt_addr_space(struct net_device *dev)
195 {
196         return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
197 }
198
199 static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
200                                   unsigned short addr_type)
201 {
202         int space = NDISC_OPT_SPACE(data_len);
203         int pad   = ndisc_addr_option_pad(addr_type);
204
205         opt[0] = type;
206         opt[1] = space>>3;
207
208         memset(opt + 2, 0, pad);
209         opt   += pad;
210         space -= pad;
211
212         memcpy(opt+2, data, data_len);
213         data_len += 2;
214         opt += data_len;
215         if ((space -= data_len) > 0)
216                 memset(opt, 0, space);
217         return opt + space;
218 }
219
220 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
221                                             struct nd_opt_hdr *end)
222 {
223         int type;
224         if (!cur || !end || cur >= end)
225                 return NULL;
226         type = cur->nd_opt_type;
227         do {
228                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
229         } while(cur < end && cur->nd_opt_type != type);
230         return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
231 }
232
233 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
234 {
235         return (opt->nd_opt_type == ND_OPT_RDNSS);
236 }
237
238 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
239                                              struct nd_opt_hdr *end)
240 {
241         if (!cur || !end || cur >= end)
242                 return NULL;
243         do {
244                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
245         } while(cur < end && !ndisc_is_useropt(cur));
246         return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL);
247 }
248
249 static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
250                                                  struct ndisc_options *ndopts)
251 {
252         struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
253
254         if (!nd_opt || opt_len < 0 || !ndopts)
255                 return NULL;
256         memset(ndopts, 0, sizeof(*ndopts));
257         while (opt_len) {
258                 int l;
259                 if (opt_len < sizeof(struct nd_opt_hdr))
260                         return NULL;
261                 l = nd_opt->nd_opt_len << 3;
262                 if (opt_len < l || l == 0)
263                         return NULL;
264                 switch (nd_opt->nd_opt_type) {
265                 case ND_OPT_SOURCE_LL_ADDR:
266                 case ND_OPT_TARGET_LL_ADDR:
267                 case ND_OPT_MTU:
268                 case ND_OPT_REDIRECT_HDR:
269                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
270                                 ND_PRINTK2(KERN_WARNING
271                                            "%s(): duplicated ND6 option found: type=%d\n",
272                                            __func__,
273                                            nd_opt->nd_opt_type);
274                         } else {
275                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
276                         }
277                         break;
278                 case ND_OPT_PREFIX_INFO:
279                         ndopts->nd_opts_pi_end = nd_opt;
280                         if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
281                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
282                         break;
283 #ifdef CONFIG_IPV6_ROUTE_INFO
284                 case ND_OPT_ROUTE_INFO:
285                         ndopts->nd_opts_ri_end = nd_opt;
286                         if (!ndopts->nd_opts_ri)
287                                 ndopts->nd_opts_ri = nd_opt;
288                         break;
289 #endif
290                 default:
291                         if (ndisc_is_useropt(nd_opt)) {
292                                 ndopts->nd_useropts_end = nd_opt;
293                                 if (!ndopts->nd_useropts)
294                                         ndopts->nd_useropts = nd_opt;
295                         } else {
296                                 /*
297                                  * Unknown options must be silently ignored,
298                                  * to accommodate future extension to the
299                                  * protocol.
300                                  */
301                                 ND_PRINTK2(KERN_NOTICE
302                                            "%s(): ignored unsupported option; type=%d, len=%d\n",
303                                            __func__,
304                                            nd_opt->nd_opt_type, nd_opt->nd_opt_len);
305                         }
306                 }
307                 opt_len -= l;
308                 nd_opt = ((void *)nd_opt) + l;
309         }
310         return ndopts;
311 }
312
313 static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
314                                       struct net_device *dev)
315 {
316         u8 *lladdr = (u8 *)(p + 1);
317         int lladdrlen = p->nd_opt_len << 3;
318         int prepad = ndisc_addr_option_pad(dev->type);
319         if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
320                 return NULL;
321         return (lladdr + prepad);
322 }
323
324 int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
325 {
326         switch (dev->type) {
327         case ARPHRD_ETHER:
328         case ARPHRD_IEEE802:    /* Not sure. Check it later. --ANK */
329         case ARPHRD_FDDI:
330                 ipv6_eth_mc_map(addr, buf);
331                 return 0;
332         case ARPHRD_IEEE802_TR:
333                 ipv6_tr_mc_map(addr,buf);
334                 return 0;
335         case ARPHRD_ARCNET:
336                 ipv6_arcnet_mc_map(addr, buf);
337                 return 0;
338         case ARPHRD_INFINIBAND:
339                 ipv6_ib_mc_map(addr, dev->broadcast, buf);
340                 return 0;
341         default:
342                 if (dir) {
343                         memcpy(buf, dev->broadcast, dev->addr_len);
344                         return 0;
345                 }
346         }
347         return -EINVAL;
348 }
349
350 EXPORT_SYMBOL(ndisc_mc_map);
351
352 static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
353 {
354         const u32 *p32 = pkey;
355         u32 addr_hash, i;
356
357         addr_hash = 0;
358         for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
359                 addr_hash ^= *p32++;
360
361         return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
362 }
363
364 static int ndisc_constructor(struct neighbour *neigh)
365 {
366         struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
367         struct net_device *dev = neigh->dev;
368         struct inet6_dev *in6_dev;
369         struct neigh_parms *parms;
370         int is_multicast = ipv6_addr_is_multicast(addr);
371
372         rcu_read_lock();
373         in6_dev = in6_dev_get(dev);
374         if (in6_dev == NULL) {
375                 rcu_read_unlock();
376                 return -EINVAL;
377         }
378
379         parms = in6_dev->nd_parms;
380         __neigh_parms_put(neigh->parms);
381         neigh->parms = neigh_parms_clone(parms);
382         rcu_read_unlock();
383
384         neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
385         if (!dev->header_ops) {
386                 neigh->nud_state = NUD_NOARP;
387                 neigh->ops = &ndisc_direct_ops;
388                 neigh->output = neigh->ops->queue_xmit;
389         } else {
390                 if (is_multicast) {
391                         neigh->nud_state = NUD_NOARP;
392                         ndisc_mc_map(addr, neigh->ha, dev, 1);
393                 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
394                         neigh->nud_state = NUD_NOARP;
395                         memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
396                         if (dev->flags&IFF_LOOPBACK)
397                                 neigh->type = RTN_LOCAL;
398                 } else if (dev->flags&IFF_POINTOPOINT) {
399                         neigh->nud_state = NUD_NOARP;
400                         memcpy(neigh->ha, dev->broadcast, dev->addr_len);
401                 }
402                 if (dev->header_ops->cache)
403                         neigh->ops = &ndisc_hh_ops;
404                 else
405                         neigh->ops = &ndisc_generic_ops;
406                 if (neigh->nud_state&NUD_VALID)
407                         neigh->output = neigh->ops->connected_output;
408                 else
409                         neigh->output = neigh->ops->output;
410         }
411         in6_dev_put(in6_dev);
412         return 0;
413 }
414
415 static int pndisc_constructor(struct pneigh_entry *n)
416 {
417         struct in6_addr *addr = (struct in6_addr*)&n->key;
418         struct in6_addr maddr;
419         struct net_device *dev = n->dev;
420
421         if (dev == NULL || __in6_dev_get(dev) == NULL)
422                 return -EINVAL;
423         addrconf_addr_solict_mult(addr, &maddr);
424         ipv6_dev_mc_inc(dev, &maddr);
425         return 0;
426 }
427
428 static void pndisc_destructor(struct pneigh_entry *n)
429 {
430         struct in6_addr *addr = (struct in6_addr*)&n->key;
431         struct in6_addr maddr;
432         struct net_device *dev = n->dev;
433
434         if (dev == NULL || __in6_dev_get(dev) == NULL)
435                 return;
436         addrconf_addr_solict_mult(addr, &maddr);
437         ipv6_dev_mc_dec(dev, &maddr);
438 }
439
440 /*
441  *      Send a Neighbour Advertisement
442  */
443 static void __ndisc_send(struct net_device *dev,
444                          struct neighbour *neigh,
445                          struct in6_addr *daddr, struct in6_addr *saddr,
446                          struct icmp6hdr *icmp6h, struct in6_addr *target,
447                          int llinfo)
448 {
449         struct flowi fl;
450         struct dst_entry *dst;
451         struct net *net = dev_net(dev);
452         struct sock *sk = net->ipv6.ndisc_sk;
453         struct sk_buff *skb;
454         struct icmp6hdr *hdr;
455         struct inet6_dev *idev;
456         int len;
457         int err;
458         u8 *opt, type;
459
460         type = icmp6h->icmp6_type;
461
462         icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
463
464         dst = icmp6_dst_alloc(dev, neigh, daddr);
465         if (!dst)
466                 return;
467
468         err = xfrm_lookup(&dst, &fl, NULL, 0);
469         if (err < 0)
470                 return;
471
472         if (!dev->addr_len)
473                 llinfo = 0;
474
475         len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
476         if (llinfo)
477                 len += ndisc_opt_addr_space(dev);
478
479         skb = sock_alloc_send_skb(sk,
480                                   (MAX_HEADER + sizeof(struct ipv6hdr) +
481                                    len + LL_RESERVED_SPACE(dev)),
482                                   1, &err);
483         if (!skb) {
484                 ND_PRINTK0(KERN_ERR
485                            "ICMPv6 ND: %s() failed to allocate an skb.\n",
486                            __func__);
487                 dst_release(dst);
488                 return;
489         }
490
491         skb_reserve(skb, LL_RESERVED_SPACE(dev));
492         ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
493
494         skb->transport_header = skb->tail;
495         skb_put(skb, len);
496
497         hdr = (struct icmp6hdr *)skb_transport_header(skb);
498         memcpy(hdr, icmp6h, sizeof(*hdr));
499
500         opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
501         if (target) {
502                 ipv6_addr_copy((struct in6_addr *)opt, target);
503                 opt += sizeof(*target);
504         }
505
506         if (llinfo)
507                 ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
508                                        dev->addr_len, dev->type);
509
510         hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
511                                            IPPROTO_ICMPV6,
512                                            csum_partial((__u8 *) hdr,
513                                                         len, 0));
514
515         skb->dst = dst;
516
517         idev = in6_dev_get(dst->dev);
518         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
519
520         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
521                       dst_output);
522         if (!err) {
523                 ICMP6MSGOUT_INC_STATS(idev, type);
524                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
525         }
526
527         if (likely(idev != NULL))
528                 in6_dev_put(idev);
529 }
530
531 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
532                    struct in6_addr *daddr, struct in6_addr *solicited_addr,
533                    int router, int solicited, int override, int inc_opt)
534 {
535         struct in6_addr tmpaddr;
536         struct inet6_ifaddr *ifp;
537         struct in6_addr *src_addr;
538         struct icmp6hdr icmp6h = {
539                 .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
540         };
541
542         /* for anycast or proxy, solicited_addr != src_addr */
543         ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
544         if (ifp) {
545                 src_addr = solicited_addr;
546                 if (ifp->flags & IFA_F_OPTIMISTIC)
547                         override = 0;
548                 in6_ifa_put(ifp);
549         } else {
550                 if (ipv6_dev_get_saddr(dev, daddr,
551                                        inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
552                                        &tmpaddr))
553                         return;
554                 src_addr = &tmpaddr;
555         }
556
557         icmp6h.icmp6_router = router;
558         icmp6h.icmp6_solicited = solicited;
559         icmp6h.icmp6_override = override;
560
561         __ndisc_send(dev, neigh, daddr, src_addr,
562                      &icmp6h, solicited_addr,
563                      inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
564 }
565
566 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
567                    struct in6_addr *solicit,
568                    struct in6_addr *daddr, struct in6_addr *saddr)
569 {
570         struct in6_addr addr_buf;
571         struct icmp6hdr icmp6h = {
572                 .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
573         };
574
575         if (saddr == NULL) {
576                 if (ipv6_get_lladdr(dev, &addr_buf,
577                                    (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
578                         return;
579                 saddr = &addr_buf;
580         }
581
582         __ndisc_send(dev, neigh, daddr, saddr,
583                      &icmp6h, solicit,
584                      !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
585 }
586
587 void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
588                    struct in6_addr *daddr)
589 {
590         struct icmp6hdr icmp6h = {
591                 .icmp6_type = NDISC_ROUTER_SOLICITATION,
592         };
593         int send_sllao = dev->addr_len;
594
595 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
596         /*
597          * According to section 2.2 of RFC 4429, we must not
598          * send router solicitations with a sllao from
599          * optimistic addresses, but we may send the solicitation
600          * if we don't include the sllao.  So here we check
601          * if our address is optimistic, and if so, we
602          * suppress the inclusion of the sllao.
603          */
604         if (send_sllao) {
605                 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
606                                                            dev, 1);
607                 if (ifp) {
608                         if (ifp->flags & IFA_F_OPTIMISTIC)  {
609                                 send_sllao = 0;
610                         }
611                         in6_ifa_put(ifp);
612                 } else {
613                         send_sllao = 0;
614                 }
615         }
616 #endif
617         __ndisc_send(dev, NULL, daddr, saddr,
618                      &icmp6h, NULL,
619                      send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
620 }
621
622
623 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
624 {
625         /*
626          *      "The sender MUST return an ICMP
627          *       destination unreachable"
628          */
629         dst_link_failure(skb);
630         kfree_skb(skb);
631 }
632
633 /* Called with locked neigh: either read or both */
634
635 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
636 {
637         struct in6_addr *saddr = NULL;
638         struct in6_addr mcaddr;
639         struct net_device *dev = neigh->dev;
640         struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
641         int probes = atomic_read(&neigh->probes);
642
643         if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
644                 saddr = &ipv6_hdr(skb)->saddr;
645
646         if ((probes -= neigh->parms->ucast_probes) < 0) {
647                 if (!(neigh->nud_state & NUD_VALID)) {
648                         ND_PRINTK1(KERN_DEBUG
649                                    "%s(): trying to ucast probe in NUD_INVALID: "
650                                    NIP6_FMT "\n",
651                                    __func__,
652                                    NIP6(*target));
653                 }
654                 ndisc_send_ns(dev, neigh, target, target, saddr);
655         } else if ((probes -= neigh->parms->app_probes) < 0) {
656 #ifdef CONFIG_ARPD
657                 neigh_app_ns(neigh);
658 #endif
659         } else {
660                 addrconf_addr_solict_mult(target, &mcaddr);
661                 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
662         }
663 }
664
665 static int pndisc_is_router(const void *pkey,
666                             struct net_device *dev)
667 {
668         struct pneigh_entry *n;
669         int ret = -1;
670
671         read_lock_bh(&nd_tbl.lock);
672         n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
673         if (n)
674                 ret = !!(n->flags & NTF_ROUTER);
675         read_unlock_bh(&nd_tbl.lock);
676
677         return ret;
678 }
679
680 static void ndisc_recv_ns(struct sk_buff *skb)
681 {
682         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
683         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
684         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
685         u8 *lladdr = NULL;
686         u32 ndoptlen = skb->tail - (skb->transport_header +
687                                     offsetof(struct nd_msg, opt));
688         struct ndisc_options ndopts;
689         struct net_device *dev = skb->dev;
690         struct inet6_ifaddr *ifp;
691         struct inet6_dev *idev = NULL;
692         struct neighbour *neigh;
693         int dad = ipv6_addr_any(saddr);
694         int inc;
695         int is_router = -1;
696
697         if (ipv6_addr_is_multicast(&msg->target)) {
698                 ND_PRINTK2(KERN_WARNING
699                            "ICMPv6 NS: multicast target address");
700                 return;
701         }
702
703         /*
704          * RFC2461 7.1.1:
705          * DAD has to be destined for solicited node multicast address.
706          */
707         if (dad &&
708             !(daddr->s6_addr32[0] == htonl(0xff020000) &&
709               daddr->s6_addr32[1] == htonl(0x00000000) &&
710               daddr->s6_addr32[2] == htonl(0x00000001) &&
711               daddr->s6_addr [12] == 0xff )) {
712                 ND_PRINTK2(KERN_WARNING
713                            "ICMPv6 NS: bad DAD packet (wrong destination)\n");
714                 return;
715         }
716
717         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
718                 ND_PRINTK2(KERN_WARNING
719                            "ICMPv6 NS: invalid ND options\n");
720                 return;
721         }
722
723         if (ndopts.nd_opts_src_lladdr) {
724                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
725                 if (!lladdr) {
726                         ND_PRINTK2(KERN_WARNING
727                                    "ICMPv6 NS: invalid link-layer address length\n");
728                         return;
729                 }
730
731                 /* RFC2461 7.1.1:
732                  *      If the IP source address is the unspecified address,
733                  *      there MUST NOT be source link-layer address option
734                  *      in the message.
735                  */
736                 if (dad) {
737                         ND_PRINTK2(KERN_WARNING
738                                    "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
739                         return;
740                 }
741         }
742
743         inc = ipv6_addr_is_multicast(daddr);
744
745         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
746         if (ifp) {
747
748                 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
749                         if (dad) {
750                                 if (dev->type == ARPHRD_IEEE802_TR) {
751                                         const unsigned char *sadr;
752                                         sadr = skb_mac_header(skb);
753                                         if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
754                                             sadr[9] == dev->dev_addr[1] &&
755                                             sadr[10] == dev->dev_addr[2] &&
756                                             sadr[11] == dev->dev_addr[3] &&
757                                             sadr[12] == dev->dev_addr[4] &&
758                                             sadr[13] == dev->dev_addr[5]) {
759                                                 /* looped-back to us */
760                                                 goto out;
761                                         }
762                                 }
763
764                                 /*
765                                  * We are colliding with another node
766                                  * who is doing DAD
767                                  * so fail our DAD process
768                                  */
769                                 addrconf_dad_failure(ifp);
770                                 return;
771                         } else {
772                                 /*
773                                  * This is not a dad solicitation.
774                                  * If we are an optimistic node,
775                                  * we should respond.
776                                  * Otherwise, we should ignore it.
777                                  */
778                                 if (!(ifp->flags & IFA_F_OPTIMISTIC))
779                                         goto out;
780                         }
781                 }
782
783                 idev = ifp->idev;
784         } else {
785                 idev = in6_dev_get(dev);
786                 if (!idev) {
787                         /* XXX: count this drop? */
788                         return;
789                 }
790
791                 if (ipv6_chk_acast_addr(dev_net(dev), dev, &msg->target) ||
792                     (idev->cnf.forwarding &&
793                      (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
794                      (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
795                         if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
796                             skb->pkt_type != PACKET_HOST &&
797                             inc != 0 &&
798                             idev->nd_parms->proxy_delay != 0) {
799                                 /*
800                                  * for anycast or proxy,
801                                  * sender should delay its response
802                                  * by a random time between 0 and
803                                  * MAX_ANYCAST_DELAY_TIME seconds.
804                                  * (RFC2461) -- yoshfuji
805                                  */
806                                 struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
807                                 if (n)
808                                         pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
809                                 goto out;
810                         }
811                 } else
812                         goto out;
813         }
814
815         if (is_router < 0)
816                 is_router = !!idev->cnf.forwarding;
817
818         if (dad) {
819                 struct in6_addr maddr;
820
821                 ipv6_addr_all_nodes(&maddr);
822                 ndisc_send_na(dev, NULL, &maddr, &msg->target,
823                               is_router, 0, (ifp != NULL), 1);
824                 goto out;
825         }
826
827         if (inc)
828                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
829         else
830                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
831
832         /*
833          *      update / create cache entry
834          *      for the source address
835          */
836         neigh = __neigh_lookup(&nd_tbl, saddr, dev,
837                                !inc || lladdr || !dev->addr_len);
838         if (neigh)
839                 neigh_update(neigh, lladdr, NUD_STALE,
840                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
841                              NEIGH_UPDATE_F_OVERRIDE);
842         if (neigh || !dev->header_ops) {
843                 ndisc_send_na(dev, neigh, saddr, &msg->target,
844                               is_router,
845                               1, (ifp != NULL && inc), inc);
846                 if (neigh)
847                         neigh_release(neigh);
848         }
849
850 out:
851         if (ifp)
852                 in6_ifa_put(ifp);
853         else
854                 in6_dev_put(idev);
855
856         return;
857 }
858
859 static void ndisc_recv_na(struct sk_buff *skb)
860 {
861         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
862         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
863         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
864         u8 *lladdr = NULL;
865         u32 ndoptlen = skb->tail - (skb->transport_header +
866                                     offsetof(struct nd_msg, opt));
867         struct ndisc_options ndopts;
868         struct net_device *dev = skb->dev;
869         struct inet6_ifaddr *ifp;
870         struct neighbour *neigh;
871
872         if (skb->len < sizeof(struct nd_msg)) {
873                 ND_PRINTK2(KERN_WARNING
874                            "ICMPv6 NA: packet too short\n");
875                 return;
876         }
877
878         if (ipv6_addr_is_multicast(&msg->target)) {
879                 ND_PRINTK2(KERN_WARNING
880                            "ICMPv6 NA: target address is multicast.\n");
881                 return;
882         }
883
884         if (ipv6_addr_is_multicast(daddr) &&
885             msg->icmph.icmp6_solicited) {
886                 ND_PRINTK2(KERN_WARNING
887                            "ICMPv6 NA: solicited NA is multicasted.\n");
888                 return;
889         }
890
891         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
892                 ND_PRINTK2(KERN_WARNING
893                            "ICMPv6 NS: invalid ND option\n");
894                 return;
895         }
896         if (ndopts.nd_opts_tgt_lladdr) {
897                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
898                 if (!lladdr) {
899                         ND_PRINTK2(KERN_WARNING
900                                    "ICMPv6 NA: invalid link-layer address length\n");
901                         return;
902                 }
903         }
904         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
905         if (ifp) {
906                 if (ifp->flags & IFA_F_TENTATIVE) {
907                         addrconf_dad_failure(ifp);
908                         return;
909                 }
910                 /* What should we make now? The advertisement
911                    is invalid, but ndisc specs say nothing
912                    about it. It could be misconfiguration, or
913                    an smart proxy agent tries to help us :-)
914                  */
915                 ND_PRINTK1(KERN_WARNING
916                            "ICMPv6 NA: someone advertises our address on %s!\n",
917                            ifp->idev->dev->name);
918                 in6_ifa_put(ifp);
919                 return;
920         }
921         neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
922
923         if (neigh) {
924                 u8 old_flags = neigh->flags;
925
926                 if (neigh->nud_state & NUD_FAILED)
927                         goto out;
928
929                 /*
930                  * Don't update the neighbor cache entry on a proxy NA from
931                  * ourselves because either the proxied node is off link or it
932                  * has already sent a NA to us.
933                  */
934                 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
935                     ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
936                     pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) {
937                         /* XXX: idev->cnf.prixy_ndp */
938                         goto out;
939                 }
940
941                 neigh_update(neigh, lladdr,
942                              msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
943                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
944                              (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
945                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
946                              (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
947
948                 if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
949                         /*
950                          * Change: router to host
951                          */
952                         struct rt6_info *rt;
953                         rt = rt6_get_dflt_router(saddr, dev);
954                         if (rt)
955                                 ip6_del_rt(rt);
956                 }
957
958 out:
959                 neigh_release(neigh);
960         }
961 }
962
963 static void ndisc_recv_rs(struct sk_buff *skb)
964 {
965         struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
966         unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
967         struct neighbour *neigh;
968         struct inet6_dev *idev;
969         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
970         struct ndisc_options ndopts;
971         u8 *lladdr = NULL;
972
973         if (skb->len < sizeof(*rs_msg))
974                 return;
975
976         idev = in6_dev_get(skb->dev);
977         if (!idev) {
978                 if (net_ratelimit())
979                         ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
980                 return;
981         }
982
983         /* Don't accept RS if we're not in router mode */
984         if (!idev->cnf.forwarding)
985                 goto out;
986
987         /*
988          * Don't update NCE if src = ::;
989          * this implies that the source node has no ip address assigned yet.
990          */
991         if (ipv6_addr_any(saddr))
992                 goto out;
993
994         /* Parse ND options */
995         if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
996                 if (net_ratelimit())
997                         ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
998                 goto out;
999         }
1000
1001         if (ndopts.nd_opts_src_lladdr) {
1002                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1003                                              skb->dev);
1004                 if (!lladdr)
1005                         goto out;
1006         }
1007
1008         neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1009         if (neigh) {
1010                 neigh_update(neigh, lladdr, NUD_STALE,
1011                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1012                              NEIGH_UPDATE_F_OVERRIDE|
1013                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1014                 neigh_release(neigh);
1015         }
1016 out:
1017         in6_dev_put(idev);
1018 }
1019
1020 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1021 {
1022         struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1023         struct sk_buff *skb;
1024         struct nlmsghdr *nlh;
1025         struct nduseroptmsg *ndmsg;
1026         struct net *net = dev_net(ra->dev);
1027         int err;
1028         int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1029                                     + (opt->nd_opt_len << 3));
1030         size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1031
1032         skb = nlmsg_new(msg_size, GFP_ATOMIC);
1033         if (skb == NULL) {
1034                 err = -ENOBUFS;
1035                 goto errout;
1036         }
1037
1038         nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1039         if (nlh == NULL) {
1040                 goto nla_put_failure;
1041         }
1042
1043         ndmsg = nlmsg_data(nlh);
1044         ndmsg->nduseropt_family = AF_INET6;
1045         ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1046         ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1047         ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1048         ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1049
1050         memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1051
1052         NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
1053                 &ipv6_hdr(ra)->saddr);
1054         nlmsg_end(skb, nlh);
1055
1056         err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL,
1057                           GFP_ATOMIC);
1058         if (err < 0)
1059                 goto errout;
1060
1061         return;
1062
1063 nla_put_failure:
1064         nlmsg_free(skb);
1065         err = -EMSGSIZE;
1066 errout:
1067         rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1068 }
1069
1070 static void ndisc_router_discovery(struct sk_buff *skb)
1071 {
1072         struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1073         struct neighbour *neigh = NULL;
1074         struct inet6_dev *in6_dev;
1075         struct rt6_info *rt = NULL;
1076         int lifetime;
1077         struct ndisc_options ndopts;
1078         int optlen;
1079         unsigned int pref = 0;
1080
1081         __u8 * opt = (__u8 *)(ra_msg + 1);
1082
1083         optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1084
1085         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1086                 ND_PRINTK2(KERN_WARNING
1087                            "ICMPv6 RA: source address is not link-local.\n");
1088                 return;
1089         }
1090         if (optlen < 0) {
1091                 ND_PRINTK2(KERN_WARNING
1092                            "ICMPv6 RA: packet too short\n");
1093                 return;
1094         }
1095
1096         /*
1097          *      set the RA_RECV flag in the interface
1098          */
1099
1100         in6_dev = in6_dev_get(skb->dev);
1101         if (in6_dev == NULL) {
1102                 ND_PRINTK0(KERN_ERR
1103                            "ICMPv6 RA: can't find inet6 device for %s.\n",
1104                            skb->dev->name);
1105                 return;
1106         }
1107         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
1108                 in6_dev_put(in6_dev);
1109                 return;
1110         }
1111
1112         if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1113                 in6_dev_put(in6_dev);
1114                 ND_PRINTK2(KERN_WARNING
1115                            "ICMP6 RA: invalid ND options\n");
1116                 return;
1117         }
1118
1119         if (in6_dev->if_flags & IF_RS_SENT) {
1120                 /*
1121                  *      flag that an RA was received after an RS was sent
1122                  *      out on this interface.
1123                  */
1124                 in6_dev->if_flags |= IF_RA_RCVD;
1125         }
1126
1127         /*
1128          * Remember the managed/otherconf flags from most recently
1129          * received RA message (RFC 2462) -- yoshfuji
1130          */
1131         in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1132                                 IF_RA_OTHERCONF)) |
1133                                 (ra_msg->icmph.icmp6_addrconf_managed ?
1134                                         IF_RA_MANAGED : 0) |
1135                                 (ra_msg->icmph.icmp6_addrconf_other ?
1136                                         IF_RA_OTHERCONF : 0);
1137
1138         if (!in6_dev->cnf.accept_ra_defrtr)
1139                 goto skip_defrtr;
1140
1141         lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1142
1143 #ifdef CONFIG_IPV6_ROUTER_PREF
1144         pref = ra_msg->icmph.icmp6_router_pref;
1145         /* 10b is handled as if it were 00b (medium) */
1146         if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1147             !in6_dev->cnf.accept_ra_rtr_pref)
1148                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
1149 #endif
1150
1151         rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1152
1153         if (rt)
1154                 neigh = rt->rt6i_nexthop;
1155
1156         if (rt && lifetime == 0) {
1157                 neigh_clone(neigh);
1158                 ip6_del_rt(rt);
1159                 rt = NULL;
1160         }
1161
1162         if (rt == NULL && lifetime) {
1163                 ND_PRINTK3(KERN_DEBUG
1164                            "ICMPv6 RA: adding default router.\n");
1165
1166                 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1167                 if (rt == NULL) {
1168                         ND_PRINTK0(KERN_ERR
1169                                    "ICMPv6 RA: %s() failed to add default route.\n",
1170                                    __func__);
1171                         in6_dev_put(in6_dev);
1172                         return;
1173                 }
1174
1175                 neigh = rt->rt6i_nexthop;
1176                 if (neigh == NULL) {
1177                         ND_PRINTK0(KERN_ERR
1178                                    "ICMPv6 RA: %s() got default router without neighbour.\n",
1179                                    __func__);
1180                         dst_release(&rt->u.dst);
1181                         in6_dev_put(in6_dev);
1182                         return;
1183                 }
1184                 neigh->flags |= NTF_ROUTER;
1185         } else if (rt) {
1186                 rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1187         }
1188
1189         if (rt)
1190                 rt->rt6i_expires = jiffies + (HZ * lifetime);
1191
1192         if (ra_msg->icmph.icmp6_hop_limit) {
1193                 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1194                 if (rt)
1195                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
1196         }
1197
1198 skip_defrtr:
1199
1200         /*
1201          *      Update Reachable Time and Retrans Timer
1202          */
1203
1204         if (in6_dev->nd_parms) {
1205                 unsigned long rtime = ntohl(ra_msg->retrans_timer);
1206
1207                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1208                         rtime = (rtime*HZ)/1000;
1209                         if (rtime < HZ/10)
1210                                 rtime = HZ/10;
1211                         in6_dev->nd_parms->retrans_time = rtime;
1212                         in6_dev->tstamp = jiffies;
1213                         inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1214                 }
1215
1216                 rtime = ntohl(ra_msg->reachable_time);
1217                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1218                         rtime = (rtime*HZ)/1000;
1219
1220                         if (rtime < HZ/10)
1221                                 rtime = HZ/10;
1222
1223                         if (rtime != in6_dev->nd_parms->base_reachable_time) {
1224                                 in6_dev->nd_parms->base_reachable_time = rtime;
1225                                 in6_dev->nd_parms->gc_staletime = 3 * rtime;
1226                                 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1227                                 in6_dev->tstamp = jiffies;
1228                                 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1229                         }
1230                 }
1231         }
1232
1233         /*
1234          *      Process options.
1235          */
1236
1237         if (!neigh)
1238                 neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1239                                        skb->dev, 1);
1240         if (neigh) {
1241                 u8 *lladdr = NULL;
1242                 if (ndopts.nd_opts_src_lladdr) {
1243                         lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1244                                                      skb->dev);
1245                         if (!lladdr) {
1246                                 ND_PRINTK2(KERN_WARNING
1247                                            "ICMPv6 RA: invalid link-layer address length\n");
1248                                 goto out;
1249                         }
1250                 }
1251                 neigh_update(neigh, lladdr, NUD_STALE,
1252                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1253                              NEIGH_UPDATE_F_OVERRIDE|
1254                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1255                              NEIGH_UPDATE_F_ISROUTER);
1256         }
1257
1258 #ifdef CONFIG_IPV6_ROUTE_INFO
1259         if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1260                 struct nd_opt_hdr *p;
1261                 for (p = ndopts.nd_opts_ri;
1262                      p;
1263                      p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1264                         if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1265                                 continue;
1266                         rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
1267                                       &ipv6_hdr(skb)->saddr);
1268                 }
1269         }
1270 #endif
1271
1272         if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
1273                 struct nd_opt_hdr *p;
1274                 for (p = ndopts.nd_opts_pi;
1275                      p;
1276                      p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1277                         addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1278                 }
1279         }
1280
1281         if (ndopts.nd_opts_mtu) {
1282                 __be32 n;
1283                 u32 mtu;
1284
1285                 memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1286                 mtu = ntohl(n);
1287
1288                 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1289                         ND_PRINTK2(KERN_WARNING
1290                                    "ICMPv6 RA: invalid mtu: %d\n",
1291                                    mtu);
1292                 } else if (in6_dev->cnf.mtu6 != mtu) {
1293                         in6_dev->cnf.mtu6 = mtu;
1294
1295                         if (rt)
1296                                 rt->u.dst.metrics[RTAX_MTU-1] = mtu;
1297
1298                         rt6_mtu_change(skb->dev, mtu);
1299                 }
1300         }
1301
1302         if (ndopts.nd_useropts) {
1303                 struct nd_opt_hdr *p;
1304                 for (p = ndopts.nd_useropts;
1305                      p;
1306                      p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1307                         ndisc_ra_useropt(skb, p);
1308                 }
1309         }
1310
1311         if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1312                 ND_PRINTK2(KERN_WARNING
1313                            "ICMPv6 RA: invalid RA options");
1314         }
1315 out:
1316         if (rt)
1317                 dst_release(&rt->u.dst);
1318         else if (neigh)
1319                 neigh_release(neigh);
1320         in6_dev_put(in6_dev);
1321 }
1322
1323 static void ndisc_redirect_rcv(struct sk_buff *skb)
1324 {
1325         struct inet6_dev *in6_dev;
1326         struct icmp6hdr *icmph;
1327         struct in6_addr *dest;
1328         struct in6_addr *target;        /* new first hop to destination */
1329         struct neighbour *neigh;
1330         int on_link = 0;
1331         struct ndisc_options ndopts;
1332         int optlen;
1333         u8 *lladdr = NULL;
1334
1335         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1336                 ND_PRINTK2(KERN_WARNING
1337                            "ICMPv6 Redirect: source address is not link-local.\n");
1338                 return;
1339         }
1340
1341         optlen = skb->tail - skb->transport_header;
1342         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1343
1344         if (optlen < 0) {
1345                 ND_PRINTK2(KERN_WARNING
1346                            "ICMPv6 Redirect: packet too short\n");
1347                 return;
1348         }
1349
1350         icmph = icmp6_hdr(skb);
1351         target = (struct in6_addr *) (icmph + 1);
1352         dest = target + 1;
1353
1354         if (ipv6_addr_is_multicast(dest)) {
1355                 ND_PRINTK2(KERN_WARNING
1356                            "ICMPv6 Redirect: destination address is multicast.\n");
1357                 return;
1358         }
1359
1360         if (ipv6_addr_equal(dest, target)) {
1361                 on_link = 1;
1362         } else if (ipv6_addr_type(target) !=
1363                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1364                 ND_PRINTK2(KERN_WARNING
1365                            "ICMPv6 Redirect: target address is not link-local unicast.\n");
1366                 return;
1367         }
1368
1369         in6_dev = in6_dev_get(skb->dev);
1370         if (!in6_dev)
1371                 return;
1372         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
1373                 in6_dev_put(in6_dev);
1374                 return;
1375         }
1376
1377         /* RFC2461 8.1:
1378          *      The IP source address of the Redirect MUST be the same as the current
1379          *      first-hop router for the specified ICMP Destination Address.
1380          */
1381
1382         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1383                 ND_PRINTK2(KERN_WARNING
1384                            "ICMPv6 Redirect: invalid ND options\n");
1385                 in6_dev_put(in6_dev);
1386                 return;
1387         }
1388         if (ndopts.nd_opts_tgt_lladdr) {
1389                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1390                                              skb->dev);
1391                 if (!lladdr) {
1392                         ND_PRINTK2(KERN_WARNING
1393                                    "ICMPv6 Redirect: invalid link-layer address length\n");
1394                         in6_dev_put(in6_dev);
1395                         return;
1396                 }
1397         }
1398
1399         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1400         if (neigh) {
1401                 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1402                              &ipv6_hdr(skb)->saddr, neigh, lladdr,
1403                              on_link);
1404                 neigh_release(neigh);
1405         }
1406         in6_dev_put(in6_dev);
1407 }
1408
1409 void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1410                          struct in6_addr *target)
1411 {
1412         struct net_device *dev = skb->dev;
1413         struct net *net = dev_net(dev);
1414         struct sock *sk = net->ipv6.ndisc_sk;
1415         int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1416         struct sk_buff *buff;
1417         struct icmp6hdr *icmph;
1418         struct in6_addr saddr_buf;
1419         struct in6_addr *addrp;
1420         struct rt6_info *rt;
1421         struct dst_entry *dst;
1422         struct inet6_dev *idev;
1423         struct flowi fl;
1424         u8 *opt;
1425         int rd_len;
1426         int err;
1427         u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1428
1429         if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1430                 ND_PRINTK2(KERN_WARNING
1431                            "ICMPv6 Redirect: no link-local address on %s\n",
1432                            dev->name);
1433                 return;
1434         }
1435
1436         if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1437             ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1438                 ND_PRINTK2(KERN_WARNING
1439                         "ICMPv6 Redirect: target address is not link-local unicast.\n");
1440                 return;
1441         }
1442
1443         icmpv6_flow_init(sk, &fl, NDISC_REDIRECT,
1444                          &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1445
1446         dst = ip6_route_output(net, NULL, &fl);
1447         if (dst == NULL)
1448                 return;
1449
1450         err = xfrm_lookup(&dst, &fl, NULL, 0);
1451         if (err)
1452                 return;
1453
1454         rt = (struct rt6_info *) dst;
1455
1456         if (rt->rt6i_flags & RTF_GATEWAY) {
1457                 ND_PRINTK2(KERN_WARNING
1458                            "ICMPv6 Redirect: destination is not a neighbour.\n");
1459                 dst_release(dst);
1460                 return;
1461         }
1462         if (!xrlim_allow(dst, 1*HZ)) {
1463                 dst_release(dst);
1464                 return;
1465         }
1466
1467         if (dev->addr_len) {
1468                 read_lock_bh(&neigh->lock);
1469                 if (neigh->nud_state & NUD_VALID) {
1470                         memcpy(ha_buf, neigh->ha, dev->addr_len);
1471                         read_unlock_bh(&neigh->lock);
1472                         ha = ha_buf;
1473                         len += ndisc_opt_addr_space(dev);
1474                 } else
1475                         read_unlock_bh(&neigh->lock);
1476         }
1477
1478         rd_len = min_t(unsigned int,
1479                      IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
1480         rd_len &= ~0x7;
1481         len += rd_len;
1482
1483         buff = sock_alloc_send_skb(sk,
1484                                    (MAX_HEADER + sizeof(struct ipv6hdr) +
1485                                     len + LL_RESERVED_SPACE(dev)),
1486                                    1, &err);
1487         if (buff == NULL) {
1488                 ND_PRINTK0(KERN_ERR
1489                            "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
1490                            __func__);
1491                 dst_release(dst);
1492                 return;
1493         }
1494
1495         skb_reserve(buff, LL_RESERVED_SPACE(dev));
1496         ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1497                    IPPROTO_ICMPV6, len);
1498
1499         skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
1500         skb_put(buff, len);
1501         icmph = icmp6_hdr(buff);
1502
1503         memset(icmph, 0, sizeof(struct icmp6hdr));
1504         icmph->icmp6_type = NDISC_REDIRECT;
1505
1506         /*
1507          *      copy target and destination addresses
1508          */
1509
1510         addrp = (struct in6_addr *)(icmph + 1);
1511         ipv6_addr_copy(addrp, target);
1512         addrp++;
1513         ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
1514
1515         opt = (u8*) (addrp + 1);
1516
1517         /*
1518          *      include target_address option
1519          */
1520
1521         if (ha)
1522                 opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
1523                                              dev->addr_len, dev->type);
1524
1525         /*
1526          *      build redirect option and copy skb over to the new packet.
1527          */
1528
1529         memset(opt, 0, 8);
1530         *(opt++) = ND_OPT_REDIRECT_HDR;
1531         *(opt++) = (rd_len >> 3);
1532         opt += 6;
1533
1534         memcpy(opt, ipv6_hdr(skb), rd_len - 8);
1535
1536         icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
1537                                              len, IPPROTO_ICMPV6,
1538                                              csum_partial((u8 *) icmph, len, 0));
1539
1540         buff->dst = dst;
1541         idev = in6_dev_get(dst->dev);
1542         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
1543         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1544                       dst_output);
1545         if (!err) {
1546                 ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
1547                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
1548         }
1549
1550         if (likely(idev != NULL))
1551                 in6_dev_put(idev);
1552 }
1553
1554 static void pndisc_redo(struct sk_buff *skb)
1555 {
1556         ndisc_recv_ns(skb);
1557         kfree_skb(skb);
1558 }
1559
1560 int ndisc_rcv(struct sk_buff *skb)
1561 {
1562         struct nd_msg *msg;
1563
1564         if (!pskb_may_pull(skb, skb->len))
1565                 return 0;
1566
1567         msg = (struct nd_msg *)skb_transport_header(skb);
1568
1569         __skb_push(skb, skb->data - skb_transport_header(skb));
1570
1571         if (ipv6_hdr(skb)->hop_limit != 255) {
1572                 ND_PRINTK2(KERN_WARNING
1573                            "ICMPv6 NDISC: invalid hop-limit: %d\n",
1574                            ipv6_hdr(skb)->hop_limit);
1575                 return 0;
1576         }
1577
1578         if (msg->icmph.icmp6_code != 0) {
1579                 ND_PRINTK2(KERN_WARNING
1580                            "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1581                            msg->icmph.icmp6_code);
1582                 return 0;
1583         }
1584
1585         memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1586
1587         switch (msg->icmph.icmp6_type) {
1588         case NDISC_NEIGHBOUR_SOLICITATION:
1589                 ndisc_recv_ns(skb);
1590                 break;
1591
1592         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1593                 ndisc_recv_na(skb);
1594                 break;
1595
1596         case NDISC_ROUTER_SOLICITATION:
1597                 ndisc_recv_rs(skb);
1598                 break;
1599
1600         case NDISC_ROUTER_ADVERTISEMENT:
1601                 ndisc_router_discovery(skb);
1602                 break;
1603
1604         case NDISC_REDIRECT:
1605                 ndisc_redirect_rcv(skb);
1606                 break;
1607         }
1608
1609         return 0;
1610 }
1611
1612 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1613 {
1614         struct net_device *dev = ptr;
1615         struct net *net = dev_net(dev);
1616
1617         switch (event) {
1618         case NETDEV_CHANGEADDR:
1619                 neigh_changeaddr(&nd_tbl, dev);
1620                 fib6_run_gc(~0UL, net);
1621                 break;
1622         case NETDEV_DOWN:
1623                 neigh_ifdown(&nd_tbl, dev);
1624                 fib6_run_gc(~0UL, net);
1625                 break;
1626         default:
1627                 break;
1628         }
1629
1630         return NOTIFY_DONE;
1631 }
1632
1633 static struct notifier_block ndisc_netdev_notifier = {
1634         .notifier_call = ndisc_netdev_event,
1635 };
1636
1637 #ifdef CONFIG_SYSCTL
1638 static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1639                                          const char *func, const char *dev_name)
1640 {
1641         static char warncomm[TASK_COMM_LEN];
1642         static int warned;
1643         if (strcmp(warncomm, current->comm) && warned < 5) {
1644                 strcpy(warncomm, current->comm);
1645                 printk(KERN_WARNING
1646                         "process `%s' is using deprecated sysctl (%s) "
1647                         "net.ipv6.neigh.%s.%s; "
1648                         "Use net.ipv6.neigh.%s.%s_ms "
1649                         "instead.\n",
1650                         warncomm, func,
1651                         dev_name, ctl->procname,
1652                         dev_name, ctl->procname);
1653                 warned++;
1654         }
1655 }
1656
1657 int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
1658 {
1659         struct net_device *dev = ctl->extra1;
1660         struct inet6_dev *idev;
1661         int ret;
1662
1663         if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1664             (strcmp(ctl->procname, "base_reachable_time") == 0))
1665                 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1666
1667         if (strcmp(ctl->procname, "retrans_time") == 0)
1668                 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1669
1670         else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1671                 ret = proc_dointvec_jiffies(ctl, write,
1672                                             filp, buffer, lenp, ppos);
1673
1674         else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1675                  (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1676                 ret = proc_dointvec_ms_jiffies(ctl, write,
1677                                                filp, buffer, lenp, ppos);
1678         else
1679                 ret = -1;
1680
1681         if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1682                 if (ctl->data == &idev->nd_parms->base_reachable_time)
1683                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1684                 idev->tstamp = jiffies;
1685                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1686                 in6_dev_put(idev);
1687         }
1688         return ret;
1689 }
1690
1691 static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
1692                                         int nlen, void __user *oldval,
1693                                         size_t __user *oldlenp,
1694                                         void __user *newval, size_t newlen)
1695 {
1696         struct net_device *dev = ctl->extra1;
1697         struct inet6_dev *idev;
1698         int ret;
1699
1700         if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
1701             ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
1702                 ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
1703
1704         switch (ctl->ctl_name) {
1705         case NET_NEIGH_REACHABLE_TIME:
1706                 ret = sysctl_jiffies(ctl, name, nlen,
1707                                      oldval, oldlenp, newval, newlen);
1708                 break;
1709         case NET_NEIGH_RETRANS_TIME_MS:
1710         case NET_NEIGH_REACHABLE_TIME_MS:
1711                  ret = sysctl_ms_jiffies(ctl, name, nlen,
1712                                          oldval, oldlenp, newval, newlen);
1713                  break;
1714         default:
1715                 ret = 0;
1716         }
1717
1718         if (newval && newlen && ret > 0 &&
1719             dev && (idev = in6_dev_get(dev)) != NULL) {
1720                 if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
1721                     ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
1722                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1723                 idev->tstamp = jiffies;
1724                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1725                 in6_dev_put(idev);
1726         }
1727
1728         return ret;
1729 }
1730
1731 #endif
1732
1733 static int ndisc_net_init(struct net *net)
1734 {
1735         struct ipv6_pinfo *np;
1736         struct sock *sk;
1737         int err;
1738
1739         err = inet_ctl_sock_create(&sk, PF_INET6,
1740                                    SOCK_RAW, IPPROTO_ICMPV6, net);
1741         if (err < 0) {
1742                 ND_PRINTK0(KERN_ERR
1743                            "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1744                            err);
1745                 return err;
1746         }
1747
1748         net->ipv6.ndisc_sk = sk;
1749
1750         np = inet6_sk(sk);
1751         np->hop_limit = 255;
1752         /* Do not loopback ndisc messages */
1753         np->mc_loop = 0;
1754
1755         return 0;
1756 }
1757
1758 static void ndisc_net_exit(struct net *net)
1759 {
1760         inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
1761 }
1762
1763 static struct pernet_operations ndisc_net_ops = {
1764         .init = ndisc_net_init,
1765         .exit = ndisc_net_exit,
1766 };
1767
1768 int __init ndisc_init(void)
1769 {
1770         int err;
1771
1772         err = register_pernet_subsys(&ndisc_net_ops);
1773         if (err)
1774                 return err;
1775         /*
1776          * Initialize the neighbour table
1777          */
1778         neigh_table_init(&nd_tbl);
1779
1780 #ifdef CONFIG_SYSCTL
1781         err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6,
1782                                     NET_IPV6_NEIGH, "ipv6",
1783                                     &ndisc_ifinfo_sysctl_change,
1784                                     &ndisc_ifinfo_sysctl_strategy);
1785         if (err)
1786                 goto out_unregister_pernet;
1787 #endif
1788         err = register_netdevice_notifier(&ndisc_netdev_notifier);
1789         if (err)
1790                 goto out_unregister_sysctl;
1791 out:
1792         return err;
1793
1794 out_unregister_sysctl:
1795 #ifdef CONFIG_SYSCTL
1796         neigh_sysctl_unregister(&nd_tbl.parms);
1797 out_unregister_pernet:
1798 #endif
1799         unregister_pernet_subsys(&ndisc_net_ops);
1800         goto out;
1801 }
1802
1803 void ndisc_cleanup(void)
1804 {
1805         unregister_netdevice_notifier(&ndisc_netdev_notifier);
1806 #ifdef CONFIG_SYSCTL
1807         neigh_sysctl_unregister(&nd_tbl.parms);
1808 #endif
1809         neigh_table_clear(&nd_tbl);
1810         unregister_pernet_subsys(&ndisc_net_ops);
1811 }