Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6] / net / ipv6 / ndisc.c
1 /*
2  *      Neighbour Discovery for IPv6
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *      Mike Shaver             <shaver@ingenia.com>
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14
15 /*
16  *      Changes:
17  *
18  *      Pierre Ynard                    :       export userland ND options
19  *                                              through netlink (RDNSS support)
20  *      Lars Fenneberg                  :       fixed MTU setting on receipt
21  *                                              of an RA.
22  *      Janos Farkas                    :       kmalloc failure checks
23  *      Alexey Kuznetsov                :       state machine reworked
24  *                                              and moved to net/core.
25  *      Pekka Savola                    :       RFC2461 validation
26  *      YOSHIFUJI Hideaki @USAGI        :       Verify ND options properly
27  */
28
29 /* Set to 3 to get tracing... */
30 #define ND_DEBUG 1
31
32 #define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
33 #define ND_NOPRINTK(x...) do { ; } while(0)
34 #define ND_PRINTK0 ND_PRINTK
35 #define ND_PRINTK1 ND_NOPRINTK
36 #define ND_PRINTK2 ND_NOPRINTK
37 #define ND_PRINTK3 ND_NOPRINTK
38 #if ND_DEBUG >= 1
39 #undef ND_PRINTK1
40 #define ND_PRINTK1 ND_PRINTK
41 #endif
42 #if ND_DEBUG >= 2
43 #undef ND_PRINTK2
44 #define ND_PRINTK2 ND_PRINTK
45 #endif
46 #if ND_DEBUG >= 3
47 #undef ND_PRINTK3
48 #define ND_PRINTK3 ND_PRINTK
49 #endif
50
51 #include <linux/module.h>
52 #include <linux/errno.h>
53 #include <linux/types.h>
54 #include <linux/socket.h>
55 #include <linux/sockios.h>
56 #include <linux/sched.h>
57 #include <linux/net.h>
58 #include <linux/in6.h>
59 #include <linux/route.h>
60 #include <linux/init.h>
61 #include <linux/rcupdate.h>
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 #include <linux/if_addr.h>
67 #include <linux/if_arp.h>
68 #include <linux/ipv6.h>
69 #include <linux/icmpv6.h>
70 #include <linux/jhash.h>
71
72 #include <net/sock.h>
73 #include <net/snmp.h>
74
75 #include <net/ipv6.h>
76 #include <net/protocol.h>
77 #include <net/ndisc.h>
78 #include <net/ip6_route.h>
79 #include <net/addrconf.h>
80 #include <net/icmp.h>
81
82 #include <net/netlink.h>
83 #include <linux/rtnetlink.h>
84
85 #include <net/flow.h>
86 #include <net/ip6_checksum.h>
87 #include <linux/proc_fs.h>
88
89 #include <linux/netfilter.h>
90 #include <linux/netfilter_ipv6.h>
91
92 static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
93 static int ndisc_constructor(struct neighbour *neigh);
94 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
95 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
96 static int pndisc_constructor(struct pneigh_entry *n);
97 static void pndisc_destructor(struct pneigh_entry *n);
98 static void pndisc_redo(struct sk_buff *skb);
99
100 static struct neigh_ops ndisc_generic_ops = {
101         .family =               AF_INET6,
102         .solicit =              ndisc_solicit,
103         .error_report =         ndisc_error_report,
104         .output =               neigh_resolve_output,
105         .connected_output =     neigh_connected_output,
106         .hh_output =            dev_queue_xmit,
107         .queue_xmit =           dev_queue_xmit,
108 };
109
110 static struct neigh_ops ndisc_hh_ops = {
111         .family =               AF_INET6,
112         .solicit =              ndisc_solicit,
113         .error_report =         ndisc_error_report,
114         .output =               neigh_resolve_output,
115         .connected_output =     neigh_resolve_output,
116         .hh_output =            dev_queue_xmit,
117         .queue_xmit =           dev_queue_xmit,
118 };
119
120
121 static struct neigh_ops ndisc_direct_ops = {
122         .family =               AF_INET6,
123         .output =               dev_queue_xmit,
124         .connected_output =     dev_queue_xmit,
125         .hh_output =            dev_queue_xmit,
126         .queue_xmit =           dev_queue_xmit,
127 };
128
129 struct neigh_table nd_tbl = {
130         .family =       AF_INET6,
131         .entry_size =   sizeof(struct neighbour) + sizeof(struct in6_addr),
132         .key_len =      sizeof(struct in6_addr),
133         .hash =         ndisc_hash,
134         .constructor =  ndisc_constructor,
135         .pconstructor = pndisc_constructor,
136         .pdestructor =  pndisc_destructor,
137         .proxy_redo =   pndisc_redo,
138         .id =           "ndisc_cache",
139         .parms = {
140                 .tbl =                  &nd_tbl,
141                 .base_reachable_time =  30 * HZ,
142                 .retrans_time =  1 * HZ,
143                 .gc_staletime = 60 * HZ,
144                 .reachable_time =               30 * HZ,
145                 .delay_probe_time =      5 * HZ,
146                 .queue_len =             3,
147                 .ucast_probes =  3,
148                 .mcast_probes =  3,
149                 .anycast_delay =         1 * HZ,
150                 .proxy_delay =          (8 * HZ) / 10,
151                 .proxy_qlen =           64,
152         },
153         .gc_interval =    30 * HZ,
154         .gc_thresh1 =    128,
155         .gc_thresh2 =    512,
156         .gc_thresh3 =   1024,
157 };
158
159 /* ND options */
160 struct ndisc_options {
161         struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
162 #ifdef CONFIG_IPV6_ROUTE_INFO
163         struct nd_opt_hdr *nd_opts_ri;
164         struct nd_opt_hdr *nd_opts_ri_end;
165 #endif
166         struct nd_opt_hdr *nd_useropts;
167         struct nd_opt_hdr *nd_useropts_end;
168 };
169
170 #define nd_opts_src_lladdr      nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
171 #define nd_opts_tgt_lladdr      nd_opt_array[ND_OPT_TARGET_LL_ADDR]
172 #define nd_opts_pi              nd_opt_array[ND_OPT_PREFIX_INFO]
173 #define nd_opts_pi_end          nd_opt_array[__ND_OPT_PREFIX_INFO_END]
174 #define nd_opts_rh              nd_opt_array[ND_OPT_REDIRECT_HDR]
175 #define nd_opts_mtu             nd_opt_array[ND_OPT_MTU]
176
177 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
178
179 /*
180  * Return the padding between the option length and the start of the
181  * link addr.  Currently only IP-over-InfiniBand needs this, although
182  * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
183  * also need a pad of 2.
184  */
185 static int ndisc_addr_option_pad(unsigned short type)
186 {
187         switch (type) {
188         case ARPHRD_INFINIBAND: return 2;
189         default:                return 0;
190         }
191 }
192
193 static inline int ndisc_opt_addr_space(struct net_device *dev)
194 {
195         return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
196 }
197
198 static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
199                                   unsigned short addr_type)
200 {
201         int space = NDISC_OPT_SPACE(data_len);
202         int pad   = ndisc_addr_option_pad(addr_type);
203
204         opt[0] = type;
205         opt[1] = space>>3;
206
207         memset(opt + 2, 0, pad);
208         opt   += pad;
209         space -= pad;
210
211         memcpy(opt+2, data, data_len);
212         data_len += 2;
213         opt += data_len;
214         if ((space -= data_len) > 0)
215                 memset(opt, 0, space);
216         return opt + space;
217 }
218
219 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
220                                             struct nd_opt_hdr *end)
221 {
222         int type;
223         if (!cur || !end || cur >= end)
224                 return NULL;
225         type = cur->nd_opt_type;
226         do {
227                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
228         } while(cur < end && cur->nd_opt_type != type);
229         return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
230 }
231
232 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
233 {
234         return (opt->nd_opt_type == ND_OPT_RDNSS);
235 }
236
237 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
238                                              struct nd_opt_hdr *end)
239 {
240         if (!cur || !end || cur >= end)
241                 return NULL;
242         do {
243                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
244         } while(cur < end && !ndisc_is_useropt(cur));
245         return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL);
246 }
247
248 static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
249                                                  struct ndisc_options *ndopts)
250 {
251         struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
252
253         if (!nd_opt || opt_len < 0 || !ndopts)
254                 return NULL;
255         memset(ndopts, 0, sizeof(*ndopts));
256         while (opt_len) {
257                 int l;
258                 if (opt_len < sizeof(struct nd_opt_hdr))
259                         return NULL;
260                 l = nd_opt->nd_opt_len << 3;
261                 if (opt_len < l || l == 0)
262                         return NULL;
263                 switch (nd_opt->nd_opt_type) {
264                 case ND_OPT_SOURCE_LL_ADDR:
265                 case ND_OPT_TARGET_LL_ADDR:
266                 case ND_OPT_MTU:
267                 case ND_OPT_REDIRECT_HDR:
268                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
269                                 ND_PRINTK2(KERN_WARNING
270                                            "%s(): duplicated ND6 option found: type=%d\n",
271                                            __func__,
272                                            nd_opt->nd_opt_type);
273                         } else {
274                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
275                         }
276                         break;
277                 case ND_OPT_PREFIX_INFO:
278                         ndopts->nd_opts_pi_end = nd_opt;
279                         if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
280                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
281                         break;
282 #ifdef CONFIG_IPV6_ROUTE_INFO
283                 case ND_OPT_ROUTE_INFO:
284                         ndopts->nd_opts_ri_end = nd_opt;
285                         if (!ndopts->nd_opts_ri)
286                                 ndopts->nd_opts_ri = nd_opt;
287                         break;
288 #endif
289                 default:
290                         if (ndisc_is_useropt(nd_opt)) {
291                                 ndopts->nd_useropts_end = nd_opt;
292                                 if (!ndopts->nd_useropts)
293                                         ndopts->nd_useropts = nd_opt;
294                         } else {
295                                 /*
296                                  * Unknown options must be silently ignored,
297                                  * to accommodate future extension to the
298                                  * protocol.
299                                  */
300                                 ND_PRINTK2(KERN_NOTICE
301                                            "%s(): ignored unsupported option; type=%d, len=%d\n",
302                                            __func__,
303                                            nd_opt->nd_opt_type, nd_opt->nd_opt_len);
304                         }
305                 }
306                 opt_len -= l;
307                 nd_opt = ((void *)nd_opt) + l;
308         }
309         return ndopts;
310 }
311
312 static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
313                                       struct net_device *dev)
314 {
315         u8 *lladdr = (u8 *)(p + 1);
316         int lladdrlen = p->nd_opt_len << 3;
317         int prepad = ndisc_addr_option_pad(dev->type);
318         if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
319                 return NULL;
320         return (lladdr + prepad);
321 }
322
323 int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
324 {
325         switch (dev->type) {
326         case ARPHRD_ETHER:
327         case ARPHRD_IEEE802:    /* Not sure. Check it later. --ANK */
328         case ARPHRD_FDDI:
329                 ipv6_eth_mc_map(addr, buf);
330                 return 0;
331         case ARPHRD_IEEE802_TR:
332                 ipv6_tr_mc_map(addr,buf);
333                 return 0;
334         case ARPHRD_ARCNET:
335                 ipv6_arcnet_mc_map(addr, buf);
336                 return 0;
337         case ARPHRD_INFINIBAND:
338                 ipv6_ib_mc_map(addr, dev->broadcast, buf);
339                 return 0;
340         default:
341                 if (dir) {
342                         memcpy(buf, dev->broadcast, dev->addr_len);
343                         return 0;
344                 }
345         }
346         return -EINVAL;
347 }
348
349 EXPORT_SYMBOL(ndisc_mc_map);
350
351 static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
352 {
353         const u32 *p32 = pkey;
354         u32 addr_hash, i;
355
356         addr_hash = 0;
357         for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
358                 addr_hash ^= *p32++;
359
360         return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
361 }
362
363 static int ndisc_constructor(struct neighbour *neigh)
364 {
365         struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
366         struct net_device *dev = neigh->dev;
367         struct inet6_dev *in6_dev;
368         struct neigh_parms *parms;
369         int is_multicast = ipv6_addr_is_multicast(addr);
370
371         rcu_read_lock();
372         in6_dev = in6_dev_get(dev);
373         if (in6_dev == NULL) {
374                 rcu_read_unlock();
375                 return -EINVAL;
376         }
377
378         parms = in6_dev->nd_parms;
379         __neigh_parms_put(neigh->parms);
380         neigh->parms = neigh_parms_clone(parms);
381         rcu_read_unlock();
382
383         neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
384         if (!dev->header_ops) {
385                 neigh->nud_state = NUD_NOARP;
386                 neigh->ops = &ndisc_direct_ops;
387                 neigh->output = neigh->ops->queue_xmit;
388         } else {
389                 if (is_multicast) {
390                         neigh->nud_state = NUD_NOARP;
391                         ndisc_mc_map(addr, neigh->ha, dev, 1);
392                 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
393                         neigh->nud_state = NUD_NOARP;
394                         memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
395                         if (dev->flags&IFF_LOOPBACK)
396                                 neigh->type = RTN_LOCAL;
397                 } else if (dev->flags&IFF_POINTOPOINT) {
398                         neigh->nud_state = NUD_NOARP;
399                         memcpy(neigh->ha, dev->broadcast, dev->addr_len);
400                 }
401                 if (dev->header_ops->cache)
402                         neigh->ops = &ndisc_hh_ops;
403                 else
404                         neigh->ops = &ndisc_generic_ops;
405                 if (neigh->nud_state&NUD_VALID)
406                         neigh->output = neigh->ops->connected_output;
407                 else
408                         neigh->output = neigh->ops->output;
409         }
410         in6_dev_put(in6_dev);
411         return 0;
412 }
413
414 static int pndisc_constructor(struct pneigh_entry *n)
415 {
416         struct in6_addr *addr = (struct in6_addr*)&n->key;
417         struct in6_addr maddr;
418         struct net_device *dev = n->dev;
419
420         if (dev == NULL || __in6_dev_get(dev) == NULL)
421                 return -EINVAL;
422         addrconf_addr_solict_mult(addr, &maddr);
423         ipv6_dev_mc_inc(dev, &maddr);
424         return 0;
425 }
426
427 static void pndisc_destructor(struct pneigh_entry *n)
428 {
429         struct in6_addr *addr = (struct in6_addr*)&n->key;
430         struct in6_addr maddr;
431         struct net_device *dev = n->dev;
432
433         if (dev == NULL || __in6_dev_get(dev) == NULL)
434                 return;
435         addrconf_addr_solict_mult(addr, &maddr);
436         ipv6_dev_mc_dec(dev, &maddr);
437 }
438
439 /*
440  *      Send a Neighbour Advertisement
441  */
442 static void __ndisc_send(struct net_device *dev,
443                          struct neighbour *neigh,
444                          struct in6_addr *daddr, struct in6_addr *saddr,
445                          struct icmp6hdr *icmp6h, struct in6_addr *target,
446                          int llinfo)
447 {
448         struct flowi fl;
449         struct dst_entry *dst;
450         struct net *net = dev->nd_net;
451         struct sock *sk = net->ipv6.ndisc_sk;
452         struct sk_buff *skb;
453         struct icmp6hdr *hdr;
454         struct inet6_dev *idev;
455         int len;
456         int err;
457         u8 *opt, type;
458
459         type = icmp6h->icmp6_type;
460
461         icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
462
463         dst = icmp6_dst_alloc(dev, neigh, daddr);
464         if (!dst)
465                 return;
466
467         err = xfrm_lookup(&dst, &fl, NULL, 0);
468         if (err < 0)
469                 return;
470
471         if (!dev->addr_len)
472                 llinfo = 0;
473
474         len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
475         if (llinfo)
476                 len += ndisc_opt_addr_space(dev);
477
478         skb = sock_alloc_send_skb(sk,
479                                   (MAX_HEADER + sizeof(struct ipv6hdr) +
480                                    len + LL_RESERVED_SPACE(dev)),
481                                   1, &err);
482         if (!skb) {
483                 ND_PRINTK0(KERN_ERR
484                            "ICMPv6 ND: %s() failed to allocate an skb.\n",
485                            __func__);
486                 dst_release(dst);
487                 return;
488         }
489
490         skb_reserve(skb, LL_RESERVED_SPACE(dev));
491         ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
492
493         skb->transport_header = skb->tail;
494         skb_put(skb, len);
495
496         hdr = (struct icmp6hdr *)skb_transport_header(skb);
497         memcpy(hdr, icmp6h, sizeof(*hdr));
498
499         opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
500         if (target) {
501                 ipv6_addr_copy((struct in6_addr *)opt, target);
502                 opt += sizeof(*target);
503         }
504
505         if (llinfo)
506                 ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
507                                        dev->addr_len, dev->type);
508
509         hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
510                                            IPPROTO_ICMPV6,
511                                            csum_partial((__u8 *) hdr,
512                                                         len, 0));
513
514         skb->dst = dst;
515
516         idev = in6_dev_get(dst->dev);
517         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
518
519         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
520                       dst_output);
521         if (!err) {
522                 ICMP6MSGOUT_INC_STATS(idev, type);
523                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
524         }
525
526         if (likely(idev != NULL))
527                 in6_dev_put(idev);
528 }
529
530 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
531                    struct in6_addr *daddr, struct in6_addr *solicited_addr,
532                    int router, int solicited, int override, int inc_opt)
533 {
534         struct in6_addr tmpaddr;
535         struct inet6_ifaddr *ifp;
536         struct in6_addr *src_addr;
537         struct icmp6hdr icmp6h = {
538                 .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
539         };
540
541         /* for anycast or proxy, solicited_addr != src_addr */
542         ifp = ipv6_get_ifaddr(dev->nd_net, solicited_addr, dev, 1);
543         if (ifp) {
544                 src_addr = solicited_addr;
545                 if (ifp->flags & IFA_F_OPTIMISTIC)
546                         override = 0;
547                 in6_ifa_put(ifp);
548         } else {
549                 if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
550                         return;
551                 src_addr = &tmpaddr;
552         }
553
554         icmp6h.icmp6_router = router;
555         icmp6h.icmp6_solicited = solicited;
556         icmp6h.icmp6_override = override;
557
558         __ndisc_send(dev, neigh, daddr, src_addr,
559                      &icmp6h, solicited_addr,
560                      inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
561 }
562
563 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
564                    struct in6_addr *solicit,
565                    struct in6_addr *daddr, struct in6_addr *saddr)
566 {
567         struct in6_addr addr_buf;
568         struct icmp6hdr icmp6h = {
569                 .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
570         };
571
572         if (saddr == NULL) {
573                 if (ipv6_get_lladdr(dev, &addr_buf,
574                                    (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
575                         return;
576                 saddr = &addr_buf;
577         }
578
579         __ndisc_send(dev, neigh, daddr, saddr,
580                      &icmp6h, solicit,
581                      !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
582 }
583
584 void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
585                    struct in6_addr *daddr)
586 {
587         struct icmp6hdr icmp6h = {
588                 .icmp6_type = NDISC_ROUTER_SOLICITATION,
589         };
590         int send_sllao = dev->addr_len;
591
592 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
593         /*
594          * According to section 2.2 of RFC 4429, we must not
595          * send router solicitations with a sllao from
596          * optimistic addresses, but we may send the solicitation
597          * if we don't include the sllao.  So here we check
598          * if our address is optimistic, and if so, we
599          * suppress the inclusion of the sllao.
600          */
601         if (send_sllao) {
602                 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev->nd_net, saddr,
603                                                            dev, 1);
604                 if (ifp) {
605                         if (ifp->flags & IFA_F_OPTIMISTIC)  {
606                                 send_sllao = 0;
607                         }
608                         in6_ifa_put(ifp);
609                 } else {
610                         send_sllao = 0;
611                 }
612         }
613 #endif
614         __ndisc_send(dev, NULL, daddr, saddr,
615                      &icmp6h, NULL,
616                      send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
617 }
618
619
620 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
621 {
622         /*
623          *      "The sender MUST return an ICMP
624          *       destination unreachable"
625          */
626         dst_link_failure(skb);
627         kfree_skb(skb);
628 }
629
630 /* Called with locked neigh: either read or both */
631
632 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
633 {
634         struct in6_addr *saddr = NULL;
635         struct in6_addr mcaddr;
636         struct net_device *dev = neigh->dev;
637         struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
638         int probes = atomic_read(&neigh->probes);
639
640         if (skb && ipv6_chk_addr(dev->nd_net, &ipv6_hdr(skb)->saddr, dev, 1))
641                 saddr = &ipv6_hdr(skb)->saddr;
642
643         if ((probes -= neigh->parms->ucast_probes) < 0) {
644                 if (!(neigh->nud_state & NUD_VALID)) {
645                         ND_PRINTK1(KERN_DEBUG
646                                    "%s(): trying to ucast probe in NUD_INVALID: "
647                                    NIP6_FMT "\n",
648                                    __func__,
649                                    NIP6(*target));
650                 }
651                 ndisc_send_ns(dev, neigh, target, target, saddr);
652         } else if ((probes -= neigh->parms->app_probes) < 0) {
653 #ifdef CONFIG_ARPD
654                 neigh_app_ns(neigh);
655 #endif
656         } else {
657                 addrconf_addr_solict_mult(target, &mcaddr);
658                 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
659         }
660 }
661
662 static void ndisc_recv_ns(struct sk_buff *skb)
663 {
664         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
665         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
666         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
667         u8 *lladdr = NULL;
668         u32 ndoptlen = skb->tail - (skb->transport_header +
669                                     offsetof(struct nd_msg, opt));
670         struct ndisc_options ndopts;
671         struct net_device *dev = skb->dev;
672         struct inet6_ifaddr *ifp;
673         struct inet6_dev *idev = NULL;
674         struct neighbour *neigh;
675         struct pneigh_entry *pneigh = NULL;
676         int dad = ipv6_addr_any(saddr);
677         int inc;
678         int is_router;
679
680         if (ipv6_addr_is_multicast(&msg->target)) {
681                 ND_PRINTK2(KERN_WARNING
682                            "ICMPv6 NS: multicast target address");
683                 return;
684         }
685
686         /*
687          * RFC2461 7.1.1:
688          * DAD has to be destined for solicited node multicast address.
689          */
690         if (dad &&
691             !(daddr->s6_addr32[0] == htonl(0xff020000) &&
692               daddr->s6_addr32[1] == htonl(0x00000000) &&
693               daddr->s6_addr32[2] == htonl(0x00000001) &&
694               daddr->s6_addr [12] == 0xff )) {
695                 ND_PRINTK2(KERN_WARNING
696                            "ICMPv6 NS: bad DAD packet (wrong destination)\n");
697                 return;
698         }
699
700         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
701                 ND_PRINTK2(KERN_WARNING
702                            "ICMPv6 NS: invalid ND options\n");
703                 return;
704         }
705
706         if (ndopts.nd_opts_src_lladdr) {
707                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
708                 if (!lladdr) {
709                         ND_PRINTK2(KERN_WARNING
710                                    "ICMPv6 NS: invalid link-layer address length\n");
711                         return;
712                 }
713
714                 /* RFC2461 7.1.1:
715                  *      If the IP source address is the unspecified address,
716                  *      there MUST NOT be source link-layer address option
717                  *      in the message.
718                  */
719                 if (dad) {
720                         ND_PRINTK2(KERN_WARNING
721                                    "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
722                         return;
723                 }
724         }
725
726         inc = ipv6_addr_is_multicast(daddr);
727
728         ifp = ipv6_get_ifaddr(dev->nd_net, &msg->target, dev, 1);
729         if (ifp) {
730
731                 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
732                         if (dad) {
733                                 if (dev->type == ARPHRD_IEEE802_TR) {
734                                         const unsigned char *sadr;
735                                         sadr = skb_mac_header(skb);
736                                         if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
737                                             sadr[9] == dev->dev_addr[1] &&
738                                             sadr[10] == dev->dev_addr[2] &&
739                                             sadr[11] == dev->dev_addr[3] &&
740                                             sadr[12] == dev->dev_addr[4] &&
741                                             sadr[13] == dev->dev_addr[5]) {
742                                                 /* looped-back to us */
743                                                 goto out;
744                                         }
745                                 }
746
747                                 /*
748                                  * We are colliding with another node
749                                  * who is doing DAD
750                                  * so fail our DAD process
751                                  */
752                                 addrconf_dad_failure(ifp);
753                                 return;
754                         } else {
755                                 /*
756                                  * This is not a dad solicitation.
757                                  * If we are an optimistic node,
758                                  * we should respond.
759                                  * Otherwise, we should ignore it.
760                                  */
761                                 if (!(ifp->flags & IFA_F_OPTIMISTIC))
762                                         goto out;
763                         }
764                 }
765
766                 idev = ifp->idev;
767         } else {
768                 idev = in6_dev_get(dev);
769                 if (!idev) {
770                         /* XXX: count this drop? */
771                         return;
772                 }
773
774                 if (ipv6_chk_acast_addr(dev, &msg->target) ||
775                     (idev->cnf.forwarding &&
776                      (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
777                      (pneigh = pneigh_lookup(&nd_tbl, dev->nd_net,
778                                              &msg->target, dev, 0)) != NULL)) {
779                         if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
780                             skb->pkt_type != PACKET_HOST &&
781                             inc != 0 &&
782                             idev->nd_parms->proxy_delay != 0) {
783                                 /*
784                                  * for anycast or proxy,
785                                  * sender should delay its response
786                                  * by a random time between 0 and
787                                  * MAX_ANYCAST_DELAY_TIME seconds.
788                                  * (RFC2461) -- yoshfuji
789                                  */
790                                 struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
791                                 if (n)
792                                         pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
793                                 goto out;
794                         }
795                 } else
796                         goto out;
797         }
798
799         is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding);
800
801         if (dad) {
802                 struct in6_addr maddr;
803
804                 ipv6_addr_all_nodes(&maddr);
805                 ndisc_send_na(dev, NULL, &maddr, &msg->target,
806                               is_router, 0, (ifp != NULL), 1);
807                 goto out;
808         }
809
810         if (inc)
811                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
812         else
813                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
814
815         /*
816          *      update / create cache entry
817          *      for the source address
818          */
819         neigh = __neigh_lookup(&nd_tbl, saddr, dev,
820                                !inc || lladdr || !dev->addr_len);
821         if (neigh)
822                 neigh_update(neigh, lladdr, NUD_STALE,
823                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
824                              NEIGH_UPDATE_F_OVERRIDE);
825         if (neigh || !dev->header_ops) {
826                 ndisc_send_na(dev, neigh, saddr, &msg->target,
827                               is_router,
828                               1, (ifp != NULL && inc), inc);
829                 if (neigh)
830                         neigh_release(neigh);
831         }
832
833 out:
834         if (ifp)
835                 in6_ifa_put(ifp);
836         else
837                 in6_dev_put(idev);
838
839         return;
840 }
841
842 static void ndisc_recv_na(struct sk_buff *skb)
843 {
844         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
845         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
846         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
847         u8 *lladdr = NULL;
848         u32 ndoptlen = skb->tail - (skb->transport_header +
849                                     offsetof(struct nd_msg, opt));
850         struct ndisc_options ndopts;
851         struct net_device *dev = skb->dev;
852         struct inet6_ifaddr *ifp;
853         struct neighbour *neigh;
854
855         if (skb->len < sizeof(struct nd_msg)) {
856                 ND_PRINTK2(KERN_WARNING
857                            "ICMPv6 NA: packet too short\n");
858                 return;
859         }
860
861         if (ipv6_addr_is_multicast(&msg->target)) {
862                 ND_PRINTK2(KERN_WARNING
863                            "ICMPv6 NA: target address is multicast.\n");
864                 return;
865         }
866
867         if (ipv6_addr_is_multicast(daddr) &&
868             msg->icmph.icmp6_solicited) {
869                 ND_PRINTK2(KERN_WARNING
870                            "ICMPv6 NA: solicited NA is multicasted.\n");
871                 return;
872         }
873
874         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
875                 ND_PRINTK2(KERN_WARNING
876                            "ICMPv6 NS: invalid ND option\n");
877                 return;
878         }
879         if (ndopts.nd_opts_tgt_lladdr) {
880                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
881                 if (!lladdr) {
882                         ND_PRINTK2(KERN_WARNING
883                                    "ICMPv6 NA: invalid link-layer address length\n");
884                         return;
885                 }
886         }
887         ifp = ipv6_get_ifaddr(dev->nd_net, &msg->target, dev, 1);
888         if (ifp) {
889                 if (ifp->flags & IFA_F_TENTATIVE) {
890                         addrconf_dad_failure(ifp);
891                         return;
892                 }
893                 /* What should we make now? The advertisement
894                    is invalid, but ndisc specs say nothing
895                    about it. It could be misconfiguration, or
896                    an smart proxy agent tries to help us :-)
897                  */
898                 ND_PRINTK1(KERN_WARNING
899                            "ICMPv6 NA: someone advertises our address on %s!\n",
900                            ifp->idev->dev->name);
901                 in6_ifa_put(ifp);
902                 return;
903         }
904         neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
905
906         if (neigh) {
907                 u8 old_flags = neigh->flags;
908
909                 if (neigh->nud_state & NUD_FAILED)
910                         goto out;
911
912                 /*
913                  * Don't update the neighbor cache entry on a proxy NA from
914                  * ourselves because either the proxied node is off link or it
915                  * has already sent a NA to us.
916                  */
917                 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
918                     ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
919                     pneigh_lookup(&nd_tbl, dev->nd_net, &msg->target, dev, 0)) {
920                         /* XXX: idev->cnf.prixy_ndp */
921                         goto out;
922                 }
923
924                 neigh_update(neigh, lladdr,
925                              msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
926                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
927                              (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
928                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
929                              (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
930
931                 if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
932                         /*
933                          * Change: router to host
934                          */
935                         struct rt6_info *rt;
936                         rt = rt6_get_dflt_router(saddr, dev);
937                         if (rt)
938                                 ip6_del_rt(rt);
939                 }
940
941 out:
942                 neigh_release(neigh);
943         }
944 }
945
946 static void ndisc_recv_rs(struct sk_buff *skb)
947 {
948         struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
949         unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
950         struct neighbour *neigh;
951         struct inet6_dev *idev;
952         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
953         struct ndisc_options ndopts;
954         u8 *lladdr = NULL;
955
956         if (skb->len < sizeof(*rs_msg))
957                 return;
958
959         idev = in6_dev_get(skb->dev);
960         if (!idev) {
961                 if (net_ratelimit())
962                         ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
963                 return;
964         }
965
966         /* Don't accept RS if we're not in router mode */
967         if (!idev->cnf.forwarding)
968                 goto out;
969
970         /*
971          * Don't update NCE if src = ::;
972          * this implies that the source node has no ip address assigned yet.
973          */
974         if (ipv6_addr_any(saddr))
975                 goto out;
976
977         /* Parse ND options */
978         if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
979                 if (net_ratelimit())
980                         ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
981                 goto out;
982         }
983
984         if (ndopts.nd_opts_src_lladdr) {
985                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
986                                              skb->dev);
987                 if (!lladdr)
988                         goto out;
989         }
990
991         neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
992         if (neigh) {
993                 neigh_update(neigh, lladdr, NUD_STALE,
994                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
995                              NEIGH_UPDATE_F_OVERRIDE|
996                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
997                 neigh_release(neigh);
998         }
999 out:
1000         in6_dev_put(idev);
1001 }
1002
1003 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1004 {
1005         struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1006         struct sk_buff *skb;
1007         struct nlmsghdr *nlh;
1008         struct nduseroptmsg *ndmsg;
1009         struct net *net = ra->dev->nd_net;
1010         int err;
1011         int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1012                                     + (opt->nd_opt_len << 3));
1013         size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1014
1015         skb = nlmsg_new(msg_size, GFP_ATOMIC);
1016         if (skb == NULL) {
1017                 err = -ENOBUFS;
1018                 goto errout;
1019         }
1020
1021         nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1022         if (nlh == NULL) {
1023                 goto nla_put_failure;
1024         }
1025
1026         ndmsg = nlmsg_data(nlh);
1027         ndmsg->nduseropt_family = AF_INET6;
1028         ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1029         ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1030         ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1031         ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1032
1033         memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1034
1035         NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
1036                 &ipv6_hdr(ra)->saddr);
1037         nlmsg_end(skb, nlh);
1038
1039         err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL,
1040                           GFP_ATOMIC);
1041         if (err < 0)
1042                 goto errout;
1043
1044         return;
1045
1046 nla_put_failure:
1047         nlmsg_free(skb);
1048         err = -EMSGSIZE;
1049 errout:
1050         rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1051 }
1052
1053 static void ndisc_router_discovery(struct sk_buff *skb)
1054 {
1055         struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1056         struct neighbour *neigh = NULL;
1057         struct inet6_dev *in6_dev;
1058         struct rt6_info *rt = NULL;
1059         int lifetime;
1060         struct ndisc_options ndopts;
1061         int optlen;
1062         unsigned int pref = 0;
1063
1064         __u8 * opt = (__u8 *)(ra_msg + 1);
1065
1066         optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1067
1068         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1069                 ND_PRINTK2(KERN_WARNING
1070                            "ICMPv6 RA: source address is not link-local.\n");
1071                 return;
1072         }
1073         if (optlen < 0) {
1074                 ND_PRINTK2(KERN_WARNING
1075                            "ICMPv6 RA: packet too short\n");
1076                 return;
1077         }
1078
1079         /*
1080          *      set the RA_RECV flag in the interface
1081          */
1082
1083         in6_dev = in6_dev_get(skb->dev);
1084         if (in6_dev == NULL) {
1085                 ND_PRINTK0(KERN_ERR
1086                            "ICMPv6 RA: can't find inet6 device for %s.\n",
1087                            skb->dev->name);
1088                 return;
1089         }
1090         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
1091                 in6_dev_put(in6_dev);
1092                 return;
1093         }
1094
1095         if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1096                 in6_dev_put(in6_dev);
1097                 ND_PRINTK2(KERN_WARNING
1098                            "ICMP6 RA: invalid ND options\n");
1099                 return;
1100         }
1101
1102         if (in6_dev->if_flags & IF_RS_SENT) {
1103                 /*
1104                  *      flag that an RA was received after an RS was sent
1105                  *      out on this interface.
1106                  */
1107                 in6_dev->if_flags |= IF_RA_RCVD;
1108         }
1109
1110         /*
1111          * Remember the managed/otherconf flags from most recently
1112          * received RA message (RFC 2462) -- yoshfuji
1113          */
1114         in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1115                                 IF_RA_OTHERCONF)) |
1116                                 (ra_msg->icmph.icmp6_addrconf_managed ?
1117                                         IF_RA_MANAGED : 0) |
1118                                 (ra_msg->icmph.icmp6_addrconf_other ?
1119                                         IF_RA_OTHERCONF : 0);
1120
1121         if (!in6_dev->cnf.accept_ra_defrtr)
1122                 goto skip_defrtr;
1123
1124         lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1125
1126 #ifdef CONFIG_IPV6_ROUTER_PREF
1127         pref = ra_msg->icmph.icmp6_router_pref;
1128         /* 10b is handled as if it were 00b (medium) */
1129         if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1130             !in6_dev->cnf.accept_ra_rtr_pref)
1131                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
1132 #endif
1133
1134         rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1135
1136         if (rt)
1137                 neigh = rt->rt6i_nexthop;
1138
1139         if (rt && lifetime == 0) {
1140                 neigh_clone(neigh);
1141                 ip6_del_rt(rt);
1142                 rt = NULL;
1143         }
1144
1145         if (rt == NULL && lifetime) {
1146                 ND_PRINTK3(KERN_DEBUG
1147                            "ICMPv6 RA: adding default router.\n");
1148
1149                 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1150                 if (rt == NULL) {
1151                         ND_PRINTK0(KERN_ERR
1152                                    "ICMPv6 RA: %s() failed to add default route.\n",
1153                                    __func__);
1154                         in6_dev_put(in6_dev);
1155                         return;
1156                 }
1157
1158                 neigh = rt->rt6i_nexthop;
1159                 if (neigh == NULL) {
1160                         ND_PRINTK0(KERN_ERR
1161                                    "ICMPv6 RA: %s() got default router without neighbour.\n",
1162                                    __func__);
1163                         dst_release(&rt->u.dst);
1164                         in6_dev_put(in6_dev);
1165                         return;
1166                 }
1167                 neigh->flags |= NTF_ROUTER;
1168         } else if (rt) {
1169                 rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1170         }
1171
1172         if (rt)
1173                 rt->rt6i_expires = jiffies + (HZ * lifetime);
1174
1175         if (ra_msg->icmph.icmp6_hop_limit) {
1176                 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1177                 if (rt)
1178                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
1179         }
1180
1181 skip_defrtr:
1182
1183         /*
1184          *      Update Reachable Time and Retrans Timer
1185          */
1186
1187         if (in6_dev->nd_parms) {
1188                 unsigned long rtime = ntohl(ra_msg->retrans_timer);
1189
1190                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1191                         rtime = (rtime*HZ)/1000;
1192                         if (rtime < HZ/10)
1193                                 rtime = HZ/10;
1194                         in6_dev->nd_parms->retrans_time = rtime;
1195                         in6_dev->tstamp = jiffies;
1196                         inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1197                 }
1198
1199                 rtime = ntohl(ra_msg->reachable_time);
1200                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1201                         rtime = (rtime*HZ)/1000;
1202
1203                         if (rtime < HZ/10)
1204                                 rtime = HZ/10;
1205
1206                         if (rtime != in6_dev->nd_parms->base_reachable_time) {
1207                                 in6_dev->nd_parms->base_reachable_time = rtime;
1208                                 in6_dev->nd_parms->gc_staletime = 3 * rtime;
1209                                 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1210                                 in6_dev->tstamp = jiffies;
1211                                 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1212                         }
1213                 }
1214         }
1215
1216         /*
1217          *      Process options.
1218          */
1219
1220         if (!neigh)
1221                 neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1222                                        skb->dev, 1);
1223         if (neigh) {
1224                 u8 *lladdr = NULL;
1225                 if (ndopts.nd_opts_src_lladdr) {
1226                         lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1227                                                      skb->dev);
1228                         if (!lladdr) {
1229                                 ND_PRINTK2(KERN_WARNING
1230                                            "ICMPv6 RA: invalid link-layer address length\n");
1231                                 goto out;
1232                         }
1233                 }
1234                 neigh_update(neigh, lladdr, NUD_STALE,
1235                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1236                              NEIGH_UPDATE_F_OVERRIDE|
1237                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1238                              NEIGH_UPDATE_F_ISROUTER);
1239         }
1240
1241 #ifdef CONFIG_IPV6_ROUTE_INFO
1242         if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1243                 struct nd_opt_hdr *p;
1244                 for (p = ndopts.nd_opts_ri;
1245                      p;
1246                      p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1247                         if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1248                                 continue;
1249                         rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
1250                                       &ipv6_hdr(skb)->saddr);
1251                 }
1252         }
1253 #endif
1254
1255         if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
1256                 struct nd_opt_hdr *p;
1257                 for (p = ndopts.nd_opts_pi;
1258                      p;
1259                      p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1260                         addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1261                 }
1262         }
1263
1264         if (ndopts.nd_opts_mtu) {
1265                 __be32 n;
1266                 u32 mtu;
1267
1268                 memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1269                 mtu = ntohl(n);
1270
1271                 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1272                         ND_PRINTK2(KERN_WARNING
1273                                    "ICMPv6 RA: invalid mtu: %d\n",
1274                                    mtu);
1275                 } else if (in6_dev->cnf.mtu6 != mtu) {
1276                         in6_dev->cnf.mtu6 = mtu;
1277
1278                         if (rt)
1279                                 rt->u.dst.metrics[RTAX_MTU-1] = mtu;
1280
1281                         rt6_mtu_change(skb->dev, mtu);
1282                 }
1283         }
1284
1285         if (ndopts.nd_useropts) {
1286                 struct nd_opt_hdr *p;
1287                 for (p = ndopts.nd_useropts;
1288                      p;
1289                      p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1290                         ndisc_ra_useropt(skb, p);
1291                 }
1292         }
1293
1294         if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1295                 ND_PRINTK2(KERN_WARNING
1296                            "ICMPv6 RA: invalid RA options");
1297         }
1298 out:
1299         if (rt)
1300                 dst_release(&rt->u.dst);
1301         else if (neigh)
1302                 neigh_release(neigh);
1303         in6_dev_put(in6_dev);
1304 }
1305
1306 static void ndisc_redirect_rcv(struct sk_buff *skb)
1307 {
1308         struct inet6_dev *in6_dev;
1309         struct icmp6hdr *icmph;
1310         struct in6_addr *dest;
1311         struct in6_addr *target;        /* new first hop to destination */
1312         struct neighbour *neigh;
1313         int on_link = 0;
1314         struct ndisc_options ndopts;
1315         int optlen;
1316         u8 *lladdr = NULL;
1317
1318         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1319                 ND_PRINTK2(KERN_WARNING
1320                            "ICMPv6 Redirect: source address is not link-local.\n");
1321                 return;
1322         }
1323
1324         optlen = skb->tail - skb->transport_header;
1325         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1326
1327         if (optlen < 0) {
1328                 ND_PRINTK2(KERN_WARNING
1329                            "ICMPv6 Redirect: packet too short\n");
1330                 return;
1331         }
1332
1333         icmph = icmp6_hdr(skb);
1334         target = (struct in6_addr *) (icmph + 1);
1335         dest = target + 1;
1336
1337         if (ipv6_addr_is_multicast(dest)) {
1338                 ND_PRINTK2(KERN_WARNING
1339                            "ICMPv6 Redirect: destination address is multicast.\n");
1340                 return;
1341         }
1342
1343         if (ipv6_addr_equal(dest, target)) {
1344                 on_link = 1;
1345         } else if (ipv6_addr_type(target) !=
1346                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1347                 ND_PRINTK2(KERN_WARNING
1348                            "ICMPv6 Redirect: target address is not link-local unicast.\n");
1349                 return;
1350         }
1351
1352         in6_dev = in6_dev_get(skb->dev);
1353         if (!in6_dev)
1354                 return;
1355         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
1356                 in6_dev_put(in6_dev);
1357                 return;
1358         }
1359
1360         /* RFC2461 8.1:
1361          *      The IP source address of the Redirect MUST be the same as the current
1362          *      first-hop router for the specified ICMP Destination Address.
1363          */
1364
1365         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1366                 ND_PRINTK2(KERN_WARNING
1367                            "ICMPv6 Redirect: invalid ND options\n");
1368                 in6_dev_put(in6_dev);
1369                 return;
1370         }
1371         if (ndopts.nd_opts_tgt_lladdr) {
1372                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1373                                              skb->dev);
1374                 if (!lladdr) {
1375                         ND_PRINTK2(KERN_WARNING
1376                                    "ICMPv6 Redirect: invalid link-layer address length\n");
1377                         in6_dev_put(in6_dev);
1378                         return;
1379                 }
1380         }
1381
1382         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1383         if (neigh) {
1384                 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1385                              &ipv6_hdr(skb)->saddr, neigh, lladdr,
1386                              on_link);
1387                 neigh_release(neigh);
1388         }
1389         in6_dev_put(in6_dev);
1390 }
1391
1392 void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1393                          struct in6_addr *target)
1394 {
1395         struct net_device *dev = skb->dev;
1396         struct net *net = dev->nd_net;
1397         struct sock *sk = net->ipv6.ndisc_sk;
1398         int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1399         struct sk_buff *buff;
1400         struct icmp6hdr *icmph;
1401         struct in6_addr saddr_buf;
1402         struct in6_addr *addrp;
1403         struct rt6_info *rt;
1404         struct dst_entry *dst;
1405         struct inet6_dev *idev;
1406         struct flowi fl;
1407         u8 *opt;
1408         int rd_len;
1409         int err;
1410         int hlen;
1411         u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1412
1413         if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1414                 ND_PRINTK2(KERN_WARNING
1415                            "ICMPv6 Redirect: no link-local address on %s\n",
1416                            dev->name);
1417                 return;
1418         }
1419
1420         if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1421             ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1422                 ND_PRINTK2(KERN_WARNING
1423                         "ICMPv6 Redirect: target address is not link-local unicast.\n");
1424                 return;
1425         }
1426
1427         icmpv6_flow_init(sk, &fl, NDISC_REDIRECT,
1428                          &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1429
1430         dst = ip6_route_output(net, NULL, &fl);
1431         if (dst == NULL)
1432                 return;
1433
1434         err = xfrm_lookup(&dst, &fl, NULL, 0);
1435         if (err)
1436                 return;
1437
1438         rt = (struct rt6_info *) dst;
1439
1440         if (rt->rt6i_flags & RTF_GATEWAY) {
1441                 ND_PRINTK2(KERN_WARNING
1442                            "ICMPv6 Redirect: destination is not a neighbour.\n");
1443                 dst_release(dst);
1444                 return;
1445         }
1446         if (!xrlim_allow(dst, 1*HZ)) {
1447                 dst_release(dst);
1448                 return;
1449         }
1450
1451         if (dev->addr_len) {
1452                 read_lock_bh(&neigh->lock);
1453                 if (neigh->nud_state & NUD_VALID) {
1454                         memcpy(ha_buf, neigh->ha, dev->addr_len);
1455                         read_unlock_bh(&neigh->lock);
1456                         ha = ha_buf;
1457                         len += ndisc_opt_addr_space(dev);
1458                 } else
1459                         read_unlock_bh(&neigh->lock);
1460         }
1461
1462         rd_len = min_t(unsigned int,
1463                      IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
1464         rd_len &= ~0x7;
1465         len += rd_len;
1466
1467         buff = sock_alloc_send_skb(sk,
1468                                    (MAX_HEADER + sizeof(struct ipv6hdr) +
1469                                     len + LL_RESERVED_SPACE(dev)),
1470                                    1, &err);
1471         if (buff == NULL) {
1472                 ND_PRINTK0(KERN_ERR
1473                            "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
1474                            __func__);
1475                 dst_release(dst);
1476                 return;
1477         }
1478
1479         hlen = 0;
1480
1481         skb_reserve(buff, LL_RESERVED_SPACE(dev));
1482         ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1483                    IPPROTO_ICMPV6, len);
1484
1485         skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
1486         skb_put(buff, len);
1487         icmph = icmp6_hdr(buff);
1488
1489         memset(icmph, 0, sizeof(struct icmp6hdr));
1490         icmph->icmp6_type = NDISC_REDIRECT;
1491
1492         /*
1493          *      copy target and destination addresses
1494          */
1495
1496         addrp = (struct in6_addr *)(icmph + 1);
1497         ipv6_addr_copy(addrp, target);
1498         addrp++;
1499         ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
1500
1501         opt = (u8*) (addrp + 1);
1502
1503         /*
1504          *      include target_address option
1505          */
1506
1507         if (ha)
1508                 opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
1509                                              dev->addr_len, dev->type);
1510
1511         /*
1512          *      build redirect option and copy skb over to the new packet.
1513          */
1514
1515         memset(opt, 0, 8);
1516         *(opt++) = ND_OPT_REDIRECT_HDR;
1517         *(opt++) = (rd_len >> 3);
1518         opt += 6;
1519
1520         memcpy(opt, ipv6_hdr(skb), rd_len - 8);
1521
1522         icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
1523                                              len, IPPROTO_ICMPV6,
1524                                              csum_partial((u8 *) icmph, len, 0));
1525
1526         buff->dst = dst;
1527         idev = in6_dev_get(dst->dev);
1528         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
1529         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1530                       dst_output);
1531         if (!err) {
1532                 ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
1533                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
1534         }
1535
1536         if (likely(idev != NULL))
1537                 in6_dev_put(idev);
1538 }
1539
1540 static void pndisc_redo(struct sk_buff *skb)
1541 {
1542         ndisc_recv_ns(skb);
1543         kfree_skb(skb);
1544 }
1545
1546 int ndisc_rcv(struct sk_buff *skb)
1547 {
1548         struct nd_msg *msg;
1549
1550         if (!pskb_may_pull(skb, skb->len))
1551                 return 0;
1552
1553         msg = (struct nd_msg *)skb_transport_header(skb);
1554
1555         __skb_push(skb, skb->data - skb_transport_header(skb));
1556
1557         if (ipv6_hdr(skb)->hop_limit != 255) {
1558                 ND_PRINTK2(KERN_WARNING
1559                            "ICMPv6 NDISC: invalid hop-limit: %d\n",
1560                            ipv6_hdr(skb)->hop_limit);
1561                 return 0;
1562         }
1563
1564         if (msg->icmph.icmp6_code != 0) {
1565                 ND_PRINTK2(KERN_WARNING
1566                            "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1567                            msg->icmph.icmp6_code);
1568                 return 0;
1569         }
1570
1571         memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1572
1573         switch (msg->icmph.icmp6_type) {
1574         case NDISC_NEIGHBOUR_SOLICITATION:
1575                 ndisc_recv_ns(skb);
1576                 break;
1577
1578         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1579                 ndisc_recv_na(skb);
1580                 break;
1581
1582         case NDISC_ROUTER_SOLICITATION:
1583                 ndisc_recv_rs(skb);
1584                 break;
1585
1586         case NDISC_ROUTER_ADVERTISEMENT:
1587                 ndisc_router_discovery(skb);
1588                 break;
1589
1590         case NDISC_REDIRECT:
1591                 ndisc_redirect_rcv(skb);
1592                 break;
1593         }
1594
1595         return 0;
1596 }
1597
1598 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1599 {
1600         struct net_device *dev = ptr;
1601         struct net *net = dev->nd_net;
1602
1603         switch (event) {
1604         case NETDEV_CHANGEADDR:
1605                 neigh_changeaddr(&nd_tbl, dev);
1606                 fib6_run_gc(~0UL, net);
1607                 break;
1608         case NETDEV_DOWN:
1609                 neigh_ifdown(&nd_tbl, dev);
1610                 fib6_run_gc(~0UL, net);
1611                 break;
1612         default:
1613                 break;
1614         }
1615
1616         return NOTIFY_DONE;
1617 }
1618
1619 static struct notifier_block ndisc_netdev_notifier = {
1620         .notifier_call = ndisc_netdev_event,
1621 };
1622
1623 #ifdef CONFIG_SYSCTL
1624 static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1625                                          const char *func, const char *dev_name)
1626 {
1627         static char warncomm[TASK_COMM_LEN];
1628         static int warned;
1629         if (strcmp(warncomm, current->comm) && warned < 5) {
1630                 strcpy(warncomm, current->comm);
1631                 printk(KERN_WARNING
1632                         "process `%s' is using deprecated sysctl (%s) "
1633                         "net.ipv6.neigh.%s.%s; "
1634                         "Use net.ipv6.neigh.%s.%s_ms "
1635                         "instead.\n",
1636                         warncomm, func,
1637                         dev_name, ctl->procname,
1638                         dev_name, ctl->procname);
1639                 warned++;
1640         }
1641 }
1642
1643 int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
1644 {
1645         struct net_device *dev = ctl->extra1;
1646         struct inet6_dev *idev;
1647         int ret;
1648
1649         if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1650             (strcmp(ctl->procname, "base_reachable_time") == 0))
1651                 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1652
1653         if (strcmp(ctl->procname, "retrans_time") == 0)
1654                 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1655
1656         else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1657                 ret = proc_dointvec_jiffies(ctl, write,
1658                                             filp, buffer, lenp, ppos);
1659
1660         else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1661                  (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1662                 ret = proc_dointvec_ms_jiffies(ctl, write,
1663                                                filp, buffer, lenp, ppos);
1664         else
1665                 ret = -1;
1666
1667         if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1668                 if (ctl->data == &idev->nd_parms->base_reachable_time)
1669                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1670                 idev->tstamp = jiffies;
1671                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1672                 in6_dev_put(idev);
1673         }
1674         return ret;
1675 }
1676
1677 static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
1678                                         int nlen, void __user *oldval,
1679                                         size_t __user *oldlenp,
1680                                         void __user *newval, size_t newlen)
1681 {
1682         struct net_device *dev = ctl->extra1;
1683         struct inet6_dev *idev;
1684         int ret;
1685
1686         if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
1687             ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
1688                 ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
1689
1690         switch (ctl->ctl_name) {
1691         case NET_NEIGH_REACHABLE_TIME:
1692                 ret = sysctl_jiffies(ctl, name, nlen,
1693                                      oldval, oldlenp, newval, newlen);
1694                 break;
1695         case NET_NEIGH_RETRANS_TIME_MS:
1696         case NET_NEIGH_REACHABLE_TIME_MS:
1697                  ret = sysctl_ms_jiffies(ctl, name, nlen,
1698                                          oldval, oldlenp, newval, newlen);
1699                  break;
1700         default:
1701                 ret = 0;
1702         }
1703
1704         if (newval && newlen && ret > 0 &&
1705             dev && (idev = in6_dev_get(dev)) != NULL) {
1706                 if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
1707                     ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
1708                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1709                 idev->tstamp = jiffies;
1710                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1711                 in6_dev_put(idev);
1712         }
1713
1714         return ret;
1715 }
1716
1717 #endif
1718
1719 static int ndisc_net_init(struct net *net)
1720 {
1721         struct socket *sock;
1722         struct ipv6_pinfo *np;
1723         struct sock *sk;
1724         int err;
1725
1726         err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &sock);
1727         if (err < 0) {
1728                 ND_PRINTK0(KERN_ERR
1729                            "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1730                            err);
1731                 return err;
1732         }
1733
1734         net->ipv6.ndisc_sk = sk = sock->sk;
1735         sk_change_net(sk, net);
1736
1737         np = inet6_sk(sk);
1738         sk->sk_allocation = GFP_ATOMIC;
1739         np->hop_limit = 255;
1740         /* Do not loopback ndisc messages */
1741         np->mc_loop = 0;
1742         sk->sk_prot->unhash(sk);
1743
1744         return 0;
1745 }
1746
1747 static void ndisc_net_exit(struct net *net)
1748 {
1749         sk_release_kernel(net->ipv6.ndisc_sk);
1750 }
1751
1752 static struct pernet_operations ndisc_net_ops = {
1753         .init = ndisc_net_init,
1754         .exit = ndisc_net_exit,
1755 };
1756
1757 int __init ndisc_init(void)
1758 {
1759         int err;
1760
1761         err = register_pernet_subsys(&ndisc_net_ops);
1762         if (err)
1763                 return err;
1764         /*
1765          * Initialize the neighbour table
1766          */
1767         neigh_table_init(&nd_tbl);
1768
1769 #ifdef CONFIG_SYSCTL
1770         err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6,
1771                                     NET_IPV6_NEIGH, "ipv6",
1772                                     &ndisc_ifinfo_sysctl_change,
1773                                     &ndisc_ifinfo_sysctl_strategy);
1774         if (err)
1775                 goto out_unregister_pernet;
1776 #endif
1777         err = register_netdevice_notifier(&ndisc_netdev_notifier);
1778         if (err)
1779                 goto out_unregister_sysctl;
1780 out:
1781         return err;
1782
1783 out_unregister_sysctl:
1784 #ifdef CONFIG_SYSCTL
1785         neigh_sysctl_unregister(&nd_tbl.parms);
1786 out_unregister_pernet:
1787 #endif
1788         unregister_pernet_subsys(&ndisc_net_ops);
1789         goto out;
1790 }
1791
1792 void ndisc_cleanup(void)
1793 {
1794         unregister_netdevice_notifier(&ndisc_netdev_notifier);
1795 #ifdef CONFIG_SYSCTL
1796         neigh_sysctl_unregister(&nd_tbl.parms);
1797 #endif
1798         neigh_table_clear(&nd_tbl);
1799         unregister_pernet_subsys(&ndisc_net_ops);
1800 }