2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
67 #include <asm/uaccess.h>
68 #include <asm/system.h>
70 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
73 * The ICMP socket(s). This is the most convenient way to flow control
74 * our ICMP output as well as maintain a clean interface throughout
75 * all layers. All Socketless IP sends will soon be gone.
77 * On SMP we have one ICMP socket per-cpu.
79 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
80 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
82 static int icmpv6_rcv(struct sk_buff **pskb);
84 static struct inet6_protocol icmpv6_protocol = {
85 .handler = icmpv6_rcv,
86 .flags = INET6_PROTO_FINAL,
89 static __inline__ int icmpv6_xmit_lock(void)
93 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
94 /* This can happen if the output path (f.e. SIT or
95 * ip6ip6 tunnel) signals dst_link_failure() for an
96 * outgoing ICMP6 packet.
104 static __inline__ void icmpv6_xmit_unlock(void)
106 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
110 * Slightly more convenient version of icmpv6_send.
112 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
114 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
119 * Figure out, may we reply to this packet with icmp error.
121 * We do not reply, if:
122 * - it was icmp error message.
123 * - it is truncated, so that it is known, that protocol is ICMPV6
124 * (i.e. in the middle of some exthdr)
129 static int is_ineligible(struct sk_buff *skb)
131 int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
132 int len = skb->len - ptr;
133 __u8 nexthdr = skb->nh.ipv6h->nexthdr;
138 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
141 if (nexthdr == IPPROTO_ICMPV6) {
143 tp = skb_header_pointer(skb,
144 ptr+offsetof(struct icmp6hdr, icmp6_type),
145 sizeof(_type), &_type);
147 !(*tp & ICMPV6_INFOMSG_MASK))
153 static int sysctl_icmpv6_time __read_mostly = 1*HZ;
156 * Check the ICMP output rate limit
158 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
161 struct dst_entry *dst;
164 /* Informational messages are not limited. */
165 if (type & ICMPV6_INFOMSG_MASK)
168 /* Do not limit pmtu discovery, it would break it. */
169 if (type == ICMPV6_PKT_TOOBIG)
173 * Look up the output route.
174 * XXX: perhaps the expire for routing entries cloned by
175 * this lookup should be more aggressive (not longer than timeout).
177 dst = ip6_route_output(sk, fl);
179 IP6_INC_STATS(ip6_dst_idev(dst),
180 IPSTATS_MIB_OUTNOROUTES);
181 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
184 struct rt6_info *rt = (struct rt6_info *)dst;
185 int tmo = sysctl_icmpv6_time;
187 /* Give more bandwidth to wider prefixes. */
188 if (rt->rt6i_dst.plen < 128)
189 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
191 res = xrlim_allow(dst, tmo);
198 * an inline helper for the "simple" if statement below
199 * checks if parameter problem report is caused by an
200 * unrecognized IPv6 option that has the Option Type
201 * highest-order two bits set to 10
204 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
208 offset += skb->nh.raw - skb->data;
209 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
212 return (*op & 0xC0) == 0x80;
215 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
218 struct icmp6hdr *icmp6h;
221 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
224 icmp6h = (struct icmp6hdr*) skb->h.raw;
225 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
226 icmp6h->icmp6_cksum = 0;
228 if (skb_queue_len(&sk->sk_write_queue) == 1) {
229 skb->csum = csum_partial((char *)icmp6h,
230 sizeof(struct icmp6hdr), skb->csum);
231 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
238 skb_queue_walk(&sk->sk_write_queue, skb) {
239 tmp_csum = csum_add(tmp_csum, skb->csum);
242 tmp_csum = csum_partial((char *)icmp6h,
243 sizeof(struct icmp6hdr), tmp_csum);
244 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
249 ip6_push_pending_frames(sk);
260 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
262 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
263 struct sk_buff *org_skb = msg->skb;
266 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
268 skb->csum = csum_block_add(skb->csum, csum, odd);
269 if (!(msg->type & ICMPV6_INFOMSG_MASK))
270 nf_ct_attach(skb, org_skb);
274 #ifdef CONFIG_IPV6_MIP6
275 static void mip6_addr_swap(struct sk_buff *skb)
277 struct ipv6hdr *iph = skb->nh.ipv6h;
278 struct inet6_skb_parm *opt = IP6CB(skb);
279 struct ipv6_destopt_hao *hao;
284 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
285 if (likely(off >= 0)) {
286 hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
287 ipv6_addr_copy(&tmp, &iph->saddr);
288 ipv6_addr_copy(&iph->saddr, &hao->addr);
289 ipv6_addr_copy(&hao->addr, &tmp);
294 static inline void mip6_addr_swap(struct sk_buff *skb) {}
298 * Send an ICMP message in response to a packet in error
300 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
301 struct net_device *dev)
303 struct inet6_dev *idev = NULL;
304 struct ipv6hdr *hdr = skb->nh.ipv6h;
306 struct ipv6_pinfo *np;
307 struct in6_addr *saddr = NULL;
308 struct dst_entry *dst;
309 struct icmp6hdr tmp_hdr;
311 struct icmpv6_msg msg;
318 if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
322 * Make sure we respect the rules
323 * i.e. RFC 1885 2.4(e)
324 * Rule (e.1) is enforced by not using icmpv6_send
325 * in any code that processes icmp errors.
327 addr_type = ipv6_addr_type(&hdr->daddr);
329 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
336 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
337 if (type != ICMPV6_PKT_TOOBIG &&
338 !(type == ICMPV6_PARAMPROB &&
339 code == ICMPV6_UNK_OPTION &&
340 (opt_unrec(skb, info))))
346 addr_type = ipv6_addr_type(&hdr->saddr);
352 if (addr_type & IPV6_ADDR_LINKLOCAL)
353 iif = skb->dev->ifindex;
356 * Must not send error if the source does not uniquely
357 * identify a single node (RFC2463 Section 2.4).
358 * We check unspecified / multicast addresses here,
359 * and anycast addresses will be checked later.
361 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
362 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
367 * Never answer to a ICMP packet.
369 if (is_ineligible(skb)) {
370 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
376 memset(&fl, 0, sizeof(fl));
377 fl.proto = IPPROTO_ICMPV6;
378 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
380 ipv6_addr_copy(&fl.fl6_src, saddr);
382 fl.fl_icmp_type = type;
383 fl.fl_icmp_code = code;
384 security_skb_classify_flow(skb, &fl);
386 if (icmpv6_xmit_lock())
389 sk = icmpv6_socket->sk;
392 if (!icmpv6_xrlim_allow(sk, type, &fl))
395 tmp_hdr.icmp6_type = type;
396 tmp_hdr.icmp6_code = code;
397 tmp_hdr.icmp6_cksum = 0;
398 tmp_hdr.icmp6_pointer = htonl(info);
400 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
401 fl.oif = np->mcast_oif;
403 err = ip6_dst_lookup(sk, &dst, &fl);
408 * We won't send icmp if the destination is known
411 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
412 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
413 goto out_dst_release;
416 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
419 if (ipv6_addr_is_multicast(&fl.fl6_dst))
420 hlimit = np->mcast_hops;
422 hlimit = np->hop_limit;
424 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
426 hlimit = ipv6_get_hoplimit(dst->dev);
433 msg.offset = skb->nh.raw - skb->data;
436 len = skb->len - msg.offset;
437 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
439 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
440 goto out_dst_release;
443 idev = in6_dev_get(skb->dev);
445 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
446 len + sizeof(struct icmp6hdr),
447 sizeof(struct icmp6hdr),
448 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
451 ip6_flush_pending_frames(sk);
454 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
456 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
457 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
458 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
461 if (likely(idev != NULL))
466 icmpv6_xmit_unlock();
469 static void icmpv6_echo_reply(struct sk_buff *skb)
472 struct inet6_dev *idev;
473 struct ipv6_pinfo *np;
474 struct in6_addr *saddr = NULL;
475 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
476 struct icmp6hdr tmp_hdr;
478 struct icmpv6_msg msg;
479 struct dst_entry *dst;
484 saddr = &skb->nh.ipv6h->daddr;
486 if (!ipv6_unicast_destination(skb))
489 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
490 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
492 memset(&fl, 0, sizeof(fl));
493 fl.proto = IPPROTO_ICMPV6;
494 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
496 ipv6_addr_copy(&fl.fl6_src, saddr);
497 fl.oif = skb->dev->ifindex;
498 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
499 security_skb_classify_flow(skb, &fl);
501 if (icmpv6_xmit_lock())
504 sk = icmpv6_socket->sk;
507 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
508 fl.oif = np->mcast_oif;
510 err = ip6_dst_lookup(sk, &dst, &fl);
513 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
516 if (ipv6_addr_is_multicast(&fl.fl6_dst))
517 hlimit = np->mcast_hops;
519 hlimit = np->hop_limit;
521 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
523 hlimit = ipv6_get_hoplimit(dst->dev);
529 idev = in6_dev_get(skb->dev);
533 msg.type = ICMPV6_ECHO_REPLY;
535 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
536 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
537 (struct rt6_info*)dst, MSG_DONTWAIT);
540 ip6_flush_pending_frames(sk);
543 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
545 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
546 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
549 if (likely(idev != NULL))
553 icmpv6_xmit_unlock();
556 static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
558 struct in6_addr *saddr, *daddr;
559 struct inet6_protocol *ipprot;
565 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
568 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
569 if (ipv6_ext_hdr(nexthdr)) {
570 /* now skip over extension headers */
571 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
575 inner_offset = sizeof(struct ipv6hdr);
578 /* Checkin header including 8 bytes of inner protocol header. */
579 if (!pskb_may_pull(skb, inner_offset+8))
582 saddr = &skb->nh.ipv6h->saddr;
583 daddr = &skb->nh.ipv6h->daddr;
585 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
586 Without this we will not able f.e. to make source routed
588 Corresponding argument (opt) to notifiers is already added.
592 hash = nexthdr & (MAX_INET_PROTOS - 1);
595 ipprot = rcu_dereference(inet6_protos[hash]);
596 if (ipprot && ipprot->err_handler)
597 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
600 read_lock(&raw_v6_lock);
601 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
602 while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
604 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
608 read_unlock(&raw_v6_lock);
612 * Handle icmp messages
615 static int icmpv6_rcv(struct sk_buff **pskb)
617 struct sk_buff *skb = *pskb;
618 struct net_device *dev = skb->dev;
619 struct inet6_dev *idev = __in6_dev_get(dev);
620 struct in6_addr *saddr, *daddr;
621 struct ipv6hdr *orig_hdr;
622 struct icmp6hdr *hdr;
625 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
627 saddr = &skb->nh.ipv6h->saddr;
628 daddr = &skb->nh.ipv6h->daddr;
630 /* Perform checksum. */
631 switch (skb->ip_summed) {
632 case CHECKSUM_COMPLETE:
633 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
638 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
640 if (__skb_checksum_complete(skb)) {
641 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
642 NIP6(*saddr), NIP6(*daddr));
647 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
650 hdr = (struct icmp6hdr *) skb->h.raw;
652 type = hdr->icmp6_type;
654 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
655 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
656 else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
657 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
660 case ICMPV6_ECHO_REQUEST:
661 icmpv6_echo_reply(skb);
664 case ICMPV6_ECHO_REPLY:
665 /* we couldn't care less */
668 case ICMPV6_PKT_TOOBIG:
669 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
670 standard destination cache. Seems, only "advanced"
671 destination cache will allow to solve this problem
674 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
676 hdr = (struct icmp6hdr *) skb->h.raw;
677 orig_hdr = (struct ipv6hdr *) (hdr + 1);
678 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
679 ntohl(hdr->icmp6_mtu));
682 * Drop through to notify
685 case ICMPV6_DEST_UNREACH:
686 case ICMPV6_TIME_EXCEED:
687 case ICMPV6_PARAMPROB:
688 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
691 case NDISC_ROUTER_SOLICITATION:
692 case NDISC_ROUTER_ADVERTISEMENT:
693 case NDISC_NEIGHBOUR_SOLICITATION:
694 case NDISC_NEIGHBOUR_ADVERTISEMENT:
699 case ICMPV6_MGM_QUERY:
700 igmp6_event_query(skb);
703 case ICMPV6_MGM_REPORT:
704 igmp6_event_report(skb);
707 case ICMPV6_MGM_REDUCTION:
708 case ICMPV6_NI_QUERY:
709 case ICMPV6_NI_REPLY:
710 case ICMPV6_MLD2_REPORT:
711 case ICMPV6_DHAAD_REQUEST:
712 case ICMPV6_DHAAD_REPLY:
713 case ICMPV6_MOBILE_PREFIX_SOL:
714 case ICMPV6_MOBILE_PREFIX_ADV:
718 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
721 if (type & ICMPV6_INFOMSG_MASK)
725 * error of unknown type.
726 * must pass to upper level
729 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
735 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
741 * Special lock-class for __icmpv6_socket:
743 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
745 int __init icmpv6_init(struct net_proto_family *ops)
750 for_each_possible_cpu(i) {
751 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
752 &per_cpu(__icmpv6_socket, i));
755 "Failed to initialize the ICMP6 control socket "
761 sk = per_cpu(__icmpv6_socket, i)->sk;
762 sk->sk_allocation = GFP_ATOMIC;
764 * Split off their lock-class, because sk->sk_dst_lock
765 * gets used from softirqs, which is safe for
766 * __icmpv6_socket (because those never get directly used
767 * via userspace syscalls), but unsafe for normal sockets.
769 lockdep_set_class(&sk->sk_dst_lock,
770 &icmpv6_socket_sk_dst_lock_key);
772 /* Enough space for 2 64K ICMP packets, including
773 * sk_buff struct overhead.
776 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
778 sk->sk_prot->unhash(sk);
782 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
783 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
791 for (j = 0; j < i; j++) {
792 if (!cpu_possible(j))
794 sock_release(per_cpu(__icmpv6_socket, j));
800 void icmpv6_cleanup(void)
804 for_each_possible_cpu(i) {
805 sock_release(per_cpu(__icmpv6_socket, i));
807 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
810 static const struct icmp6_err {
818 { /* ADM_PROHIBITED */
822 { /* Was NOT_NEIGHBOUR, now reserved */
836 int icmpv6_err_convert(int type, int code, int *err)
843 case ICMPV6_DEST_UNREACH:
845 if (code <= ICMPV6_PORT_UNREACH) {
846 *err = tab_unreach[code].err;
847 fatal = tab_unreach[code].fatal;
851 case ICMPV6_PKT_TOOBIG:
855 case ICMPV6_PARAMPROB:
860 case ICMPV6_TIME_EXCEED:
869 ctl_table ipv6_icmp_table[] = {
871 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
872 .procname = "ratelimit",
873 .data = &sysctl_icmpv6_time,
874 .maxlen = sizeof(int),
876 .proc_handler = &proc_dointvec