2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sched.h>
41 #include <linux/sockios.h>
42 #include <linux/net.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
45 #include <linux/netfilter.h>
48 #include <linux/sysctl.h>
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/protocol.h>
62 #include <net/rawv6.h>
63 #include <net/transp_v6.h>
64 #include <net/ip6_route.h>
65 #include <net/addrconf.h>
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
71 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
74 * The ICMP socket(s). This is the most convenient way to flow control
75 * our ICMP output as well as maintain a clean interface throughout
76 * all layers. All Socketless IP sends will soon be gone.
78 * On SMP we have one ICMP socket per-cpu.
80 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
81 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
83 static int icmpv6_rcv(struct sk_buff **pskb);
85 static struct inet6_protocol icmpv6_protocol = {
86 .handler = icmpv6_rcv,
87 .flags = INET6_PROTO_FINAL,
90 static __inline__ int icmpv6_xmit_lock(void)
94 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
95 /* This can happen if the output path (f.e. SIT or
96 * ip6ip6 tunnel) signals dst_link_failure() for an
97 * outgoing ICMP6 packet.
105 static __inline__ void icmpv6_xmit_unlock(void)
107 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
111 * Slightly more convenient version of icmpv6_send.
113 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
115 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
120 * Figure out, may we reply to this packet with icmp error.
122 * We do not reply, if:
123 * - it was icmp error message.
124 * - it is truncated, so that it is known, that protocol is ICMPV6
125 * (i.e. in the middle of some exthdr)
130 static int is_ineligible(struct sk_buff *skb)
132 int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
133 int len = skb->len - ptr;
134 __u8 nexthdr = skb->nh.ipv6h->nexthdr;
139 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
142 if (nexthdr == IPPROTO_ICMPV6) {
144 tp = skb_header_pointer(skb,
145 ptr+offsetof(struct icmp6hdr, icmp6_type),
146 sizeof(_type), &_type);
148 !(*tp & ICMPV6_INFOMSG_MASK))
154 static int sysctl_icmpv6_time __read_mostly = 1*HZ;
157 * Check the ICMP output rate limit
159 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
162 struct dst_entry *dst;
165 /* Informational messages are not limited. */
166 if (type & ICMPV6_INFOMSG_MASK)
169 /* Do not limit pmtu discovery, it would break it. */
170 if (type == ICMPV6_PKT_TOOBIG)
174 * Look up the output route.
175 * XXX: perhaps the expire for routing entries cloned by
176 * this lookup should be more aggressive (not longer than timeout).
178 dst = ip6_route_output(sk, fl);
180 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
181 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
184 struct rt6_info *rt = (struct rt6_info *)dst;
185 int tmo = sysctl_icmpv6_time;
187 /* Give more bandwidth to wider prefixes. */
188 if (rt->rt6i_dst.plen < 128)
189 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
191 res = xrlim_allow(dst, tmo);
198 * an inline helper for the "simple" if statement below
199 * checks if parameter problem report is caused by an
200 * unrecognized IPv6 option that has the Option Type
201 * highest-order two bits set to 10
204 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
208 offset += skb->nh.raw - skb->data;
209 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
212 return (*op & 0xC0) == 0x80;
215 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
218 struct icmp6hdr *icmp6h;
221 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
224 icmp6h = (struct icmp6hdr*) skb->h.raw;
225 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
226 icmp6h->icmp6_cksum = 0;
228 if (skb_queue_len(&sk->sk_write_queue) == 1) {
229 skb->csum = csum_partial((char *)icmp6h,
230 sizeof(struct icmp6hdr), skb->csum);
231 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
238 skb_queue_walk(&sk->sk_write_queue, skb) {
239 tmp_csum = csum_add(tmp_csum, skb->csum);
242 tmp_csum = csum_partial((char *)icmp6h,
243 sizeof(struct icmp6hdr), tmp_csum);
244 tmp_csum = csum_ipv6_magic(&fl->fl6_src,
246 len, fl->proto, tmp_csum);
247 icmp6h->icmp6_cksum = tmp_csum;
249 if (icmp6h->icmp6_cksum == 0)
250 icmp6h->icmp6_cksum = -1;
251 ip6_push_pending_frames(sk);
262 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
264 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
265 struct sk_buff *org_skb = msg->skb;
268 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
270 skb->csum = csum_block_add(skb->csum, csum, odd);
271 if (!(msg->type & ICMPV6_INFOMSG_MASK))
272 nf_ct_attach(skb, org_skb);
276 #ifdef CONFIG_IPV6_MIP6
277 static void mip6_addr_swap(struct sk_buff *skb)
279 struct ipv6hdr *iph = skb->nh.ipv6h;
280 struct inet6_skb_parm *opt = IP6CB(skb);
281 struct ipv6_destopt_hao *hao;
286 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
287 if (likely(off >= 0)) {
288 hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
289 ipv6_addr_copy(&tmp, &iph->saddr);
290 ipv6_addr_copy(&iph->saddr, &hao->addr);
291 ipv6_addr_copy(&hao->addr, &tmp);
296 static inline void mip6_addr_swap(struct sk_buff *skb) {}
300 * Send an ICMP message in response to a packet in error
302 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
303 struct net_device *dev)
305 struct inet6_dev *idev = NULL;
306 struct ipv6hdr *hdr = skb->nh.ipv6h;
308 struct ipv6_pinfo *np;
309 struct in6_addr *saddr = NULL;
310 struct dst_entry *dst;
311 struct icmp6hdr tmp_hdr;
313 struct icmpv6_msg msg;
320 if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
324 * Make sure we respect the rules
325 * i.e. RFC 1885 2.4(e)
326 * Rule (e.1) is enforced by not using icmpv6_send
327 * in any code that processes icmp errors.
329 addr_type = ipv6_addr_type(&hdr->daddr);
331 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
338 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
339 if (type != ICMPV6_PKT_TOOBIG &&
340 !(type == ICMPV6_PARAMPROB &&
341 code == ICMPV6_UNK_OPTION &&
342 (opt_unrec(skb, info))))
348 addr_type = ipv6_addr_type(&hdr->saddr);
354 if (addr_type & IPV6_ADDR_LINKLOCAL)
355 iif = skb->dev->ifindex;
358 * Must not send error if the source does not uniquely
359 * identify a single node (RFC2463 Section 2.4).
360 * We check unspecified / multicast addresses here,
361 * and anycast addresses will be checked later.
363 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
364 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
369 * Never answer to a ICMP packet.
371 if (is_ineligible(skb)) {
372 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
378 memset(&fl, 0, sizeof(fl));
379 fl.proto = IPPROTO_ICMPV6;
380 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
382 ipv6_addr_copy(&fl.fl6_src, saddr);
384 fl.fl_icmp_type = type;
385 fl.fl_icmp_code = code;
386 security_skb_classify_flow(skb, &fl);
388 if (icmpv6_xmit_lock())
391 sk = icmpv6_socket->sk;
394 if (!icmpv6_xrlim_allow(sk, type, &fl))
397 tmp_hdr.icmp6_type = type;
398 tmp_hdr.icmp6_code = code;
399 tmp_hdr.icmp6_cksum = 0;
400 tmp_hdr.icmp6_pointer = htonl(info);
402 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
403 fl.oif = np->mcast_oif;
405 err = ip6_dst_lookup(sk, &dst, &fl);
410 * We won't send icmp if the destination is known
413 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
414 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
415 goto out_dst_release;
418 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
421 if (ipv6_addr_is_multicast(&fl.fl6_dst))
422 hlimit = np->mcast_hops;
424 hlimit = np->hop_limit;
426 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
428 hlimit = ipv6_get_hoplimit(dst->dev);
435 msg.offset = skb->nh.raw - skb->data;
438 len = skb->len - msg.offset;
439 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
441 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
442 goto out_dst_release;
445 idev = in6_dev_get(skb->dev);
447 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
448 len + sizeof(struct icmp6hdr),
449 sizeof(struct icmp6hdr),
450 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
453 ip6_flush_pending_frames(sk);
456 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
458 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
459 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
460 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
463 if (likely(idev != NULL))
468 icmpv6_xmit_unlock();
471 static void icmpv6_echo_reply(struct sk_buff *skb)
474 struct inet6_dev *idev;
475 struct ipv6_pinfo *np;
476 struct in6_addr *saddr = NULL;
477 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
478 struct icmp6hdr tmp_hdr;
480 struct icmpv6_msg msg;
481 struct dst_entry *dst;
486 saddr = &skb->nh.ipv6h->daddr;
488 if (!ipv6_unicast_destination(skb))
491 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
492 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
494 memset(&fl, 0, sizeof(fl));
495 fl.proto = IPPROTO_ICMPV6;
496 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
498 ipv6_addr_copy(&fl.fl6_src, saddr);
499 fl.oif = skb->dev->ifindex;
500 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
501 security_skb_classify_flow(skb, &fl);
503 if (icmpv6_xmit_lock())
506 sk = icmpv6_socket->sk;
509 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
510 fl.oif = np->mcast_oif;
512 err = ip6_dst_lookup(sk, &dst, &fl);
515 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
518 if (ipv6_addr_is_multicast(&fl.fl6_dst))
519 hlimit = np->mcast_hops;
521 hlimit = np->hop_limit;
523 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
525 hlimit = ipv6_get_hoplimit(dst->dev);
531 idev = in6_dev_get(skb->dev);
535 msg.type = ICMPV6_ECHO_REPLY;
537 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
538 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
539 (struct rt6_info*)dst, MSG_DONTWAIT);
542 ip6_flush_pending_frames(sk);
545 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
547 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
548 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
551 if (likely(idev != NULL))
555 icmpv6_xmit_unlock();
558 static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
560 struct in6_addr *saddr, *daddr;
561 struct inet6_protocol *ipprot;
567 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
570 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
571 if (ipv6_ext_hdr(nexthdr)) {
572 /* now skip over extension headers */
573 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
577 inner_offset = sizeof(struct ipv6hdr);
580 /* Checkin header including 8 bytes of inner protocol header. */
581 if (!pskb_may_pull(skb, inner_offset+8))
584 saddr = &skb->nh.ipv6h->saddr;
585 daddr = &skb->nh.ipv6h->daddr;
587 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
588 Without this we will not able f.e. to make source routed
590 Corresponding argument (opt) to notifiers is already added.
594 hash = nexthdr & (MAX_INET_PROTOS - 1);
597 ipprot = rcu_dereference(inet6_protos[hash]);
598 if (ipprot && ipprot->err_handler)
599 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
602 read_lock(&raw_v6_lock);
603 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
604 while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
606 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
610 read_unlock(&raw_v6_lock);
614 * Handle icmp messages
617 static int icmpv6_rcv(struct sk_buff **pskb)
619 struct sk_buff *skb = *pskb;
620 struct net_device *dev = skb->dev;
621 struct inet6_dev *idev = __in6_dev_get(dev);
622 struct in6_addr *saddr, *daddr;
623 struct ipv6hdr *orig_hdr;
624 struct icmp6hdr *hdr;
627 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
629 saddr = &skb->nh.ipv6h->saddr;
630 daddr = &skb->nh.ipv6h->daddr;
632 /* Perform checksum. */
633 switch (skb->ip_summed) {
634 case CHECKSUM_COMPLETE:
635 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
640 skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len,
642 if (__skb_checksum_complete(skb)) {
643 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
644 NIP6(*saddr), NIP6(*daddr));
649 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
652 hdr = (struct icmp6hdr *) skb->h.raw;
654 type = hdr->icmp6_type;
656 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
657 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
658 else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
659 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
662 case ICMPV6_ECHO_REQUEST:
663 icmpv6_echo_reply(skb);
666 case ICMPV6_ECHO_REPLY:
667 /* we couldn't care less */
670 case ICMPV6_PKT_TOOBIG:
671 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
672 standard destination cache. Seems, only "advanced"
673 destination cache will allow to solve this problem
676 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
678 hdr = (struct icmp6hdr *) skb->h.raw;
679 orig_hdr = (struct ipv6hdr *) (hdr + 1);
680 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
681 ntohl(hdr->icmp6_mtu));
684 * Drop through to notify
687 case ICMPV6_DEST_UNREACH:
688 case ICMPV6_TIME_EXCEED:
689 case ICMPV6_PARAMPROB:
690 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
693 case NDISC_ROUTER_SOLICITATION:
694 case NDISC_ROUTER_ADVERTISEMENT:
695 case NDISC_NEIGHBOUR_SOLICITATION:
696 case NDISC_NEIGHBOUR_ADVERTISEMENT:
701 case ICMPV6_MGM_QUERY:
702 igmp6_event_query(skb);
705 case ICMPV6_MGM_REPORT:
706 igmp6_event_report(skb);
709 case ICMPV6_MGM_REDUCTION:
710 case ICMPV6_NI_QUERY:
711 case ICMPV6_NI_REPLY:
712 case ICMPV6_MLD2_REPORT:
713 case ICMPV6_DHAAD_REQUEST:
714 case ICMPV6_DHAAD_REPLY:
715 case ICMPV6_MOBILE_PREFIX_SOL:
716 case ICMPV6_MOBILE_PREFIX_ADV:
720 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
723 if (type & ICMPV6_INFOMSG_MASK)
727 * error of unknown type.
728 * must pass to upper level
731 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
737 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
743 * Special lock-class for __icmpv6_socket:
745 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
747 int __init icmpv6_init(struct net_proto_family *ops)
752 for_each_possible_cpu(i) {
753 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
754 &per_cpu(__icmpv6_socket, i));
757 "Failed to initialize the ICMP6 control socket "
763 sk = per_cpu(__icmpv6_socket, i)->sk;
764 sk->sk_allocation = GFP_ATOMIC;
766 * Split off their lock-class, because sk->sk_dst_lock
767 * gets used from softirqs, which is safe for
768 * __icmpv6_socket (because those never get directly used
769 * via userspace syscalls), but unsafe for normal sockets.
771 lockdep_set_class(&sk->sk_dst_lock,
772 &icmpv6_socket_sk_dst_lock_key);
774 /* Enough space for 2 64K ICMP packets, including
775 * sk_buff struct overhead.
778 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
780 sk->sk_prot->unhash(sk);
784 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
785 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
793 for (j = 0; j < i; j++) {
794 if (!cpu_possible(j))
796 sock_release(per_cpu(__icmpv6_socket, j));
802 void icmpv6_cleanup(void)
806 for_each_possible_cpu(i) {
807 sock_release(per_cpu(__icmpv6_socket, i));
809 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
812 static const struct icmp6_err {
820 { /* ADM_PROHIBITED */
824 { /* Was NOT_NEIGHBOUR, now reserved */
838 int icmpv6_err_convert(int type, int code, int *err)
845 case ICMPV6_DEST_UNREACH:
847 if (code <= ICMPV6_PORT_UNREACH) {
848 *err = tab_unreach[code].err;
849 fatal = tab_unreach[code].fatal;
853 case ICMPV6_PKT_TOOBIG:
857 case ICMPV6_PARAMPROB:
862 case ICMPV6_TIME_EXCEED:
871 ctl_table ipv6_icmp_table[] = {
873 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
874 .procname = "ratelimit",
875 .data = &sysctl_icmpv6_time,
876 .maxlen = sizeof(int),
878 .proc_handler = &proc_dointvec