2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sched.h>
41 #include <linux/sockios.h>
42 #include <linux/net.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
45 #include <linux/netfilter.h>
48 #include <linux/sysctl.h>
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/protocol.h>
62 #include <net/rawv6.h>
63 #include <net/transp_v6.h>
64 #include <net/ip6_route.h>
65 #include <net/addrconf.h>
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
71 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
74 * The ICMP socket(s). This is the most convenient way to flow control
75 * our ICMP output as well as maintain a clean interface throughout
76 * all layers. All Socketless IP sends will soon be gone.
78 * On SMP we have one ICMP socket per-cpu.
80 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
81 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
83 static int icmpv6_rcv(struct sk_buff **pskb);
85 static struct inet6_protocol icmpv6_protocol = {
86 .handler = icmpv6_rcv,
87 .flags = INET6_PROTO_FINAL,
90 static __inline__ int icmpv6_xmit_lock(void)
94 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
95 /* This can happen if the output path (f.e. SIT or
96 * ip6ip6 tunnel) signals dst_link_failure() for an
97 * outgoing ICMP6 packet.
105 static __inline__ void icmpv6_xmit_unlock(void)
107 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
111 * Slightly more convenient version of icmpv6_send.
113 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
115 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
120 * Figure out, may we reply to this packet with icmp error.
122 * We do not reply, if:
123 * - it was icmp error message.
124 * - it is truncated, so that it is known, that protocol is ICMPV6
125 * (i.e. in the middle of some exthdr)
130 static int is_ineligible(struct sk_buff *skb)
132 int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
133 int len = skb->len - ptr;
134 __u8 nexthdr = skb->nh.ipv6h->nexthdr;
139 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
142 if (nexthdr == IPPROTO_ICMPV6) {
144 tp = skb_header_pointer(skb,
145 ptr+offsetof(struct icmp6hdr, icmp6_type),
146 sizeof(_type), &_type);
148 !(*tp & ICMPV6_INFOMSG_MASK))
154 static int sysctl_icmpv6_time __read_mostly = 1*HZ;
157 * Check the ICMP output rate limit
159 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
162 struct dst_entry *dst;
165 /* Informational messages are not limited. */
166 if (type & ICMPV6_INFOMSG_MASK)
169 /* Do not limit pmtu discovery, it would break it. */
170 if (type == ICMPV6_PKT_TOOBIG)
174 * Look up the output route.
175 * XXX: perhaps the expire for routing entries cloned by
176 * this lookup should be more aggressive (not longer than timeout).
178 dst = ip6_route_output(sk, fl);
180 IP6_INC_STATS(ip6_dst_idev(dst),
181 IPSTATS_MIB_OUTNOROUTES);
182 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
185 struct rt6_info *rt = (struct rt6_info *)dst;
186 int tmo = sysctl_icmpv6_time;
188 /* Give more bandwidth to wider prefixes. */
189 if (rt->rt6i_dst.plen < 128)
190 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
192 res = xrlim_allow(dst, tmo);
199 * an inline helper for the "simple" if statement below
200 * checks if parameter problem report is caused by an
201 * unrecognized IPv6 option that has the Option Type
202 * highest-order two bits set to 10
205 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
209 offset += skb->nh.raw - skb->data;
210 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
213 return (*op & 0xC0) == 0x80;
216 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
219 struct icmp6hdr *icmp6h;
222 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
225 icmp6h = (struct icmp6hdr*) skb->h.raw;
226 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
227 icmp6h->icmp6_cksum = 0;
229 if (skb_queue_len(&sk->sk_write_queue) == 1) {
230 skb->csum = csum_partial((char *)icmp6h,
231 sizeof(struct icmp6hdr), skb->csum);
232 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
239 skb_queue_walk(&sk->sk_write_queue, skb) {
240 tmp_csum = csum_add(tmp_csum, skb->csum);
243 tmp_csum = csum_partial((char *)icmp6h,
244 sizeof(struct icmp6hdr), tmp_csum);
245 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
250 ip6_push_pending_frames(sk);
261 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
263 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
264 struct sk_buff *org_skb = msg->skb;
267 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
269 skb->csum = csum_block_add(skb->csum, csum, odd);
270 if (!(msg->type & ICMPV6_INFOMSG_MASK))
271 nf_ct_attach(skb, org_skb);
275 #ifdef CONFIG_IPV6_MIP6
276 static void mip6_addr_swap(struct sk_buff *skb)
278 struct ipv6hdr *iph = skb->nh.ipv6h;
279 struct inet6_skb_parm *opt = IP6CB(skb);
280 struct ipv6_destopt_hao *hao;
285 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
286 if (likely(off >= 0)) {
287 hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
288 ipv6_addr_copy(&tmp, &iph->saddr);
289 ipv6_addr_copy(&iph->saddr, &hao->addr);
290 ipv6_addr_copy(&hao->addr, &tmp);
295 static inline void mip6_addr_swap(struct sk_buff *skb) {}
299 * Send an ICMP message in response to a packet in error
301 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
302 struct net_device *dev)
304 struct inet6_dev *idev = NULL;
305 struct ipv6hdr *hdr = skb->nh.ipv6h;
307 struct ipv6_pinfo *np;
308 struct in6_addr *saddr = NULL;
309 struct dst_entry *dst;
310 struct icmp6hdr tmp_hdr;
312 struct icmpv6_msg msg;
319 if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
323 * Make sure we respect the rules
324 * i.e. RFC 1885 2.4(e)
325 * Rule (e.1) is enforced by not using icmpv6_send
326 * in any code that processes icmp errors.
328 addr_type = ipv6_addr_type(&hdr->daddr);
330 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
337 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
338 if (type != ICMPV6_PKT_TOOBIG &&
339 !(type == ICMPV6_PARAMPROB &&
340 code == ICMPV6_UNK_OPTION &&
341 (opt_unrec(skb, info))))
347 addr_type = ipv6_addr_type(&hdr->saddr);
353 if (addr_type & IPV6_ADDR_LINKLOCAL)
354 iif = skb->dev->ifindex;
357 * Must not send error if the source does not uniquely
358 * identify a single node (RFC2463 Section 2.4).
359 * We check unspecified / multicast addresses here,
360 * and anycast addresses will be checked later.
362 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
363 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
368 * Never answer to a ICMP packet.
370 if (is_ineligible(skb)) {
371 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
377 memset(&fl, 0, sizeof(fl));
378 fl.proto = IPPROTO_ICMPV6;
379 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
381 ipv6_addr_copy(&fl.fl6_src, saddr);
383 fl.fl_icmp_type = type;
384 fl.fl_icmp_code = code;
385 security_skb_classify_flow(skb, &fl);
387 if (icmpv6_xmit_lock())
390 sk = icmpv6_socket->sk;
393 if (!icmpv6_xrlim_allow(sk, type, &fl))
396 tmp_hdr.icmp6_type = type;
397 tmp_hdr.icmp6_code = code;
398 tmp_hdr.icmp6_cksum = 0;
399 tmp_hdr.icmp6_pointer = htonl(info);
401 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
402 fl.oif = np->mcast_oif;
404 err = ip6_dst_lookup(sk, &dst, &fl);
409 * We won't send icmp if the destination is known
412 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
413 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
414 goto out_dst_release;
417 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
420 if (ipv6_addr_is_multicast(&fl.fl6_dst))
421 hlimit = np->mcast_hops;
423 hlimit = np->hop_limit;
425 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
427 hlimit = ipv6_get_hoplimit(dst->dev);
434 msg.offset = skb->nh.raw - skb->data;
437 len = skb->len - msg.offset;
438 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
440 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
441 goto out_dst_release;
444 idev = in6_dev_get(skb->dev);
446 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
447 len + sizeof(struct icmp6hdr),
448 sizeof(struct icmp6hdr),
449 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
452 ip6_flush_pending_frames(sk);
455 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
457 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
458 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
459 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
462 if (likely(idev != NULL))
467 icmpv6_xmit_unlock();
470 static void icmpv6_echo_reply(struct sk_buff *skb)
473 struct inet6_dev *idev;
474 struct ipv6_pinfo *np;
475 struct in6_addr *saddr = NULL;
476 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
477 struct icmp6hdr tmp_hdr;
479 struct icmpv6_msg msg;
480 struct dst_entry *dst;
485 saddr = &skb->nh.ipv6h->daddr;
487 if (!ipv6_unicast_destination(skb))
490 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
491 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
493 memset(&fl, 0, sizeof(fl));
494 fl.proto = IPPROTO_ICMPV6;
495 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
497 ipv6_addr_copy(&fl.fl6_src, saddr);
498 fl.oif = skb->dev->ifindex;
499 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
500 security_skb_classify_flow(skb, &fl);
502 if (icmpv6_xmit_lock())
505 sk = icmpv6_socket->sk;
508 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
509 fl.oif = np->mcast_oif;
511 err = ip6_dst_lookup(sk, &dst, &fl);
514 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
517 if (ipv6_addr_is_multicast(&fl.fl6_dst))
518 hlimit = np->mcast_hops;
520 hlimit = np->hop_limit;
522 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
524 hlimit = ipv6_get_hoplimit(dst->dev);
530 idev = in6_dev_get(skb->dev);
534 msg.type = ICMPV6_ECHO_REPLY;
536 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
537 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
538 (struct rt6_info*)dst, MSG_DONTWAIT);
541 ip6_flush_pending_frames(sk);
544 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
546 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
547 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
550 if (likely(idev != NULL))
554 icmpv6_xmit_unlock();
557 static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
559 struct in6_addr *saddr, *daddr;
560 struct inet6_protocol *ipprot;
566 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
569 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
570 if (ipv6_ext_hdr(nexthdr)) {
571 /* now skip over extension headers */
572 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
576 inner_offset = sizeof(struct ipv6hdr);
579 /* Checkin header including 8 bytes of inner protocol header. */
580 if (!pskb_may_pull(skb, inner_offset+8))
583 saddr = &skb->nh.ipv6h->saddr;
584 daddr = &skb->nh.ipv6h->daddr;
586 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
587 Without this we will not able f.e. to make source routed
589 Corresponding argument (opt) to notifiers is already added.
593 hash = nexthdr & (MAX_INET_PROTOS - 1);
596 ipprot = rcu_dereference(inet6_protos[hash]);
597 if (ipprot && ipprot->err_handler)
598 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
601 read_lock(&raw_v6_lock);
602 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
603 while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
605 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
609 read_unlock(&raw_v6_lock);
613 * Handle icmp messages
616 static int icmpv6_rcv(struct sk_buff **pskb)
618 struct sk_buff *skb = *pskb;
619 struct net_device *dev = skb->dev;
620 struct inet6_dev *idev = __in6_dev_get(dev);
621 struct in6_addr *saddr, *daddr;
622 struct ipv6hdr *orig_hdr;
623 struct icmp6hdr *hdr;
626 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
628 saddr = &skb->nh.ipv6h->saddr;
629 daddr = &skb->nh.ipv6h->daddr;
631 /* Perform checksum. */
632 switch (skb->ip_summed) {
633 case CHECKSUM_COMPLETE:
634 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
639 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
641 if (__skb_checksum_complete(skb)) {
642 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
643 NIP6(*saddr), NIP6(*daddr));
648 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
651 hdr = (struct icmp6hdr *) skb->h.raw;
653 type = hdr->icmp6_type;
655 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
656 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
657 else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
658 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
661 case ICMPV6_ECHO_REQUEST:
662 icmpv6_echo_reply(skb);
665 case ICMPV6_ECHO_REPLY:
666 /* we couldn't care less */
669 case ICMPV6_PKT_TOOBIG:
670 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
671 standard destination cache. Seems, only "advanced"
672 destination cache will allow to solve this problem
675 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
677 hdr = (struct icmp6hdr *) skb->h.raw;
678 orig_hdr = (struct ipv6hdr *) (hdr + 1);
679 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
680 ntohl(hdr->icmp6_mtu));
683 * Drop through to notify
686 case ICMPV6_DEST_UNREACH:
687 case ICMPV6_TIME_EXCEED:
688 case ICMPV6_PARAMPROB:
689 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
692 case NDISC_ROUTER_SOLICITATION:
693 case NDISC_ROUTER_ADVERTISEMENT:
694 case NDISC_NEIGHBOUR_SOLICITATION:
695 case NDISC_NEIGHBOUR_ADVERTISEMENT:
700 case ICMPV6_MGM_QUERY:
701 igmp6_event_query(skb);
704 case ICMPV6_MGM_REPORT:
705 igmp6_event_report(skb);
708 case ICMPV6_MGM_REDUCTION:
709 case ICMPV6_NI_QUERY:
710 case ICMPV6_NI_REPLY:
711 case ICMPV6_MLD2_REPORT:
712 case ICMPV6_DHAAD_REQUEST:
713 case ICMPV6_DHAAD_REPLY:
714 case ICMPV6_MOBILE_PREFIX_SOL:
715 case ICMPV6_MOBILE_PREFIX_ADV:
719 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
722 if (type & ICMPV6_INFOMSG_MASK)
726 * error of unknown type.
727 * must pass to upper level
730 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
736 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
742 * Special lock-class for __icmpv6_socket:
744 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
746 int __init icmpv6_init(struct net_proto_family *ops)
751 for_each_possible_cpu(i) {
752 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
753 &per_cpu(__icmpv6_socket, i));
756 "Failed to initialize the ICMP6 control socket "
762 sk = per_cpu(__icmpv6_socket, i)->sk;
763 sk->sk_allocation = GFP_ATOMIC;
765 * Split off their lock-class, because sk->sk_dst_lock
766 * gets used from softirqs, which is safe for
767 * __icmpv6_socket (because those never get directly used
768 * via userspace syscalls), but unsafe for normal sockets.
770 lockdep_set_class(&sk->sk_dst_lock,
771 &icmpv6_socket_sk_dst_lock_key);
773 /* Enough space for 2 64K ICMP packets, including
774 * sk_buff struct overhead.
777 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
779 sk->sk_prot->unhash(sk);
783 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
784 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
792 for (j = 0; j < i; j++) {
793 if (!cpu_possible(j))
795 sock_release(per_cpu(__icmpv6_socket, j));
801 void icmpv6_cleanup(void)
805 for_each_possible_cpu(i) {
806 sock_release(per_cpu(__icmpv6_socket, i));
808 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
811 static const struct icmp6_err {
819 { /* ADM_PROHIBITED */
823 { /* Was NOT_NEIGHBOUR, now reserved */
837 int icmpv6_err_convert(int type, int code, int *err)
844 case ICMPV6_DEST_UNREACH:
846 if (code <= ICMPV6_PORT_UNREACH) {
847 *err = tab_unreach[code].err;
848 fatal = tab_unreach[code].fatal;
852 case ICMPV6_PKT_TOOBIG:
856 case ICMPV6_PARAMPROB:
861 case ICMPV6_TIME_EXCEED:
870 ctl_table ipv6_icmp_table[] = {
872 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
873 .procname = "ratelimit",
874 .data = &sysctl_icmpv6_time,
875 .maxlen = sizeof(int),
877 .proc_handler = &proc_dointvec