2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
67 #include <asm/uaccess.h>
68 #include <asm/system.h>
70 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
71 EXPORT_SYMBOL(icmpv6_statistics);
72 DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics) __read_mostly;
73 EXPORT_SYMBOL(icmpv6msg_statistics);
76 * The ICMP socket(s). This is the most convenient way to flow control
77 * our ICMP output as well as maintain a clean interface throughout
78 * all layers. All Socketless IP sends will soon be gone.
80 * On SMP we have one ICMP socket per-cpu.
82 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
83 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
85 static int icmpv6_rcv(struct sk_buff *skb);
87 static struct inet6_protocol icmpv6_protocol = {
88 .handler = icmpv6_rcv,
89 .flags = INET6_PROTO_FINAL,
92 static __inline__ int icmpv6_xmit_lock(void)
96 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
97 /* This can happen if the output path (f.e. SIT or
98 * ip6ip6 tunnel) signals dst_link_failure() for an
99 * outgoing ICMP6 packet.
107 static __inline__ void icmpv6_xmit_unlock(void)
109 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
113 * Slightly more convenient version of icmpv6_send.
115 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
117 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
122 * Figure out, may we reply to this packet with icmp error.
124 * We do not reply, if:
125 * - it was icmp error message.
126 * - it is truncated, so that it is known, that protocol is ICMPV6
127 * (i.e. in the middle of some exthdr)
132 static int is_ineligible(struct sk_buff *skb)
134 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
135 int len = skb->len - ptr;
136 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
144 if (nexthdr == IPPROTO_ICMPV6) {
146 tp = skb_header_pointer(skb,
147 ptr+offsetof(struct icmp6hdr, icmp6_type),
148 sizeof(_type), &_type);
150 !(*tp & ICMPV6_INFOMSG_MASK))
156 static int sysctl_icmpv6_time __read_mostly = 1*HZ;
159 * Check the ICMP output rate limit
161 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
164 struct dst_entry *dst;
167 /* Informational messages are not limited. */
168 if (type & ICMPV6_INFOMSG_MASK)
171 /* Do not limit pmtu discovery, it would break it. */
172 if (type == ICMPV6_PKT_TOOBIG)
176 * Look up the output route.
177 * XXX: perhaps the expire for routing entries cloned by
178 * this lookup should be more aggressive (not longer than timeout).
180 dst = ip6_route_output(sk, fl);
182 IP6_INC_STATS(ip6_dst_idev(dst),
183 IPSTATS_MIB_OUTNOROUTES);
184 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
187 struct rt6_info *rt = (struct rt6_info *)dst;
188 int tmo = sysctl_icmpv6_time;
190 /* Give more bandwidth to wider prefixes. */
191 if (rt->rt6i_dst.plen < 128)
192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
194 res = xrlim_allow(dst, tmo);
201 * an inline helper for the "simple" if statement below
202 * checks if parameter problem report is caused by an
203 * unrecognized IPv6 option that has the Option Type
204 * highest-order two bits set to 10
207 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
211 offset += skb_network_offset(skb);
212 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
215 return (*op & 0xC0) == 0x80;
218 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
221 struct icmp6hdr *icmp6h;
224 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
227 icmp6h = icmp6_hdr(skb);
228 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
229 icmp6h->icmp6_cksum = 0;
231 if (skb_queue_len(&sk->sk_write_queue) == 1) {
232 skb->csum = csum_partial((char *)icmp6h,
233 sizeof(struct icmp6hdr), skb->csum);
234 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
241 skb_queue_walk(&sk->sk_write_queue, skb) {
242 tmp_csum = csum_add(tmp_csum, skb->csum);
245 tmp_csum = csum_partial((char *)icmp6h,
246 sizeof(struct icmp6hdr), tmp_csum);
247 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
252 ip6_push_pending_frames(sk);
263 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
265 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
266 struct sk_buff *org_skb = msg->skb;
269 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
271 skb->csum = csum_block_add(skb->csum, csum, odd);
272 if (!(msg->type & ICMPV6_INFOMSG_MASK))
273 nf_ct_attach(skb, org_skb);
277 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
278 static void mip6_addr_swap(struct sk_buff *skb)
280 struct ipv6hdr *iph = ipv6_hdr(skb);
281 struct inet6_skb_parm *opt = IP6CB(skb);
282 struct ipv6_destopt_hao *hao;
287 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
288 if (likely(off >= 0)) {
289 hao = (struct ipv6_destopt_hao *)
290 (skb_network_header(skb) + off);
291 ipv6_addr_copy(&tmp, &iph->saddr);
292 ipv6_addr_copy(&iph->saddr, &hao->addr);
293 ipv6_addr_copy(&hao->addr, &tmp);
298 static inline void mip6_addr_swap(struct sk_buff *skb) {}
302 * Send an ICMP message in response to a packet in error
304 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
305 struct net_device *dev)
307 struct inet6_dev *idev = NULL;
308 struct ipv6hdr *hdr = ipv6_hdr(skb);
310 struct ipv6_pinfo *np;
311 struct in6_addr *saddr = NULL;
312 struct dst_entry *dst;
313 struct icmp6hdr tmp_hdr;
315 struct icmpv6_msg msg;
322 if ((u8 *)hdr < skb->head ||
323 (skb->network_header + sizeof(*hdr)) > skb->tail)
327 * Make sure we respect the rules
328 * i.e. RFC 1885 2.4(e)
329 * Rule (e.1) is enforced by not using icmpv6_send
330 * in any code that processes icmp errors.
332 addr_type = ipv6_addr_type(&hdr->daddr);
334 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
341 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
342 if (type != ICMPV6_PKT_TOOBIG &&
343 !(type == ICMPV6_PARAMPROB &&
344 code == ICMPV6_UNK_OPTION &&
345 (opt_unrec(skb, info))))
351 addr_type = ipv6_addr_type(&hdr->saddr);
357 if (addr_type & IPV6_ADDR_LINKLOCAL)
358 iif = skb->dev->ifindex;
361 * Must not send error if the source does not uniquely
362 * identify a single node (RFC2463 Section 2.4).
363 * We check unspecified / multicast addresses here,
364 * and anycast addresses will be checked later.
366 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
367 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
372 * Never answer to a ICMP packet.
374 if (is_ineligible(skb)) {
375 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
381 memset(&fl, 0, sizeof(fl));
382 fl.proto = IPPROTO_ICMPV6;
383 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
385 ipv6_addr_copy(&fl.fl6_src, saddr);
387 fl.fl_icmp_type = type;
388 fl.fl_icmp_code = code;
389 security_skb_classify_flow(skb, &fl);
391 if (icmpv6_xmit_lock())
394 sk = icmpv6_socket->sk;
397 if (!icmpv6_xrlim_allow(sk, type, &fl))
400 tmp_hdr.icmp6_type = type;
401 tmp_hdr.icmp6_code = code;
402 tmp_hdr.icmp6_cksum = 0;
403 tmp_hdr.icmp6_pointer = htonl(info);
405 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
406 fl.oif = np->mcast_oif;
408 err = ip6_dst_lookup(sk, &dst, &fl);
413 * We won't send icmp if the destination is known
416 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
417 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
418 goto out_dst_release;
421 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
424 if (ipv6_addr_is_multicast(&fl.fl6_dst))
425 hlimit = np->mcast_hops;
427 hlimit = np->hop_limit;
429 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
431 hlimit = ipv6_get_hoplimit(dst->dev);
438 msg.offset = skb_network_offset(skb);
441 len = skb->len - msg.offset;
442 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
444 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
445 goto out_dst_release;
448 idev = in6_dev_get(skb->dev);
450 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
451 len + sizeof(struct icmp6hdr),
452 sizeof(struct icmp6hdr),
453 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
456 ip6_flush_pending_frames(sk);
459 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
461 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
464 if (likely(idev != NULL))
469 icmpv6_xmit_unlock();
472 EXPORT_SYMBOL(icmpv6_send);
474 static void icmpv6_echo_reply(struct sk_buff *skb)
477 struct inet6_dev *idev;
478 struct ipv6_pinfo *np;
479 struct in6_addr *saddr = NULL;
480 struct icmp6hdr *icmph = icmp6_hdr(skb);
481 struct icmp6hdr tmp_hdr;
483 struct icmpv6_msg msg;
484 struct dst_entry *dst;
489 saddr = &ipv6_hdr(skb)->daddr;
491 if (!ipv6_unicast_destination(skb))
494 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
495 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
497 memset(&fl, 0, sizeof(fl));
498 fl.proto = IPPROTO_ICMPV6;
499 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
501 ipv6_addr_copy(&fl.fl6_src, saddr);
502 fl.oif = skb->dev->ifindex;
503 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
504 security_skb_classify_flow(skb, &fl);
506 if (icmpv6_xmit_lock())
509 sk = icmpv6_socket->sk;
512 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
513 fl.oif = np->mcast_oif;
515 err = ip6_dst_lookup(sk, &dst, &fl);
518 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
521 if (ipv6_addr_is_multicast(&fl.fl6_dst))
522 hlimit = np->mcast_hops;
524 hlimit = np->hop_limit;
526 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
528 hlimit = ipv6_get_hoplimit(dst->dev);
534 idev = in6_dev_get(skb->dev);
538 msg.type = ICMPV6_ECHO_REPLY;
540 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
541 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
542 (struct rt6_info*)dst, MSG_DONTWAIT);
545 ip6_flush_pending_frames(sk);
548 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
551 if (likely(idev != NULL))
555 icmpv6_xmit_unlock();
558 static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
560 struct in6_addr *saddr, *daddr;
561 struct inet6_protocol *ipprot;
567 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
570 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
571 if (ipv6_ext_hdr(nexthdr)) {
572 /* now skip over extension headers */
573 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
577 inner_offset = sizeof(struct ipv6hdr);
580 /* Checkin header including 8 bytes of inner protocol header. */
581 if (!pskb_may_pull(skb, inner_offset+8))
584 saddr = &ipv6_hdr(skb)->saddr;
585 daddr = &ipv6_hdr(skb)->daddr;
587 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
588 Without this we will not able f.e. to make source routed
590 Corresponding argument (opt) to notifiers is already added.
594 hash = nexthdr & (MAX_INET_PROTOS - 1);
597 ipprot = rcu_dereference(inet6_protos[hash]);
598 if (ipprot && ipprot->err_handler)
599 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
602 read_lock(&raw_v6_lock);
603 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
604 while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr,
606 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
610 read_unlock(&raw_v6_lock);
614 * Handle icmp messages
617 static int icmpv6_rcv(struct sk_buff *skb)
619 struct net_device *dev = skb->dev;
620 struct inet6_dev *idev = __in6_dev_get(dev);
621 struct in6_addr *saddr, *daddr;
622 struct ipv6hdr *orig_hdr;
623 struct icmp6hdr *hdr;
626 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
628 saddr = &ipv6_hdr(skb)->saddr;
629 daddr = &ipv6_hdr(skb)->daddr;
631 /* Perform checksum. */
632 switch (skb->ip_summed) {
633 case CHECKSUM_COMPLETE:
634 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
639 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
641 if (__skb_checksum_complete(skb)) {
642 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
643 NIP6(*saddr), NIP6(*daddr));
648 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
651 hdr = icmp6_hdr(skb);
653 type = hdr->icmp6_type;
655 ICMP6MSGIN_INC_STATS_BH(idev, type);
658 case ICMPV6_ECHO_REQUEST:
659 icmpv6_echo_reply(skb);
662 case ICMPV6_ECHO_REPLY:
663 /* we couldn't care less */
666 case ICMPV6_PKT_TOOBIG:
667 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
668 standard destination cache. Seems, only "advanced"
669 destination cache will allow to solve this problem
672 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
674 hdr = icmp6_hdr(skb);
675 orig_hdr = (struct ipv6hdr *) (hdr + 1);
676 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
677 ntohl(hdr->icmp6_mtu));
680 * Drop through to notify
683 case ICMPV6_DEST_UNREACH:
684 case ICMPV6_TIME_EXCEED:
685 case ICMPV6_PARAMPROB:
686 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
689 case NDISC_ROUTER_SOLICITATION:
690 case NDISC_ROUTER_ADVERTISEMENT:
691 case NDISC_NEIGHBOUR_SOLICITATION:
692 case NDISC_NEIGHBOUR_ADVERTISEMENT:
697 case ICMPV6_MGM_QUERY:
698 igmp6_event_query(skb);
701 case ICMPV6_MGM_REPORT:
702 igmp6_event_report(skb);
705 case ICMPV6_MGM_REDUCTION:
706 case ICMPV6_NI_QUERY:
707 case ICMPV6_NI_REPLY:
708 case ICMPV6_MLD2_REPORT:
709 case ICMPV6_DHAAD_REQUEST:
710 case ICMPV6_DHAAD_REPLY:
711 case ICMPV6_MOBILE_PREFIX_SOL:
712 case ICMPV6_MOBILE_PREFIX_ADV:
716 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
719 if (type & ICMPV6_INFOMSG_MASK)
723 * error of unknown type.
724 * must pass to upper level
727 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
734 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
740 * Special lock-class for __icmpv6_socket:
742 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
744 int __init icmpv6_init(struct net_proto_family *ops)
749 for_each_possible_cpu(i) {
750 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
751 &per_cpu(__icmpv6_socket, i));
754 "Failed to initialize the ICMP6 control socket "
760 sk = per_cpu(__icmpv6_socket, i)->sk;
761 sk->sk_allocation = GFP_ATOMIC;
763 * Split off their lock-class, because sk->sk_dst_lock
764 * gets used from softirqs, which is safe for
765 * __icmpv6_socket (because those never get directly used
766 * via userspace syscalls), but unsafe for normal sockets.
768 lockdep_set_class(&sk->sk_dst_lock,
769 &icmpv6_socket_sk_dst_lock_key);
771 /* Enough space for 2 64K ICMP packets, including
772 * sk_buff struct overhead.
775 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
777 sk->sk_prot->unhash(sk);
781 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
782 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
790 for (j = 0; j < i; j++) {
791 if (!cpu_possible(j))
793 sock_release(per_cpu(__icmpv6_socket, j));
799 void icmpv6_cleanup(void)
803 for_each_possible_cpu(i) {
804 sock_release(per_cpu(__icmpv6_socket, i));
806 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
809 static const struct icmp6_err {
817 { /* ADM_PROHIBITED */
821 { /* Was NOT_NEIGHBOUR, now reserved */
835 int icmpv6_err_convert(int type, int code, int *err)
842 case ICMPV6_DEST_UNREACH:
844 if (code <= ICMPV6_PORT_UNREACH) {
845 *err = tab_unreach[code].err;
846 fatal = tab_unreach[code].fatal;
850 case ICMPV6_PKT_TOOBIG:
854 case ICMPV6_PARAMPROB:
859 case ICMPV6_TIME_EXCEED:
867 EXPORT_SYMBOL(icmpv6_err_convert);
870 ctl_table ipv6_icmp_table[] = {
872 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
873 .procname = "ratelimit",
874 .data = &sysctl_icmpv6_time,
875 .maxlen = sizeof(int),
877 .proc_handler = &proc_dointvec