2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
67 #include <asm/uaccess.h>
68 #include <asm/system.h>
70 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
71 EXPORT_SYMBOL(icmpv6_statistics);
74 * The ICMP socket(s). This is the most convenient way to flow control
75 * our ICMP output as well as maintain a clean interface throughout
76 * all layers. All Socketless IP sends will soon be gone.
78 * On SMP we have one ICMP socket per-cpu.
80 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
81 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
83 static int icmpv6_rcv(struct sk_buff **pskb);
85 static struct inet6_protocol icmpv6_protocol = {
86 .handler = icmpv6_rcv,
87 .flags = INET6_PROTO_FINAL,
90 static __inline__ int icmpv6_xmit_lock(void)
94 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
95 /* This can happen if the output path (f.e. SIT or
96 * ip6ip6 tunnel) signals dst_link_failure() for an
97 * outgoing ICMP6 packet.
105 static __inline__ void icmpv6_xmit_unlock(void)
107 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
111 * Slightly more convenient version of icmpv6_send.
113 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
115 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
120 * Figure out, may we reply to this packet with icmp error.
122 * We do not reply, if:
123 * - it was icmp error message.
124 * - it is truncated, so that it is known, that protocol is ICMPV6
125 * (i.e. in the middle of some exthdr)
130 static int is_ineligible(struct sk_buff *skb)
132 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
133 int len = skb->len - ptr;
134 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
139 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
142 if (nexthdr == IPPROTO_ICMPV6) {
144 tp = skb_header_pointer(skb,
145 ptr+offsetof(struct icmp6hdr, icmp6_type),
146 sizeof(_type), &_type);
148 !(*tp & ICMPV6_INFOMSG_MASK))
154 static int sysctl_icmpv6_time __read_mostly = 1*HZ;
157 * Check the ICMP output rate limit
159 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
162 struct dst_entry *dst;
165 /* Informational messages are not limited. */
166 if (type & ICMPV6_INFOMSG_MASK)
169 /* Do not limit pmtu discovery, it would break it. */
170 if (type == ICMPV6_PKT_TOOBIG)
174 * Look up the output route.
175 * XXX: perhaps the expire for routing entries cloned by
176 * this lookup should be more aggressive (not longer than timeout).
178 dst = ip6_route_output(sk, fl);
180 IP6_INC_STATS(ip6_dst_idev(dst),
181 IPSTATS_MIB_OUTNOROUTES);
182 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
185 struct rt6_info *rt = (struct rt6_info *)dst;
186 int tmo = sysctl_icmpv6_time;
188 /* Give more bandwidth to wider prefixes. */
189 if (rt->rt6i_dst.plen < 128)
190 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
192 res = xrlim_allow(dst, tmo);
199 * an inline helper for the "simple" if statement below
200 * checks if parameter problem report is caused by an
201 * unrecognized IPv6 option that has the Option Type
202 * highest-order two bits set to 10
205 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
209 offset += skb_network_offset(skb);
210 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
213 return (*op & 0xC0) == 0x80;
216 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
219 struct icmp6hdr *icmp6h;
222 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
225 icmp6h = icmp6_hdr(skb);
226 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
227 icmp6h->icmp6_cksum = 0;
229 if (skb_queue_len(&sk->sk_write_queue) == 1) {
230 skb->csum = csum_partial((char *)icmp6h,
231 sizeof(struct icmp6hdr), skb->csum);
232 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
239 skb_queue_walk(&sk->sk_write_queue, skb) {
240 tmp_csum = csum_add(tmp_csum, skb->csum);
243 tmp_csum = csum_partial((char *)icmp6h,
244 sizeof(struct icmp6hdr), tmp_csum);
245 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
250 ip6_push_pending_frames(sk);
261 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
263 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
264 struct sk_buff *org_skb = msg->skb;
267 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
269 skb->csum = csum_block_add(skb->csum, csum, odd);
270 if (!(msg->type & ICMPV6_INFOMSG_MASK))
271 nf_ct_attach(skb, org_skb);
275 #ifdef CONFIG_IPV6_MIP6
276 static void mip6_addr_swap(struct sk_buff *skb)
278 struct ipv6hdr *iph = ipv6_hdr(skb);
279 struct inet6_skb_parm *opt = IP6CB(skb);
280 struct ipv6_destopt_hao *hao;
285 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
286 if (likely(off >= 0)) {
287 hao = (struct ipv6_destopt_hao *)
288 (skb_network_header(skb) + off);
289 ipv6_addr_copy(&tmp, &iph->saddr);
290 ipv6_addr_copy(&iph->saddr, &hao->addr);
291 ipv6_addr_copy(&hao->addr, &tmp);
296 static inline void mip6_addr_swap(struct sk_buff *skb) {}
300 * Send an ICMP message in response to a packet in error
302 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
303 struct net_device *dev)
305 struct inet6_dev *idev = NULL;
306 struct ipv6hdr *hdr = ipv6_hdr(skb);
308 struct ipv6_pinfo *np;
309 struct in6_addr *saddr = NULL;
310 struct dst_entry *dst;
311 struct icmp6hdr tmp_hdr;
313 struct icmpv6_msg msg;
320 if ((u8 *)hdr < skb->head ||
321 (skb->network_header + sizeof(*hdr)) > skb->tail)
325 * Make sure we respect the rules
326 * i.e. RFC 1885 2.4(e)
327 * Rule (e.1) is enforced by not using icmpv6_send
328 * in any code that processes icmp errors.
330 addr_type = ipv6_addr_type(&hdr->daddr);
332 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
339 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
340 if (type != ICMPV6_PKT_TOOBIG &&
341 !(type == ICMPV6_PARAMPROB &&
342 code == ICMPV6_UNK_OPTION &&
343 (opt_unrec(skb, info))))
349 addr_type = ipv6_addr_type(&hdr->saddr);
355 if (addr_type & IPV6_ADDR_LINKLOCAL)
356 iif = skb->dev->ifindex;
359 * Must not send error if the source does not uniquely
360 * identify a single node (RFC2463 Section 2.4).
361 * We check unspecified / multicast addresses here,
362 * and anycast addresses will be checked later.
364 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
365 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
370 * Never answer to a ICMP packet.
372 if (is_ineligible(skb)) {
373 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
379 memset(&fl, 0, sizeof(fl));
380 fl.proto = IPPROTO_ICMPV6;
381 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
383 ipv6_addr_copy(&fl.fl6_src, saddr);
385 fl.fl_icmp_type = type;
386 fl.fl_icmp_code = code;
387 security_skb_classify_flow(skb, &fl);
389 if (icmpv6_xmit_lock())
392 sk = icmpv6_socket->sk;
395 if (!icmpv6_xrlim_allow(sk, type, &fl))
398 tmp_hdr.icmp6_type = type;
399 tmp_hdr.icmp6_code = code;
400 tmp_hdr.icmp6_cksum = 0;
401 tmp_hdr.icmp6_pointer = htonl(info);
403 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
404 fl.oif = np->mcast_oif;
406 err = ip6_dst_lookup(sk, &dst, &fl);
411 * We won't send icmp if the destination is known
414 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
415 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
416 goto out_dst_release;
419 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
422 if (ipv6_addr_is_multicast(&fl.fl6_dst))
423 hlimit = np->mcast_hops;
425 hlimit = np->hop_limit;
427 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
429 hlimit = ipv6_get_hoplimit(dst->dev);
436 msg.offset = skb_network_offset(skb);
439 len = skb->len - msg.offset;
440 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
442 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
443 goto out_dst_release;
446 idev = in6_dev_get(skb->dev);
448 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
449 len + sizeof(struct icmp6hdr),
450 sizeof(struct icmp6hdr),
451 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
454 ip6_flush_pending_frames(sk);
457 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
459 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
460 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
461 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
464 if (likely(idev != NULL))
469 icmpv6_xmit_unlock();
472 EXPORT_SYMBOL(icmpv6_send);
474 static void icmpv6_echo_reply(struct sk_buff *skb)
477 struct inet6_dev *idev;
478 struct ipv6_pinfo *np;
479 struct in6_addr *saddr = NULL;
480 struct icmp6hdr *icmph = icmp6_hdr(skb);
481 struct icmp6hdr tmp_hdr;
483 struct icmpv6_msg msg;
484 struct dst_entry *dst;
489 saddr = &ipv6_hdr(skb)->daddr;
491 if (!ipv6_unicast_destination(skb))
494 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
495 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
497 memset(&fl, 0, sizeof(fl));
498 fl.proto = IPPROTO_ICMPV6;
499 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
501 ipv6_addr_copy(&fl.fl6_src, saddr);
502 fl.oif = skb->dev->ifindex;
503 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
504 security_skb_classify_flow(skb, &fl);
506 if (icmpv6_xmit_lock())
509 sk = icmpv6_socket->sk;
512 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
513 fl.oif = np->mcast_oif;
515 err = ip6_dst_lookup(sk, &dst, &fl);
518 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
521 if (ipv6_addr_is_multicast(&fl.fl6_dst))
522 hlimit = np->mcast_hops;
524 hlimit = np->hop_limit;
526 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
528 hlimit = ipv6_get_hoplimit(dst->dev);
534 idev = in6_dev_get(skb->dev);
538 msg.type = ICMPV6_ECHO_REPLY;
540 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
541 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
542 (struct rt6_info*)dst, MSG_DONTWAIT);
545 ip6_flush_pending_frames(sk);
548 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
550 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
551 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
554 if (likely(idev != NULL))
558 icmpv6_xmit_unlock();
561 static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
563 struct in6_addr *saddr, *daddr;
564 struct inet6_protocol *ipprot;
570 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
573 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
574 if (ipv6_ext_hdr(nexthdr)) {
575 /* now skip over extension headers */
576 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
580 inner_offset = sizeof(struct ipv6hdr);
583 /* Checkin header including 8 bytes of inner protocol header. */
584 if (!pskb_may_pull(skb, inner_offset+8))
587 saddr = &ipv6_hdr(skb)->saddr;
588 daddr = &ipv6_hdr(skb)->daddr;
590 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
591 Without this we will not able f.e. to make source routed
593 Corresponding argument (opt) to notifiers is already added.
597 hash = nexthdr & (MAX_INET_PROTOS - 1);
600 ipprot = rcu_dereference(inet6_protos[hash]);
601 if (ipprot && ipprot->err_handler)
602 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
605 read_lock(&raw_v6_lock);
606 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
607 while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
609 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
613 read_unlock(&raw_v6_lock);
617 * Handle icmp messages
620 static int icmpv6_rcv(struct sk_buff **pskb)
622 struct sk_buff *skb = *pskb;
623 struct net_device *dev = skb->dev;
624 struct inet6_dev *idev = __in6_dev_get(dev);
625 struct in6_addr *saddr, *daddr;
626 struct ipv6hdr *orig_hdr;
627 struct icmp6hdr *hdr;
630 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
632 saddr = &ipv6_hdr(skb)->saddr;
633 daddr = &ipv6_hdr(skb)->daddr;
635 /* Perform checksum. */
636 switch (skb->ip_summed) {
637 case CHECKSUM_COMPLETE:
638 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
643 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
645 if (__skb_checksum_complete(skb)) {
646 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
647 NIP6(*saddr), NIP6(*daddr));
652 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
655 hdr = icmp6_hdr(skb);
657 type = hdr->icmp6_type;
659 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
660 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
661 else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
662 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
665 case ICMPV6_ECHO_REQUEST:
666 icmpv6_echo_reply(skb);
669 case ICMPV6_ECHO_REPLY:
670 /* we couldn't care less */
673 case ICMPV6_PKT_TOOBIG:
674 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
675 standard destination cache. Seems, only "advanced"
676 destination cache will allow to solve this problem
679 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
681 hdr = icmp6_hdr(skb);
682 orig_hdr = (struct ipv6hdr *) (hdr + 1);
683 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
684 ntohl(hdr->icmp6_mtu));
687 * Drop through to notify
690 case ICMPV6_DEST_UNREACH:
691 case ICMPV6_TIME_EXCEED:
692 case ICMPV6_PARAMPROB:
693 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
696 case NDISC_ROUTER_SOLICITATION:
697 case NDISC_ROUTER_ADVERTISEMENT:
698 case NDISC_NEIGHBOUR_SOLICITATION:
699 case NDISC_NEIGHBOUR_ADVERTISEMENT:
704 case ICMPV6_MGM_QUERY:
705 igmp6_event_query(skb);
708 case ICMPV6_MGM_REPORT:
709 igmp6_event_report(skb);
712 case ICMPV6_MGM_REDUCTION:
713 case ICMPV6_NI_QUERY:
714 case ICMPV6_NI_REPLY:
715 case ICMPV6_MLD2_REPORT:
716 case ICMPV6_DHAAD_REQUEST:
717 case ICMPV6_DHAAD_REPLY:
718 case ICMPV6_MOBILE_PREFIX_SOL:
719 case ICMPV6_MOBILE_PREFIX_ADV:
723 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
726 if (type & ICMPV6_INFOMSG_MASK)
730 * error of unknown type.
731 * must pass to upper level
734 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
741 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
747 * Special lock-class for __icmpv6_socket:
749 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
751 int __init icmpv6_init(struct net_proto_family *ops)
756 for_each_possible_cpu(i) {
757 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
758 &per_cpu(__icmpv6_socket, i));
761 "Failed to initialize the ICMP6 control socket "
767 sk = per_cpu(__icmpv6_socket, i)->sk;
768 sk->sk_allocation = GFP_ATOMIC;
770 * Split off their lock-class, because sk->sk_dst_lock
771 * gets used from softirqs, which is safe for
772 * __icmpv6_socket (because those never get directly used
773 * via userspace syscalls), but unsafe for normal sockets.
775 lockdep_set_class(&sk->sk_dst_lock,
776 &icmpv6_socket_sk_dst_lock_key);
778 /* Enough space for 2 64K ICMP packets, including
779 * sk_buff struct overhead.
782 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
784 sk->sk_prot->unhash(sk);
788 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
789 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
797 for (j = 0; j < i; j++) {
798 if (!cpu_possible(j))
800 sock_release(per_cpu(__icmpv6_socket, j));
806 void icmpv6_cleanup(void)
810 for_each_possible_cpu(i) {
811 sock_release(per_cpu(__icmpv6_socket, i));
813 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
816 static const struct icmp6_err {
824 { /* ADM_PROHIBITED */
828 { /* Was NOT_NEIGHBOUR, now reserved */
842 int icmpv6_err_convert(int type, int code, int *err)
849 case ICMPV6_DEST_UNREACH:
851 if (code <= ICMPV6_PORT_UNREACH) {
852 *err = tab_unreach[code].err;
853 fatal = tab_unreach[code].fatal;
857 case ICMPV6_PKT_TOOBIG:
861 case ICMPV6_PARAMPROB:
866 case ICMPV6_TIME_EXCEED:
874 EXPORT_SYMBOL(icmpv6_err_convert);
877 ctl_table ipv6_icmp_table[] = {
879 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
880 .procname = "ratelimit",
881 .data = &sysctl_icmpv6_time,
882 .maxlen = sizeof(int),
884 .proc_handler = &proc_dointvec