2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
71 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
72 EXPORT_SYMBOL(icmpv6_statistics);
73 DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics) __read_mostly;
74 EXPORT_SYMBOL(icmpv6msg_statistics);
77 * The ICMP socket(s). This is the most convenient way to flow control
78 * our ICMP output as well as maintain a clean interface throughout
79 * all layers. All Socketless IP sends will soon be gone.
81 * On SMP we have one ICMP socket per-cpu.
83 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
84 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
86 static int icmpv6_rcv(struct sk_buff *skb);
88 static struct inet6_protocol icmpv6_protocol = {
89 .handler = icmpv6_rcv,
90 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
93 static __inline__ int icmpv6_xmit_lock(void)
97 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
98 /* This can happen if the output path (f.e. SIT or
99 * ip6ip6 tunnel) signals dst_link_failure() for an
100 * outgoing ICMP6 packet.
108 static __inline__ void icmpv6_xmit_unlock(void)
110 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
114 * Slightly more convenient version of icmpv6_send.
116 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
118 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
123 * Figure out, may we reply to this packet with icmp error.
125 * We do not reply, if:
126 * - it was icmp error message.
127 * - it is truncated, so that it is known, that protocol is ICMPV6
128 * (i.e. in the middle of some exthdr)
133 static int is_ineligible(struct sk_buff *skb)
135 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
136 int len = skb->len - ptr;
137 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
142 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
145 if (nexthdr == IPPROTO_ICMPV6) {
147 tp = skb_header_pointer(skb,
148 ptr+offsetof(struct icmp6hdr, icmp6_type),
149 sizeof(_type), &_type);
151 !(*tp & ICMPV6_INFOMSG_MASK))
158 * Check the ICMP output rate limit
160 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
163 struct dst_entry *dst;
166 /* Informational messages are not limited. */
167 if (type & ICMPV6_INFOMSG_MASK)
170 /* Do not limit pmtu discovery, it would break it. */
171 if (type == ICMPV6_PKT_TOOBIG)
175 * Look up the output route.
176 * XXX: perhaps the expire for routing entries cloned by
177 * this lookup should be more aggressive (not longer than timeout).
179 dst = ip6_route_output(sk, fl);
181 IP6_INC_STATS(ip6_dst_idev(dst),
182 IPSTATS_MIB_OUTNOROUTES);
183 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
186 struct rt6_info *rt = (struct rt6_info *)dst;
187 int tmo = init_net.ipv6.sysctl.icmpv6_time;
189 /* Give more bandwidth to wider prefixes. */
190 if (rt->rt6i_dst.plen < 128)
191 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
193 res = xrlim_allow(dst, tmo);
200 * an inline helper for the "simple" if statement below
201 * checks if parameter problem report is caused by an
202 * unrecognized IPv6 option that has the Option Type
203 * highest-order two bits set to 10
206 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
210 offset += skb_network_offset(skb);
211 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
214 return (*op & 0xC0) == 0x80;
217 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
220 struct icmp6hdr *icmp6h;
223 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
226 icmp6h = icmp6_hdr(skb);
227 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
228 icmp6h->icmp6_cksum = 0;
230 if (skb_queue_len(&sk->sk_write_queue) == 1) {
231 skb->csum = csum_partial((char *)icmp6h,
232 sizeof(struct icmp6hdr), skb->csum);
233 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
240 skb_queue_walk(&sk->sk_write_queue, skb) {
241 tmp_csum = csum_add(tmp_csum, skb->csum);
244 tmp_csum = csum_partial((char *)icmp6h,
245 sizeof(struct icmp6hdr), tmp_csum);
246 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
251 ip6_push_pending_frames(sk);
262 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
264 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
265 struct sk_buff *org_skb = msg->skb;
268 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
270 skb->csum = csum_block_add(skb->csum, csum, odd);
271 if (!(msg->type & ICMPV6_INFOMSG_MASK))
272 nf_ct_attach(skb, org_skb);
276 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
277 static void mip6_addr_swap(struct sk_buff *skb)
279 struct ipv6hdr *iph = ipv6_hdr(skb);
280 struct inet6_skb_parm *opt = IP6CB(skb);
281 struct ipv6_destopt_hao *hao;
286 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
287 if (likely(off >= 0)) {
288 hao = (struct ipv6_destopt_hao *)
289 (skb_network_header(skb) + off);
290 ipv6_addr_copy(&tmp, &iph->saddr);
291 ipv6_addr_copy(&iph->saddr, &hao->addr);
292 ipv6_addr_copy(&hao->addr, &tmp);
297 static inline void mip6_addr_swap(struct sk_buff *skb) {}
301 * Send an ICMP message in response to a packet in error
303 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
304 struct net_device *dev)
306 struct inet6_dev *idev = NULL;
307 struct ipv6hdr *hdr = ipv6_hdr(skb);
309 struct ipv6_pinfo *np;
310 struct in6_addr *saddr = NULL;
311 struct dst_entry *dst;
312 struct dst_entry *dst2;
313 struct icmp6hdr tmp_hdr;
316 struct icmpv6_msg msg;
323 if ((u8 *)hdr < skb->head ||
324 (skb->network_header + sizeof(*hdr)) > skb->tail)
328 * Make sure we respect the rules
329 * i.e. RFC 1885 2.4(e)
330 * Rule (e.1) is enforced by not using icmpv6_send
331 * in any code that processes icmp errors.
333 addr_type = ipv6_addr_type(&hdr->daddr);
335 if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0))
342 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
343 if (type != ICMPV6_PKT_TOOBIG &&
344 !(type == ICMPV6_PARAMPROB &&
345 code == ICMPV6_UNK_OPTION &&
346 (opt_unrec(skb, info))))
352 addr_type = ipv6_addr_type(&hdr->saddr);
358 if (addr_type & IPV6_ADDR_LINKLOCAL)
359 iif = skb->dev->ifindex;
362 * Must not send error if the source does not uniquely
363 * identify a single node (RFC2463 Section 2.4).
364 * We check unspecified / multicast addresses here,
365 * and anycast addresses will be checked later.
367 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
368 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
373 * Never answer to a ICMP packet.
375 if (is_ineligible(skb)) {
376 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
382 memset(&fl, 0, sizeof(fl));
383 fl.proto = IPPROTO_ICMPV6;
384 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
386 ipv6_addr_copy(&fl.fl6_src, saddr);
388 fl.fl_icmp_type = type;
389 fl.fl_icmp_code = code;
390 security_skb_classify_flow(skb, &fl);
392 if (icmpv6_xmit_lock())
395 sk = icmpv6_socket->sk;
398 if (!icmpv6_xrlim_allow(sk, type, &fl))
401 tmp_hdr.icmp6_type = type;
402 tmp_hdr.icmp6_code = code;
403 tmp_hdr.icmp6_cksum = 0;
404 tmp_hdr.icmp6_pointer = htonl(info);
406 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
407 fl.oif = np->mcast_oif;
409 err = ip6_dst_lookup(sk, &dst, &fl);
414 * We won't send icmp if the destination is known
417 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
418 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
419 goto out_dst_release;
422 /* No need to clone since we're just using its address. */
425 err = xfrm_lookup(&dst, &fl, sk, 0);
438 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
441 if (ip6_dst_lookup(sk, &dst2, &fl))
444 err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP);
445 if (err == -ENOENT) {
458 if (ipv6_addr_is_multicast(&fl.fl6_dst))
459 hlimit = np->mcast_hops;
461 hlimit = np->hop_limit;
463 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
465 hlimit = ipv6_get_hoplimit(dst->dev);
472 msg.offset = skb_network_offset(skb);
475 len = skb->len - msg.offset;
476 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
478 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
479 goto out_dst_release;
482 idev = in6_dev_get(skb->dev);
484 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
485 len + sizeof(struct icmp6hdr),
486 sizeof(struct icmp6hdr),
487 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
490 ip6_flush_pending_frames(sk);
493 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
496 if (likely(idev != NULL))
501 icmpv6_xmit_unlock();
504 EXPORT_SYMBOL(icmpv6_send);
506 static void icmpv6_echo_reply(struct sk_buff *skb)
509 struct inet6_dev *idev;
510 struct ipv6_pinfo *np;
511 struct in6_addr *saddr = NULL;
512 struct icmp6hdr *icmph = icmp6_hdr(skb);
513 struct icmp6hdr tmp_hdr;
515 struct icmpv6_msg msg;
516 struct dst_entry *dst;
521 saddr = &ipv6_hdr(skb)->daddr;
523 if (!ipv6_unicast_destination(skb))
526 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
527 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
529 memset(&fl, 0, sizeof(fl));
530 fl.proto = IPPROTO_ICMPV6;
531 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
533 ipv6_addr_copy(&fl.fl6_src, saddr);
534 fl.oif = skb->dev->ifindex;
535 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
536 security_skb_classify_flow(skb, &fl);
538 if (icmpv6_xmit_lock())
541 sk = icmpv6_socket->sk;
544 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
545 fl.oif = np->mcast_oif;
547 err = ip6_dst_lookup(sk, &dst, &fl);
550 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
553 if (ipv6_addr_is_multicast(&fl.fl6_dst))
554 hlimit = np->mcast_hops;
556 hlimit = np->hop_limit;
558 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
560 hlimit = ipv6_get_hoplimit(dst->dev);
566 idev = in6_dev_get(skb->dev);
570 msg.type = ICMPV6_ECHO_REPLY;
572 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
573 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
574 (struct rt6_info*)dst, MSG_DONTWAIT);
577 ip6_flush_pending_frames(sk);
580 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
583 if (likely(idev != NULL))
587 icmpv6_xmit_unlock();
590 static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
592 struct inet6_protocol *ipprot;
597 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
600 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
601 if (ipv6_ext_hdr(nexthdr)) {
602 /* now skip over extension headers */
603 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
607 inner_offset = sizeof(struct ipv6hdr);
610 /* Checkin header including 8 bytes of inner protocol header. */
611 if (!pskb_may_pull(skb, inner_offset+8))
614 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
615 Without this we will not able f.e. to make source routed
617 Corresponding argument (opt) to notifiers is already added.
621 hash = nexthdr & (MAX_INET_PROTOS - 1);
624 ipprot = rcu_dereference(inet6_protos[hash]);
625 if (ipprot && ipprot->err_handler)
626 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
629 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
633 * Handle icmp messages
636 static int icmpv6_rcv(struct sk_buff *skb)
638 struct net_device *dev = skb->dev;
639 struct inet6_dev *idev = __in6_dev_get(dev);
640 struct in6_addr *saddr, *daddr;
641 struct ipv6hdr *orig_hdr;
642 struct icmp6hdr *hdr;
645 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
648 if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
652 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
655 nh = skb_network_offset(skb);
656 skb_set_network_header(skb, sizeof(*hdr));
658 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
661 skb_set_network_header(skb, nh);
664 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
666 saddr = &ipv6_hdr(skb)->saddr;
667 daddr = &ipv6_hdr(skb)->daddr;
669 /* Perform checksum. */
670 switch (skb->ip_summed) {
671 case CHECKSUM_COMPLETE:
672 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
677 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
679 if (__skb_checksum_complete(skb)) {
680 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
681 NIP6(*saddr), NIP6(*daddr));
686 if (!pskb_pull(skb, sizeof(*hdr)))
689 hdr = icmp6_hdr(skb);
691 type = hdr->icmp6_type;
693 ICMP6MSGIN_INC_STATS_BH(idev, type);
696 case ICMPV6_ECHO_REQUEST:
697 icmpv6_echo_reply(skb);
700 case ICMPV6_ECHO_REPLY:
701 /* we couldn't care less */
704 case ICMPV6_PKT_TOOBIG:
705 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
706 standard destination cache. Seems, only "advanced"
707 destination cache will allow to solve this problem
710 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
712 hdr = icmp6_hdr(skb);
713 orig_hdr = (struct ipv6hdr *) (hdr + 1);
714 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
715 ntohl(hdr->icmp6_mtu));
718 * Drop through to notify
721 case ICMPV6_DEST_UNREACH:
722 case ICMPV6_TIME_EXCEED:
723 case ICMPV6_PARAMPROB:
724 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
727 case NDISC_ROUTER_SOLICITATION:
728 case NDISC_ROUTER_ADVERTISEMENT:
729 case NDISC_NEIGHBOUR_SOLICITATION:
730 case NDISC_NEIGHBOUR_ADVERTISEMENT:
735 case ICMPV6_MGM_QUERY:
736 igmp6_event_query(skb);
739 case ICMPV6_MGM_REPORT:
740 igmp6_event_report(skb);
743 case ICMPV6_MGM_REDUCTION:
744 case ICMPV6_NI_QUERY:
745 case ICMPV6_NI_REPLY:
746 case ICMPV6_MLD2_REPORT:
747 case ICMPV6_DHAAD_REQUEST:
748 case ICMPV6_DHAAD_REPLY:
749 case ICMPV6_MOBILE_PREFIX_SOL:
750 case ICMPV6_MOBILE_PREFIX_ADV:
754 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
757 if (type & ICMPV6_INFOMSG_MASK)
761 * error of unknown type.
762 * must pass to upper level
765 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
772 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
779 * Special lock-class for __icmpv6_socket:
781 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
783 int __init icmpv6_init(struct net_proto_family *ops)
788 for_each_possible_cpu(i) {
789 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
790 &per_cpu(__icmpv6_socket, i));
793 "Failed to initialize the ICMP6 control socket "
799 sk = per_cpu(__icmpv6_socket, i)->sk;
800 sk->sk_allocation = GFP_ATOMIC;
802 * Split off their lock-class, because sk->sk_dst_lock
803 * gets used from softirqs, which is safe for
804 * __icmpv6_socket (because those never get directly used
805 * via userspace syscalls), but unsafe for normal sockets.
807 lockdep_set_class(&sk->sk_dst_lock,
808 &icmpv6_socket_sk_dst_lock_key);
810 /* Enough space for 2 64K ICMP packets, including
811 * sk_buff struct overhead.
814 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
816 sk->sk_prot->unhash(sk);
820 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
821 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
829 for (j = 0; j < i; j++) {
830 if (!cpu_possible(j))
832 sock_release(per_cpu(__icmpv6_socket, j));
838 void icmpv6_cleanup(void)
842 for_each_possible_cpu(i) {
843 sock_release(per_cpu(__icmpv6_socket, i));
845 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
848 static const struct icmp6_err {
856 { /* ADM_PROHIBITED */
860 { /* Was NOT_NEIGHBOUR, now reserved */
874 int icmpv6_err_convert(int type, int code, int *err)
881 case ICMPV6_DEST_UNREACH:
883 if (code <= ICMPV6_PORT_UNREACH) {
884 *err = tab_unreach[code].err;
885 fatal = tab_unreach[code].fatal;
889 case ICMPV6_PKT_TOOBIG:
893 case ICMPV6_PARAMPROB:
898 case ICMPV6_TIME_EXCEED:
906 EXPORT_SYMBOL(icmpv6_err_convert);
909 ctl_table ipv6_icmp_table_template[] = {
911 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
912 .procname = "ratelimit",
913 .data = &init_net.ipv6.sysctl.icmpv6_time,
914 .maxlen = sizeof(int),
916 .proc_handler = &proc_dointvec
921 struct ctl_table *ipv6_icmp_sysctl_init(struct net *net)
923 struct ctl_table *table;
925 table = kmemdup(ipv6_icmp_table_template,
926 sizeof(ipv6_icmp_table_template),