2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sockios.h>
41 #include <linux/net.h>
42 #include <linux/skbuff.h>
43 #include <linux/init.h>
44 #include <linux/netfilter.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
71 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
72 EXPORT_SYMBOL(icmpv6_statistics);
73 DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics) __read_mostly;
74 EXPORT_SYMBOL(icmpv6msg_statistics);
77 * The ICMP socket(s). This is the most convenient way to flow control
78 * our ICMP output as well as maintain a clean interface throughout
79 * all layers. All Socketless IP sends will soon be gone.
81 * On SMP we have one ICMP socket per-cpu.
83 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
84 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
86 static int icmpv6_rcv(struct sk_buff *skb);
88 static struct inet6_protocol icmpv6_protocol = {
89 .handler = icmpv6_rcv,
90 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
93 static __inline__ int icmpv6_xmit_lock(void)
97 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
98 /* This can happen if the output path (f.e. SIT or
99 * ip6ip6 tunnel) signals dst_link_failure() for an
100 * outgoing ICMP6 packet.
108 static __inline__ void icmpv6_xmit_unlock(void)
110 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
114 * Slightly more convenient version of icmpv6_send.
116 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
118 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
123 * Figure out, may we reply to this packet with icmp error.
125 * We do not reply, if:
126 * - it was icmp error message.
127 * - it is truncated, so that it is known, that protocol is ICMPV6
128 * (i.e. in the middle of some exthdr)
133 static int is_ineligible(struct sk_buff *skb)
135 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
136 int len = skb->len - ptr;
137 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
142 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
145 if (nexthdr == IPPROTO_ICMPV6) {
147 tp = skb_header_pointer(skb,
148 ptr+offsetof(struct icmp6hdr, icmp6_type),
149 sizeof(_type), &_type);
151 !(*tp & ICMPV6_INFOMSG_MASK))
158 * Check the ICMP output rate limit
160 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
163 struct dst_entry *dst;
166 /* Informational messages are not limited. */
167 if (type & ICMPV6_INFOMSG_MASK)
170 /* Do not limit pmtu discovery, it would break it. */
171 if (type == ICMPV6_PKT_TOOBIG)
175 * Look up the output route.
176 * XXX: perhaps the expire for routing entries cloned by
177 * this lookup should be more aggressive (not longer than timeout).
179 dst = ip6_route_output(sk, fl);
181 IP6_INC_STATS(ip6_dst_idev(dst),
182 IPSTATS_MIB_OUTNOROUTES);
183 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
186 struct rt6_info *rt = (struct rt6_info *)dst;
187 int tmo = init_net.ipv6.sysctl.icmpv6_time;
189 /* Give more bandwidth to wider prefixes. */
190 if (rt->rt6i_dst.plen < 128)
191 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
193 res = xrlim_allow(dst, tmo);
200 * an inline helper for the "simple" if statement below
201 * checks if parameter problem report is caused by an
202 * unrecognized IPv6 option that has the Option Type
203 * highest-order two bits set to 10
206 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
210 offset += skb_network_offset(skb);
211 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
214 return (*op & 0xC0) == 0x80;
217 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
220 struct icmp6hdr *icmp6h;
223 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
226 icmp6h = icmp6_hdr(skb);
227 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
228 icmp6h->icmp6_cksum = 0;
230 if (skb_queue_len(&sk->sk_write_queue) == 1) {
231 skb->csum = csum_partial((char *)icmp6h,
232 sizeof(struct icmp6hdr), skb->csum);
233 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
240 skb_queue_walk(&sk->sk_write_queue, skb) {
241 tmp_csum = csum_add(tmp_csum, skb->csum);
244 tmp_csum = csum_partial((char *)icmp6h,
245 sizeof(struct icmp6hdr), tmp_csum);
246 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
251 ip6_push_pending_frames(sk);
262 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
264 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
265 struct sk_buff *org_skb = msg->skb;
268 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
270 skb->csum = csum_block_add(skb->csum, csum, odd);
271 if (!(msg->type & ICMPV6_INFOMSG_MASK))
272 nf_ct_attach(skb, org_skb);
276 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
277 static void mip6_addr_swap(struct sk_buff *skb)
279 struct ipv6hdr *iph = ipv6_hdr(skb);
280 struct inet6_skb_parm *opt = IP6CB(skb);
281 struct ipv6_destopt_hao *hao;
286 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
287 if (likely(off >= 0)) {
288 hao = (struct ipv6_destopt_hao *)
289 (skb_network_header(skb) + off);
290 ipv6_addr_copy(&tmp, &iph->saddr);
291 ipv6_addr_copy(&iph->saddr, &hao->addr);
292 ipv6_addr_copy(&hao->addr, &tmp);
297 static inline void mip6_addr_swap(struct sk_buff *skb) {}
301 * Send an ICMP message in response to a packet in error
303 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
304 struct net_device *dev)
306 struct inet6_dev *idev = NULL;
307 struct ipv6hdr *hdr = ipv6_hdr(skb);
309 struct ipv6_pinfo *np;
310 struct in6_addr *saddr = NULL;
311 struct dst_entry *dst;
312 struct dst_entry *dst2;
313 struct icmp6hdr tmp_hdr;
316 struct icmpv6_msg msg;
323 if ((u8 *)hdr < skb->head ||
324 (skb->network_header + sizeof(*hdr)) > skb->tail)
328 * Make sure we respect the rules
329 * i.e. RFC 1885 2.4(e)
330 * Rule (e.1) is enforced by not using icmpv6_send
331 * in any code that processes icmp errors.
333 addr_type = ipv6_addr_type(&hdr->daddr);
335 if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0))
342 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
343 if (type != ICMPV6_PKT_TOOBIG &&
344 !(type == ICMPV6_PARAMPROB &&
345 code == ICMPV6_UNK_OPTION &&
346 (opt_unrec(skb, info))))
352 addr_type = ipv6_addr_type(&hdr->saddr);
358 if (addr_type & IPV6_ADDR_LINKLOCAL)
359 iif = skb->dev->ifindex;
362 * Must not send error if the source does not uniquely
363 * identify a single node (RFC2463 Section 2.4).
364 * We check unspecified / multicast addresses here,
365 * and anycast addresses will be checked later.
367 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
368 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
373 * Never answer to a ICMP packet.
375 if (is_ineligible(skb)) {
376 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
382 memset(&fl, 0, sizeof(fl));
383 fl.proto = IPPROTO_ICMPV6;
384 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
386 ipv6_addr_copy(&fl.fl6_src, saddr);
388 fl.fl_icmp_type = type;
389 fl.fl_icmp_code = code;
390 security_skb_classify_flow(skb, &fl);
392 if (icmpv6_xmit_lock())
395 sk = icmpv6_socket->sk;
398 if (!icmpv6_xrlim_allow(sk, type, &fl))
401 tmp_hdr.icmp6_type = type;
402 tmp_hdr.icmp6_code = code;
403 tmp_hdr.icmp6_cksum = 0;
404 tmp_hdr.icmp6_pointer = htonl(info);
406 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
407 fl.oif = np->mcast_oif;
409 err = ip6_dst_lookup(sk, &dst, &fl);
414 * We won't send icmp if the destination is known
417 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
418 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
419 goto out_dst_release;
422 /* No need to clone since we're just using its address. */
425 err = xfrm_lookup(&dst, &fl, sk, 0);
438 if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
439 goto relookup_failed;
441 if (ip6_dst_lookup(sk, &dst2, &fl))
442 goto relookup_failed;
444 err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP);
451 goto out_dst_release;
460 if (ipv6_addr_is_multicast(&fl.fl6_dst))
461 hlimit = np->mcast_hops;
463 hlimit = np->hop_limit;
465 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
467 hlimit = ipv6_get_hoplimit(dst->dev);
474 msg.offset = skb_network_offset(skb);
477 len = skb->len - msg.offset;
478 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
480 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
481 goto out_dst_release;
484 idev = in6_dev_get(skb->dev);
486 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
487 len + sizeof(struct icmp6hdr),
488 sizeof(struct icmp6hdr),
489 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
492 ip6_flush_pending_frames(sk);
495 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
498 if (likely(idev != NULL))
503 icmpv6_xmit_unlock();
506 EXPORT_SYMBOL(icmpv6_send);
508 static void icmpv6_echo_reply(struct sk_buff *skb)
511 struct inet6_dev *idev;
512 struct ipv6_pinfo *np;
513 struct in6_addr *saddr = NULL;
514 struct icmp6hdr *icmph = icmp6_hdr(skb);
515 struct icmp6hdr tmp_hdr;
517 struct icmpv6_msg msg;
518 struct dst_entry *dst;
523 saddr = &ipv6_hdr(skb)->daddr;
525 if (!ipv6_unicast_destination(skb))
528 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
529 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
531 memset(&fl, 0, sizeof(fl));
532 fl.proto = IPPROTO_ICMPV6;
533 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
535 ipv6_addr_copy(&fl.fl6_src, saddr);
536 fl.oif = skb->dev->ifindex;
537 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
538 security_skb_classify_flow(skb, &fl);
540 if (icmpv6_xmit_lock())
543 sk = icmpv6_socket->sk;
546 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
547 fl.oif = np->mcast_oif;
549 err = ip6_dst_lookup(sk, &dst, &fl);
552 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
555 if (ipv6_addr_is_multicast(&fl.fl6_dst))
556 hlimit = np->mcast_hops;
558 hlimit = np->hop_limit;
560 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
562 hlimit = ipv6_get_hoplimit(dst->dev);
568 idev = in6_dev_get(skb->dev);
572 msg.type = ICMPV6_ECHO_REPLY;
574 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
575 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
576 (struct rt6_info*)dst, MSG_DONTWAIT);
579 ip6_flush_pending_frames(sk);
582 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
585 if (likely(idev != NULL))
589 icmpv6_xmit_unlock();
592 static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
594 struct inet6_protocol *ipprot;
599 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
602 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
603 if (ipv6_ext_hdr(nexthdr)) {
604 /* now skip over extension headers */
605 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
609 inner_offset = sizeof(struct ipv6hdr);
612 /* Checkin header including 8 bytes of inner protocol header. */
613 if (!pskb_may_pull(skb, inner_offset+8))
616 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
617 Without this we will not able f.e. to make source routed
619 Corresponding argument (opt) to notifiers is already added.
623 hash = nexthdr & (MAX_INET_PROTOS - 1);
626 ipprot = rcu_dereference(inet6_protos[hash]);
627 if (ipprot && ipprot->err_handler)
628 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
631 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
635 * Handle icmp messages
638 static int icmpv6_rcv(struct sk_buff *skb)
640 struct net_device *dev = skb->dev;
641 struct inet6_dev *idev = __in6_dev_get(dev);
642 struct in6_addr *saddr, *daddr;
643 struct ipv6hdr *orig_hdr;
644 struct icmp6hdr *hdr;
647 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
650 if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags &
654 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr)))
657 nh = skb_network_offset(skb);
658 skb_set_network_header(skb, sizeof(*hdr));
660 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
663 skb_set_network_header(skb, nh);
666 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
668 saddr = &ipv6_hdr(skb)->saddr;
669 daddr = &ipv6_hdr(skb)->daddr;
671 /* Perform checksum. */
672 switch (skb->ip_summed) {
673 case CHECKSUM_COMPLETE:
674 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
679 skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len,
681 if (__skb_checksum_complete(skb)) {
682 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
683 NIP6(*saddr), NIP6(*daddr));
688 if (!pskb_pull(skb, sizeof(*hdr)))
691 hdr = icmp6_hdr(skb);
693 type = hdr->icmp6_type;
695 ICMP6MSGIN_INC_STATS_BH(idev, type);
698 case ICMPV6_ECHO_REQUEST:
699 icmpv6_echo_reply(skb);
702 case ICMPV6_ECHO_REPLY:
703 /* we couldn't care less */
706 case ICMPV6_PKT_TOOBIG:
707 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
708 standard destination cache. Seems, only "advanced"
709 destination cache will allow to solve this problem
712 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
714 hdr = icmp6_hdr(skb);
715 orig_hdr = (struct ipv6hdr *) (hdr + 1);
716 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
717 ntohl(hdr->icmp6_mtu));
720 * Drop through to notify
723 case ICMPV6_DEST_UNREACH:
724 case ICMPV6_TIME_EXCEED:
725 case ICMPV6_PARAMPROB:
726 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
729 case NDISC_ROUTER_SOLICITATION:
730 case NDISC_ROUTER_ADVERTISEMENT:
731 case NDISC_NEIGHBOUR_SOLICITATION:
732 case NDISC_NEIGHBOUR_ADVERTISEMENT:
737 case ICMPV6_MGM_QUERY:
738 igmp6_event_query(skb);
741 case ICMPV6_MGM_REPORT:
742 igmp6_event_report(skb);
745 case ICMPV6_MGM_REDUCTION:
746 case ICMPV6_NI_QUERY:
747 case ICMPV6_NI_REPLY:
748 case ICMPV6_MLD2_REPORT:
749 case ICMPV6_DHAAD_REQUEST:
750 case ICMPV6_DHAAD_REPLY:
751 case ICMPV6_MOBILE_PREFIX_SOL:
752 case ICMPV6_MOBILE_PREFIX_ADV:
756 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
759 if (type & ICMPV6_INFOMSG_MASK)
763 * error of unknown type.
764 * must pass to upper level
767 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
774 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
781 * Special lock-class for __icmpv6_socket:
783 static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
785 int __init icmpv6_init(struct net_proto_family *ops)
790 for_each_possible_cpu(i) {
791 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
792 &per_cpu(__icmpv6_socket, i));
795 "Failed to initialize the ICMP6 control socket "
801 sk = per_cpu(__icmpv6_socket, i)->sk;
802 sk->sk_allocation = GFP_ATOMIC;
804 * Split off their lock-class, because sk->sk_dst_lock
805 * gets used from softirqs, which is safe for
806 * __icmpv6_socket (because those never get directly used
807 * via userspace syscalls), but unsafe for normal sockets.
809 lockdep_set_class(&sk->sk_dst_lock,
810 &icmpv6_socket_sk_dst_lock_key);
812 /* Enough space for 2 64K ICMP packets, including
813 * sk_buff struct overhead.
816 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
818 sk->sk_prot->unhash(sk);
822 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
823 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
831 for (j = 0; j < i; j++) {
832 if (!cpu_possible(j))
834 sock_release(per_cpu(__icmpv6_socket, j));
840 void icmpv6_cleanup(void)
844 for_each_possible_cpu(i) {
845 sock_release(per_cpu(__icmpv6_socket, i));
847 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
850 static const struct icmp6_err {
858 { /* ADM_PROHIBITED */
862 { /* Was NOT_NEIGHBOUR, now reserved */
876 int icmpv6_err_convert(int type, int code, int *err)
883 case ICMPV6_DEST_UNREACH:
885 if (code <= ICMPV6_PORT_UNREACH) {
886 *err = tab_unreach[code].err;
887 fatal = tab_unreach[code].fatal;
891 case ICMPV6_PKT_TOOBIG:
895 case ICMPV6_PARAMPROB:
900 case ICMPV6_TIME_EXCEED:
908 EXPORT_SYMBOL(icmpv6_err_convert);
911 ctl_table ipv6_icmp_table_template[] = {
913 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
914 .procname = "ratelimit",
915 .data = &init_net.ipv6.sysctl.icmpv6_time,
916 .maxlen = sizeof(int),
918 .proc_handler = &proc_dointvec
923 struct ctl_table *ipv6_icmp_sysctl_init(struct net *net)
925 struct ctl_table *table;
927 table = kmemdup(ipv6_icmp_table_template,
928 sizeof(ipv6_icmp_table_template),