2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sched.h>
41 #include <linux/sockios.h>
42 #include <linux/net.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
45 #include <linux/netfilter.h>
48 #include <linux/sysctl.h>
51 #include <linux/inet.h>
52 #include <linux/netdevice.h>
53 #include <linux/icmpv6.h>
59 #include <net/ip6_checksum.h>
60 #include <net/protocol.h>
62 #include <net/rawv6.h>
63 #include <net/transp_v6.h>
64 #include <net/ip6_route.h>
65 #include <net/addrconf.h>
68 #include <asm/uaccess.h>
69 #include <asm/system.h>
71 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
74 * The ICMP socket(s). This is the most convenient way to flow control
75 * our ICMP output as well as maintain a clean interface throughout
76 * all layers. All Socketless IP sends will soon be gone.
78 * On SMP we have one ICMP socket per-cpu.
80 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
81 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
83 static int icmpv6_rcv(struct sk_buff **pskb);
85 static struct inet6_protocol icmpv6_protocol = {
86 .handler = icmpv6_rcv,
87 .flags = INET6_PROTO_FINAL,
90 static __inline__ int icmpv6_xmit_lock(void)
94 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
95 /* This can happen if the output path (f.e. SIT or
96 * ip6ip6 tunnel) signals dst_link_failure() for an
97 * outgoing ICMP6 packet.
105 static __inline__ void icmpv6_xmit_unlock(void)
107 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
111 * Slightly more convenient version of icmpv6_send.
113 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
115 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
120 * Figure out, may we reply to this packet with icmp error.
122 * We do not reply, if:
123 * - it was icmp error message.
124 * - it is truncated, so that it is known, that protocol is ICMPV6
125 * (i.e. in the middle of some exthdr)
130 static int is_ineligible(struct sk_buff *skb)
132 int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
133 int len = skb->len - ptr;
134 __u8 nexthdr = skb->nh.ipv6h->nexthdr;
139 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
142 if (nexthdr == IPPROTO_ICMPV6) {
144 tp = skb_header_pointer(skb,
145 ptr+offsetof(struct icmp6hdr, icmp6_type),
146 sizeof(_type), &_type);
148 !(*tp & ICMPV6_INFOMSG_MASK))
154 static int sysctl_icmpv6_time = 1*HZ;
157 * Check the ICMP output rate limit
159 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
162 struct dst_entry *dst;
165 /* Informational messages are not limited. */
166 if (type & ICMPV6_INFOMSG_MASK)
169 /* Do not limit pmtu discovery, it would break it. */
170 if (type == ICMPV6_PKT_TOOBIG)
174 * Look up the output route.
175 * XXX: perhaps the expire for routing entries cloned by
176 * this lookup should be more aggressive (not longer than timeout).
178 dst = ip6_route_output(sk, fl);
180 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
181 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
184 struct rt6_info *rt = (struct rt6_info *)dst;
185 int tmo = sysctl_icmpv6_time;
187 /* Give more bandwidth to wider prefixes. */
188 if (rt->rt6i_dst.plen < 128)
189 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
191 res = xrlim_allow(dst, tmo);
198 * an inline helper for the "simple" if statement below
199 * checks if parameter problem report is caused by an
200 * unrecognized IPv6 option that has the Option Type
201 * highest-order two bits set to 10
204 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
208 offset += skb->nh.raw - skb->data;
209 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
212 return (*op & 0xC0) == 0x80;
215 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
218 struct icmp6hdr *icmp6h;
221 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
224 icmp6h = (struct icmp6hdr*) skb->h.raw;
225 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
226 icmp6h->icmp6_cksum = 0;
228 if (skb_queue_len(&sk->sk_write_queue) == 1) {
229 skb->csum = csum_partial((char *)icmp6h,
230 sizeof(struct icmp6hdr), skb->csum);
231 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
238 skb_queue_walk(&sk->sk_write_queue, skb) {
239 tmp_csum = csum_add(tmp_csum, skb->csum);
242 tmp_csum = csum_partial((char *)icmp6h,
243 sizeof(struct icmp6hdr), tmp_csum);
244 tmp_csum = csum_ipv6_magic(&fl->fl6_src,
246 len, fl->proto, tmp_csum);
247 icmp6h->icmp6_cksum = tmp_csum;
249 if (icmp6h->icmp6_cksum == 0)
250 icmp6h->icmp6_cksum = -1;
251 ip6_push_pending_frames(sk);
262 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
264 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
265 struct sk_buff *org_skb = msg->skb;
268 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
270 skb->csum = csum_block_add(skb->csum, csum, odd);
271 if (!(msg->type & ICMPV6_INFOMSG_MASK))
272 nf_ct_attach(skb, org_skb);
277 * Send an ICMP message in response to a packet in error
279 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
280 struct net_device *dev)
282 struct inet6_dev *idev = NULL;
283 struct ipv6hdr *hdr = skb->nh.ipv6h;
285 struct ipv6_pinfo *np;
286 struct in6_addr *saddr = NULL;
287 struct dst_entry *dst;
288 struct icmp6hdr tmp_hdr;
290 struct icmpv6_msg msg;
297 if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
301 * Make sure we respect the rules
302 * i.e. RFC 1885 2.4(e)
303 * Rule (e.1) is enforced by not using icmpv6_send
304 * in any code that processes icmp errors.
306 addr_type = ipv6_addr_type(&hdr->daddr);
308 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
315 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
316 if (type != ICMPV6_PKT_TOOBIG &&
317 !(type == ICMPV6_PARAMPROB &&
318 code == ICMPV6_UNK_OPTION &&
319 (opt_unrec(skb, info))))
325 addr_type = ipv6_addr_type(&hdr->saddr);
331 if (addr_type & IPV6_ADDR_LINKLOCAL)
332 iif = skb->dev->ifindex;
335 * Must not send error if the source does not uniquely
336 * identify a single node (RFC2463 Section 2.4).
337 * We check unspecified / multicast addresses here,
338 * and anycast addresses will be checked later.
340 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
341 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
346 * Never answer to a ICMP packet.
348 if (is_ineligible(skb)) {
349 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
353 memset(&fl, 0, sizeof(fl));
354 fl.proto = IPPROTO_ICMPV6;
355 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
357 ipv6_addr_copy(&fl.fl6_src, saddr);
359 fl.fl_icmp_type = type;
360 fl.fl_icmp_code = code;
362 if (icmpv6_xmit_lock())
365 sk = icmpv6_socket->sk;
368 if (!icmpv6_xrlim_allow(sk, type, &fl))
371 tmp_hdr.icmp6_type = type;
372 tmp_hdr.icmp6_code = code;
373 tmp_hdr.icmp6_cksum = 0;
374 tmp_hdr.icmp6_pointer = htonl(info);
376 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
377 fl.oif = np->mcast_oif;
379 err = ip6_dst_lookup(sk, &dst, &fl);
384 * We won't send icmp if the destination is known
387 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
388 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n");
389 goto out_dst_release;
392 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
395 if (ipv6_addr_is_multicast(&fl.fl6_dst))
396 hlimit = np->mcast_hops;
398 hlimit = np->hop_limit;
400 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
402 hlimit = ipv6_get_hoplimit(dst->dev);
404 tclass = np->cork.tclass;
409 msg.offset = skb->nh.raw - skb->data;
412 len = skb->len - msg.offset;
413 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
415 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
416 goto out_dst_release;
419 idev = in6_dev_get(skb->dev);
421 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
422 len + sizeof(struct icmp6hdr),
423 sizeof(struct icmp6hdr),
424 hlimit, tclass, NULL, &fl, (struct rt6_info*)dst,
427 ip6_flush_pending_frames(sk);
430 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
432 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
433 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
434 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
437 if (likely(idev != NULL))
442 icmpv6_xmit_unlock();
445 static void icmpv6_echo_reply(struct sk_buff *skb)
448 struct inet6_dev *idev;
449 struct ipv6_pinfo *np;
450 struct in6_addr *saddr = NULL;
451 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
452 struct icmp6hdr tmp_hdr;
454 struct icmpv6_msg msg;
455 struct dst_entry *dst;
460 saddr = &skb->nh.ipv6h->daddr;
462 if (!ipv6_unicast_destination(skb))
465 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
466 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
468 memset(&fl, 0, sizeof(fl));
469 fl.proto = IPPROTO_ICMPV6;
470 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
472 ipv6_addr_copy(&fl.fl6_src, saddr);
473 fl.oif = skb->dev->ifindex;
474 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
476 if (icmpv6_xmit_lock())
479 sk = icmpv6_socket->sk;
482 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
483 fl.oif = np->mcast_oif;
485 err = ip6_dst_lookup(sk, &dst, &fl);
488 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
491 if (ipv6_addr_is_multicast(&fl.fl6_dst))
492 hlimit = np->mcast_hops;
494 hlimit = np->hop_limit;
496 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
498 hlimit = ipv6_get_hoplimit(dst->dev);
500 tclass = np->cork.tclass;
504 idev = in6_dev_get(skb->dev);
508 msg.type = ICMPV6_ECHO_REPLY;
510 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
511 sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl,
512 (struct rt6_info*)dst, MSG_DONTWAIT);
515 ip6_flush_pending_frames(sk);
518 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
520 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
521 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
524 if (likely(idev != NULL))
528 icmpv6_xmit_unlock();
531 static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
533 struct in6_addr *saddr, *daddr;
534 struct inet6_protocol *ipprot;
540 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
543 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
544 if (ipv6_ext_hdr(nexthdr)) {
545 /* now skip over extension headers */
546 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
550 inner_offset = sizeof(struct ipv6hdr);
553 /* Checkin header including 8 bytes of inner protocol header. */
554 if (!pskb_may_pull(skb, inner_offset+8))
557 saddr = &skb->nh.ipv6h->saddr;
558 daddr = &skb->nh.ipv6h->daddr;
560 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
561 Without this we will not able f.e. to make source routed
563 Corresponding argument (opt) to notifiers is already added.
567 hash = nexthdr & (MAX_INET_PROTOS - 1);
570 ipprot = rcu_dereference(inet6_protos[hash]);
571 if (ipprot && ipprot->err_handler)
572 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
575 read_lock(&raw_v6_lock);
576 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
577 while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
579 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
583 read_unlock(&raw_v6_lock);
587 * Handle icmp messages
590 static int icmpv6_rcv(struct sk_buff **pskb)
592 struct sk_buff *skb = *pskb;
593 struct net_device *dev = skb->dev;
594 struct inet6_dev *idev = __in6_dev_get(dev);
595 struct in6_addr *saddr, *daddr;
596 struct ipv6hdr *orig_hdr;
597 struct icmp6hdr *hdr;
600 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
602 saddr = &skb->nh.ipv6h->saddr;
603 daddr = &skb->nh.ipv6h->daddr;
605 /* Perform checksum. */
606 switch (skb->ip_summed) {
608 if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
613 skb->csum = ~csum_ipv6_magic(saddr, daddr, skb->len,
615 if (__skb_checksum_complete(skb)) {
616 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n",
617 NIP6(*saddr), NIP6(*daddr));
622 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
625 hdr = (struct icmp6hdr *) skb->h.raw;
627 type = hdr->icmp6_type;
629 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
630 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
631 else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
632 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
635 case ICMPV6_ECHO_REQUEST:
636 icmpv6_echo_reply(skb);
639 case ICMPV6_ECHO_REPLY:
640 /* we couldn't care less */
643 case ICMPV6_PKT_TOOBIG:
644 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
645 standard destination cache. Seems, only "advanced"
646 destination cache will allow to solve this problem
649 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
651 hdr = (struct icmp6hdr *) skb->h.raw;
652 orig_hdr = (struct ipv6hdr *) (hdr + 1);
653 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
654 ntohl(hdr->icmp6_mtu));
657 * Drop through to notify
660 case ICMPV6_DEST_UNREACH:
661 case ICMPV6_TIME_EXCEED:
662 case ICMPV6_PARAMPROB:
663 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
666 case NDISC_ROUTER_SOLICITATION:
667 case NDISC_ROUTER_ADVERTISEMENT:
668 case NDISC_NEIGHBOUR_SOLICITATION:
669 case NDISC_NEIGHBOUR_ADVERTISEMENT:
674 case ICMPV6_MGM_QUERY:
675 igmp6_event_query(skb);
678 case ICMPV6_MGM_REPORT:
679 igmp6_event_report(skb);
682 case ICMPV6_MGM_REDUCTION:
683 case ICMPV6_NI_QUERY:
684 case ICMPV6_NI_REPLY:
685 case ICMPV6_MLD2_REPORT:
686 case ICMPV6_DHAAD_REQUEST:
687 case ICMPV6_DHAAD_REPLY:
688 case ICMPV6_MOBILE_PREFIX_SOL:
689 case ICMPV6_MOBILE_PREFIX_ADV:
693 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
696 if (type & ICMPV6_INFOMSG_MASK)
700 * error of unknown type.
701 * must pass to upper level
704 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
710 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
715 int __init icmpv6_init(struct net_proto_family *ops)
720 for_each_possible_cpu(i) {
721 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
722 &per_cpu(__icmpv6_socket, i));
725 "Failed to initialize the ICMP6 control socket "
731 sk = per_cpu(__icmpv6_socket, i)->sk;
732 sk->sk_allocation = GFP_ATOMIC;
734 /* Enough space for 2 64K ICMP packets, including
735 * sk_buff struct overhead.
738 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
740 sk->sk_prot->unhash(sk);
744 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
745 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
753 for (j = 0; j < i; j++) {
754 if (!cpu_possible(j))
756 sock_release(per_cpu(__icmpv6_socket, j));
762 void icmpv6_cleanup(void)
766 for_each_possible_cpu(i) {
767 sock_release(per_cpu(__icmpv6_socket, i));
769 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
772 static const struct icmp6_err {
780 { /* ADM_PROHIBITED */
784 { /* Was NOT_NEIGHBOUR, now reserved */
798 int icmpv6_err_convert(int type, int code, int *err)
805 case ICMPV6_DEST_UNREACH:
807 if (code <= ICMPV6_PORT_UNREACH) {
808 *err = tab_unreach[code].err;
809 fatal = tab_unreach[code].fatal;
813 case ICMPV6_PKT_TOOBIG:
817 case ICMPV6_PARAMPROB:
822 case ICMPV6_TIME_EXCEED:
831 ctl_table ipv6_icmp_table[] = {
833 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
834 .procname = "ratelimit",
835 .data = &sysctl_icmpv6_time,
836 .maxlen = sizeof(int),
838 .proc_handler = &proc_dointvec