2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sched.h>
41 #include <linux/sockios.h>
42 #include <linux/net.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
67 #include <asm/uaccess.h>
68 #include <asm/system.h>
70 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
73 * The ICMP socket(s). This is the most convenient way to flow control
74 * our ICMP output as well as maintain a clean interface throughout
75 * all layers. All Socketless IP sends will soon be gone.
77 * On SMP we have one ICMP socket per-cpu.
79 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
80 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
82 static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
84 static struct inet6_protocol icmpv6_protocol = {
85 .handler = icmpv6_rcv,
86 .flags = INET6_PROTO_FINAL,
89 static __inline__ int icmpv6_xmit_lock(void)
93 if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
94 /* This can happen if the output path (f.e. SIT or
95 * ip6ip6 tunnel) signals dst_link_failure() for an
96 * outgoing ICMP6 packet.
104 static __inline__ void icmpv6_xmit_unlock(void)
106 spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
110 * Slightly more convenient version of icmpv6_send.
112 void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
114 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
119 * Figure out, may we reply to this packet with icmp error.
121 * We do not reply, if:
122 * - it was icmp error message.
123 * - it is truncated, so that it is known, that protocol is ICMPV6
124 * (i.e. in the middle of some exthdr)
129 static int is_ineligible(struct sk_buff *skb)
131 int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
132 int len = skb->len - ptr;
133 __u8 nexthdr = skb->nh.ipv6h->nexthdr;
138 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr);
141 if (nexthdr == IPPROTO_ICMPV6) {
143 tp = skb_header_pointer(skb,
144 ptr+offsetof(struct icmp6hdr, icmp6_type),
145 sizeof(_type), &_type);
147 !(*tp & ICMPV6_INFOMSG_MASK))
153 static int sysctl_icmpv6_time = 1*HZ;
156 * Check the ICMP output rate limit
158 static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
161 struct dst_entry *dst;
164 /* Informational messages are not limited. */
165 if (type & ICMPV6_INFOMSG_MASK)
168 /* Do not limit pmtu discovery, it would break it. */
169 if (type == ICMPV6_PKT_TOOBIG)
173 * Look up the output route.
174 * XXX: perhaps the expire for routing entries cloned by
175 * this lookup should be more aggressive (not longer than timeout).
177 dst = ip6_route_output(sk, fl);
179 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
180 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
183 struct rt6_info *rt = (struct rt6_info *)dst;
184 int tmo = sysctl_icmpv6_time;
186 /* Give more bandwidth to wider prefixes. */
187 if (rt->rt6i_dst.plen < 128)
188 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
190 res = xrlim_allow(dst, tmo);
197 * an inline helper for the "simple" if statement below
198 * checks if parameter problem report is caused by an
199 * unrecognized IPv6 option that has the Option Type
200 * highest-order two bits set to 10
203 static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
207 offset += skb->nh.raw - skb->data;
208 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
211 return (*op & 0xC0) == 0x80;
214 static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
217 struct icmp6hdr *icmp6h;
220 if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
223 icmp6h = (struct icmp6hdr*) skb->h.raw;
224 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
225 icmp6h->icmp6_cksum = 0;
227 if (skb_queue_len(&sk->sk_write_queue) == 1) {
228 skb->csum = csum_partial((char *)icmp6h,
229 sizeof(struct icmp6hdr), skb->csum);
230 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
237 skb_queue_walk(&sk->sk_write_queue, skb) {
238 tmp_csum = csum_add(tmp_csum, skb->csum);
241 tmp_csum = csum_partial((char *)icmp6h,
242 sizeof(struct icmp6hdr), tmp_csum);
243 tmp_csum = csum_ipv6_magic(&fl->fl6_src,
245 len, fl->proto, tmp_csum);
246 icmp6h->icmp6_cksum = tmp_csum;
248 if (icmp6h->icmp6_cksum == 0)
249 icmp6h->icmp6_cksum = -1;
250 ip6_push_pending_frames(sk);
260 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
262 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
263 struct sk_buff *org_skb = msg->skb;
266 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
268 skb->csum = csum_block_add(skb->csum, csum, odd);
273 * Send an ICMP message in response to a packet in error
275 void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
276 struct net_device *dev)
278 struct inet6_dev *idev = NULL;
279 struct ipv6hdr *hdr = skb->nh.ipv6h;
281 struct ipv6_pinfo *np;
282 struct in6_addr *saddr = NULL;
283 struct dst_entry *dst;
284 struct icmp6hdr tmp_hdr;
286 struct icmpv6_msg msg;
293 if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
297 * Make sure we respect the rules
298 * i.e. RFC 1885 2.4(e)
299 * Rule (e.1) is enforced by not using icmpv6_send
300 * in any code that processes icmp errors.
302 addr_type = ipv6_addr_type(&hdr->daddr);
304 if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
311 if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
312 if (type != ICMPV6_PKT_TOOBIG &&
313 !(type == ICMPV6_PARAMPROB &&
314 code == ICMPV6_UNK_OPTION &&
315 (opt_unrec(skb, info))))
321 addr_type = ipv6_addr_type(&hdr->saddr);
327 if (addr_type & IPV6_ADDR_LINKLOCAL)
328 iif = skb->dev->ifindex;
331 * Must not send if we know that source is Anycast also.
332 * for now we don't know that.
334 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
335 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
340 * Never answer to a ICMP packet.
342 if (is_ineligible(skb)) {
343 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n");
347 memset(&fl, 0, sizeof(fl));
348 fl.proto = IPPROTO_ICMPV6;
349 ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
351 ipv6_addr_copy(&fl.fl6_src, saddr);
353 fl.fl_icmp_type = type;
354 fl.fl_icmp_code = code;
356 if (icmpv6_xmit_lock())
359 sk = icmpv6_socket->sk;
362 if (!icmpv6_xrlim_allow(sk, type, &fl))
365 tmp_hdr.icmp6_type = type;
366 tmp_hdr.icmp6_code = code;
367 tmp_hdr.icmp6_cksum = 0;
368 tmp_hdr.icmp6_pointer = htonl(info);
370 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
371 fl.oif = np->mcast_oif;
373 err = ip6_dst_lookup(sk, &dst, &fl);
376 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
377 goto out_dst_release;
379 if (ipv6_addr_is_multicast(&fl.fl6_dst))
380 hlimit = np->mcast_hops;
382 hlimit = np->hop_limit;
384 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
386 hlimit = ipv6_get_hoplimit(dst->dev);
389 msg.offset = skb->nh.raw - skb->data;
391 len = skb->len - msg.offset;
392 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
394 LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n");
395 goto out_dst_release;
398 idev = in6_dev_get(skb->dev);
400 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
401 len + sizeof(struct icmp6hdr),
402 sizeof(struct icmp6hdr),
403 hlimit, NULL, &fl, (struct rt6_info*)dst,
406 ip6_flush_pending_frames(sk);
409 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
411 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
412 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
413 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
416 if (likely(idev != NULL))
421 icmpv6_xmit_unlock();
424 static void icmpv6_echo_reply(struct sk_buff *skb)
427 struct inet6_dev *idev;
428 struct ipv6_pinfo *np;
429 struct in6_addr *saddr = NULL;
430 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
431 struct icmp6hdr tmp_hdr;
433 struct icmpv6_msg msg;
434 struct dst_entry *dst;
438 saddr = &skb->nh.ipv6h->daddr;
440 if (!ipv6_unicast_destination(skb))
443 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
444 tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
446 memset(&fl, 0, sizeof(fl));
447 fl.proto = IPPROTO_ICMPV6;
448 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
450 ipv6_addr_copy(&fl.fl6_src, saddr);
451 fl.oif = skb->dev->ifindex;
452 fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
454 if (icmpv6_xmit_lock())
457 sk = icmpv6_socket->sk;
460 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
461 fl.oif = np->mcast_oif;
463 err = ip6_dst_lookup(sk, &dst, &fl);
466 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
467 goto out_dst_release;
469 if (ipv6_addr_is_multicast(&fl.fl6_dst))
470 hlimit = np->mcast_hops;
472 hlimit = np->hop_limit;
474 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
476 hlimit = ipv6_get_hoplimit(dst->dev);
478 idev = in6_dev_get(skb->dev);
483 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
484 sizeof(struct icmp6hdr), hlimit, NULL, &fl,
485 (struct rt6_info*)dst, MSG_DONTWAIT);
488 ip6_flush_pending_frames(sk);
491 err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
493 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
494 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
497 if (likely(idev != NULL))
502 icmpv6_xmit_unlock();
505 static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
507 struct in6_addr *saddr, *daddr;
508 struct inet6_protocol *ipprot;
514 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
517 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
518 if (ipv6_ext_hdr(nexthdr)) {
519 /* now skip over extension headers */
520 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
524 inner_offset = sizeof(struct ipv6hdr);
527 /* Checkin header including 8 bytes of inner protocol header. */
528 if (!pskb_may_pull(skb, inner_offset+8))
531 saddr = &skb->nh.ipv6h->saddr;
532 daddr = &skb->nh.ipv6h->daddr;
534 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
535 Without this we will not able f.e. to make source routed
537 Corresponding argument (opt) to notifiers is already added.
541 hash = nexthdr & (MAX_INET_PROTOS - 1);
544 ipprot = rcu_dereference(inet6_protos[hash]);
545 if (ipprot && ipprot->err_handler)
546 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
549 read_lock(&raw_v6_lock);
550 if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
551 while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr,
552 skb->dev->ifindex))) {
553 rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
557 read_unlock(&raw_v6_lock);
561 * Handle icmp messages
564 static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
566 struct sk_buff *skb = *pskb;
567 struct net_device *dev = skb->dev;
568 struct inet6_dev *idev = __in6_dev_get(dev);
569 struct in6_addr *saddr, *daddr;
570 struct ipv6hdr *orig_hdr;
571 struct icmp6hdr *hdr;
574 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
576 saddr = &skb->nh.ipv6h->saddr;
577 daddr = &skb->nh.ipv6h->daddr;
579 /* Perform checksum. */
580 if (skb->ip_summed == CHECKSUM_HW) {
581 skb->ip_summed = CHECKSUM_UNNECESSARY;
582 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
584 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 hw checksum failed\n");
585 skb->ip_summed = CHECKSUM_NONE;
588 if (skb->ip_summed == CHECKSUM_NONE) {
589 if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
590 skb_checksum(skb, 0, skb->len, 0))) {
591 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
592 NIP6(*saddr), NIP6(*daddr));
597 if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
600 hdr = (struct icmp6hdr *) skb->h.raw;
602 type = hdr->icmp6_type;
604 if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
605 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
606 else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
607 ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
610 case ICMPV6_ECHO_REQUEST:
611 icmpv6_echo_reply(skb);
614 case ICMPV6_ECHO_REPLY:
615 /* we couldn't care less */
618 case ICMPV6_PKT_TOOBIG:
619 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
620 standard destination cache. Seems, only "advanced"
621 destination cache will allow to solve this problem
624 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
626 hdr = (struct icmp6hdr *) skb->h.raw;
627 orig_hdr = (struct ipv6hdr *) (hdr + 1);
628 rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
629 ntohl(hdr->icmp6_mtu));
632 * Drop through to notify
635 case ICMPV6_DEST_UNREACH:
636 case ICMPV6_TIME_EXCEED:
637 case ICMPV6_PARAMPROB:
638 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
641 case NDISC_ROUTER_SOLICITATION:
642 case NDISC_ROUTER_ADVERTISEMENT:
643 case NDISC_NEIGHBOUR_SOLICITATION:
644 case NDISC_NEIGHBOUR_ADVERTISEMENT:
649 case ICMPV6_MGM_QUERY:
650 igmp6_event_query(skb);
653 case ICMPV6_MGM_REPORT:
654 igmp6_event_report(skb);
657 case ICMPV6_MGM_REDUCTION:
658 case ICMPV6_NI_QUERY:
659 case ICMPV6_NI_REPLY:
660 case ICMPV6_MLD2_REPORT:
661 case ICMPV6_DHAAD_REQUEST:
662 case ICMPV6_DHAAD_REPLY:
663 case ICMPV6_MOBILE_PREFIX_SOL:
664 case ICMPV6_MOBILE_PREFIX_ADV:
668 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n");
671 if (type & ICMPV6_INFOMSG_MASK)
675 * error of unknown type.
676 * must pass to upper level
679 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
685 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
690 int __init icmpv6_init(struct net_proto_family *ops)
695 for (i = 0; i < NR_CPUS; i++) {
696 if (!cpu_possible(i))
699 err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
700 &per_cpu(__icmpv6_socket, i));
703 "Failed to initialize the ICMP6 control socket "
709 sk = per_cpu(__icmpv6_socket, i)->sk;
710 sk->sk_allocation = GFP_ATOMIC;
712 /* Enough space for 2 64K ICMP packets, including
713 * sk_buff struct overhead.
716 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
718 sk->sk_prot->unhash(sk);
722 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
723 printk(KERN_ERR "Failed to register ICMP6 protocol\n");
731 for (j = 0; j < i; j++) {
732 if (!cpu_possible(j))
734 sock_release(per_cpu(__icmpv6_socket, j));
740 void icmpv6_cleanup(void)
744 for (i = 0; i < NR_CPUS; i++) {
745 if (!cpu_possible(i))
747 sock_release(per_cpu(__icmpv6_socket, i));
749 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
752 static struct icmp6_err {
760 { /* ADM_PROHIBITED */
764 { /* Was NOT_NEIGHBOUR, now reserved */
778 int icmpv6_err_convert(int type, int code, int *err)
785 case ICMPV6_DEST_UNREACH:
787 if (code <= ICMPV6_PORT_UNREACH) {
788 *err = tab_unreach[code].err;
789 fatal = tab_unreach[code].fatal;
793 case ICMPV6_PKT_TOOBIG:
797 case ICMPV6_PARAMPROB:
802 case ICMPV6_TIME_EXCEED:
811 ctl_table ipv6_icmp_table[] = {
813 .ctl_name = NET_IPV6_ICMP_RATELIMIT,
814 .procname = "ratelimit",
815 .data = &sysctl_icmpv6_time,
816 .maxlen = sizeof(int),
818 .proc_handler = &proc_dointvec