2 * Linux NET3: IP/IP protocol decoder.
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
29 /* tunnel.c: an IP tunnel driver
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
46 -Alan Cox (Alan.Cox@linux.org) 21 March 95
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
58 /* Things I wish I had known when writing the tunnel driver:
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
78 find out how much more space you can allocate by calling
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
92 For comments look at net/ipv4/ip_gre.c --ANK
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
112 #include <net/sock.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 static int ipip_fb_tunnel_init(struct net_device *dev);
123 static int ipip_tunnel_init(struct net_device *dev);
124 static void ipip_tunnel_setup(struct net_device *dev);
126 static struct net_device *ipip_fb_tunnel_dev;
128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
131 static struct ip_tunnel *tunnels_wc[1];
132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134 static DEFINE_RWLOCK(ipip_lock);
136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
138 unsigned h0 = HASH(remote);
139 unsigned h1 = HASH(local);
142 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 if (local == t->parms.iph.saddr &&
144 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
147 for (t = tunnels_r[h0]; t; t = t->next) {
148 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151 for (t = tunnels_l[h1]; t; t = t->next) {
152 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
155 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
160 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
162 __be32 remote = parms->iph.daddr;
163 __be32 local = parms->iph.saddr;
175 return &tunnels[prio][h];
178 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
180 return __ipip_bucket(&t->parms);
183 static void ipip_tunnel_unlink(struct ip_tunnel *t)
185 struct ip_tunnel **tp;
187 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
189 write_lock_bh(&ipip_lock);
191 write_unlock_bh(&ipip_lock);
197 static void ipip_tunnel_link(struct ip_tunnel *t)
199 struct ip_tunnel **tp = ipip_bucket(t);
202 write_lock_bh(&ipip_lock);
204 write_unlock_bh(&ipip_lock);
207 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
211 struct ip_tunnel *t, **tp, *nt;
212 struct net_device *dev;
215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
223 strlcpy(name, parms->name, IFNAMSIZ);
225 sprintf(name, "tunl%%d");
227 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
231 if (strchr(name, '%')) {
232 if (dev_alloc_name(dev, name) < 0)
236 nt = netdev_priv(dev);
237 dev->init = ipip_tunnel_init;
240 if (register_netdevice(dev) < 0)
244 ipip_tunnel_link(nt);
252 static void ipip_tunnel_uninit(struct net_device *dev)
254 if (dev == ipip_fb_tunnel_dev) {
255 write_lock_bh(&ipip_lock);
256 tunnels_wc[0] = NULL;
257 write_unlock_bh(&ipip_lock);
259 ipip_tunnel_unlink(netdev_priv(dev));
263 static int ipip_err(struct sk_buff *skb, u32 info)
265 #ifndef I_WISH_WORLD_WERE_PERFECT
267 /* It is not :-( All the routers (except for Linux) return only
268 8 bytes of packet payload. It means, that precise relaying of
269 ICMP in the real Internet is absolutely infeasible.
271 struct iphdr *iph = (struct iphdr*)skb->data;
272 const int type = icmp_hdr(skb)->type;
273 const int code = icmp_hdr(skb)->code;
279 case ICMP_PARAMETERPROB:
282 case ICMP_DEST_UNREACH:
285 case ICMP_PORT_UNREACH:
286 /* Impossible event. */
288 case ICMP_FRAG_NEEDED:
289 /* Soft state for pmtu is maintained by IP core. */
292 /* All others are translated to HOST_UNREACH.
293 rfc2003 contains "deep thoughts" about NET_UNREACH,
294 I believe they are just ether pollution. --ANK
299 case ICMP_TIME_EXCEEDED:
300 if (code != ICMP_EXC_TTL)
307 read_lock(&ipip_lock);
308 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
309 if (t == NULL || t->parms.iph.daddr == 0)
313 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
316 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
320 t->err_time = jiffies;
322 read_unlock(&ipip_lock);
325 struct iphdr *iph = (struct iphdr*)dp;
326 int hlen = iph->ihl<<2;
328 const int type = icmp_hdr(skb)->type;
329 const int code = icmp_hdr(skb)->code;
334 struct sk_buff *skb2;
338 if (len < hlen + sizeof(struct iphdr))
340 eiph = (struct iphdr*)(dp + hlen);
345 case ICMP_PARAMETERPROB:
346 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
350 /* So... This guy found something strange INSIDE encapsulated
351 packet. Well, he is fool, but what can we do ?
353 rel_type = ICMP_PARAMETERPROB;
354 rel_info = htonl((n - hlen) << 24);
357 case ICMP_DEST_UNREACH:
360 case ICMP_PORT_UNREACH:
361 /* Impossible event. */
363 case ICMP_FRAG_NEEDED:
364 /* And it is the only really necessary thing :-) */
365 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
369 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
370 if (n > ntohs(eiph->tot_len))
375 /* All others are translated to HOST_UNREACH.
376 rfc2003 contains "deep thoughts" about NET_UNREACH,
377 I believe, it is just ether pollution. --ANK
379 rel_type = ICMP_DEST_UNREACH;
380 rel_code = ICMP_HOST_UNREACH;
384 case ICMP_TIME_EXCEEDED:
385 if (code != ICMP_EXC_TTL)
390 /* Prepare fake skb to feed it to icmp_send */
391 skb2 = skb_clone(skb, GFP_ATOMIC);
394 dst_release(skb2->dst);
396 skb_pull(skb2, skb->data - (u8*)eiph);
397 skb_reset_network_header(skb2);
399 /* Try to guess incoming interface */
400 memset(&fl, 0, sizeof(fl));
401 fl.fl4_daddr = eiph->saddr;
402 fl.fl4_tos = RT_TOS(eiph->tos);
403 fl.proto = IPPROTO_IPIP;
404 if (ip_route_output_key(&init_net, &rt, &key)) {
408 skb2->dev = rt->u.dst.dev;
410 /* route "incoming" packet */
411 if (rt->rt_flags&RTCF_LOCAL) {
414 fl.fl4_daddr = eiph->daddr;
415 fl.fl4_src = eiph->saddr;
416 fl.fl4_tos = eiph->tos;
417 if (ip_route_output_key(&init_net, &rt, &fl) ||
418 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
425 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
426 skb2->dst->dev->type != ARPHRD_TUNNEL) {
432 /* change mtu on this route */
433 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
434 if (n > dst_mtu(skb2->dst)) {
438 skb2->dst->ops->update_pmtu(skb2->dst, n);
439 } else if (type == ICMP_TIME_EXCEEDED) {
440 struct ip_tunnel *t = netdev_priv(skb2->dev);
441 if (t->parms.iph.ttl) {
442 rel_type = ICMP_DEST_UNREACH;
443 rel_code = ICMP_HOST_UNREACH;
447 icmp_send(skb2, rel_type, rel_code, rel_info);
453 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
456 struct iphdr *inner_iph = ip_hdr(skb);
458 if (INET_ECN_is_ce(outer_iph->tos))
459 IP_ECN_set_ce(inner_iph);
462 static int ipip_rcv(struct sk_buff *skb)
464 struct ip_tunnel *tunnel;
465 const struct iphdr *iph = ip_hdr(skb);
467 read_lock(&ipip_lock);
468 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
469 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
470 read_unlock(&ipip_lock);
477 skb->mac_header = skb->network_header;
478 skb_reset_network_header(skb);
479 skb->protocol = htons(ETH_P_IP);
480 skb->pkt_type = PACKET_HOST;
482 tunnel->stat.rx_packets++;
483 tunnel->stat.rx_bytes += skb->len;
484 skb->dev = tunnel->dev;
485 dst_release(skb->dst);
488 ipip_ecn_decapsulate(iph, skb);
490 read_unlock(&ipip_lock);
493 read_unlock(&ipip_lock);
499 * This function assumes it is being called from dev_queue_xmit()
500 * and that skb is filled properly by that function.
503 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
505 struct ip_tunnel *tunnel = netdev_priv(dev);
506 struct net_device_stats *stats = &tunnel->stat;
507 struct iphdr *tiph = &tunnel->parms.iph;
508 u8 tos = tunnel->parms.iph.tos;
509 __be16 df = tiph->frag_off;
510 struct rtable *rt; /* Route to the other host */
511 struct net_device *tdev; /* Device to other host */
512 struct iphdr *old_iph = ip_hdr(skb);
513 struct iphdr *iph; /* Our new IP header */
514 unsigned int max_headroom; /* The extra header space needed */
515 __be32 dst = tiph->daddr;
518 if (tunnel->recursion++) {
519 tunnel->stat.collisions++;
523 if (skb->protocol != htons(ETH_P_IP))
531 if ((rt = (struct rtable*)skb->dst) == NULL) {
532 tunnel->stat.tx_fifo_errors++;
535 if ((dst = rt->rt_gateway) == 0)
540 struct flowi fl = { .oif = tunnel->parms.link,
543 .saddr = tiph->saddr,
544 .tos = RT_TOS(tos) } },
545 .proto = IPPROTO_IPIP };
546 if (ip_route_output_key(&init_net, &rt, &fl)) {
547 tunnel->stat.tx_carrier_errors++;
551 tdev = rt->u.dst.dev;
555 tunnel->stat.collisions++;
560 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
562 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
565 tunnel->stat.collisions++;
570 skb->dst->ops->update_pmtu(skb->dst, mtu);
572 df |= (old_iph->frag_off&htons(IP_DF));
574 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
575 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
580 if (tunnel->err_count > 0) {
581 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
583 dst_link_failure(skb);
585 tunnel->err_count = 0;
589 * Okay, now see if we can stuff it in the buffer as-is.
591 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
593 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
594 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
595 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
604 skb_set_owner_w(new_skb, skb->sk);
607 old_iph = ip_hdr(skb);
610 skb->transport_header = skb->network_header;
611 skb_push(skb, sizeof(struct iphdr));
612 skb_reset_network_header(skb);
613 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
614 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
616 dst_release(skb->dst);
617 skb->dst = &rt->u.dst;
620 * Push down and install the IPIP header.
625 iph->ihl = sizeof(struct iphdr)>>2;
627 iph->protocol = IPPROTO_IPIP;
628 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
629 iph->daddr = rt->rt_dst;
630 iph->saddr = rt->rt_src;
632 if ((iph->ttl = tiph->ttl) == 0)
633 iph->ttl = old_iph->ttl;
642 dst_link_failure(skb);
650 static void ipip_tunnel_bind_dev(struct net_device *dev)
652 struct net_device *tdev = NULL;
653 struct ip_tunnel *tunnel;
656 tunnel = netdev_priv(dev);
657 iph = &tunnel->parms.iph;
660 struct flowi fl = { .oif = tunnel->parms.link,
662 { .daddr = iph->daddr,
664 .tos = RT_TOS(iph->tos) } },
665 .proto = IPPROTO_IPIP };
667 if (!ip_route_output_key(&init_net, &rt, &fl)) {
668 tdev = rt->u.dst.dev;
671 dev->flags |= IFF_POINTOPOINT;
674 if (!tdev && tunnel->parms.link)
675 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
678 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
679 dev->mtu = tdev->mtu - sizeof(struct iphdr);
681 dev->iflink = tunnel->parms.link;
685 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
688 struct ip_tunnel_parm p;
694 if (dev == ipip_fb_tunnel_dev) {
695 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
699 t = ipip_tunnel_locate(&p, 0);
702 t = netdev_priv(dev);
703 memcpy(&p, &t->parms, sizeof(p));
704 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
711 if (!capable(CAP_NET_ADMIN))
715 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
719 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
720 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
723 p.iph.frag_off |= htons(IP_DF);
725 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
727 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
734 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
735 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
739 t = netdev_priv(dev);
740 ipip_tunnel_unlink(t);
741 t->parms.iph.saddr = p.iph.saddr;
742 t->parms.iph.daddr = p.iph.daddr;
743 memcpy(dev->dev_addr, &p.iph.saddr, 4);
744 memcpy(dev->broadcast, &p.iph.daddr, 4);
746 netdev_state_change(dev);
752 if (cmd == SIOCCHGTUNNEL) {
753 t->parms.iph.ttl = p.iph.ttl;
754 t->parms.iph.tos = p.iph.tos;
755 t->parms.iph.frag_off = p.iph.frag_off;
756 if (t->parms.link != p.link) {
757 t->parms.link = p.link;
758 ipip_tunnel_bind_dev(dev);
759 netdev_state_change(dev);
762 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
765 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
770 if (!capable(CAP_NET_ADMIN))
773 if (dev == ipip_fb_tunnel_dev) {
775 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
778 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
781 if (t->dev == ipip_fb_tunnel_dev)
785 unregister_netdevice(dev);
797 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
799 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
802 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
804 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
810 static void ipip_tunnel_setup(struct net_device *dev)
812 dev->uninit = ipip_tunnel_uninit;
813 dev->hard_start_xmit = ipip_tunnel_xmit;
814 dev->get_stats = ipip_tunnel_get_stats;
815 dev->do_ioctl = ipip_tunnel_ioctl;
816 dev->change_mtu = ipip_tunnel_change_mtu;
817 dev->destructor = free_netdev;
819 dev->type = ARPHRD_TUNNEL;
820 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
821 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
822 dev->flags = IFF_NOARP;
827 static int ipip_tunnel_init(struct net_device *dev)
829 struct ip_tunnel *tunnel;
831 tunnel = netdev_priv(dev);
834 strcpy(tunnel->parms.name, dev->name);
836 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
837 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
839 ipip_tunnel_bind_dev(dev);
844 static int __init ipip_fb_tunnel_init(struct net_device *dev)
846 struct ip_tunnel *tunnel = netdev_priv(dev);
847 struct iphdr *iph = &tunnel->parms.iph;
850 strcpy(tunnel->parms.name, dev->name);
853 iph->protocol = IPPROTO_IPIP;
857 tunnels_wc[0] = tunnel;
861 static struct xfrm_tunnel ipip_handler = {
863 .err_handler = ipip_err,
867 static char banner[] __initdata =
868 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
870 static int __init ipip_init(void)
876 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
877 printk(KERN_INFO "ipip init: can't register tunnel\n");
881 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
884 if (!ipip_fb_tunnel_dev) {
889 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
891 if ((err = register_netdev(ipip_fb_tunnel_dev)))
896 free_netdev(ipip_fb_tunnel_dev);
898 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
902 static void __exit ipip_destroy_tunnels(void)
906 for (prio = 1; prio < 4; prio++) {
908 for (h = 0; h < HASH_SIZE; h++) {
910 while ((t = tunnels[prio][h]) != NULL)
911 unregister_netdevice(t->dev);
916 static void __exit ipip_fini(void)
918 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
919 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
922 ipip_destroy_tunnels();
923 unregister_netdevice(ipip_fb_tunnel_dev);
927 module_init(ipip_init);
928 module_exit(ipip_fini);
929 MODULE_LICENSE("GPL");