2 * Linux NET3: IP/IP protocol decoder.
4 * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
7 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
10 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
11 * a module taking up 2 pages).
12 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13 * to keep ip_forward happy.
14 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
16 * David Woodhouse : Perform some basic ICMP handling.
17 * IPIP Routing without decapsulation.
18 * Carlos Picoto : GRE over IP support
19 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20 * I do not want to merge them together.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
29 /* tunnel.c: an IP tunnel driver
31 The purpose of this driver is to provide an IP tunnel through
32 which you can tunnel network traffic transparently across subnets.
34 This was written by looking at Nick Holloway's dummy driver
35 Thanks for the great code!
37 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
40 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 dev->hard_header/hard_header_len changed to use no headers.
42 Comments/bracketing tweaked.
43 Made the tunnels use dev->name not tunnel: when error reporting.
46 -Alan Cox (Alan.Cox@linux.org) 21 March 95
49 Changed to tunnel to destination gateway in addition to the
50 tunnel's pointopoint address
51 Almost completely rewritten
52 Note: There is currently no firewall or ICMP handling done.
54 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
58 /* Things I wish I had known when writing the tunnel driver:
60 When the tunnel_xmit() function is called, the skb contains the
61 packet to be sent (plus a great deal of extra info), and dev
62 contains the tunnel device that _we_ are.
64 When we are passed a packet, we are expected to fill in the
65 source address with our source IP address.
67 What is the proper way to allocate, copy and free a buffer?
68 After you allocate it, it is a "0 length" chunk of memory
69 starting at zero. If you want to add headers to the buffer
70 later, you'll have to call "skb_reserve(skb, amount)" with
71 the amount of memory you want reserved. Then, you call
72 "skb_put(skb, amount)" with the amount of space you want in
73 the buffer. skb_put() returns a pointer to the top (#0) of
74 that buffer. skb->len is set to the amount of space you have
75 "allocated" with skb_put(). You can then write up to skb->len
76 bytes to that buffer. If you need more, you can call skb_put()
77 again with the additional amount of space you need. You can
78 find out how much more space you can allocate by calling
80 Now, to add header space, call "skb_push(skb, header_len)".
81 This creates space at the beginning of the buffer and returns
82 a pointer to this new space. If later you need to strip a
83 header from a buffer, call "skb_pull(skb, header_len)".
84 skb_headroom() will return how much space is left at the top
85 of the buffer (before the main data). Remember, this headroom
86 space must be reserved before the skb_put() function is called.
90 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
92 For comments look at net/ipv4/ip_gre.c --ANK
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
112 #include <net/sock.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 static int ipip_fb_tunnel_init(struct net_device *dev);
123 static int ipip_tunnel_init(struct net_device *dev);
124 static void ipip_tunnel_setup(struct net_device *dev);
126 static struct net_device *ipip_fb_tunnel_dev;
128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
131 static struct ip_tunnel *tunnels_wc[1];
132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134 static DEFINE_RWLOCK(ipip_lock);
136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
138 unsigned h0 = HASH(remote);
139 unsigned h1 = HASH(local);
142 for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 if (local == t->parms.iph.saddr &&
144 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
147 for (t = tunnels_r[h0]; t; t = t->next) {
148 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151 for (t = tunnels_l[h1]; t; t = t->next) {
152 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
155 if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
160 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
162 __be32 remote = parms->iph.daddr;
163 __be32 local = parms->iph.saddr;
175 return &tunnels[prio][h];
178 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
180 return __ipip_bucket(&t->parms);
183 static void ipip_tunnel_unlink(struct ip_tunnel *t)
185 struct ip_tunnel **tp;
187 for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
189 write_lock_bh(&ipip_lock);
191 write_unlock_bh(&ipip_lock);
197 static void ipip_tunnel_link(struct ip_tunnel *t)
199 struct ip_tunnel **tp = ipip_bucket(t);
202 write_lock_bh(&ipip_lock);
204 write_unlock_bh(&ipip_lock);
207 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
211 struct ip_tunnel *t, **tp, *nt;
212 struct net_device *dev;
215 for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
216 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
223 strlcpy(name, parms->name, IFNAMSIZ);
225 sprintf(name, "tunl%%d");
227 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
231 nt = netdev_priv(dev);
232 dev->init = ipip_tunnel_init;
235 if (register_netdevice(dev) < 0) {
241 ipip_tunnel_link(nt);
248 static void ipip_tunnel_uninit(struct net_device *dev)
250 if (dev == ipip_fb_tunnel_dev) {
251 write_lock_bh(&ipip_lock);
252 tunnels_wc[0] = NULL;
253 write_unlock_bh(&ipip_lock);
255 ipip_tunnel_unlink(netdev_priv(dev));
259 static int ipip_err(struct sk_buff *skb, u32 info)
261 #ifndef I_WISH_WORLD_WERE_PERFECT
263 /* It is not :-( All the routers (except for Linux) return only
264 8 bytes of packet payload. It means, that precise relaying of
265 ICMP in the real Internet is absolutely infeasible.
267 struct iphdr *iph = (struct iphdr*)skb->data;
268 const int type = icmp_hdr(skb)->type;
269 const int code = icmp_hdr(skb)->code;
275 case ICMP_PARAMETERPROB:
278 case ICMP_DEST_UNREACH:
281 case ICMP_PORT_UNREACH:
282 /* Impossible event. */
284 case ICMP_FRAG_NEEDED:
285 /* Soft state for pmtu is maintained by IP core. */
288 /* All others are translated to HOST_UNREACH.
289 rfc2003 contains "deep thoughts" about NET_UNREACH,
290 I believe they are just ether pollution. --ANK
295 case ICMP_TIME_EXCEEDED:
296 if (code != ICMP_EXC_TTL)
303 read_lock(&ipip_lock);
304 t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
305 if (t == NULL || t->parms.iph.daddr == 0)
309 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
312 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
316 t->err_time = jiffies;
318 read_unlock(&ipip_lock);
321 struct iphdr *iph = (struct iphdr*)dp;
322 int hlen = iph->ihl<<2;
324 const int type = icmp_hdr(skb)->type;
325 const int code = icmp_hdr(skb)->code;
330 struct sk_buff *skb2;
334 if (len < hlen + sizeof(struct iphdr))
336 eiph = (struct iphdr*)(dp + hlen);
341 case ICMP_PARAMETERPROB:
342 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
346 /* So... This guy found something strange INSIDE encapsulated
347 packet. Well, he is fool, but what can we do ?
349 rel_type = ICMP_PARAMETERPROB;
350 rel_info = htonl((n - hlen) << 24);
353 case ICMP_DEST_UNREACH:
356 case ICMP_PORT_UNREACH:
357 /* Impossible event. */
359 case ICMP_FRAG_NEEDED:
360 /* And it is the only really necessary thing :-) */
361 n = ntohs(icmp_hdr(skb)->un.frag.mtu);
365 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
366 if (n > ntohs(eiph->tot_len))
371 /* All others are translated to HOST_UNREACH.
372 rfc2003 contains "deep thoughts" about NET_UNREACH,
373 I believe, it is just ether pollution. --ANK
375 rel_type = ICMP_DEST_UNREACH;
376 rel_code = ICMP_HOST_UNREACH;
380 case ICMP_TIME_EXCEEDED:
381 if (code != ICMP_EXC_TTL)
386 /* Prepare fake skb to feed it to icmp_send */
387 skb2 = skb_clone(skb, GFP_ATOMIC);
390 dst_release(skb2->dst);
392 skb_pull(skb2, skb->data - (u8*)eiph);
393 skb_reset_network_header(skb2);
395 /* Try to guess incoming interface */
396 memset(&fl, 0, sizeof(fl));
397 fl.fl4_daddr = eiph->saddr;
398 fl.fl4_tos = RT_TOS(eiph->tos);
399 fl.proto = IPPROTO_IPIP;
400 if (ip_route_output_key(&init_net, &rt, &key)) {
404 skb2->dev = rt->u.dst.dev;
406 /* route "incoming" packet */
407 if (rt->rt_flags&RTCF_LOCAL) {
410 fl.fl4_daddr = eiph->daddr;
411 fl.fl4_src = eiph->saddr;
412 fl.fl4_tos = eiph->tos;
413 if (ip_route_output_key(&init_net, &rt, &fl) ||
414 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
421 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
422 skb2->dst->dev->type != ARPHRD_TUNNEL) {
428 /* change mtu on this route */
429 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
430 if (n > dst_mtu(skb2->dst)) {
434 skb2->dst->ops->update_pmtu(skb2->dst, n);
435 } else if (type == ICMP_TIME_EXCEEDED) {
436 struct ip_tunnel *t = netdev_priv(skb2->dev);
437 if (t->parms.iph.ttl) {
438 rel_type = ICMP_DEST_UNREACH;
439 rel_code = ICMP_HOST_UNREACH;
443 icmp_send(skb2, rel_type, rel_code, rel_info);
449 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
452 struct iphdr *inner_iph = ip_hdr(skb);
454 if (INET_ECN_is_ce(outer_iph->tos))
455 IP_ECN_set_ce(inner_iph);
458 static int ipip_rcv(struct sk_buff *skb)
460 struct ip_tunnel *tunnel;
461 const struct iphdr *iph = ip_hdr(skb);
463 read_lock(&ipip_lock);
464 if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
465 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
466 read_unlock(&ipip_lock);
473 skb->mac_header = skb->network_header;
474 skb_reset_network_header(skb);
475 skb->protocol = htons(ETH_P_IP);
476 skb->pkt_type = PACKET_HOST;
478 tunnel->stat.rx_packets++;
479 tunnel->stat.rx_bytes += skb->len;
480 skb->dev = tunnel->dev;
481 dst_release(skb->dst);
484 ipip_ecn_decapsulate(iph, skb);
486 read_unlock(&ipip_lock);
489 read_unlock(&ipip_lock);
495 * This function assumes it is being called from dev_queue_xmit()
496 * and that skb is filled properly by that function.
499 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
501 struct ip_tunnel *tunnel = netdev_priv(dev);
502 struct net_device_stats *stats = &tunnel->stat;
503 struct iphdr *tiph = &tunnel->parms.iph;
504 u8 tos = tunnel->parms.iph.tos;
505 __be16 df = tiph->frag_off;
506 struct rtable *rt; /* Route to the other host */
507 struct net_device *tdev; /* Device to other host */
508 struct iphdr *old_iph = ip_hdr(skb);
509 struct iphdr *iph; /* Our new IP header */
510 unsigned int max_headroom; /* The extra header space needed */
511 __be32 dst = tiph->daddr;
514 if (tunnel->recursion++) {
515 tunnel->stat.collisions++;
519 if (skb->protocol != htons(ETH_P_IP))
527 if ((rt = (struct rtable*)skb->dst) == NULL) {
528 tunnel->stat.tx_fifo_errors++;
531 if ((dst = rt->rt_gateway) == 0)
536 struct flowi fl = { .oif = tunnel->parms.link,
539 .saddr = tiph->saddr,
540 .tos = RT_TOS(tos) } },
541 .proto = IPPROTO_IPIP };
542 if (ip_route_output_key(&init_net, &rt, &fl)) {
543 tunnel->stat.tx_carrier_errors++;
547 tdev = rt->u.dst.dev;
551 tunnel->stat.collisions++;
556 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
558 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
561 tunnel->stat.collisions++;
566 skb->dst->ops->update_pmtu(skb->dst, mtu);
568 df |= (old_iph->frag_off&htons(IP_DF));
570 if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
571 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
576 if (tunnel->err_count > 0) {
577 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
579 dst_link_failure(skb);
581 tunnel->err_count = 0;
585 * Okay, now see if we can stuff it in the buffer as-is.
587 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
589 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
590 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
591 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
600 skb_set_owner_w(new_skb, skb->sk);
603 old_iph = ip_hdr(skb);
606 skb->transport_header = skb->network_header;
607 skb_push(skb, sizeof(struct iphdr));
608 skb_reset_network_header(skb);
609 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
610 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
612 dst_release(skb->dst);
613 skb->dst = &rt->u.dst;
616 * Push down and install the IPIP header.
621 iph->ihl = sizeof(struct iphdr)>>2;
623 iph->protocol = IPPROTO_IPIP;
624 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
625 iph->daddr = rt->rt_dst;
626 iph->saddr = rt->rt_src;
628 if ((iph->ttl = tiph->ttl) == 0)
629 iph->ttl = old_iph->ttl;
638 dst_link_failure(skb);
646 static void ipip_tunnel_bind_dev(struct net_device *dev)
648 struct net_device *tdev = NULL;
649 struct ip_tunnel *tunnel;
652 tunnel = netdev_priv(dev);
653 iph = &tunnel->parms.iph;
656 struct flowi fl = { .oif = tunnel->parms.link,
658 { .daddr = iph->daddr,
660 .tos = RT_TOS(iph->tos) } },
661 .proto = IPPROTO_IPIP };
663 if (!ip_route_output_key(&init_net, &rt, &fl)) {
664 tdev = rt->u.dst.dev;
667 dev->flags |= IFF_POINTOPOINT;
670 if (!tdev && tunnel->parms.link)
671 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
674 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
675 dev->mtu = tdev->mtu - sizeof(struct iphdr);
677 dev->iflink = tunnel->parms.link;
681 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
684 struct ip_tunnel_parm p;
690 if (dev == ipip_fb_tunnel_dev) {
691 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
695 t = ipip_tunnel_locate(&p, 0);
698 t = netdev_priv(dev);
699 memcpy(&p, &t->parms, sizeof(p));
700 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
707 if (!capable(CAP_NET_ADMIN))
711 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
715 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
716 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
719 p.iph.frag_off |= htons(IP_DF);
721 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
723 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
730 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
731 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
735 t = netdev_priv(dev);
736 ipip_tunnel_unlink(t);
737 t->parms.iph.saddr = p.iph.saddr;
738 t->parms.iph.daddr = p.iph.daddr;
739 memcpy(dev->dev_addr, &p.iph.saddr, 4);
740 memcpy(dev->broadcast, &p.iph.daddr, 4);
742 netdev_state_change(dev);
748 if (cmd == SIOCCHGTUNNEL) {
749 t->parms.iph.ttl = p.iph.ttl;
750 t->parms.iph.tos = p.iph.tos;
751 t->parms.iph.frag_off = p.iph.frag_off;
752 if (t->parms.link != p.link) {
753 t->parms.link = p.link;
754 ipip_tunnel_bind_dev(dev);
755 netdev_state_change(dev);
758 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
761 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
766 if (!capable(CAP_NET_ADMIN))
769 if (dev == ipip_fb_tunnel_dev) {
771 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
774 if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
777 if (t->dev == ipip_fb_tunnel_dev)
781 unregister_netdevice(dev);
793 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
795 return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
798 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
800 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
806 static void ipip_tunnel_setup(struct net_device *dev)
808 dev->uninit = ipip_tunnel_uninit;
809 dev->hard_start_xmit = ipip_tunnel_xmit;
810 dev->get_stats = ipip_tunnel_get_stats;
811 dev->do_ioctl = ipip_tunnel_ioctl;
812 dev->change_mtu = ipip_tunnel_change_mtu;
813 dev->destructor = free_netdev;
815 dev->type = ARPHRD_TUNNEL;
816 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
817 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
818 dev->flags = IFF_NOARP;
823 static int ipip_tunnel_init(struct net_device *dev)
825 struct ip_tunnel *tunnel;
827 tunnel = netdev_priv(dev);
830 strcpy(tunnel->parms.name, dev->name);
832 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
833 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
835 ipip_tunnel_bind_dev(dev);
840 static int __init ipip_fb_tunnel_init(struct net_device *dev)
842 struct ip_tunnel *tunnel = netdev_priv(dev);
843 struct iphdr *iph = &tunnel->parms.iph;
846 strcpy(tunnel->parms.name, dev->name);
849 iph->protocol = IPPROTO_IPIP;
853 tunnels_wc[0] = tunnel;
857 static struct xfrm_tunnel ipip_handler = {
859 .err_handler = ipip_err,
863 static char banner[] __initdata =
864 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
866 static int __init ipip_init(void)
872 if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
873 printk(KERN_INFO "ipip init: can't register tunnel\n");
877 ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
880 if (!ipip_fb_tunnel_dev) {
885 ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
887 if ((err = register_netdev(ipip_fb_tunnel_dev)))
892 free_netdev(ipip_fb_tunnel_dev);
894 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
898 static void __exit ipip_destroy_tunnels(void)
902 for (prio = 1; prio < 4; prio++) {
904 for (h = 0; h < HASH_SIZE; h++) {
906 while ((t = tunnels[prio][h]) != NULL)
907 unregister_netdevice(t->dev);
912 static void __exit ipip_fini(void)
914 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
915 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
918 ipip_destroy_tunnels();
919 unregister_netdevice(ipip_fb_tunnel_dev);
923 module_init(ipip_init);
924 module_exit(ipip_fini);
925 MODULE_LICENSE("GPL");