git.oblomov.eu Git - linux-2.6/blob - net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/sched.h>
 100 #include <linux/kernel.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <linux/in.h>
 105 #include <linux/tcp.h>
 106 #include <linux/udp.h>
 107 #include <linux/if_arp.h>
 108 #include <linux/mroute.h>
 109 #include <linux/init.h>
 110 #include <linux/netfilter_ipv4.h>
 111 #include <linux/if_ether.h>
 112
 113 #include <net/sock.h>
 114 #include <net/ip.h>
 115 #include <net/icmp.h>
 116 #include <net/ipip.h>
 117 #include <net/inet_ecn.h>
 118 #include <net/xfrm.h>
 119
 120 #define HASH_SIZE  16
 121 #define HASH(addr) ((addr^(addr>>4))&0xF)
 122
 123 static int ipip_fb_tunnel_init(struct net_device *dev);
 124 static int ipip_tunnel_init(struct net_device *dev);
 125 static void ipip_tunnel_setup(struct net_device *dev);
 126
 127 static struct net_device *ipip_fb_tunnel_dev;
 128
 129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 132 static struct ip_tunnel *tunnels_wc[1];
 133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 134
 135 static DEFINE_RWLOCK(ipip_lock);
 136
 137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
 138 {
 139         unsigned h0 = HASH(remote);
 140         unsigned h1 = HASH(local);
 141         struct ip_tunnel *t;
 142
 143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 144                 if (local == t->parms.iph.saddr &&
 145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 146                         return t;
 147         }
 148         for (t = tunnels_r[h0]; t; t = t->next) {
 149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 150                         return t;
 151         }
 152         for (t = tunnels_l[h1]; t; t = t->next) {
 153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 154                         return t;
 155         }
 156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 157                 return t;
 158         return NULL;
 159 }
 160
 161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 162 {
 163         u32 remote = t->parms.iph.daddr;
 164         u32 local = t->parms.iph.saddr;
 165         unsigned h = 0;
 166         int prio = 0;
 167
 168         if (remote) {
 169                 prio |= 2;
 170                 h ^= HASH(remote);
 171         }
 172         if (local) {
 173                 prio |= 1;
 174                 h ^= HASH(local);
 175         }
 176         return &tunnels[prio][h];
 177 }
 178
 179
 180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 181 {
 182         struct ip_tunnel **tp;
 183
 184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 185                 if (t == *tp) {
 186                         write_lock_bh(&ipip_lock);
 187                         *tp = t->next;
 188                         write_unlock_bh(&ipip_lock);
 189                         break;
 190                 }
 191         }
 192 }
 193
 194 static void ipip_tunnel_link(struct ip_tunnel *t)
 195 {
 196         struct ip_tunnel **tp = ipip_bucket(t);
 197
 198         t->next = *tp;
 199         write_lock_bh(&ipip_lock);
 200         *tp = t;
 201         write_unlock_bh(&ipip_lock);
 202 }
 203
 204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 205 {
 206         u32 remote = parms->iph.daddr;
 207         u32 local = parms->iph.saddr;
 208         struct ip_tunnel *t, **tp, *nt;
 209         struct net_device *dev;
 210         unsigned h = 0;
 211         int prio = 0;
 212         char name[IFNAMSIZ];
 213
 214         if (remote) {
 215                 prio |= 2;
 216                 h ^= HASH(remote);
 217         }
 218         if (local) {
 219                 prio |= 1;
 220                 h ^= HASH(local);
 221         }
 222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 224                         return t;
 225         }
 226         if (!create)
 227                 return NULL;
 228
 229         if (parms->name[0])
 230                 strlcpy(name, parms->name, IFNAMSIZ);
 231         else {
 232                 int i;
 233                 for (i=1; i<100; i++) {
 234                         sprintf(name, "tunl%d", i);
 235                         if (__dev_get_by_name(name) == NULL)
 236                                 break;
 237                 }
 238                 if (i==100)
 239                         goto failed;
 240         }
 241
 242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 243         if (dev == NULL)
 244                 return NULL;
 245
 246         nt = netdev_priv(dev);
 247         SET_MODULE_OWNER(dev);
 248         dev->init = ipip_tunnel_init;
 249         nt->parms = *parms;
 250
 251         if (register_netdevice(dev) < 0) {
 252                 free_netdev(dev);
 253                 goto failed;
 254         }
 255
 256         dev_hold(dev);
 257         ipip_tunnel_link(nt);
 258         return nt;
 259
 260 failed:
 261         return NULL;
 262 }
 263
 264 static void ipip_tunnel_uninit(struct net_device *dev)
 265 {
 266         if (dev == ipip_fb_tunnel_dev) {
 267                 write_lock_bh(&ipip_lock);
 268                 tunnels_wc[0] = NULL;
 269                 write_unlock_bh(&ipip_lock);
 270         } else
 271                 ipip_tunnel_unlink(netdev_priv(dev));
 272         dev_put(dev);
 273 }
 274
 275 static int ipip_err(struct sk_buff *skb, u32 info)
 276 {
 277 #ifndef I_WISH_WORLD_WERE_PERFECT
 278
 279 /* It is not :-( All the routers (except for Linux) return only
 280    8 bytes of packet payload. It means, that precise relaying of
 281    ICMP in the real Internet is absolutely infeasible.
 282  */
 283         struct iphdr *iph = (struct iphdr*)skb->data;
 284         int type = skb->h.icmph->type;
 285         int code = skb->h.icmph->code;
 286         struct ip_tunnel *t;
 287         int err;
 288
 289         switch (type) {
 290         default:
 291         case ICMP_PARAMETERPROB:
 292                 return 0;
 293
 294         case ICMP_DEST_UNREACH:
 295                 switch (code) {
 296                 case ICMP_SR_FAILED:
 297                 case ICMP_PORT_UNREACH:
 298                         /* Impossible event. */
 299                         return 0;
 300                 case ICMP_FRAG_NEEDED:
 301                         /* Soft state for pmtu is maintained by IP core. */
 302                         return 0;
 303                 default:
 304                         /* All others are translated to HOST_UNREACH.
 305                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 306                            I believe they are just ether pollution. --ANK
 307                          */
 308                         break;
 309                 }
 310                 break;
 311         case ICMP_TIME_EXCEEDED:
 312                 if (code != ICMP_EXC_TTL)
 313                         return 0;
 314                 break;
 315         }
 316
 317         err = -ENOENT;
 318
 319         read_lock(&ipip_lock);
 320         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 321         if (t == NULL || t->parms.iph.daddr == 0)
 322                 goto out;
 323
 324         err = 0;
 325         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 326                 goto out;
 327
 328         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 329                 t->err_count++;
 330         else
 331                 t->err_count = 1;
 332         t->err_time = jiffies;
 333 out:
 334         read_unlock(&ipip_lock);
 335         return err;
 336 #else
 337         struct iphdr *iph = (struct iphdr*)dp;
 338         int hlen = iph->ihl<<2;
 339         struct iphdr *eiph;
 340         int type = skb->h.icmph->type;
 341         int code = skb->h.icmph->code;
 342         int rel_type = 0;
 343         int rel_code = 0;
 344         int rel_info = 0;
 345         struct sk_buff *skb2;
 346         struct flowi fl;
 347         struct rtable *rt;
 348
 349         if (len < hlen + sizeof(struct iphdr))
 350                 return 0;
 351         eiph = (struct iphdr*)(dp + hlen);
 352
 353         switch (type) {
 354         default:
 355                 return 0;
 356         case ICMP_PARAMETERPROB:
 357                 if (skb->h.icmph->un.gateway < hlen)
 358                         return 0;
 359
 360                 /* So... This guy found something strange INSIDE encapsulated
 361                    packet. Well, he is fool, but what can we do ?
 362                  */
 363                 rel_type = ICMP_PARAMETERPROB;
 364                 rel_info = skb->h.icmph->un.gateway - hlen;
 365                 break;
 366
 367         case ICMP_DEST_UNREACH:
 368                 switch (code) {
 369                 case ICMP_SR_FAILED:
 370                 case ICMP_PORT_UNREACH:
 371                         /* Impossible event. */
 372                         return 0;
 373                 case ICMP_FRAG_NEEDED:
 374                         /* And it is the only really necessary thing :-) */
 375                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
 376                         if (rel_info < hlen+68)
 377                                 return 0;
 378                         rel_info -= hlen;
 379                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 380                         if (rel_info > ntohs(eiph->tot_len))
 381                                 return 0;
 382                         break;
 383                 default:
 384                         /* All others are translated to HOST_UNREACH.
 385                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 386                            I believe, it is just ether pollution. --ANK
 387                          */
 388                         rel_type = ICMP_DEST_UNREACH;
 389                         rel_code = ICMP_HOST_UNREACH;
 390                         break;
 391                 }
 392                 break;
 393         case ICMP_TIME_EXCEEDED:
 394                 if (code != ICMP_EXC_TTL)
 395                         return 0;
 396                 break;
 397         }
 398
 399         /* Prepare fake skb to feed it to icmp_send */
 400         skb2 = skb_clone(skb, GFP_ATOMIC);
 401         if (skb2 == NULL)
 402                 return 0;
 403         dst_release(skb2->dst);
 404         skb2->dst = NULL;
 405         skb_pull(skb2, skb->data - (u8*)eiph);
 406         skb2->nh.raw = skb2->data;
 407
 408         /* Try to guess incoming interface */
 409         memset(&fl, 0, sizeof(fl));
 410         fl.fl4_daddr = eiph->saddr;
 411         fl.fl4_tos = RT_TOS(eiph->tos);
 412         fl.proto = IPPROTO_IPIP;
 413         if (ip_route_output_key(&rt, &key)) {
 414                 kfree_skb(skb2);
 415                 return 0;
 416         }
 417         skb2->dev = rt->u.dst.dev;
 418
 419         /* route "incoming" packet */
 420         if (rt->rt_flags&RTCF_LOCAL) {
 421                 ip_rt_put(rt);
 422                 rt = NULL;
 423                 fl.fl4_daddr = eiph->daddr;
 424                 fl.fl4_src = eiph->saddr;
 425                 fl.fl4_tos = eiph->tos;
 426                 if (ip_route_output_key(&rt, &fl) ||
 427                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 428                         ip_rt_put(rt);
 429                         kfree_skb(skb2);
 430                         return 0;
 431                 }
 432         } else {
 433                 ip_rt_put(rt);
 434                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 435                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 436                         kfree_skb(skb2);
 437                         return 0;
 438                 }
 439         }
 440
 441         /* change mtu on this route */
 442         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 443                 if (rel_info > dst_mtu(skb2->dst)) {
 444                         kfree_skb(skb2);
 445                         return 0;
 446                 }
 447                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
 448                 rel_info = htonl(rel_info);
 449         } else if (type == ICMP_TIME_EXCEEDED) {
 450                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 451                 if (t->parms.iph.ttl) {
 452                         rel_type = ICMP_DEST_UNREACH;
 453                         rel_code = ICMP_HOST_UNREACH;
 454                 }
 455         }
 456
 457         icmp_send(skb2, rel_type, rel_code, rel_info);
 458         kfree_skb(skb2);
 459         return 0;
 460 #endif
 461 }
 462
 463 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
 464 {
 465         struct iphdr *inner_iph = skb->nh.iph;
 466
 467         if (INET_ECN_is_ce(outer_iph->tos))
 468                 IP_ECN_set_ce(inner_iph);
 469 }
 470
 471 static int ipip_rcv(struct sk_buff *skb)
 472 {
 473         struct iphdr *iph;
 474         struct ip_tunnel *tunnel;
 475
 476         iph = skb->nh.iph;
 477
 478         read_lock(&ipip_lock);
 479         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 480                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 481                         read_unlock(&ipip_lock);
 482                         kfree_skb(skb);
 483                         return 0;
 484                 }
 485
 486                 secpath_reset(skb);
 487
 488                 skb->mac.raw = skb->nh.raw;
 489                 skb->nh.raw = skb->data;
 490                 skb->protocol = htons(ETH_P_IP);
 491                 skb->pkt_type = PACKET_HOST;
 492
 493                 tunnel->stat.rx_packets++;
 494                 tunnel->stat.rx_bytes += skb->len;
 495                 skb->dev = tunnel->dev;
 496                 dst_release(skb->dst);
 497                 skb->dst = NULL;
 498                 nf_reset(skb);
 499                 ipip_ecn_decapsulate(iph, skb);
 500                 netif_rx(skb);
 501                 read_unlock(&ipip_lock);
 502                 return 0;
 503         }
 504         read_unlock(&ipip_lock);
 505
 506         return -1;
 507 }
 508
 509 /*
 510  *      This function assumes it is being called from dev_queue_xmit()
 511  *      and that skb is filled properly by that function.
 512  */
 513
 514 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 515 {
 516         struct ip_tunnel *tunnel = netdev_priv(dev);
 517         struct net_device_stats *stats = &tunnel->stat;
 518         struct iphdr  *tiph = &tunnel->parms.iph;
 519         u8     tos = tunnel->parms.iph.tos;
 520         u16    df = tiph->frag_off;
 521         struct rtable *rt;                      /* Route to the other host */
 522         struct net_device *tdev;                        /* Device to other host */
 523         struct iphdr  *old_iph = skb->nh.iph;
 524         struct iphdr  *iph;                     /* Our new IP header */
 525         int    max_headroom;                    /* The extra header space needed */
 526         u32    dst = tiph->daddr;
 527         int    mtu;
 528
 529         if (tunnel->recursion++) {
 530                 tunnel->stat.collisions++;
 531                 goto tx_error;
 532         }
 533
 534         if (skb->protocol != htons(ETH_P_IP))
 535                 goto tx_error;
 536
 537         if (tos&1)
 538                 tos = old_iph->tos;
 539
 540         if (!dst) {
 541                 /* NBMA tunnel */
 542                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 543                         tunnel->stat.tx_fifo_errors++;
 544                         goto tx_error;
 545                 }
 546                 if ((dst = rt->rt_gateway) == 0)
 547                         goto tx_error_icmp;
 548         }
 549
 550         {
 551                 struct flowi fl = { .oif = tunnel->parms.link,
 552                                     .nl_u = { .ip4_u =
 553                                               { .daddr = dst,
 554                                                 .saddr = tiph->saddr,
 555                                                 .tos = RT_TOS(tos) } },
 556                                     .proto = IPPROTO_IPIP };
 557                 if (ip_route_output_key(&rt, &fl)) {
 558                         tunnel->stat.tx_carrier_errors++;
 559                         goto tx_error_icmp;
 560                 }
 561         }
 562         tdev = rt->u.dst.dev;
 563
 564         if (tdev == dev) {
 565                 ip_rt_put(rt);
 566                 tunnel->stat.collisions++;
 567                 goto tx_error;
 568         }
 569
 570         if (tiph->frag_off)
 571                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 572         else
 573                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 574
 575         if (mtu < 68) {
 576                 tunnel->stat.collisions++;
 577                 ip_rt_put(rt);
 578                 goto tx_error;
 579         }
 580         if (skb->dst)
 581                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 582
 583         df |= (old_iph->frag_off&htons(IP_DF));
 584
 585         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 586                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 587                 ip_rt_put(rt);
 588                 goto tx_error;
 589         }
 590
 591         if (tunnel->err_count > 0) {
 592                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 593                         tunnel->err_count--;
 594                         dst_link_failure(skb);
 595                 } else
 596                         tunnel->err_count = 0;
 597         }
 598
 599         /*
 600          * Okay, now see if we can stuff it in the buffer as-is.
 601          */
 602         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 603
 604         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 605                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 606                 if (!new_skb) {
 607                         ip_rt_put(rt);
 608                         stats->tx_dropped++;
 609                         dev_kfree_skb(skb);
 610                         tunnel->recursion--;
 611                         return 0;
 612                 }
 613                 if (skb->sk)
 614                         skb_set_owner_w(new_skb, skb->sk);
 615                 dev_kfree_skb(skb);
 616                 skb = new_skb;
 617                 old_iph = skb->nh.iph;
 618         }
 619
 620         skb->h.raw = skb->nh.raw;
 621         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
 622         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 623         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 624                               IPSKB_REROUTED);
 625         dst_release(skb->dst);
 626         skb->dst = &rt->u.dst;
 627
 628         /*
 629          *      Push down and install the IPIP header.
 630          */
 631
 632         iph                     =       skb->nh.iph;
 633         iph->version            =       4;
 634         iph->ihl                =       sizeof(struct iphdr)>>2;
 635         iph->frag_off           =       df;
 636         iph->protocol           =       IPPROTO_IPIP;
 637         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 638         iph->daddr              =       rt->rt_dst;
 639         iph->saddr              =       rt->rt_src;
 640
 641         if ((iph->ttl = tiph->ttl) == 0)
 642                 iph->ttl        =       old_iph->ttl;
 643
 644         nf_reset(skb);
 645
 646         IPTUNNEL_XMIT();
 647         tunnel->recursion--;
 648         return 0;
 649
 650 tx_error_icmp:
 651         dst_link_failure(skb);
 652 tx_error:
 653         stats->tx_errors++;
 654         dev_kfree_skb(skb);
 655         tunnel->recursion--;
 656         return 0;
 657 }
 658
 659 static int
 660 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 661 {
 662         int err = 0;
 663         struct ip_tunnel_parm p;
 664         struct ip_tunnel *t;
 665
 666         switch (cmd) {
 667         case SIOCGETTUNNEL:
 668                 t = NULL;
 669                 if (dev == ipip_fb_tunnel_dev) {
 670                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 671                                 err = -EFAULT;
 672                                 break;
 673                         }
 674                         t = ipip_tunnel_locate(&p, 0);
 675                 }
 676                 if (t == NULL)
 677                         t = netdev_priv(dev);
 678                 memcpy(&p, &t->parms, sizeof(p));
 679                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 680                         err = -EFAULT;
 681                 break;
 682
 683         case SIOCADDTUNNEL:
 684         case SIOCCHGTUNNEL:
 685                 err = -EPERM;
 686                 if (!capable(CAP_NET_ADMIN))
 687                         goto done;
 688
 689                 err = -EFAULT;
 690                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 691                         goto done;
 692
 693                 err = -EINVAL;
 694                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 695                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 696                         goto done;
 697                 if (p.iph.ttl)
 698                         p.iph.frag_off |= htons(IP_DF);
 699
 700                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 701
 702                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 703                         if (t != NULL) {
 704                                 if (t->dev != dev) {
 705                                         err = -EEXIST;
 706                                         break;
 707                                 }
 708                         } else {
 709                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 710                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 711                                         err = -EINVAL;
 712                                         break;
 713                                 }
 714                                 t = netdev_priv(dev);
 715                                 ipip_tunnel_unlink(t);
 716                                 t->parms.iph.saddr = p.iph.saddr;
 717                                 t->parms.iph.daddr = p.iph.daddr;
 718                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 719                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 720                                 ipip_tunnel_link(t);
 721                                 netdev_state_change(dev);
 722                         }
 723                 }
 724
 725                 if (t) {
 726                         err = 0;
 727                         if (cmd == SIOCCHGTUNNEL) {
 728                                 t->parms.iph.ttl = p.iph.ttl;
 729                                 t->parms.iph.tos = p.iph.tos;
 730                                 t->parms.iph.frag_off = p.iph.frag_off;
 731                         }
 732                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 733                                 err = -EFAULT;
 734                 } else
 735                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 736                 break;
 737
 738         case SIOCDELTUNNEL:
 739                 err = -EPERM;
 740                 if (!capable(CAP_NET_ADMIN))
 741                         goto done;
 742
 743                 if (dev == ipip_fb_tunnel_dev) {
 744                         err = -EFAULT;
 745                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 746                                 goto done;
 747                         err = -ENOENT;
 748                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 749                                 goto done;
 750                         err = -EPERM;
 751                         if (t->dev == ipip_fb_tunnel_dev)
 752                                 goto done;
 753                         dev = t->dev;
 754                 }
 755                 err = unregister_netdevice(dev);
 756                 break;
 757
 758         default:
 759                 err = -EINVAL;
 760         }
 761
 762 done:
 763         return err;
 764 }
 765
 766 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 767 {
 768         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 769 }
 770
 771 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 772 {
 773         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 774                 return -EINVAL;
 775         dev->mtu = new_mtu;
 776         return 0;
 777 }
 778
 779 static void ipip_tunnel_setup(struct net_device *dev)
 780 {
 781         SET_MODULE_OWNER(dev);
 782         dev->uninit             = ipip_tunnel_uninit;
 783         dev->hard_start_xmit    = ipip_tunnel_xmit;
 784         dev->get_stats          = ipip_tunnel_get_stats;
 785         dev->do_ioctl           = ipip_tunnel_ioctl;
 786         dev->change_mtu         = ipip_tunnel_change_mtu;
 787         dev->destructor         = free_netdev;
 788
 789         dev->type               = ARPHRD_TUNNEL;
 790         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 791         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 792         dev->flags              = IFF_NOARP;
 793         dev->iflink             = 0;
 794         dev->addr_len           = 4;
 795 }
 796
 797 static int ipip_tunnel_init(struct net_device *dev)
 798 {
 799         struct net_device *tdev = NULL;
 800         struct ip_tunnel *tunnel;
 801         struct iphdr *iph;
 802
 803         tunnel = netdev_priv(dev);
 804         iph = &tunnel->parms.iph;
 805
 806         tunnel->dev = dev;
 807         strcpy(tunnel->parms.name, dev->name);
 808
 809         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 810         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 811
 812         if (iph->daddr) {
 813                 struct flowi fl = { .oif = tunnel->parms.link,
 814                                     .nl_u = { .ip4_u =
 815                                               { .daddr = iph->daddr,
 816                                                 .saddr = iph->saddr,
 817                                                 .tos = RT_TOS(iph->tos) } },
 818                                     .proto = IPPROTO_IPIP };
 819                 struct rtable *rt;
 820                 if (!ip_route_output_key(&rt, &fl)) {
 821                         tdev = rt->u.dst.dev;
 822                         ip_rt_put(rt);
 823                 }
 824                 dev->flags |= IFF_POINTOPOINT;
 825         }
 826
 827         if (!tdev && tunnel->parms.link)
 828                 tdev = __dev_get_by_index(tunnel->parms.link);
 829
 830         if (tdev) {
 831                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 832                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 833         }
 834         dev->iflink = tunnel->parms.link;
 835
 836         return 0;
 837 }
 838
 839 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 840 {
 841         struct ip_tunnel *tunnel = netdev_priv(dev);
 842         struct iphdr *iph = &tunnel->parms.iph;
 843
 844         tunnel->dev = dev;
 845         strcpy(tunnel->parms.name, dev->name);
 846
 847         iph->version            = 4;
 848         iph->protocol           = IPPROTO_IPIP;
 849         iph->ihl                = 5;
 850
 851         dev_hold(dev);
 852         tunnels_wc[0]           = tunnel;
 853         return 0;
 854 }
 855
 856 static struct xfrm_tunnel ipip_handler = {
 857         .handler        =       ipip_rcv,
 858         .err_handler    =       ipip_err,
 859         .priority       =       1,
 860 };
 861
 862 static char banner[] __initdata =
 863         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 864
 865 static int __init ipip_init(void)
 866 {
 867         int err;
 868
 869         printk(banner);
 870
 871         if (xfrm4_tunnel_register(&ipip_handler)) {
 872                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 873                 return -EAGAIN;
 874         }
 875
 876         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 877                                            "tunl0",
 878                                            ipip_tunnel_setup);
 879         if (!ipip_fb_tunnel_dev) {
 880                 err = -ENOMEM;
 881                 goto err1;
 882         }
 883
 884         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 885
 886         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 887                 goto err2;
 888  out:
 889         return err;
 890  err2:
 891         free_netdev(ipip_fb_tunnel_dev);
 892  err1:
 893         xfrm4_tunnel_deregister(&ipip_handler);
 894         goto out;
 895 }
 896
 897 static void __exit ipip_destroy_tunnels(void)
 898 {
 899         int prio;
 900
 901         for (prio = 1; prio < 4; prio++) {
 902                 int h;
 903                 for (h = 0; h < HASH_SIZE; h++) {
 904                         struct ip_tunnel *t;
 905                         while ((t = tunnels[prio][h]) != NULL)
 906                                 unregister_netdevice(t->dev);
 907                 }
 908         }
 909 }
 910
 911 static void __exit ipip_fini(void)
 912 {
 913         if (xfrm4_tunnel_deregister(&ipip_handler))
 914                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 915
 916         rtnl_lock();
 917         ipip_destroy_tunnels();
 918         unregister_netdevice(ipip_fb_tunnel_dev);
 919         rtnl_unlock();
 920 }
 921
 922 module_init(ipip_init);
 923 module_exit(ipip_fini);
 924 MODULE_LICENSE("GPL");