git.oblomov.eu Git - linux-2.6/blob - net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/kernel.h>
 100 #include <asm/uaccess.h>
 101 #include <linux/skbuff.h>
 102 #include <linux/netdevice.h>
 103 #include <linux/in.h>
 104 #include <linux/tcp.h>
 105 #include <linux/udp.h>
 106 #include <linux/if_arp.h>
 107 #include <linux/mroute.h>
 108 #include <linux/init.h>
 109 #include <linux/netfilter_ipv4.h>
 110 #include <linux/if_ether.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/ipip.h>
 116 #include <net/inet_ecn.h>
 117 #include <net/xfrm.h>
 118
 119 #define HASH_SIZE  16
 120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 121
 122 static int ipip_fb_tunnel_init(struct net_device *dev);
 123 static int ipip_tunnel_init(struct net_device *dev);
 124 static void ipip_tunnel_setup(struct net_device *dev);
 125
 126 static struct net_device *ipip_fb_tunnel_dev;
 127
 128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_wc[1];
 132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 133
 134 static DEFINE_RWLOCK(ipip_lock);
 135
 136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
 137 {
 138         unsigned h0 = HASH(remote);
 139         unsigned h1 = HASH(local);
 140         struct ip_tunnel *t;
 141
 142         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 143                 if (local == t->parms.iph.saddr &&
 144                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 145                         return t;
 146         }
 147         for (t = tunnels_r[h0]; t; t = t->next) {
 148                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 149                         return t;
 150         }
 151         for (t = tunnels_l[h1]; t; t = t->next) {
 152                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 153                         return t;
 154         }
 155         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 156                 return t;
 157         return NULL;
 158 }
 159
 160 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
 161 {
 162         __be32 remote = parms->iph.daddr;
 163         __be32 local = parms->iph.saddr;
 164         unsigned h = 0;
 165         int prio = 0;
 166
 167         if (remote) {
 168                 prio |= 2;
 169                 h ^= HASH(remote);
 170         }
 171         if (local) {
 172                 prio |= 1;
 173                 h ^= HASH(local);
 174         }
 175         return &tunnels[prio][h];
 176 }
 177
 178 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 179 {
 180         return __ipip_bucket(&t->parms);
 181 }
 182
 183 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 184 {
 185         struct ip_tunnel **tp;
 186
 187         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 188                 if (t == *tp) {
 189                         write_lock_bh(&ipip_lock);
 190                         *tp = t->next;
 191                         write_unlock_bh(&ipip_lock);
 192                         break;
 193                 }
 194         }
 195 }
 196
 197 static void ipip_tunnel_link(struct ip_tunnel *t)
 198 {
 199         struct ip_tunnel **tp = ipip_bucket(t);
 200
 201         t->next = *tp;
 202         write_lock_bh(&ipip_lock);
 203         *tp = t;
 204         write_unlock_bh(&ipip_lock);
 205 }
 206
 207 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 208 {
 209         __be32 remote = parms->iph.daddr;
 210         __be32 local = parms->iph.saddr;
 211         struct ip_tunnel *t, **tp, *nt;
 212         struct net_device *dev;
 213         char name[IFNAMSIZ];
 214
 215         for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 216                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 217                         return t;
 218         }
 219         if (!create)
 220                 return NULL;
 221
 222         if (parms->name[0])
 223                 strlcpy(name, parms->name, IFNAMSIZ);
 224         else
 225                 sprintf(name, "tunl%%d");
 226
 227         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 228         if (dev == NULL)
 229                 return NULL;
 230
 231         if (strchr(name, '%')) {
 232                 if (dev_alloc_name(dev, name) < 0)
 233                         goto failed_free;
 234         }
 235
 236         nt = netdev_priv(dev);
 237         dev->init = ipip_tunnel_init;
 238         nt->parms = *parms;
 239
 240         if (register_netdevice(dev) < 0)
 241                 goto failed_free;
 242
 243         dev_hold(dev);
 244         ipip_tunnel_link(nt);
 245         return nt;
 246
 247 failed_free:
 248         free_netdev(dev);
 249         return NULL;
 250 }
 251
 252 static void ipip_tunnel_uninit(struct net_device *dev)
 253 {
 254         if (dev == ipip_fb_tunnel_dev) {
 255                 write_lock_bh(&ipip_lock);
 256                 tunnels_wc[0] = NULL;
 257                 write_unlock_bh(&ipip_lock);
 258         } else
 259                 ipip_tunnel_unlink(netdev_priv(dev));
 260         dev_put(dev);
 261 }
 262
 263 static int ipip_err(struct sk_buff *skb, u32 info)
 264 {
 265 #ifndef I_WISH_WORLD_WERE_PERFECT
 266
 267 /* It is not :-( All the routers (except for Linux) return only
 268    8 bytes of packet payload. It means, that precise relaying of
 269    ICMP in the real Internet is absolutely infeasible.
 270  */
 271         struct iphdr *iph = (struct iphdr*)skb->data;
 272         const int type = icmp_hdr(skb)->type;
 273         const int code = icmp_hdr(skb)->code;
 274         struct ip_tunnel *t;
 275         int err;
 276
 277         switch (type) {
 278         default:
 279         case ICMP_PARAMETERPROB:
 280                 return 0;
 281
 282         case ICMP_DEST_UNREACH:
 283                 switch (code) {
 284                 case ICMP_SR_FAILED:
 285                 case ICMP_PORT_UNREACH:
 286                         /* Impossible event. */
 287                         return 0;
 288                 case ICMP_FRAG_NEEDED:
 289                         /* Soft state for pmtu is maintained by IP core. */
 290                         return 0;
 291                 default:
 292                         /* All others are translated to HOST_UNREACH.
 293                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 294                            I believe they are just ether pollution. --ANK
 295                          */
 296                         break;
 297                 }
 298                 break;
 299         case ICMP_TIME_EXCEEDED:
 300                 if (code != ICMP_EXC_TTL)
 301                         return 0;
 302                 break;
 303         }
 304
 305         err = -ENOENT;
 306
 307         read_lock(&ipip_lock);
 308         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 309         if (t == NULL || t->parms.iph.daddr == 0)
 310                 goto out;
 311
 312         err = 0;
 313         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 314                 goto out;
 315
 316         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 317                 t->err_count++;
 318         else
 319                 t->err_count = 1;
 320         t->err_time = jiffies;
 321 out:
 322         read_unlock(&ipip_lock);
 323         return err;
 324 #else
 325         struct iphdr *iph = (struct iphdr*)dp;
 326         int hlen = iph->ihl<<2;
 327         struct iphdr *eiph;
 328         const int type = icmp_hdr(skb)->type;
 329         const int code = icmp_hdr(skb)->code;
 330         int rel_type = 0;
 331         int rel_code = 0;
 332         __be32 rel_info = 0;
 333         __u32 n = 0;
 334         struct sk_buff *skb2;
 335         struct flowi fl;
 336         struct rtable *rt;
 337
 338         if (len < hlen + sizeof(struct iphdr))
 339                 return 0;
 340         eiph = (struct iphdr*)(dp + hlen);
 341
 342         switch (type) {
 343         default:
 344                 return 0;
 345         case ICMP_PARAMETERPROB:
 346                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 347                 if (n < hlen)
 348                         return 0;
 349
 350                 /* So... This guy found something strange INSIDE encapsulated
 351                    packet. Well, he is fool, but what can we do ?
 352                  */
 353                 rel_type = ICMP_PARAMETERPROB;
 354                 rel_info = htonl((n - hlen) << 24);
 355                 break;
 356
 357         case ICMP_DEST_UNREACH:
 358                 switch (code) {
 359                 case ICMP_SR_FAILED:
 360                 case ICMP_PORT_UNREACH:
 361                         /* Impossible event. */
 362                         return 0;
 363                 case ICMP_FRAG_NEEDED:
 364                         /* And it is the only really necessary thing :-) */
 365                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 366                         if (n < hlen+68)
 367                                 return 0;
 368                         n -= hlen;
 369                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 370                         if (n > ntohs(eiph->tot_len))
 371                                 return 0;
 372                         rel_info = htonl(n);
 373                         break;
 374                 default:
 375                         /* All others are translated to HOST_UNREACH.
 376                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 377                            I believe, it is just ether pollution. --ANK
 378                          */
 379                         rel_type = ICMP_DEST_UNREACH;
 380                         rel_code = ICMP_HOST_UNREACH;
 381                         break;
 382                 }
 383                 break;
 384         case ICMP_TIME_EXCEEDED:
 385                 if (code != ICMP_EXC_TTL)
 386                         return 0;
 387                 break;
 388         }
 389
 390         /* Prepare fake skb to feed it to icmp_send */
 391         skb2 = skb_clone(skb, GFP_ATOMIC);
 392         if (skb2 == NULL)
 393                 return 0;
 394         dst_release(skb2->dst);
 395         skb2->dst = NULL;
 396         skb_pull(skb2, skb->data - (u8*)eiph);
 397         skb_reset_network_header(skb2);
 398
 399         /* Try to guess incoming interface */
 400         memset(&fl, 0, sizeof(fl));
 401         fl.fl4_daddr = eiph->saddr;
 402         fl.fl4_tos = RT_TOS(eiph->tos);
 403         fl.proto = IPPROTO_IPIP;
 404         if (ip_route_output_key(&init_net, &rt, &key)) {
 405                 kfree_skb(skb2);
 406                 return 0;
 407         }
 408         skb2->dev = rt->u.dst.dev;
 409
 410         /* route "incoming" packet */
 411         if (rt->rt_flags&RTCF_LOCAL) {
 412                 ip_rt_put(rt);
 413                 rt = NULL;
 414                 fl.fl4_daddr = eiph->daddr;
 415                 fl.fl4_src = eiph->saddr;
 416                 fl.fl4_tos = eiph->tos;
 417                 if (ip_route_output_key(&init_net, &rt, &fl) ||
 418                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 419                         ip_rt_put(rt);
 420                         kfree_skb(skb2);
 421                         return 0;
 422                 }
 423         } else {
 424                 ip_rt_put(rt);
 425                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 426                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 427                         kfree_skb(skb2);
 428                         return 0;
 429                 }
 430         }
 431
 432         /* change mtu on this route */
 433         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 434                 if (n > dst_mtu(skb2->dst)) {
 435                         kfree_skb(skb2);
 436                         return 0;
 437                 }
 438                 skb2->dst->ops->update_pmtu(skb2->dst, n);
 439         } else if (type == ICMP_TIME_EXCEEDED) {
 440                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 441                 if (t->parms.iph.ttl) {
 442                         rel_type = ICMP_DEST_UNREACH;
 443                         rel_code = ICMP_HOST_UNREACH;
 444                 }
 445         }
 446
 447         icmp_send(skb2, rel_type, rel_code, rel_info);
 448         kfree_skb(skb2);
 449         return 0;
 450 #endif
 451 }
 452
 453 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 454                                         struct sk_buff *skb)
 455 {
 456         struct iphdr *inner_iph = ip_hdr(skb);
 457
 458         if (INET_ECN_is_ce(outer_iph->tos))
 459                 IP_ECN_set_ce(inner_iph);
 460 }
 461
 462 static int ipip_rcv(struct sk_buff *skb)
 463 {
 464         struct ip_tunnel *tunnel;
 465         const struct iphdr *iph = ip_hdr(skb);
 466
 467         read_lock(&ipip_lock);
 468         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 469                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 470                         read_unlock(&ipip_lock);
 471                         kfree_skb(skb);
 472                         return 0;
 473                 }
 474
 475                 secpath_reset(skb);
 476
 477                 skb->mac_header = skb->network_header;
 478                 skb_reset_network_header(skb);
 479                 skb->protocol = htons(ETH_P_IP);
 480                 skb->pkt_type = PACKET_HOST;
 481
 482                 tunnel->stat.rx_packets++;
 483                 tunnel->stat.rx_bytes += skb->len;
 484                 skb->dev = tunnel->dev;
 485                 dst_release(skb->dst);
 486                 skb->dst = NULL;
 487                 nf_reset(skb);
 488                 ipip_ecn_decapsulate(iph, skb);
 489                 netif_rx(skb);
 490                 read_unlock(&ipip_lock);
 491                 return 0;
 492         }
 493         read_unlock(&ipip_lock);
 494
 495         return -1;
 496 }
 497
 498 /*
 499  *      This function assumes it is being called from dev_queue_xmit()
 500  *      and that skb is filled properly by that function.
 501  */
 502
 503 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 504 {
 505         struct ip_tunnel *tunnel = netdev_priv(dev);
 506         struct net_device_stats *stats = &tunnel->stat;
 507         struct iphdr  *tiph = &tunnel->parms.iph;
 508         u8     tos = tunnel->parms.iph.tos;
 509         __be16 df = tiph->frag_off;
 510         struct rtable *rt;                      /* Route to the other host */
 511         struct net_device *tdev;                        /* Device to other host */
 512         struct iphdr  *old_iph = ip_hdr(skb);
 513         struct iphdr  *iph;                     /* Our new IP header */
 514         unsigned int max_headroom;              /* The extra header space needed */
 515         __be32 dst = tiph->daddr;
 516         int    mtu;
 517
 518         if (tunnel->recursion++) {
 519                 tunnel->stat.collisions++;
 520                 goto tx_error;
 521         }
 522
 523         if (skb->protocol != htons(ETH_P_IP))
 524                 goto tx_error;
 525
 526         if (tos&1)
 527                 tos = old_iph->tos;
 528
 529         if (!dst) {
 530                 /* NBMA tunnel */
 531                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 532                         tunnel->stat.tx_fifo_errors++;
 533                         goto tx_error;
 534                 }
 535                 if ((dst = rt->rt_gateway) == 0)
 536                         goto tx_error_icmp;
 537         }
 538
 539         {
 540                 struct flowi fl = { .oif = tunnel->parms.link,
 541                                     .nl_u = { .ip4_u =
 542                                               { .daddr = dst,
 543                                                 .saddr = tiph->saddr,
 544                                                 .tos = RT_TOS(tos) } },
 545                                     .proto = IPPROTO_IPIP };
 546                 if (ip_route_output_key(&init_net, &rt, &fl)) {
 547                         tunnel->stat.tx_carrier_errors++;
 548                         goto tx_error_icmp;
 549                 }
 550         }
 551         tdev = rt->u.dst.dev;
 552
 553         if (tdev == dev) {
 554                 ip_rt_put(rt);
 555                 tunnel->stat.collisions++;
 556                 goto tx_error;
 557         }
 558
 559         if (tiph->frag_off)
 560                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 561         else
 562                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 563
 564         if (mtu < 68) {
 565                 tunnel->stat.collisions++;
 566                 ip_rt_put(rt);
 567                 goto tx_error;
 568         }
 569         if (skb->dst)
 570                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 571
 572         df |= (old_iph->frag_off&htons(IP_DF));
 573
 574         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 575                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 576                 ip_rt_put(rt);
 577                 goto tx_error;
 578         }
 579
 580         if (tunnel->err_count > 0) {
 581                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 582                         tunnel->err_count--;
 583                         dst_link_failure(skb);
 584                 } else
 585                         tunnel->err_count = 0;
 586         }
 587
 588         /*
 589          * Okay, now see if we can stuff it in the buffer as-is.
 590          */
 591         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 592
 593         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 594             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 595                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 596                 if (!new_skb) {
 597                         ip_rt_put(rt);
 598                         stats->tx_dropped++;
 599                         dev_kfree_skb(skb);
 600                         tunnel->recursion--;
 601                         return 0;
 602                 }
 603                 if (skb->sk)
 604                         skb_set_owner_w(new_skb, skb->sk);
 605                 dev_kfree_skb(skb);
 606                 skb = new_skb;
 607                 old_iph = ip_hdr(skb);
 608         }
 609
 610         skb->transport_header = skb->network_header;
 611         skb_push(skb, sizeof(struct iphdr));
 612         skb_reset_network_header(skb);
 613         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 614         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 615                               IPSKB_REROUTED);
 616         dst_release(skb->dst);
 617         skb->dst = &rt->u.dst;
 618
 619         /*
 620          *      Push down and install the IPIP header.
 621          */
 622
 623         iph                     =       ip_hdr(skb);
 624         iph->version            =       4;
 625         iph->ihl                =       sizeof(struct iphdr)>>2;
 626         iph->frag_off           =       df;
 627         iph->protocol           =       IPPROTO_IPIP;
 628         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 629         iph->daddr              =       rt->rt_dst;
 630         iph->saddr              =       rt->rt_src;
 631
 632         if ((iph->ttl = tiph->ttl) == 0)
 633                 iph->ttl        =       old_iph->ttl;
 634
 635         nf_reset(skb);
 636
 637         IPTUNNEL_XMIT();
 638         tunnel->recursion--;
 639         return 0;
 640
 641 tx_error_icmp:
 642         dst_link_failure(skb);
 643 tx_error:
 644         stats->tx_errors++;
 645         dev_kfree_skb(skb);
 646         tunnel->recursion--;
 647         return 0;
 648 }
 649
 650 static void ipip_tunnel_bind_dev(struct net_device *dev)
 651 {
 652         struct net_device *tdev = NULL;
 653         struct ip_tunnel *tunnel;
 654         struct iphdr *iph;
 655
 656         tunnel = netdev_priv(dev);
 657         iph = &tunnel->parms.iph;
 658
 659         if (iph->daddr) {
 660                 struct flowi fl = { .oif = tunnel->parms.link,
 661                                     .nl_u = { .ip4_u =
 662                                               { .daddr = iph->daddr,
 663                                                 .saddr = iph->saddr,
 664                                                 .tos = RT_TOS(iph->tos) } },
 665                                     .proto = IPPROTO_IPIP };
 666                 struct rtable *rt;
 667                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
 668                         tdev = rt->u.dst.dev;
 669                         ip_rt_put(rt);
 670                 }
 671                 dev->flags |= IFF_POINTOPOINT;
 672         }
 673
 674         if (!tdev && tunnel->parms.link)
 675                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
 676
 677         if (tdev) {
 678                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 679                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 680         }
 681         dev->iflink = tunnel->parms.link;
 682 }
 683
 684 static int
 685 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 686 {
 687         int err = 0;
 688         struct ip_tunnel_parm p;
 689         struct ip_tunnel *t;
 690
 691         switch (cmd) {
 692         case SIOCGETTUNNEL:
 693                 t = NULL;
 694                 if (dev == ipip_fb_tunnel_dev) {
 695                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 696                                 err = -EFAULT;
 697                                 break;
 698                         }
 699                         t = ipip_tunnel_locate(&p, 0);
 700                 }
 701                 if (t == NULL)
 702                         t = netdev_priv(dev);
 703                 memcpy(&p, &t->parms, sizeof(p));
 704                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 705                         err = -EFAULT;
 706                 break;
 707
 708         case SIOCADDTUNNEL:
 709         case SIOCCHGTUNNEL:
 710                 err = -EPERM;
 711                 if (!capable(CAP_NET_ADMIN))
 712                         goto done;
 713
 714                 err = -EFAULT;
 715                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 716                         goto done;
 717
 718                 err = -EINVAL;
 719                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 720                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 721                         goto done;
 722                 if (p.iph.ttl)
 723                         p.iph.frag_off |= htons(IP_DF);
 724
 725                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 726
 727                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 728                         if (t != NULL) {
 729                                 if (t->dev != dev) {
 730                                         err = -EEXIST;
 731                                         break;
 732                                 }
 733                         } else {
 734                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 735                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 736                                         err = -EINVAL;
 737                                         break;
 738                                 }
 739                                 t = netdev_priv(dev);
 740                                 ipip_tunnel_unlink(t);
 741                                 t->parms.iph.saddr = p.iph.saddr;
 742                                 t->parms.iph.daddr = p.iph.daddr;
 743                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 744                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 745                                 ipip_tunnel_link(t);
 746                                 netdev_state_change(dev);
 747                         }
 748                 }
 749
 750                 if (t) {
 751                         err = 0;
 752                         if (cmd == SIOCCHGTUNNEL) {
 753                                 t->parms.iph.ttl = p.iph.ttl;
 754                                 t->parms.iph.tos = p.iph.tos;
 755                                 t->parms.iph.frag_off = p.iph.frag_off;
 756                                 if (t->parms.link != p.link) {
 757                                         t->parms.link = p.link;
 758                                         ipip_tunnel_bind_dev(dev);
 759                                         netdev_state_change(dev);
 760                                 }
 761                         }
 762                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 763                                 err = -EFAULT;
 764                 } else
 765                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 766                 break;
 767
 768         case SIOCDELTUNNEL:
 769                 err = -EPERM;
 770                 if (!capable(CAP_NET_ADMIN))
 771                         goto done;
 772
 773                 if (dev == ipip_fb_tunnel_dev) {
 774                         err = -EFAULT;
 775                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 776                                 goto done;
 777                         err = -ENOENT;
 778                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 779                                 goto done;
 780                         err = -EPERM;
 781                         if (t->dev == ipip_fb_tunnel_dev)
 782                                 goto done;
 783                         dev = t->dev;
 784                 }
 785                 unregister_netdevice(dev);
 786                 err = 0;
 787                 break;
 788
 789         default:
 790                 err = -EINVAL;
 791         }
 792
 793 done:
 794         return err;
 795 }
 796
 797 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 798 {
 799         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 800 }
 801
 802 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 803 {
 804         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 805                 return -EINVAL;
 806         dev->mtu = new_mtu;
 807         return 0;
 808 }
 809
 810 static void ipip_tunnel_setup(struct net_device *dev)
 811 {
 812         dev->uninit             = ipip_tunnel_uninit;
 813         dev->hard_start_xmit    = ipip_tunnel_xmit;
 814         dev->get_stats          = ipip_tunnel_get_stats;
 815         dev->do_ioctl           = ipip_tunnel_ioctl;
 816         dev->change_mtu         = ipip_tunnel_change_mtu;
 817         dev->destructor         = free_netdev;
 818
 819         dev->type               = ARPHRD_TUNNEL;
 820         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 821         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 822         dev->flags              = IFF_NOARP;
 823         dev->iflink             = 0;
 824         dev->addr_len           = 4;
 825 }
 826
 827 static int ipip_tunnel_init(struct net_device *dev)
 828 {
 829         struct ip_tunnel *tunnel;
 830
 831         tunnel = netdev_priv(dev);
 832
 833         tunnel->dev = dev;
 834         strcpy(tunnel->parms.name, dev->name);
 835
 836         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 837         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 838
 839         ipip_tunnel_bind_dev(dev);
 840
 841         return 0;
 842 }
 843
 844 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 845 {
 846         struct ip_tunnel *tunnel = netdev_priv(dev);
 847         struct iphdr *iph = &tunnel->parms.iph;
 848
 849         tunnel->dev = dev;
 850         strcpy(tunnel->parms.name, dev->name);
 851
 852         iph->version            = 4;
 853         iph->protocol           = IPPROTO_IPIP;
 854         iph->ihl                = 5;
 855
 856         dev_hold(dev);
 857         tunnels_wc[0]           = tunnel;
 858         return 0;
 859 }
 860
 861 static struct xfrm_tunnel ipip_handler = {
 862         .handler        =       ipip_rcv,
 863         .err_handler    =       ipip_err,
 864         .priority       =       1,
 865 };
 866
 867 static char banner[] __initdata =
 868         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 869
 870 static int __init ipip_init(void)
 871 {
 872         int err;
 873
 874         printk(banner);
 875
 876         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
 877                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 878                 return -EAGAIN;
 879         }
 880
 881         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 882                                            "tunl0",
 883                                            ipip_tunnel_setup);
 884         if (!ipip_fb_tunnel_dev) {
 885                 err = -ENOMEM;
 886                 goto err1;
 887         }
 888
 889         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 890
 891         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 892                 goto err2;
 893  out:
 894         return err;
 895  err2:
 896         free_netdev(ipip_fb_tunnel_dev);
 897  err1:
 898         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 899         goto out;
 900 }
 901
 902 static void __exit ipip_destroy_tunnels(void)
 903 {
 904         int prio;
 905
 906         for (prio = 1; prio < 4; prio++) {
 907                 int h;
 908                 for (h = 0; h < HASH_SIZE; h++) {
 909                         struct ip_tunnel *t;
 910                         while ((t = tunnels[prio][h]) != NULL)
 911                                 unregister_netdevice(t->dev);
 912                 }
 913         }
 914 }
 915
 916 static void __exit ipip_fini(void)
 917 {
 918         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 919                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 920
 921         rtnl_lock();
 922         ipip_destroy_tunnels();
 923         unregister_netdevice(ipip_fb_tunnel_dev);
 924         rtnl_unlock();
 925 }
 926
 927 module_init(ipip_init);
 928 module_exit(ipip_fini);
 929 MODULE_LICENSE("GPL");