git.oblomov.eu Git - linux-2.6/blob - net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
   5  *
   6  *      Authors:
   7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   8  *
   9  *      Fixes:
  10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
  11  *                                      a module taking up 2 pages).
  12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  13  *                                      to keep ip_forward happy.
  14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  16  *              David Woodhouse :       Perform some basic ICMP handling.
  17  *                                      IPIP Routing without decapsulation.
  18  *              Carlos Picoto   :       GRE over IP support
  19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  20  *                                      I do not want to merge them together.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  *
  27  */
  28
  29 /* tunnel.c: an IP tunnel driver
  30
  31         The purpose of this driver is to provide an IP tunnel through
  32         which you can tunnel network traffic transparently across subnets.
  33
  34         This was written by looking at Nick Holloway's dummy driver
  35         Thanks for the great code!
  36
  37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  38
  39         Minor tweaks:
  40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  41                 dev->hard_header/hard_header_len changed to use no headers.
  42                 Comments/bracketing tweaked.
  43                 Made the tunnels use dev->name not tunnel: when error reporting.
  44                 Added tx_dropped stat
  45
  46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
  47
  48         Reworked:
  49                 Changed to tunnel to destination gateway in addition to the
  50                         tunnel's pointopoint address
  51                 Almost completely rewritten
  52                 Note:  There is currently no firewall or ICMP handling done.
  53
  54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  55
  56 */
  57
  58 /* Things I wish I had known when writing the tunnel driver:
  59
  60         When the tunnel_xmit() function is called, the skb contains the
  61         packet to be sent (plus a great deal of extra info), and dev
  62         contains the tunnel device that _we_ are.
  63
  64         When we are passed a packet, we are expected to fill in the
  65         source address with our source IP address.
  66
  67         What is the proper way to allocate, copy and free a buffer?
  68         After you allocate it, it is a "0 length" chunk of memory
  69         starting at zero.  If you want to add headers to the buffer
  70         later, you'll have to call "skb_reserve(skb, amount)" with
  71         the amount of memory you want reserved.  Then, you call
  72         "skb_put(skb, amount)" with the amount of space you want in
  73         the buffer.  skb_put() returns a pointer to the top (#0) of
  74         that buffer.  skb->len is set to the amount of space you have
  75         "allocated" with skb_put().  You can then write up to skb->len
  76         bytes to that buffer.  If you need more, you can call skb_put()
  77         again with the additional amount of space you need.  You can
  78         find out how much more space you can allocate by calling
  79         "skb_tailroom(skb)".
  80         Now, to add header space, call "skb_push(skb, header_len)".
  81         This creates space at the beginning of the buffer and returns
  82         a pointer to this new space.  If later you need to strip a
  83         header from a buffer, call "skb_pull(skb, header_len)".
  84         skb_headroom() will return how much space is left at the top
  85         of the buffer (before the main data).  Remember, this headroom
  86         space must be reserved before the skb_put() function is called.
  87         */
  88
  89 /*
  90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  91
  92    For comments look at net/ipv4/ip_gre.c --ANK
  93  */
  94
  95
  96 #include <linux/capability.h>
  97 #include <linux/module.h>
  98 #include <linux/types.h>
  99 #include <linux/kernel.h>
 100 #include <asm/uaccess.h>
 101 #include <linux/skbuff.h>
 102 #include <linux/netdevice.h>
 103 #include <linux/in.h>
 104 #include <linux/tcp.h>
 105 #include <linux/udp.h>
 106 #include <linux/if_arp.h>
 107 #include <linux/mroute.h>
 108 #include <linux/init.h>
 109 #include <linux/netfilter_ipv4.h>
 110 #include <linux/if_ether.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/ipip.h>
 116 #include <net/inet_ecn.h>
 117 #include <net/xfrm.h>
 118
 119 #define HASH_SIZE  16
 120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 121
 122 static int ipip_fb_tunnel_init(struct net_device *dev);
 123 static int ipip_tunnel_init(struct net_device *dev);
 124 static void ipip_tunnel_setup(struct net_device *dev);
 125
 126 static struct net_device *ipip_fb_tunnel_dev;
 127
 128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
 129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
 130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
 131 static struct ip_tunnel *tunnels_wc[1];
 132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
 133
 134 static DEFINE_RWLOCK(ipip_lock);
 135
 136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
 137 {
 138         unsigned h0 = HASH(remote);
 139         unsigned h1 = HASH(local);
 140         struct ip_tunnel *t;
 141
 142         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 143                 if (local == t->parms.iph.saddr &&
 144                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 145                         return t;
 146         }
 147         for (t = tunnels_r[h0]; t; t = t->next) {
 148                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 149                         return t;
 150         }
 151         for (t = tunnels_l[h1]; t; t = t->next) {
 152                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 153                         return t;
 154         }
 155         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
 156                 return t;
 157         return NULL;
 158 }
 159
 160 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
 161 {
 162         __be32 remote = parms->iph.daddr;
 163         __be32 local = parms->iph.saddr;
 164         unsigned h = 0;
 165         int prio = 0;
 166
 167         if (remote) {
 168                 prio |= 2;
 169                 h ^= HASH(remote);
 170         }
 171         if (local) {
 172                 prio |= 1;
 173                 h ^= HASH(local);
 174         }
 175         return &tunnels[prio][h];
 176 }
 177
 178 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 179 {
 180         return __ipip_bucket(&t->parms);
 181 }
 182
 183 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 184 {
 185         struct ip_tunnel **tp;
 186
 187         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
 188                 if (t == *tp) {
 189                         write_lock_bh(&ipip_lock);
 190                         *tp = t->next;
 191                         write_unlock_bh(&ipip_lock);
 192                         break;
 193                 }
 194         }
 195 }
 196
 197 static void ipip_tunnel_link(struct ip_tunnel *t)
 198 {
 199         struct ip_tunnel **tp = ipip_bucket(t);
 200
 201         t->next = *tp;
 202         write_lock_bh(&ipip_lock);
 203         *tp = t;
 204         write_unlock_bh(&ipip_lock);
 205 }
 206
 207 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 208 {
 209         __be32 remote = parms->iph.daddr;
 210         __be32 local = parms->iph.saddr;
 211         struct ip_tunnel *t, **tp, *nt;
 212         struct net_device *dev;
 213         char name[IFNAMSIZ];
 214
 215         for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 216                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 217                         return t;
 218         }
 219         if (!create)
 220                 return NULL;
 221
 222         if (parms->name[0])
 223                 strlcpy(name, parms->name, IFNAMSIZ);
 224         else
 225                 sprintf(name, "tunl%%d");
 226
 227         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 228         if (dev == NULL)
 229                 return NULL;
 230
 231         nt = netdev_priv(dev);
 232         dev->init = ipip_tunnel_init;
 233         nt->parms = *parms;
 234
 235         if (register_netdevice(dev) < 0) {
 236                 free_netdev(dev);
 237                 goto failed;
 238         }
 239
 240         dev_hold(dev);
 241         ipip_tunnel_link(nt);
 242         return nt;
 243
 244 failed:
 245         return NULL;
 246 }
 247
 248 static void ipip_tunnel_uninit(struct net_device *dev)
 249 {
 250         if (dev == ipip_fb_tunnel_dev) {
 251                 write_lock_bh(&ipip_lock);
 252                 tunnels_wc[0] = NULL;
 253                 write_unlock_bh(&ipip_lock);
 254         } else
 255                 ipip_tunnel_unlink(netdev_priv(dev));
 256         dev_put(dev);
 257 }
 258
 259 static int ipip_err(struct sk_buff *skb, u32 info)
 260 {
 261 #ifndef I_WISH_WORLD_WERE_PERFECT
 262
 263 /* It is not :-( All the routers (except for Linux) return only
 264    8 bytes of packet payload. It means, that precise relaying of
 265    ICMP in the real Internet is absolutely infeasible.
 266  */
 267         struct iphdr *iph = (struct iphdr*)skb->data;
 268         const int type = icmp_hdr(skb)->type;
 269         const int code = icmp_hdr(skb)->code;
 270         struct ip_tunnel *t;
 271         int err;
 272
 273         switch (type) {
 274         default:
 275         case ICMP_PARAMETERPROB:
 276                 return 0;
 277
 278         case ICMP_DEST_UNREACH:
 279                 switch (code) {
 280                 case ICMP_SR_FAILED:
 281                 case ICMP_PORT_UNREACH:
 282                         /* Impossible event. */
 283                         return 0;
 284                 case ICMP_FRAG_NEEDED:
 285                         /* Soft state for pmtu is maintained by IP core. */
 286                         return 0;
 287                 default:
 288                         /* All others are translated to HOST_UNREACH.
 289                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 290                            I believe they are just ether pollution. --ANK
 291                          */
 292                         break;
 293                 }
 294                 break;
 295         case ICMP_TIME_EXCEEDED:
 296                 if (code != ICMP_EXC_TTL)
 297                         return 0;
 298                 break;
 299         }
 300
 301         err = -ENOENT;
 302
 303         read_lock(&ipip_lock);
 304         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
 305         if (t == NULL || t->parms.iph.daddr == 0)
 306                 goto out;
 307
 308         err = 0;
 309         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 310                 goto out;
 311
 312         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 313                 t->err_count++;
 314         else
 315                 t->err_count = 1;
 316         t->err_time = jiffies;
 317 out:
 318         read_unlock(&ipip_lock);
 319         return err;
 320 #else
 321         struct iphdr *iph = (struct iphdr*)dp;
 322         int hlen = iph->ihl<<2;
 323         struct iphdr *eiph;
 324         const int type = icmp_hdr(skb)->type;
 325         const int code = icmp_hdr(skb)->code;
 326         int rel_type = 0;
 327         int rel_code = 0;
 328         __be32 rel_info = 0;
 329         __u32 n = 0;
 330         struct sk_buff *skb2;
 331         struct flowi fl;
 332         struct rtable *rt;
 333
 334         if (len < hlen + sizeof(struct iphdr))
 335                 return 0;
 336         eiph = (struct iphdr*)(dp + hlen);
 337
 338         switch (type) {
 339         default:
 340                 return 0;
 341         case ICMP_PARAMETERPROB:
 342                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 343                 if (n < hlen)
 344                         return 0;
 345
 346                 /* So... This guy found something strange INSIDE encapsulated
 347                    packet. Well, he is fool, but what can we do ?
 348                  */
 349                 rel_type = ICMP_PARAMETERPROB;
 350                 rel_info = htonl((n - hlen) << 24);
 351                 break;
 352
 353         case ICMP_DEST_UNREACH:
 354                 switch (code) {
 355                 case ICMP_SR_FAILED:
 356                 case ICMP_PORT_UNREACH:
 357                         /* Impossible event. */
 358                         return 0;
 359                 case ICMP_FRAG_NEEDED:
 360                         /* And it is the only really necessary thing :-) */
 361                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 362                         if (n < hlen+68)
 363                                 return 0;
 364                         n -= hlen;
 365                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 366                         if (n > ntohs(eiph->tot_len))
 367                                 return 0;
 368                         rel_info = htonl(n);
 369                         break;
 370                 default:
 371                         /* All others are translated to HOST_UNREACH.
 372                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 373                            I believe, it is just ether pollution. --ANK
 374                          */
 375                         rel_type = ICMP_DEST_UNREACH;
 376                         rel_code = ICMP_HOST_UNREACH;
 377                         break;
 378                 }
 379                 break;
 380         case ICMP_TIME_EXCEEDED:
 381                 if (code != ICMP_EXC_TTL)
 382                         return 0;
 383                 break;
 384         }
 385
 386         /* Prepare fake skb to feed it to icmp_send */
 387         skb2 = skb_clone(skb, GFP_ATOMIC);
 388         if (skb2 == NULL)
 389                 return 0;
 390         dst_release(skb2->dst);
 391         skb2->dst = NULL;
 392         skb_pull(skb2, skb->data - (u8*)eiph);
 393         skb_reset_network_header(skb2);
 394
 395         /* Try to guess incoming interface */
 396         memset(&fl, 0, sizeof(fl));
 397         fl.fl4_daddr = eiph->saddr;
 398         fl.fl4_tos = RT_TOS(eiph->tos);
 399         fl.proto = IPPROTO_IPIP;
 400         if (ip_route_output_key(&init_net, &rt, &key)) {
 401                 kfree_skb(skb2);
 402                 return 0;
 403         }
 404         skb2->dev = rt->u.dst.dev;
 405
 406         /* route "incoming" packet */
 407         if (rt->rt_flags&RTCF_LOCAL) {
 408                 ip_rt_put(rt);
 409                 rt = NULL;
 410                 fl.fl4_daddr = eiph->daddr;
 411                 fl.fl4_src = eiph->saddr;
 412                 fl.fl4_tos = eiph->tos;
 413                 if (ip_route_output_key(&init_net, &rt, &fl) ||
 414                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 415                         ip_rt_put(rt);
 416                         kfree_skb(skb2);
 417                         return 0;
 418                 }
 419         } else {
 420                 ip_rt_put(rt);
 421                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 422                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
 423                         kfree_skb(skb2);
 424                         return 0;
 425                 }
 426         }
 427
 428         /* change mtu on this route */
 429         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 430                 if (n > dst_mtu(skb2->dst)) {
 431                         kfree_skb(skb2);
 432                         return 0;
 433                 }
 434                 skb2->dst->ops->update_pmtu(skb2->dst, n);
 435         } else if (type == ICMP_TIME_EXCEEDED) {
 436                 struct ip_tunnel *t = netdev_priv(skb2->dev);
 437                 if (t->parms.iph.ttl) {
 438                         rel_type = ICMP_DEST_UNREACH;
 439                         rel_code = ICMP_HOST_UNREACH;
 440                 }
 441         }
 442
 443         icmp_send(skb2, rel_type, rel_code, rel_info);
 444         kfree_skb(skb2);
 445         return 0;
 446 #endif
 447 }
 448
 449 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 450                                         struct sk_buff *skb)
 451 {
 452         struct iphdr *inner_iph = ip_hdr(skb);
 453
 454         if (INET_ECN_is_ce(outer_iph->tos))
 455                 IP_ECN_set_ce(inner_iph);
 456 }
 457
 458 static int ipip_rcv(struct sk_buff *skb)
 459 {
 460         struct ip_tunnel *tunnel;
 461         const struct iphdr *iph = ip_hdr(skb);
 462
 463         read_lock(&ipip_lock);
 464         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 465                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 466                         read_unlock(&ipip_lock);
 467                         kfree_skb(skb);
 468                         return 0;
 469                 }
 470
 471                 secpath_reset(skb);
 472
 473                 skb->mac_header = skb->network_header;
 474                 skb_reset_network_header(skb);
 475                 skb->protocol = htons(ETH_P_IP);
 476                 skb->pkt_type = PACKET_HOST;
 477
 478                 tunnel->stat.rx_packets++;
 479                 tunnel->stat.rx_bytes += skb->len;
 480                 skb->dev = tunnel->dev;
 481                 dst_release(skb->dst);
 482                 skb->dst = NULL;
 483                 nf_reset(skb);
 484                 ipip_ecn_decapsulate(iph, skb);
 485                 netif_rx(skb);
 486                 read_unlock(&ipip_lock);
 487                 return 0;
 488         }
 489         read_unlock(&ipip_lock);
 490
 491         return -1;
 492 }
 493
 494 /*
 495  *      This function assumes it is being called from dev_queue_xmit()
 496  *      and that skb is filled properly by that function.
 497  */
 498
 499 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 500 {
 501         struct ip_tunnel *tunnel = netdev_priv(dev);
 502         struct net_device_stats *stats = &tunnel->stat;
 503         struct iphdr  *tiph = &tunnel->parms.iph;
 504         u8     tos = tunnel->parms.iph.tos;
 505         __be16 df = tiph->frag_off;
 506         struct rtable *rt;                      /* Route to the other host */
 507         struct net_device *tdev;                        /* Device to other host */
 508         struct iphdr  *old_iph = ip_hdr(skb);
 509         struct iphdr  *iph;                     /* Our new IP header */
 510         unsigned int max_headroom;              /* The extra header space needed */
 511         __be32 dst = tiph->daddr;
 512         int    mtu;
 513
 514         if (tunnel->recursion++) {
 515                 tunnel->stat.collisions++;
 516                 goto tx_error;
 517         }
 518
 519         if (skb->protocol != htons(ETH_P_IP))
 520                 goto tx_error;
 521
 522         if (tos&1)
 523                 tos = old_iph->tos;
 524
 525         if (!dst) {
 526                 /* NBMA tunnel */
 527                 if ((rt = (struct rtable*)skb->dst) == NULL) {
 528                         tunnel->stat.tx_fifo_errors++;
 529                         goto tx_error;
 530                 }
 531                 if ((dst = rt->rt_gateway) == 0)
 532                         goto tx_error_icmp;
 533         }
 534
 535         {
 536                 struct flowi fl = { .oif = tunnel->parms.link,
 537                                     .nl_u = { .ip4_u =
 538                                               { .daddr = dst,
 539                                                 .saddr = tiph->saddr,
 540                                                 .tos = RT_TOS(tos) } },
 541                                     .proto = IPPROTO_IPIP };
 542                 if (ip_route_output_key(&init_net, &rt, &fl)) {
 543                         tunnel->stat.tx_carrier_errors++;
 544                         goto tx_error_icmp;
 545                 }
 546         }
 547         tdev = rt->u.dst.dev;
 548
 549         if (tdev == dev) {
 550                 ip_rt_put(rt);
 551                 tunnel->stat.collisions++;
 552                 goto tx_error;
 553         }
 554
 555         if (tiph->frag_off)
 556                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 557         else
 558                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
 559
 560         if (mtu < 68) {
 561                 tunnel->stat.collisions++;
 562                 ip_rt_put(rt);
 563                 goto tx_error;
 564         }
 565         if (skb->dst)
 566                 skb->dst->ops->update_pmtu(skb->dst, mtu);
 567
 568         df |= (old_iph->frag_off&htons(IP_DF));
 569
 570         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
 571                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 572                 ip_rt_put(rt);
 573                 goto tx_error;
 574         }
 575
 576         if (tunnel->err_count > 0) {
 577                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 578                         tunnel->err_count--;
 579                         dst_link_failure(skb);
 580                 } else
 581                         tunnel->err_count = 0;
 582         }
 583
 584         /*
 585          * Okay, now see if we can stuff it in the buffer as-is.
 586          */
 587         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 588
 589         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 590             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 591                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 592                 if (!new_skb) {
 593                         ip_rt_put(rt);
 594                         stats->tx_dropped++;
 595                         dev_kfree_skb(skb);
 596                         tunnel->recursion--;
 597                         return 0;
 598                 }
 599                 if (skb->sk)
 600                         skb_set_owner_w(new_skb, skb->sk);
 601                 dev_kfree_skb(skb);
 602                 skb = new_skb;
 603                 old_iph = ip_hdr(skb);
 604         }
 605
 606         skb->transport_header = skb->network_header;
 607         skb_push(skb, sizeof(struct iphdr));
 608         skb_reset_network_header(skb);
 609         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 610         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 611                               IPSKB_REROUTED);
 612         dst_release(skb->dst);
 613         skb->dst = &rt->u.dst;
 614
 615         /*
 616          *      Push down and install the IPIP header.
 617          */
 618
 619         iph                     =       ip_hdr(skb);
 620         iph->version            =       4;
 621         iph->ihl                =       sizeof(struct iphdr)>>2;
 622         iph->frag_off           =       df;
 623         iph->protocol           =       IPPROTO_IPIP;
 624         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 625         iph->daddr              =       rt->rt_dst;
 626         iph->saddr              =       rt->rt_src;
 627
 628         if ((iph->ttl = tiph->ttl) == 0)
 629                 iph->ttl        =       old_iph->ttl;
 630
 631         nf_reset(skb);
 632
 633         IPTUNNEL_XMIT();
 634         tunnel->recursion--;
 635         return 0;
 636
 637 tx_error_icmp:
 638         dst_link_failure(skb);
 639 tx_error:
 640         stats->tx_errors++;
 641         dev_kfree_skb(skb);
 642         tunnel->recursion--;
 643         return 0;
 644 }
 645
 646 static void ipip_tunnel_bind_dev(struct net_device *dev)
 647 {
 648         struct net_device *tdev = NULL;
 649         struct ip_tunnel *tunnel;
 650         struct iphdr *iph;
 651
 652         tunnel = netdev_priv(dev);
 653         iph = &tunnel->parms.iph;
 654
 655         if (iph->daddr) {
 656                 struct flowi fl = { .oif = tunnel->parms.link,
 657                                     .nl_u = { .ip4_u =
 658                                               { .daddr = iph->daddr,
 659                                                 .saddr = iph->saddr,
 660                                                 .tos = RT_TOS(iph->tos) } },
 661                                     .proto = IPPROTO_IPIP };
 662                 struct rtable *rt;
 663                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
 664                         tdev = rt->u.dst.dev;
 665                         ip_rt_put(rt);
 666                 }
 667                 dev->flags |= IFF_POINTOPOINT;
 668         }
 669
 670         if (!tdev && tunnel->parms.link)
 671                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
 672
 673         if (tdev) {
 674                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 675                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 676         }
 677         dev->iflink = tunnel->parms.link;
 678 }
 679
 680 static int
 681 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 682 {
 683         int err = 0;
 684         struct ip_tunnel_parm p;
 685         struct ip_tunnel *t;
 686
 687         switch (cmd) {
 688         case SIOCGETTUNNEL:
 689                 t = NULL;
 690                 if (dev == ipip_fb_tunnel_dev) {
 691                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 692                                 err = -EFAULT;
 693                                 break;
 694                         }
 695                         t = ipip_tunnel_locate(&p, 0);
 696                 }
 697                 if (t == NULL)
 698                         t = netdev_priv(dev);
 699                 memcpy(&p, &t->parms, sizeof(p));
 700                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 701                         err = -EFAULT;
 702                 break;
 703
 704         case SIOCADDTUNNEL:
 705         case SIOCCHGTUNNEL:
 706                 err = -EPERM;
 707                 if (!capable(CAP_NET_ADMIN))
 708                         goto done;
 709
 710                 err = -EFAULT;
 711                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 712                         goto done;
 713
 714                 err = -EINVAL;
 715                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 716                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 717                         goto done;
 718                 if (p.iph.ttl)
 719                         p.iph.frag_off |= htons(IP_DF);
 720
 721                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 722
 723                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 724                         if (t != NULL) {
 725                                 if (t->dev != dev) {
 726                                         err = -EEXIST;
 727                                         break;
 728                                 }
 729                         } else {
 730                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 731                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 732                                         err = -EINVAL;
 733                                         break;
 734                                 }
 735                                 t = netdev_priv(dev);
 736                                 ipip_tunnel_unlink(t);
 737                                 t->parms.iph.saddr = p.iph.saddr;
 738                                 t->parms.iph.daddr = p.iph.daddr;
 739                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 740                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 741                                 ipip_tunnel_link(t);
 742                                 netdev_state_change(dev);
 743                         }
 744                 }
 745
 746                 if (t) {
 747                         err = 0;
 748                         if (cmd == SIOCCHGTUNNEL) {
 749                                 t->parms.iph.ttl = p.iph.ttl;
 750                                 t->parms.iph.tos = p.iph.tos;
 751                                 t->parms.iph.frag_off = p.iph.frag_off;
 752                                 if (t->parms.link != p.link) {
 753                                         t->parms.link = p.link;
 754                                         ipip_tunnel_bind_dev(dev);
 755                                         netdev_state_change(dev);
 756                                 }
 757                         }
 758                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 759                                 err = -EFAULT;
 760                 } else
 761                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 762                 break;
 763
 764         case SIOCDELTUNNEL:
 765                 err = -EPERM;
 766                 if (!capable(CAP_NET_ADMIN))
 767                         goto done;
 768
 769                 if (dev == ipip_fb_tunnel_dev) {
 770                         err = -EFAULT;
 771                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 772                                 goto done;
 773                         err = -ENOENT;
 774                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
 775                                 goto done;
 776                         err = -EPERM;
 777                         if (t->dev == ipip_fb_tunnel_dev)
 778                                 goto done;
 779                         dev = t->dev;
 780                 }
 781                 unregister_netdevice(dev);
 782                 err = 0;
 783                 break;
 784
 785         default:
 786                 err = -EINVAL;
 787         }
 788
 789 done:
 790         return err;
 791 }
 792
 793 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
 794 {
 795         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
 796 }
 797
 798 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 799 {
 800         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 801                 return -EINVAL;
 802         dev->mtu = new_mtu;
 803         return 0;
 804 }
 805
 806 static void ipip_tunnel_setup(struct net_device *dev)
 807 {
 808         dev->uninit             = ipip_tunnel_uninit;
 809         dev->hard_start_xmit    = ipip_tunnel_xmit;
 810         dev->get_stats          = ipip_tunnel_get_stats;
 811         dev->do_ioctl           = ipip_tunnel_ioctl;
 812         dev->change_mtu         = ipip_tunnel_change_mtu;
 813         dev->destructor         = free_netdev;
 814
 815         dev->type               = ARPHRD_TUNNEL;
 816         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 817         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 818         dev->flags              = IFF_NOARP;
 819         dev->iflink             = 0;
 820         dev->addr_len           = 4;
 821 }
 822
 823 static int ipip_tunnel_init(struct net_device *dev)
 824 {
 825         struct ip_tunnel *tunnel;
 826
 827         tunnel = netdev_priv(dev);
 828
 829         tunnel->dev = dev;
 830         strcpy(tunnel->parms.name, dev->name);
 831
 832         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 833         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 834
 835         ipip_tunnel_bind_dev(dev);
 836
 837         return 0;
 838 }
 839
 840 static int __init ipip_fb_tunnel_init(struct net_device *dev)
 841 {
 842         struct ip_tunnel *tunnel = netdev_priv(dev);
 843         struct iphdr *iph = &tunnel->parms.iph;
 844
 845         tunnel->dev = dev;
 846         strcpy(tunnel->parms.name, dev->name);
 847
 848         iph->version            = 4;
 849         iph->protocol           = IPPROTO_IPIP;
 850         iph->ihl                = 5;
 851
 852         dev_hold(dev);
 853         tunnels_wc[0]           = tunnel;
 854         return 0;
 855 }
 856
 857 static struct xfrm_tunnel ipip_handler = {
 858         .handler        =       ipip_rcv,
 859         .err_handler    =       ipip_err,
 860         .priority       =       1,
 861 };
 862
 863 static char banner[] __initdata =
 864         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 865
 866 static int __init ipip_init(void)
 867 {
 868         int err;
 869
 870         printk(banner);
 871
 872         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
 873                 printk(KERN_INFO "ipip init: can't register tunnel\n");
 874                 return -EAGAIN;
 875         }
 876
 877         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 878                                            "tunl0",
 879                                            ipip_tunnel_setup);
 880         if (!ipip_fb_tunnel_dev) {
 881                 err = -ENOMEM;
 882                 goto err1;
 883         }
 884
 885         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
 886
 887         if ((err = register_netdev(ipip_fb_tunnel_dev)))
 888                 goto err2;
 889  out:
 890         return err;
 891  err2:
 892         free_netdev(ipip_fb_tunnel_dev);
 893  err1:
 894         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 895         goto out;
 896 }
 897
 898 static void __exit ipip_destroy_tunnels(void)
 899 {
 900         int prio;
 901
 902         for (prio = 1; prio < 4; prio++) {
 903                 int h;
 904                 for (h = 0; h < HASH_SIZE; h++) {
 905                         struct ip_tunnel *t;
 906                         while ((t = tunnels[prio][h]) != NULL)
 907                                 unregister_netdevice(t->dev);
 908                 }
 909         }
 910 }
 911
 912 static void __exit ipip_fini(void)
 913 {
 914         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 915                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 916
 917         rtnl_lock();
 918         ipip_destroy_tunnels();
 919         unregister_netdevice(ipip_fb_tunnel_dev);
 920         rtnl_unlock();
 921 }
 922
 923 module_init(ipip_init);
 924 module_exit(ipip_fini);
 925 MODULE_LICENSE("GPL");