Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
[linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120
121 #define HASH_SIZE  16
122 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
123
124 static int ipip_net_id;
125 struct ipip_net {
126         struct ip_tunnel *tunnels_r_l[HASH_SIZE];
127         struct ip_tunnel *tunnels_r[HASH_SIZE];
128         struct ip_tunnel *tunnels_l[HASH_SIZE];
129         struct ip_tunnel *tunnels_wc[1];
130         struct ip_tunnel **tunnels[4];
131
132         struct net_device *fb_tunnel_dev;
133 };
134
135 static int ipip_fb_tunnel_init(struct net_device *dev);
136 static int ipip_tunnel_init(struct net_device *dev);
137 static void ipip_tunnel_setup(struct net_device *dev);
138
139 static DEFINE_RWLOCK(ipip_lock);
140
141 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
142                 __be32 remote, __be32 local)
143 {
144         unsigned h0 = HASH(remote);
145         unsigned h1 = HASH(local);
146         struct ip_tunnel *t;
147         struct ipip_net *ipn = net_generic(net, ipip_net_id);
148
149         for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {
150                 if (local == t->parms.iph.saddr &&
151                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
152                         return t;
153         }
154         for (t = ipn->tunnels_r[h0]; t; t = t->next) {
155                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
156                         return t;
157         }
158         for (t = ipn->tunnels_l[h1]; t; t = t->next) {
159                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
160                         return t;
161         }
162         if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
163                 return t;
164         return NULL;
165 }
166
167 static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
168                 struct ip_tunnel_parm *parms)
169 {
170         __be32 remote = parms->iph.daddr;
171         __be32 local = parms->iph.saddr;
172         unsigned h = 0;
173         int prio = 0;
174
175         if (remote) {
176                 prio |= 2;
177                 h ^= HASH(remote);
178         }
179         if (local) {
180                 prio |= 1;
181                 h ^= HASH(local);
182         }
183         return &ipn->tunnels[prio][h];
184 }
185
186 static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
187                 struct ip_tunnel *t)
188 {
189         return __ipip_bucket(ipn, &t->parms);
190 }
191
192 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
193 {
194         struct ip_tunnel **tp;
195
196         for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
197                 if (t == *tp) {
198                         write_lock_bh(&ipip_lock);
199                         *tp = t->next;
200                         write_unlock_bh(&ipip_lock);
201                         break;
202                 }
203         }
204 }
205
206 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
207 {
208         struct ip_tunnel **tp = ipip_bucket(ipn, t);
209
210         t->next = *tp;
211         write_lock_bh(&ipip_lock);
212         *tp = t;
213         write_unlock_bh(&ipip_lock);
214 }
215
216 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
217                 struct ip_tunnel_parm *parms, int create)
218 {
219         __be32 remote = parms->iph.daddr;
220         __be32 local = parms->iph.saddr;
221         struct ip_tunnel *t, **tp, *nt;
222         struct net_device *dev;
223         char name[IFNAMSIZ];
224         struct ipip_net *ipn = net_generic(net, ipip_net_id);
225
226         for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
227                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
228                         return t;
229         }
230         if (!create)
231                 return NULL;
232
233         if (parms->name[0])
234                 strlcpy(name, parms->name, IFNAMSIZ);
235         else
236                 sprintf(name, "tunl%%d");
237
238         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
239         if (dev == NULL)
240                 return NULL;
241
242         dev_net_set(dev, net);
243
244         if (strchr(name, '%')) {
245                 if (dev_alloc_name(dev, name) < 0)
246                         goto failed_free;
247         }
248
249         nt = netdev_priv(dev);
250         dev->init = ipip_tunnel_init;
251         nt->parms = *parms;
252
253         if (register_netdevice(dev) < 0)
254                 goto failed_free;
255
256         dev_hold(dev);
257         ipip_tunnel_link(ipn, nt);
258         return nt;
259
260 failed_free:
261         free_netdev(dev);
262         return NULL;
263 }
264
265 static void ipip_tunnel_uninit(struct net_device *dev)
266 {
267         struct net *net = dev_net(dev);
268         struct ipip_net *ipn = net_generic(net, ipip_net_id);
269
270         if (dev == ipn->fb_tunnel_dev) {
271                 write_lock_bh(&ipip_lock);
272                 ipn->tunnels_wc[0] = NULL;
273                 write_unlock_bh(&ipip_lock);
274         } else
275                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
276         dev_put(dev);
277 }
278
279 static int ipip_err(struct sk_buff *skb, u32 info)
280 {
281
282 /* All the routers (except for Linux) return only
283    8 bytes of packet payload. It means, that precise relaying of
284    ICMP in the real Internet is absolutely infeasible.
285  */
286         struct iphdr *iph = (struct iphdr*)skb->data;
287         const int type = icmp_hdr(skb)->type;
288         const int code = icmp_hdr(skb)->code;
289         struct ip_tunnel *t;
290         int err;
291
292         switch (type) {
293         default:
294         case ICMP_PARAMETERPROB:
295                 return 0;
296
297         case ICMP_DEST_UNREACH:
298                 switch (code) {
299                 case ICMP_SR_FAILED:
300                 case ICMP_PORT_UNREACH:
301                         /* Impossible event. */
302                         return 0;
303                 case ICMP_FRAG_NEEDED:
304                         /* Soft state for pmtu is maintained by IP core. */
305                         return 0;
306                 default:
307                         /* All others are translated to HOST_UNREACH.
308                            rfc2003 contains "deep thoughts" about NET_UNREACH,
309                            I believe they are just ether pollution. --ANK
310                          */
311                         break;
312                 }
313                 break;
314         case ICMP_TIME_EXCEEDED:
315                 if (code != ICMP_EXC_TTL)
316                         return 0;
317                 break;
318         }
319
320         err = -ENOENT;
321
322         read_lock(&ipip_lock);
323         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
324         if (t == NULL || t->parms.iph.daddr == 0)
325                 goto out;
326
327         err = 0;
328         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
329                 goto out;
330
331         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
332                 t->err_count++;
333         else
334                 t->err_count = 1;
335         t->err_time = jiffies;
336 out:
337         read_unlock(&ipip_lock);
338         return err;
339 }
340
341 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
342                                         struct sk_buff *skb)
343 {
344         struct iphdr *inner_iph = ip_hdr(skb);
345
346         if (INET_ECN_is_ce(outer_iph->tos))
347                 IP_ECN_set_ce(inner_iph);
348 }
349
350 static int ipip_rcv(struct sk_buff *skb)
351 {
352         struct ip_tunnel *tunnel;
353         const struct iphdr *iph = ip_hdr(skb);
354
355         read_lock(&ipip_lock);
356         if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
357                                         iph->saddr, iph->daddr)) != NULL) {
358                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
359                         read_unlock(&ipip_lock);
360                         kfree_skb(skb);
361                         return 0;
362                 }
363
364                 secpath_reset(skb);
365
366                 skb->mac_header = skb->network_header;
367                 skb_reset_network_header(skb);
368                 skb->protocol = htons(ETH_P_IP);
369                 skb->pkt_type = PACKET_HOST;
370
371                 tunnel->stat.rx_packets++;
372                 tunnel->stat.rx_bytes += skb->len;
373                 skb->dev = tunnel->dev;
374                 dst_release(skb->dst);
375                 skb->dst = NULL;
376                 nf_reset(skb);
377                 ipip_ecn_decapsulate(iph, skb);
378                 netif_rx(skb);
379                 read_unlock(&ipip_lock);
380                 return 0;
381         }
382         read_unlock(&ipip_lock);
383
384         return -1;
385 }
386
387 /*
388  *      This function assumes it is being called from dev_queue_xmit()
389  *      and that skb is filled properly by that function.
390  */
391
392 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
393 {
394         struct ip_tunnel *tunnel = netdev_priv(dev);
395         struct net_device_stats *stats = &tunnel->stat;
396         struct iphdr  *tiph = &tunnel->parms.iph;
397         u8     tos = tunnel->parms.iph.tos;
398         __be16 df = tiph->frag_off;
399         struct rtable *rt;                      /* Route to the other host */
400         struct net_device *tdev;                        /* Device to other host */
401         struct iphdr  *old_iph = ip_hdr(skb);
402         struct iphdr  *iph;                     /* Our new IP header */
403         unsigned int max_headroom;              /* The extra header space needed */
404         __be32 dst = tiph->daddr;
405         int    mtu;
406
407         if (tunnel->recursion++) {
408                 tunnel->stat.collisions++;
409                 goto tx_error;
410         }
411
412         if (skb->protocol != htons(ETH_P_IP))
413                 goto tx_error;
414
415         if (tos&1)
416                 tos = old_iph->tos;
417
418         if (!dst) {
419                 /* NBMA tunnel */
420                 if ((rt = skb->rtable) == NULL) {
421                         tunnel->stat.tx_fifo_errors++;
422                         goto tx_error;
423                 }
424                 if ((dst = rt->rt_gateway) == 0)
425                         goto tx_error_icmp;
426         }
427
428         {
429                 struct flowi fl = { .oif = tunnel->parms.link,
430                                     .nl_u = { .ip4_u =
431                                               { .daddr = dst,
432                                                 .saddr = tiph->saddr,
433                                                 .tos = RT_TOS(tos) } },
434                                     .proto = IPPROTO_IPIP };
435                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
436                         tunnel->stat.tx_carrier_errors++;
437                         goto tx_error_icmp;
438                 }
439         }
440         tdev = rt->u.dst.dev;
441
442         if (tdev == dev) {
443                 ip_rt_put(rt);
444                 tunnel->stat.collisions++;
445                 goto tx_error;
446         }
447
448         if (tiph->frag_off)
449                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
450         else
451                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
452
453         if (mtu < 68) {
454                 tunnel->stat.collisions++;
455                 ip_rt_put(rt);
456                 goto tx_error;
457         }
458         if (skb->dst)
459                 skb->dst->ops->update_pmtu(skb->dst, mtu);
460
461         df |= (old_iph->frag_off&htons(IP_DF));
462
463         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
464                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
465                 ip_rt_put(rt);
466                 goto tx_error;
467         }
468
469         if (tunnel->err_count > 0) {
470                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
471                         tunnel->err_count--;
472                         dst_link_failure(skb);
473                 } else
474                         tunnel->err_count = 0;
475         }
476
477         /*
478          * Okay, now see if we can stuff it in the buffer as-is.
479          */
480         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
481
482         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
483             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
484                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
485                 if (!new_skb) {
486                         ip_rt_put(rt);
487                         stats->tx_dropped++;
488                         dev_kfree_skb(skb);
489                         tunnel->recursion--;
490                         return 0;
491                 }
492                 if (skb->sk)
493                         skb_set_owner_w(new_skb, skb->sk);
494                 dev_kfree_skb(skb);
495                 skb = new_skb;
496                 old_iph = ip_hdr(skb);
497         }
498
499         skb->transport_header = skb->network_header;
500         skb_push(skb, sizeof(struct iphdr));
501         skb_reset_network_header(skb);
502         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
503         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
504                               IPSKB_REROUTED);
505         dst_release(skb->dst);
506         skb->dst = &rt->u.dst;
507
508         /*
509          *      Push down and install the IPIP header.
510          */
511
512         iph                     =       ip_hdr(skb);
513         iph->version            =       4;
514         iph->ihl                =       sizeof(struct iphdr)>>2;
515         iph->frag_off           =       df;
516         iph->protocol           =       IPPROTO_IPIP;
517         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
518         iph->daddr              =       rt->rt_dst;
519         iph->saddr              =       rt->rt_src;
520
521         if ((iph->ttl = tiph->ttl) == 0)
522                 iph->ttl        =       old_iph->ttl;
523
524         nf_reset(skb);
525
526         IPTUNNEL_XMIT();
527         tunnel->recursion--;
528         return 0;
529
530 tx_error_icmp:
531         dst_link_failure(skb);
532 tx_error:
533         stats->tx_errors++;
534         dev_kfree_skb(skb);
535         tunnel->recursion--;
536         return 0;
537 }
538
539 static void ipip_tunnel_bind_dev(struct net_device *dev)
540 {
541         struct net_device *tdev = NULL;
542         struct ip_tunnel *tunnel;
543         struct iphdr *iph;
544
545         tunnel = netdev_priv(dev);
546         iph = &tunnel->parms.iph;
547
548         if (iph->daddr) {
549                 struct flowi fl = { .oif = tunnel->parms.link,
550                                     .nl_u = { .ip4_u =
551                                               { .daddr = iph->daddr,
552                                                 .saddr = iph->saddr,
553                                                 .tos = RT_TOS(iph->tos) } },
554                                     .proto = IPPROTO_IPIP };
555                 struct rtable *rt;
556                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
557                         tdev = rt->u.dst.dev;
558                         ip_rt_put(rt);
559                 }
560                 dev->flags |= IFF_POINTOPOINT;
561         }
562
563         if (!tdev && tunnel->parms.link)
564                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
565
566         if (tdev) {
567                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
568                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
569         }
570         dev->iflink = tunnel->parms.link;
571 }
572
573 static int
574 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
575 {
576         int err = 0;
577         struct ip_tunnel_parm p;
578         struct ip_tunnel *t;
579         struct net *net = dev_net(dev);
580         struct ipip_net *ipn = net_generic(net, ipip_net_id);
581
582         switch (cmd) {
583         case SIOCGETTUNNEL:
584                 t = NULL;
585                 if (dev == ipn->fb_tunnel_dev) {
586                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
587                                 err = -EFAULT;
588                                 break;
589                         }
590                         t = ipip_tunnel_locate(net, &p, 0);
591                 }
592                 if (t == NULL)
593                         t = netdev_priv(dev);
594                 memcpy(&p, &t->parms, sizeof(p));
595                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
596                         err = -EFAULT;
597                 break;
598
599         case SIOCADDTUNNEL:
600         case SIOCCHGTUNNEL:
601                 err = -EPERM;
602                 if (!capable(CAP_NET_ADMIN))
603                         goto done;
604
605                 err = -EFAULT;
606                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
607                         goto done;
608
609                 err = -EINVAL;
610                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
611                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
612                         goto done;
613                 if (p.iph.ttl)
614                         p.iph.frag_off |= htons(IP_DF);
615
616                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
617
618                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
619                         if (t != NULL) {
620                                 if (t->dev != dev) {
621                                         err = -EEXIST;
622                                         break;
623                                 }
624                         } else {
625                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
626                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
627                                         err = -EINVAL;
628                                         break;
629                                 }
630                                 t = netdev_priv(dev);
631                                 ipip_tunnel_unlink(ipn, t);
632                                 t->parms.iph.saddr = p.iph.saddr;
633                                 t->parms.iph.daddr = p.iph.daddr;
634                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
635                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
636                                 ipip_tunnel_link(ipn, t);
637                                 netdev_state_change(dev);
638                         }
639                 }
640
641                 if (t) {
642                         err = 0;
643                         if (cmd == SIOCCHGTUNNEL) {
644                                 t->parms.iph.ttl = p.iph.ttl;
645                                 t->parms.iph.tos = p.iph.tos;
646                                 t->parms.iph.frag_off = p.iph.frag_off;
647                                 if (t->parms.link != p.link) {
648                                         t->parms.link = p.link;
649                                         ipip_tunnel_bind_dev(dev);
650                                         netdev_state_change(dev);
651                                 }
652                         }
653                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
654                                 err = -EFAULT;
655                 } else
656                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
657                 break;
658
659         case SIOCDELTUNNEL:
660                 err = -EPERM;
661                 if (!capable(CAP_NET_ADMIN))
662                         goto done;
663
664                 if (dev == ipn->fb_tunnel_dev) {
665                         err = -EFAULT;
666                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
667                                 goto done;
668                         err = -ENOENT;
669                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
670                                 goto done;
671                         err = -EPERM;
672                         if (t->dev == ipn->fb_tunnel_dev)
673                                 goto done;
674                         dev = t->dev;
675                 }
676                 unregister_netdevice(dev);
677                 err = 0;
678                 break;
679
680         default:
681                 err = -EINVAL;
682         }
683
684 done:
685         return err;
686 }
687
688 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
689 {
690         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
691 }
692
693 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
694 {
695         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
696                 return -EINVAL;
697         dev->mtu = new_mtu;
698         return 0;
699 }
700
701 static void ipip_tunnel_setup(struct net_device *dev)
702 {
703         dev->uninit             = ipip_tunnel_uninit;
704         dev->hard_start_xmit    = ipip_tunnel_xmit;
705         dev->get_stats          = ipip_tunnel_get_stats;
706         dev->do_ioctl           = ipip_tunnel_ioctl;
707         dev->change_mtu         = ipip_tunnel_change_mtu;
708         dev->destructor         = free_netdev;
709
710         dev->type               = ARPHRD_TUNNEL;
711         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
712         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
713         dev->flags              = IFF_NOARP;
714         dev->iflink             = 0;
715         dev->addr_len           = 4;
716         dev->features           |= NETIF_F_NETNS_LOCAL;
717 }
718
719 static int ipip_tunnel_init(struct net_device *dev)
720 {
721         struct ip_tunnel *tunnel;
722
723         tunnel = netdev_priv(dev);
724
725         tunnel->dev = dev;
726         strcpy(tunnel->parms.name, dev->name);
727
728         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
729         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
730
731         ipip_tunnel_bind_dev(dev);
732
733         return 0;
734 }
735
736 static int ipip_fb_tunnel_init(struct net_device *dev)
737 {
738         struct ip_tunnel *tunnel = netdev_priv(dev);
739         struct iphdr *iph = &tunnel->parms.iph;
740         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
741
742         tunnel->dev = dev;
743         strcpy(tunnel->parms.name, dev->name);
744
745         iph->version            = 4;
746         iph->protocol           = IPPROTO_IPIP;
747         iph->ihl                = 5;
748
749         dev_hold(dev);
750         ipn->tunnels_wc[0]      = tunnel;
751         return 0;
752 }
753
754 static struct xfrm_tunnel ipip_handler = {
755         .handler        =       ipip_rcv,
756         .err_handler    =       ipip_err,
757         .priority       =       1,
758 };
759
760 static char banner[] __initdata =
761         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
762
763 static void ipip_destroy_tunnels(struct ipip_net *ipn)
764 {
765         int prio;
766
767         for (prio = 1; prio < 4; prio++) {
768                 int h;
769                 for (h = 0; h < HASH_SIZE; h++) {
770                         struct ip_tunnel *t;
771                         while ((t = ipn->tunnels[prio][h]) != NULL)
772                                 unregister_netdevice(t->dev);
773                 }
774         }
775 }
776
777 static int ipip_init_net(struct net *net)
778 {
779         int err;
780         struct ipip_net *ipn;
781
782         err = -ENOMEM;
783         ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL);
784         if (ipn == NULL)
785                 goto err_alloc;
786
787         err = net_assign_generic(net, ipip_net_id, ipn);
788         if (err < 0)
789                 goto err_assign;
790
791         ipn->tunnels[0] = ipn->tunnels_wc;
792         ipn->tunnels[1] = ipn->tunnels_l;
793         ipn->tunnels[2] = ipn->tunnels_r;
794         ipn->tunnels[3] = ipn->tunnels_r_l;
795
796         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
797                                            "tunl0",
798                                            ipip_tunnel_setup);
799         if (!ipn->fb_tunnel_dev) {
800                 err = -ENOMEM;
801                 goto err_alloc_dev;
802         }
803
804         ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
805         dev_net_set(ipn->fb_tunnel_dev, net);
806
807         if ((err = register_netdev(ipn->fb_tunnel_dev)))
808                 goto err_reg_dev;
809
810         return 0;
811
812 err_reg_dev:
813         free_netdev(ipn->fb_tunnel_dev);
814 err_alloc_dev:
815         /* nothing */
816 err_assign:
817         kfree(ipn);
818 err_alloc:
819         return err;
820 }
821
822 static void ipip_exit_net(struct net *net)
823 {
824         struct ipip_net *ipn;
825
826         ipn = net_generic(net, ipip_net_id);
827         rtnl_lock();
828         ipip_destroy_tunnels(ipn);
829         unregister_netdevice(ipn->fb_tunnel_dev);
830         rtnl_unlock();
831         kfree(ipn);
832 }
833
834 static struct pernet_operations ipip_net_ops = {
835         .init = ipip_init_net,
836         .exit = ipip_exit_net,
837 };
838
839 static int __init ipip_init(void)
840 {
841         int err;
842
843         printk(banner);
844
845         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
846                 printk(KERN_INFO "ipip init: can't register tunnel\n");
847                 return -EAGAIN;
848         }
849
850         err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
851         if (err)
852                 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
853
854         return err;
855 }
856
857 static void __exit ipip_fini(void)
858 {
859         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
860                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
861
862         unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
863 }
864
865 module_init(ipip_init);
866 module_exit(ipip_fini);
867 MODULE_LICENSE("GPL");