Merge branch 'master' into upstream
[linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder. 
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38                 
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45                 
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55                 
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling 
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95  
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111 #include <linux/if_ether.h>
112
113 #include <net/sock.h>
114 #include <net/ip.h>
115 #include <net/icmp.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118 #include <net/xfrm.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) ((addr^(addr>>4))&0xF)
122
123 static int ipip_fb_tunnel_init(struct net_device *dev);
124 static int ipip_tunnel_init(struct net_device *dev);
125 static void ipip_tunnel_setup(struct net_device *dev);
126
127 static struct net_device *ipip_fb_tunnel_dev;
128
129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_wc[1];
133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134
135 static DEFINE_RWLOCK(ipip_lock);
136
137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
138 {
139         unsigned h0 = HASH(remote);
140         unsigned h1 = HASH(local);
141         struct ip_tunnel *t;
142
143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
144                 if (local == t->parms.iph.saddr &&
145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
146                         return t;
147         }
148         for (t = tunnels_r[h0]; t; t = t->next) {
149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150                         return t;
151         }
152         for (t = tunnels_l[h1]; t; t = t->next) {
153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
154                         return t;
155         }
156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
157                 return t;
158         return NULL;
159 }
160
161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
162 {
163         u32 remote = t->parms.iph.daddr;
164         u32 local = t->parms.iph.saddr;
165         unsigned h = 0;
166         int prio = 0;
167
168         if (remote) {
169                 prio |= 2;
170                 h ^= HASH(remote);
171         }
172         if (local) {
173                 prio |= 1;
174                 h ^= HASH(local);
175         }
176         return &tunnels[prio][h];
177 }
178
179
180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
181 {
182         struct ip_tunnel **tp;
183
184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
185                 if (t == *tp) {
186                         write_lock_bh(&ipip_lock);
187                         *tp = t->next;
188                         write_unlock_bh(&ipip_lock);
189                         break;
190                 }
191         }
192 }
193
194 static void ipip_tunnel_link(struct ip_tunnel *t)
195 {
196         struct ip_tunnel **tp = ipip_bucket(t);
197
198         t->next = *tp;
199         write_lock_bh(&ipip_lock);
200         *tp = t;
201         write_unlock_bh(&ipip_lock);
202 }
203
204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
205 {
206         u32 remote = parms->iph.daddr;
207         u32 local = parms->iph.saddr;
208         struct ip_tunnel *t, **tp, *nt;
209         struct net_device *dev;
210         unsigned h = 0;
211         int prio = 0;
212         char name[IFNAMSIZ];
213
214         if (remote) {
215                 prio |= 2;
216                 h ^= HASH(remote);
217         }
218         if (local) {
219                 prio |= 1;
220                 h ^= HASH(local);
221         }
222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
224                         return t;
225         }
226         if (!create)
227                 return NULL;
228
229         if (parms->name[0])
230                 strlcpy(name, parms->name, IFNAMSIZ);
231         else {
232                 int i;
233                 for (i=1; i<100; i++) {
234                         sprintf(name, "tunl%d", i);
235                         if (__dev_get_by_name(name) == NULL)
236                                 break;
237                 }
238                 if (i==100)
239                         goto failed;
240         }
241
242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243         if (dev == NULL)
244                 return NULL;
245
246         nt = netdev_priv(dev);
247         SET_MODULE_OWNER(dev);
248         dev->init = ipip_tunnel_init;
249         nt->parms = *parms;
250
251         if (register_netdevice(dev) < 0) {
252                 free_netdev(dev);
253                 goto failed;
254         }
255
256         dev_hold(dev);
257         ipip_tunnel_link(nt);
258         return nt;
259
260 failed:
261         return NULL;
262 }
263
264 static void ipip_tunnel_uninit(struct net_device *dev)
265 {
266         if (dev == ipip_fb_tunnel_dev) {
267                 write_lock_bh(&ipip_lock);
268                 tunnels_wc[0] = NULL;
269                 write_unlock_bh(&ipip_lock);
270         } else
271                 ipip_tunnel_unlink(netdev_priv(dev));
272         dev_put(dev);
273 }
274
275 static int ipip_err(struct sk_buff *skb, u32 info)
276 {
277 #ifndef I_WISH_WORLD_WERE_PERFECT
278
279 /* It is not :-( All the routers (except for Linux) return only
280    8 bytes of packet payload. It means, that precise relaying of
281    ICMP in the real Internet is absolutely infeasible.
282  */
283         struct iphdr *iph = (struct iphdr*)skb->data;
284         int type = skb->h.icmph->type;
285         int code = skb->h.icmph->code;
286         struct ip_tunnel *t;
287         int err;
288
289         switch (type) {
290         default:
291         case ICMP_PARAMETERPROB:
292                 return 0;
293
294         case ICMP_DEST_UNREACH:
295                 switch (code) {
296                 case ICMP_SR_FAILED:
297                 case ICMP_PORT_UNREACH:
298                         /* Impossible event. */
299                         return 0;
300                 case ICMP_FRAG_NEEDED:
301                         /* Soft state for pmtu is maintained by IP core. */
302                         return 0;
303                 default:
304                         /* All others are translated to HOST_UNREACH.
305                            rfc2003 contains "deep thoughts" about NET_UNREACH,
306                            I believe they are just ether pollution. --ANK
307                          */
308                         break;
309                 }
310                 break;
311         case ICMP_TIME_EXCEEDED:
312                 if (code != ICMP_EXC_TTL)
313                         return 0;
314                 break;
315         }
316
317         err = -ENOENT;
318
319         read_lock(&ipip_lock);
320         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
321         if (t == NULL || t->parms.iph.daddr == 0)
322                 goto out;
323
324         err = 0;
325         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
326                 goto out;
327
328         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
329                 t->err_count++;
330         else
331                 t->err_count = 1;
332         t->err_time = jiffies;
333 out:
334         read_unlock(&ipip_lock);
335         return err;
336 #else
337         struct iphdr *iph = (struct iphdr*)dp;
338         int hlen = iph->ihl<<2;
339         struct iphdr *eiph;
340         int type = skb->h.icmph->type;
341         int code = skb->h.icmph->code;
342         int rel_type = 0;
343         int rel_code = 0;
344         int rel_info = 0;
345         struct sk_buff *skb2;
346         struct flowi fl;
347         struct rtable *rt;
348
349         if (len < hlen + sizeof(struct iphdr))
350                 return 0;
351         eiph = (struct iphdr*)(dp + hlen);
352
353         switch (type) {
354         default:
355                 return 0;
356         case ICMP_PARAMETERPROB:
357                 if (skb->h.icmph->un.gateway < hlen)
358                         return 0;
359
360                 /* So... This guy found something strange INSIDE encapsulated
361                    packet. Well, he is fool, but what can we do ?
362                  */
363                 rel_type = ICMP_PARAMETERPROB;
364                 rel_info = skb->h.icmph->un.gateway - hlen;
365                 break;
366
367         case ICMP_DEST_UNREACH:
368                 switch (code) {
369                 case ICMP_SR_FAILED:
370                 case ICMP_PORT_UNREACH:
371                         /* Impossible event. */
372                         return 0;
373                 case ICMP_FRAG_NEEDED:
374                         /* And it is the only really necessary thing :-) */
375                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
376                         if (rel_info < hlen+68)
377                                 return 0;
378                         rel_info -= hlen;
379                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
380                         if (rel_info > ntohs(eiph->tot_len))
381                                 return 0;
382                         break;
383                 default:
384                         /* All others are translated to HOST_UNREACH.
385                            rfc2003 contains "deep thoughts" about NET_UNREACH,
386                            I believe, it is just ether pollution. --ANK
387                          */
388                         rel_type = ICMP_DEST_UNREACH;
389                         rel_code = ICMP_HOST_UNREACH;
390                         break;
391                 }
392                 break;
393         case ICMP_TIME_EXCEEDED:
394                 if (code != ICMP_EXC_TTL)
395                         return 0;
396                 break;
397         }
398
399         /* Prepare fake skb to feed it to icmp_send */
400         skb2 = skb_clone(skb, GFP_ATOMIC);
401         if (skb2 == NULL)
402                 return 0;
403         dst_release(skb2->dst);
404         skb2->dst = NULL;
405         skb_pull(skb2, skb->data - (u8*)eiph);
406         skb2->nh.raw = skb2->data;
407
408         /* Try to guess incoming interface */
409         memset(&fl, 0, sizeof(fl));
410         fl.fl4_daddr = eiph->saddr;
411         fl.fl4_tos = RT_TOS(eiph->tos);
412         fl.proto = IPPROTO_IPIP;
413         if (ip_route_output_key(&rt, &key)) {
414                 kfree_skb(skb2);
415                 return 0;
416         }
417         skb2->dev = rt->u.dst.dev;
418
419         /* route "incoming" packet */
420         if (rt->rt_flags&RTCF_LOCAL) {
421                 ip_rt_put(rt);
422                 rt = NULL;
423                 fl.fl4_daddr = eiph->daddr;
424                 fl.fl4_src = eiph->saddr;
425                 fl.fl4_tos = eiph->tos;
426                 if (ip_route_output_key(&rt, &fl) ||
427                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
428                         ip_rt_put(rt);
429                         kfree_skb(skb2);
430                         return 0;
431                 }
432         } else {
433                 ip_rt_put(rt);
434                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
435                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
436                         kfree_skb(skb2);
437                         return 0;
438                 }
439         }
440
441         /* change mtu on this route */
442         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
443                 if (rel_info > dst_mtu(skb2->dst)) {
444                         kfree_skb(skb2);
445                         return 0;
446                 }
447                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
448                 rel_info = htonl(rel_info);
449         } else if (type == ICMP_TIME_EXCEEDED) {
450                 struct ip_tunnel *t = netdev_priv(skb2->dev);
451                 if (t->parms.iph.ttl) {
452                         rel_type = ICMP_DEST_UNREACH;
453                         rel_code = ICMP_HOST_UNREACH;
454                 }
455         }
456
457         icmp_send(skb2, rel_type, rel_code, rel_info);
458         kfree_skb(skb2);
459         return 0;
460 #endif
461 }
462
463 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
464 {
465         struct iphdr *inner_iph = skb->nh.iph;
466
467         if (INET_ECN_is_ce(outer_iph->tos))
468                 IP_ECN_set_ce(inner_iph);
469 }
470
471 static int ipip_rcv(struct sk_buff *skb)
472 {
473         struct iphdr *iph;
474         struct ip_tunnel *tunnel;
475
476         iph = skb->nh.iph;
477
478         read_lock(&ipip_lock);
479         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
480                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
481                         read_unlock(&ipip_lock);
482                         kfree_skb(skb);
483                         return 0;
484                 }
485
486                 secpath_reset(skb);
487
488                 skb->mac.raw = skb->nh.raw;
489                 skb->nh.raw = skb->data;
490                 skb->protocol = htons(ETH_P_IP);
491                 skb->pkt_type = PACKET_HOST;
492
493                 tunnel->stat.rx_packets++;
494                 tunnel->stat.rx_bytes += skb->len;
495                 skb->dev = tunnel->dev;
496                 dst_release(skb->dst);
497                 skb->dst = NULL;
498                 nf_reset(skb);
499                 ipip_ecn_decapsulate(iph, skb);
500                 netif_rx(skb);
501                 read_unlock(&ipip_lock);
502                 return 0;
503         }
504         read_unlock(&ipip_lock);
505
506         return -1;
507 }
508
509 /*
510  *      This function assumes it is being called from dev_queue_xmit()
511  *      and that skb is filled properly by that function.
512  */
513
514 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
515 {
516         struct ip_tunnel *tunnel = netdev_priv(dev);
517         struct net_device_stats *stats = &tunnel->stat;
518         struct iphdr  *tiph = &tunnel->parms.iph;
519         u8     tos = tunnel->parms.iph.tos;
520         u16    df = tiph->frag_off;
521         struct rtable *rt;                      /* Route to the other host */
522         struct net_device *tdev;                        /* Device to other host */
523         struct iphdr  *old_iph = skb->nh.iph;
524         struct iphdr  *iph;                     /* Our new IP header */
525         int    max_headroom;                    /* The extra header space needed */
526         u32    dst = tiph->daddr;
527         int    mtu;
528
529         if (tunnel->recursion++) {
530                 tunnel->stat.collisions++;
531                 goto tx_error;
532         }
533
534         if (skb->protocol != htons(ETH_P_IP))
535                 goto tx_error;
536
537         if (tos&1)
538                 tos = old_iph->tos;
539
540         if (!dst) {
541                 /* NBMA tunnel */
542                 if ((rt = (struct rtable*)skb->dst) == NULL) {
543                         tunnel->stat.tx_fifo_errors++;
544                         goto tx_error;
545                 }
546                 if ((dst = rt->rt_gateway) == 0)
547                         goto tx_error_icmp;
548         }
549
550         {
551                 struct flowi fl = { .oif = tunnel->parms.link,
552                                     .nl_u = { .ip4_u =
553                                               { .daddr = dst,
554                                                 .saddr = tiph->saddr,
555                                                 .tos = RT_TOS(tos) } },
556                                     .proto = IPPROTO_IPIP };
557                 if (ip_route_output_key(&rt, &fl)) {
558                         tunnel->stat.tx_carrier_errors++;
559                         goto tx_error_icmp;
560                 }
561         }
562         tdev = rt->u.dst.dev;
563
564         if (tdev == dev) {
565                 ip_rt_put(rt);
566                 tunnel->stat.collisions++;
567                 goto tx_error;
568         }
569
570         if (tiph->frag_off)
571                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
572         else
573                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
574
575         if (mtu < 68) {
576                 tunnel->stat.collisions++;
577                 ip_rt_put(rt);
578                 goto tx_error;
579         }
580         if (skb->dst)
581                 skb->dst->ops->update_pmtu(skb->dst, mtu);
582
583         df |= (old_iph->frag_off&htons(IP_DF));
584
585         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
586                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
587                 ip_rt_put(rt);
588                 goto tx_error;
589         }
590
591         if (tunnel->err_count > 0) {
592                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
593                         tunnel->err_count--;
594                         dst_link_failure(skb);
595                 } else
596                         tunnel->err_count = 0;
597         }
598
599         /*
600          * Okay, now see if we can stuff it in the buffer as-is.
601          */
602         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
603
604         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
605                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
606                 if (!new_skb) {
607                         ip_rt_put(rt);
608                         stats->tx_dropped++;
609                         dev_kfree_skb(skb);
610                         tunnel->recursion--;
611                         return 0;
612                 }
613                 if (skb->sk)
614                         skb_set_owner_w(new_skb, skb->sk);
615                 dev_kfree_skb(skb);
616                 skb = new_skb;
617                 old_iph = skb->nh.iph;
618         }
619
620         skb->h.raw = skb->nh.raw;
621         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
622         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
623         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
624                               IPSKB_REROUTED);
625         dst_release(skb->dst);
626         skb->dst = &rt->u.dst;
627
628         /*
629          *      Push down and install the IPIP header.
630          */
631
632         iph                     =       skb->nh.iph;
633         iph->version            =       4;
634         iph->ihl                =       sizeof(struct iphdr)>>2;
635         iph->frag_off           =       df;
636         iph->protocol           =       IPPROTO_IPIP;
637         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
638         iph->daddr              =       rt->rt_dst;
639         iph->saddr              =       rt->rt_src;
640
641         if ((iph->ttl = tiph->ttl) == 0)
642                 iph->ttl        =       old_iph->ttl;
643
644         nf_reset(skb);
645
646         IPTUNNEL_XMIT();
647         tunnel->recursion--;
648         return 0;
649
650 tx_error_icmp:
651         dst_link_failure(skb);
652 tx_error:
653         stats->tx_errors++;
654         dev_kfree_skb(skb);
655         tunnel->recursion--;
656         return 0;
657 }
658
659 static int
660 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
661 {
662         int err = 0;
663         struct ip_tunnel_parm p;
664         struct ip_tunnel *t;
665
666         switch (cmd) {
667         case SIOCGETTUNNEL:
668                 t = NULL;
669                 if (dev == ipip_fb_tunnel_dev) {
670                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
671                                 err = -EFAULT;
672                                 break;
673                         }
674                         t = ipip_tunnel_locate(&p, 0);
675                 }
676                 if (t == NULL)
677                         t = netdev_priv(dev);
678                 memcpy(&p, &t->parms, sizeof(p));
679                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
680                         err = -EFAULT;
681                 break;
682
683         case SIOCADDTUNNEL:
684         case SIOCCHGTUNNEL:
685                 err = -EPERM;
686                 if (!capable(CAP_NET_ADMIN))
687                         goto done;
688
689                 err = -EFAULT;
690                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
691                         goto done;
692
693                 err = -EINVAL;
694                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
695                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
696                         goto done;
697                 if (p.iph.ttl)
698                         p.iph.frag_off |= htons(IP_DF);
699
700                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
701
702                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
703                         if (t != NULL) {
704                                 if (t->dev != dev) {
705                                         err = -EEXIST;
706                                         break;
707                                 }
708                         } else {
709                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
710                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
711                                         err = -EINVAL;
712                                         break;
713                                 }
714                                 t = netdev_priv(dev);
715                                 ipip_tunnel_unlink(t);
716                                 t->parms.iph.saddr = p.iph.saddr;
717                                 t->parms.iph.daddr = p.iph.daddr;
718                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
719                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
720                                 ipip_tunnel_link(t);
721                                 netdev_state_change(dev);
722                         }
723                 }
724
725                 if (t) {
726                         err = 0;
727                         if (cmd == SIOCCHGTUNNEL) {
728                                 t->parms.iph.ttl = p.iph.ttl;
729                                 t->parms.iph.tos = p.iph.tos;
730                                 t->parms.iph.frag_off = p.iph.frag_off;
731                         }
732                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
733                                 err = -EFAULT;
734                 } else
735                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
736                 break;
737
738         case SIOCDELTUNNEL:
739                 err = -EPERM;
740                 if (!capable(CAP_NET_ADMIN))
741                         goto done;
742
743                 if (dev == ipip_fb_tunnel_dev) {
744                         err = -EFAULT;
745                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
746                                 goto done;
747                         err = -ENOENT;
748                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
749                                 goto done;
750                         err = -EPERM;
751                         if (t->dev == ipip_fb_tunnel_dev)
752                                 goto done;
753                         dev = t->dev;
754                 }
755                 err = unregister_netdevice(dev);
756                 break;
757
758         default:
759                 err = -EINVAL;
760         }
761
762 done:
763         return err;
764 }
765
766 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
767 {
768         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
769 }
770
771 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
772 {
773         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
774                 return -EINVAL;
775         dev->mtu = new_mtu;
776         return 0;
777 }
778
779 static void ipip_tunnel_setup(struct net_device *dev)
780 {
781         SET_MODULE_OWNER(dev);
782         dev->uninit             = ipip_tunnel_uninit;
783         dev->hard_start_xmit    = ipip_tunnel_xmit;
784         dev->get_stats          = ipip_tunnel_get_stats;
785         dev->do_ioctl           = ipip_tunnel_ioctl;
786         dev->change_mtu         = ipip_tunnel_change_mtu;
787         dev->destructor         = free_netdev;
788
789         dev->type               = ARPHRD_TUNNEL;
790         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
791         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
792         dev->flags              = IFF_NOARP;
793         dev->iflink             = 0;
794         dev->addr_len           = 4;
795 }
796
797 static int ipip_tunnel_init(struct net_device *dev)
798 {
799         struct net_device *tdev = NULL;
800         struct ip_tunnel *tunnel;
801         struct iphdr *iph;
802
803         tunnel = netdev_priv(dev);
804         iph = &tunnel->parms.iph;
805
806         tunnel->dev = dev;
807         strcpy(tunnel->parms.name, dev->name);
808
809         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
810         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
811
812         if (iph->daddr) {
813                 struct flowi fl = { .oif = tunnel->parms.link,
814                                     .nl_u = { .ip4_u =
815                                               { .daddr = iph->daddr,
816                                                 .saddr = iph->saddr,
817                                                 .tos = RT_TOS(iph->tos) } },
818                                     .proto = IPPROTO_IPIP };
819                 struct rtable *rt;
820                 if (!ip_route_output_key(&rt, &fl)) {
821                         tdev = rt->u.dst.dev;
822                         ip_rt_put(rt);
823                 }
824                 dev->flags |= IFF_POINTOPOINT;
825         }
826
827         if (!tdev && tunnel->parms.link)
828                 tdev = __dev_get_by_index(tunnel->parms.link);
829
830         if (tdev) {
831                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
832                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
833         }
834         dev->iflink = tunnel->parms.link;
835
836         return 0;
837 }
838
839 static int __init ipip_fb_tunnel_init(struct net_device *dev)
840 {
841         struct ip_tunnel *tunnel = netdev_priv(dev);
842         struct iphdr *iph = &tunnel->parms.iph;
843
844         tunnel->dev = dev;
845         strcpy(tunnel->parms.name, dev->name);
846
847         iph->version            = 4;
848         iph->protocol           = IPPROTO_IPIP;
849         iph->ihl                = 5;
850
851         dev_hold(dev);
852         tunnels_wc[0]           = tunnel;
853         return 0;
854 }
855
856 static struct xfrm_tunnel ipip_handler = {
857         .handler        =       ipip_rcv,
858         .err_handler    =       ipip_err,
859         .priority       =       1,
860 };
861
862 static char banner[] __initdata =
863         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
864
865 static int __init ipip_init(void)
866 {
867         int err;
868
869         printk(banner);
870
871         if (xfrm4_tunnel_register(&ipip_handler)) {
872                 printk(KERN_INFO "ipip init: can't register tunnel\n");
873                 return -EAGAIN;
874         }
875
876         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
877                                            "tunl0",
878                                            ipip_tunnel_setup);
879         if (!ipip_fb_tunnel_dev) {
880                 err = -ENOMEM;
881                 goto err1;
882         }
883
884         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
885
886         if ((err = register_netdev(ipip_fb_tunnel_dev)))
887                 goto err2;
888  out:
889         return err;
890  err2:
891         free_netdev(ipip_fb_tunnel_dev);
892  err1:
893         xfrm4_tunnel_deregister(&ipip_handler);
894         goto out;
895 }
896
897 static void __exit ipip_destroy_tunnels(void)
898 {
899         int prio;
900
901         for (prio = 1; prio < 4; prio++) {
902                 int h;
903                 for (h = 0; h < HASH_SIZE; h++) {
904                         struct ip_tunnel *t;
905                         while ((t = tunnels[prio][h]) != NULL)
906                                 unregister_netdevice(t->dev);
907                 }
908         }
909 }
910
911 static void __exit ipip_fini(void)
912 {
913         if (xfrm4_tunnel_deregister(&ipip_handler))
914                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
915
916         rtnl_lock();
917         ipip_destroy_tunnels();
918         unregister_netdevice(ipip_fb_tunnel_dev);
919         rtnl_unlock();
920 }
921
922 module_init(ipip_init);
923 module_exit(ipip_fini);
924 MODULE_LICENSE("GPL");