[PATCH] Lost sockfd_put() in routing_ioctl()
[linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder. 
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38                 
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45                 
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55                 
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling 
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95  
96 #include <linux/config.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/protocol.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118 #include <net/xfrm.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) ((addr^(addr>>4))&0xF)
122
123 static int ipip_fb_tunnel_init(struct net_device *dev);
124 static int ipip_tunnel_init(struct net_device *dev);
125 static void ipip_tunnel_setup(struct net_device *dev);
126
127 static struct net_device *ipip_fb_tunnel_dev;
128
129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_wc[1];
133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134
135 static DEFINE_RWLOCK(ipip_lock);
136
137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
138 {
139         unsigned h0 = HASH(remote);
140         unsigned h1 = HASH(local);
141         struct ip_tunnel *t;
142
143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
144                 if (local == t->parms.iph.saddr &&
145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
146                         return t;
147         }
148         for (t = tunnels_r[h0]; t; t = t->next) {
149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150                         return t;
151         }
152         for (t = tunnels_l[h1]; t; t = t->next) {
153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
154                         return t;
155         }
156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
157                 return t;
158         return NULL;
159 }
160
161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
162 {
163         u32 remote = t->parms.iph.daddr;
164         u32 local = t->parms.iph.saddr;
165         unsigned h = 0;
166         int prio = 0;
167
168         if (remote) {
169                 prio |= 2;
170                 h ^= HASH(remote);
171         }
172         if (local) {
173                 prio |= 1;
174                 h ^= HASH(local);
175         }
176         return &tunnels[prio][h];
177 }
178
179
180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
181 {
182         struct ip_tunnel **tp;
183
184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
185                 if (t == *tp) {
186                         write_lock_bh(&ipip_lock);
187                         *tp = t->next;
188                         write_unlock_bh(&ipip_lock);
189                         break;
190                 }
191         }
192 }
193
194 static void ipip_tunnel_link(struct ip_tunnel *t)
195 {
196         struct ip_tunnel **tp = ipip_bucket(t);
197
198         t->next = *tp;
199         write_lock_bh(&ipip_lock);
200         *tp = t;
201         write_unlock_bh(&ipip_lock);
202 }
203
204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
205 {
206         u32 remote = parms->iph.daddr;
207         u32 local = parms->iph.saddr;
208         struct ip_tunnel *t, **tp, *nt;
209         struct net_device *dev;
210         unsigned h = 0;
211         int prio = 0;
212         char name[IFNAMSIZ];
213
214         if (remote) {
215                 prio |= 2;
216                 h ^= HASH(remote);
217         }
218         if (local) {
219                 prio |= 1;
220                 h ^= HASH(local);
221         }
222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
224                         return t;
225         }
226         if (!create)
227                 return NULL;
228
229         if (parms->name[0])
230                 strlcpy(name, parms->name, IFNAMSIZ);
231         else {
232                 int i;
233                 for (i=1; i<100; i++) {
234                         sprintf(name, "tunl%d", i);
235                         if (__dev_get_by_name(name) == NULL)
236                                 break;
237                 }
238                 if (i==100)
239                         goto failed;
240         }
241
242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243         if (dev == NULL)
244                 return NULL;
245
246         nt = dev->priv;
247         SET_MODULE_OWNER(dev);
248         dev->init = ipip_tunnel_init;
249         nt->parms = *parms;
250
251         if (register_netdevice(dev) < 0) {
252                 free_netdev(dev);
253                 goto failed;
254         }
255
256         dev_hold(dev);
257         ipip_tunnel_link(nt);
258         return nt;
259
260 failed:
261         return NULL;
262 }
263
264 static void ipip_tunnel_uninit(struct net_device *dev)
265 {
266         if (dev == ipip_fb_tunnel_dev) {
267                 write_lock_bh(&ipip_lock);
268                 tunnels_wc[0] = NULL;
269                 write_unlock_bh(&ipip_lock);
270         } else
271                 ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
272         dev_put(dev);
273 }
274
275 static void ipip_err(struct sk_buff *skb, u32 info)
276 {
277 #ifndef I_WISH_WORLD_WERE_PERFECT
278
279 /* It is not :-( All the routers (except for Linux) return only
280    8 bytes of packet payload. It means, that precise relaying of
281    ICMP in the real Internet is absolutely infeasible.
282  */
283         struct iphdr *iph = (struct iphdr*)skb->data;
284         int type = skb->h.icmph->type;
285         int code = skb->h.icmph->code;
286         struct ip_tunnel *t;
287
288         switch (type) {
289         default:
290         case ICMP_PARAMETERPROB:
291                 return;
292
293         case ICMP_DEST_UNREACH:
294                 switch (code) {
295                 case ICMP_SR_FAILED:
296                 case ICMP_PORT_UNREACH:
297                         /* Impossible event. */
298                         return;
299                 case ICMP_FRAG_NEEDED:
300                         /* Soft state for pmtu is maintained by IP core. */
301                         return;
302                 default:
303                         /* All others are translated to HOST_UNREACH.
304                            rfc2003 contains "deep thoughts" about NET_UNREACH,
305                            I believe they are just ether pollution. --ANK
306                          */
307                         break;
308                 }
309                 break;
310         case ICMP_TIME_EXCEEDED:
311                 if (code != ICMP_EXC_TTL)
312                         return;
313                 break;
314         }
315
316         read_lock(&ipip_lock);
317         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
318         if (t == NULL || t->parms.iph.daddr == 0)
319                 goto out;
320         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
321                 goto out;
322
323         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
324                 t->err_count++;
325         else
326                 t->err_count = 1;
327         t->err_time = jiffies;
328 out:
329         read_unlock(&ipip_lock);
330         return;
331 #else
332         struct iphdr *iph = (struct iphdr*)dp;
333         int hlen = iph->ihl<<2;
334         struct iphdr *eiph;
335         int type = skb->h.icmph->type;
336         int code = skb->h.icmph->code;
337         int rel_type = 0;
338         int rel_code = 0;
339         int rel_info = 0;
340         struct sk_buff *skb2;
341         struct flowi fl;
342         struct rtable *rt;
343
344         if (len < hlen + sizeof(struct iphdr))
345                 return;
346         eiph = (struct iphdr*)(dp + hlen);
347
348         switch (type) {
349         default:
350                 return;
351         case ICMP_PARAMETERPROB:
352                 if (skb->h.icmph->un.gateway < hlen)
353                         return;
354
355                 /* So... This guy found something strange INSIDE encapsulated
356                    packet. Well, he is fool, but what can we do ?
357                  */
358                 rel_type = ICMP_PARAMETERPROB;
359                 rel_info = skb->h.icmph->un.gateway - hlen;
360                 break;
361
362         case ICMP_DEST_UNREACH:
363                 switch (code) {
364                 case ICMP_SR_FAILED:
365                 case ICMP_PORT_UNREACH:
366                         /* Impossible event. */
367                         return;
368                 case ICMP_FRAG_NEEDED:
369                         /* And it is the only really necessary thing :-) */
370                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
371                         if (rel_info < hlen+68)
372                                 return;
373                         rel_info -= hlen;
374                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
375                         if (rel_info > ntohs(eiph->tot_len))
376                                 return;
377                         break;
378                 default:
379                         /* All others are translated to HOST_UNREACH.
380                            rfc2003 contains "deep thoughts" about NET_UNREACH,
381                            I believe, it is just ether pollution. --ANK
382                          */
383                         rel_type = ICMP_DEST_UNREACH;
384                         rel_code = ICMP_HOST_UNREACH;
385                         break;
386                 }
387                 break;
388         case ICMP_TIME_EXCEEDED:
389                 if (code != ICMP_EXC_TTL)
390                         return;
391                 break;
392         }
393
394         /* Prepare fake skb to feed it to icmp_send */
395         skb2 = skb_clone(skb, GFP_ATOMIC);
396         if (skb2 == NULL)
397                 return;
398         dst_release(skb2->dst);
399         skb2->dst = NULL;
400         skb_pull(skb2, skb->data - (u8*)eiph);
401         skb2->nh.raw = skb2->data;
402
403         /* Try to guess incoming interface */
404         memset(&fl, 0, sizeof(fl));
405         fl.fl4_daddr = eiph->saddr;
406         fl.fl4_tos = RT_TOS(eiph->tos);
407         fl.proto = IPPROTO_IPIP;
408         if (ip_route_output_key(&rt, &key)) {
409                 kfree_skb(skb2);
410                 return;
411         }
412         skb2->dev = rt->u.dst.dev;
413
414         /* route "incoming" packet */
415         if (rt->rt_flags&RTCF_LOCAL) {
416                 ip_rt_put(rt);
417                 rt = NULL;
418                 fl.fl4_daddr = eiph->daddr;
419                 fl.fl4_src = eiph->saddr;
420                 fl.fl4_tos = eiph->tos;
421                 if (ip_route_output_key(&rt, &fl) ||
422                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
423                         ip_rt_put(rt);
424                         kfree_skb(skb2);
425                         return;
426                 }
427         } else {
428                 ip_rt_put(rt);
429                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
430                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
431                         kfree_skb(skb2);
432                         return;
433                 }
434         }
435
436         /* change mtu on this route */
437         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
438                 if (rel_info > dst_mtu(skb2->dst)) {
439                         kfree_skb(skb2);
440                         return;
441                 }
442                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
443                 rel_info = htonl(rel_info);
444         } else if (type == ICMP_TIME_EXCEEDED) {
445                 struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
446                 if (t->parms.iph.ttl) {
447                         rel_type = ICMP_DEST_UNREACH;
448                         rel_code = ICMP_HOST_UNREACH;
449                 }
450         }
451
452         icmp_send(skb2, rel_type, rel_code, rel_info);
453         kfree_skb(skb2);
454         return;
455 #endif
456 }
457
458 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
459 {
460         struct iphdr *inner_iph = skb->nh.iph;
461
462         if (INET_ECN_is_ce(outer_iph->tos))
463                 IP_ECN_set_ce(inner_iph);
464 }
465
466 static int ipip_rcv(struct sk_buff *skb)
467 {
468         struct iphdr *iph;
469         struct ip_tunnel *tunnel;
470
471         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
472                 goto out;
473
474         iph = skb->nh.iph;
475
476         read_lock(&ipip_lock);
477         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
478                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
479                         read_unlock(&ipip_lock);
480                         kfree_skb(skb);
481                         return 0;
482                 }
483
484                 secpath_reset(skb);
485
486                 skb->mac.raw = skb->nh.raw;
487                 skb->nh.raw = skb->data;
488                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
489                 skb->protocol = htons(ETH_P_IP);
490                 skb->pkt_type = PACKET_HOST;
491
492                 tunnel->stat.rx_packets++;
493                 tunnel->stat.rx_bytes += skb->len;
494                 skb->dev = tunnel->dev;
495                 dst_release(skb->dst);
496                 skb->dst = NULL;
497                 nf_reset(skb);
498                 ipip_ecn_decapsulate(iph, skb);
499                 netif_rx(skb);
500                 read_unlock(&ipip_lock);
501                 return 0;
502         }
503         read_unlock(&ipip_lock);
504
505 out:
506         return -1;
507 }
508
509 /*
510  *      This function assumes it is being called from dev_queue_xmit()
511  *      and that skb is filled properly by that function.
512  */
513
514 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
515 {
516         struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
517         struct net_device_stats *stats = &tunnel->stat;
518         struct iphdr  *tiph = &tunnel->parms.iph;
519         u8     tos = tunnel->parms.iph.tos;
520         u16    df = tiph->frag_off;
521         struct rtable *rt;                      /* Route to the other host */
522         struct net_device *tdev;                        /* Device to other host */
523         struct iphdr  *old_iph = skb->nh.iph;
524         struct iphdr  *iph;                     /* Our new IP header */
525         int    max_headroom;                    /* The extra header space needed */
526         u32    dst = tiph->daddr;
527         int    mtu;
528
529         if (tunnel->recursion++) {
530                 tunnel->stat.collisions++;
531                 goto tx_error;
532         }
533
534         if (skb->protocol != htons(ETH_P_IP))
535                 goto tx_error;
536
537         if (tos&1)
538                 tos = old_iph->tos;
539
540         if (!dst) {
541                 /* NBMA tunnel */
542                 if ((rt = (struct rtable*)skb->dst) == NULL) {
543                         tunnel->stat.tx_fifo_errors++;
544                         goto tx_error;
545                 }
546                 if ((dst = rt->rt_gateway) == 0)
547                         goto tx_error_icmp;
548         }
549
550         {
551                 struct flowi fl = { .oif = tunnel->parms.link,
552                                     .nl_u = { .ip4_u =
553                                               { .daddr = dst,
554                                                 .saddr = tiph->saddr,
555                                                 .tos = RT_TOS(tos) } },
556                                     .proto = IPPROTO_IPIP };
557                 if (ip_route_output_key(&rt, &fl)) {
558                         tunnel->stat.tx_carrier_errors++;
559                         goto tx_error_icmp;
560                 }
561         }
562         tdev = rt->u.dst.dev;
563
564         if (tdev == dev) {
565                 ip_rt_put(rt);
566                 tunnel->stat.collisions++;
567                 goto tx_error;
568         }
569
570         if (tiph->frag_off)
571                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
572         else
573                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
574
575         if (mtu < 68) {
576                 tunnel->stat.collisions++;
577                 ip_rt_put(rt);
578                 goto tx_error;
579         }
580         if (skb->dst)
581                 skb->dst->ops->update_pmtu(skb->dst, mtu);
582
583         df |= (old_iph->frag_off&htons(IP_DF));
584
585         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
586                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
587                 ip_rt_put(rt);
588                 goto tx_error;
589         }
590
591         if (tunnel->err_count > 0) {
592                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
593                         tunnel->err_count--;
594                         dst_link_failure(skb);
595                 } else
596                         tunnel->err_count = 0;
597         }
598
599         /*
600          * Okay, now see if we can stuff it in the buffer as-is.
601          */
602         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
603
604         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
605                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
606                 if (!new_skb) {
607                         ip_rt_put(rt);
608                         stats->tx_dropped++;
609                         dev_kfree_skb(skb);
610                         tunnel->recursion--;
611                         return 0;
612                 }
613                 if (skb->sk)
614                         skb_set_owner_w(new_skb, skb->sk);
615                 dev_kfree_skb(skb);
616                 skb = new_skb;
617                 old_iph = skb->nh.iph;
618         }
619
620         skb->h.raw = skb->nh.raw;
621         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
622         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
623         dst_release(skb->dst);
624         skb->dst = &rt->u.dst;
625
626         /*
627          *      Push down and install the IPIP header.
628          */
629
630         iph                     =       skb->nh.iph;
631         iph->version            =       4;
632         iph->ihl                =       sizeof(struct iphdr)>>2;
633         iph->frag_off           =       df;
634         iph->protocol           =       IPPROTO_IPIP;
635         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
636         iph->daddr              =       rt->rt_dst;
637         iph->saddr              =       rt->rt_src;
638
639         if ((iph->ttl = tiph->ttl) == 0)
640                 iph->ttl        =       old_iph->ttl;
641
642         nf_reset(skb);
643
644         IPTUNNEL_XMIT();
645         tunnel->recursion--;
646         return 0;
647
648 tx_error_icmp:
649         dst_link_failure(skb);
650 tx_error:
651         stats->tx_errors++;
652         dev_kfree_skb(skb);
653         tunnel->recursion--;
654         return 0;
655 }
656
657 static int
658 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
659 {
660         int err = 0;
661         struct ip_tunnel_parm p;
662         struct ip_tunnel *t;
663
664         switch (cmd) {
665         case SIOCGETTUNNEL:
666                 t = NULL;
667                 if (dev == ipip_fb_tunnel_dev) {
668                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
669                                 err = -EFAULT;
670                                 break;
671                         }
672                         t = ipip_tunnel_locate(&p, 0);
673                 }
674                 if (t == NULL)
675                         t = (struct ip_tunnel*)dev->priv;
676                 memcpy(&p, &t->parms, sizeof(p));
677                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
678                         err = -EFAULT;
679                 break;
680
681         case SIOCADDTUNNEL:
682         case SIOCCHGTUNNEL:
683                 err = -EPERM;
684                 if (!capable(CAP_NET_ADMIN))
685                         goto done;
686
687                 err = -EFAULT;
688                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
689                         goto done;
690
691                 err = -EINVAL;
692                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
693                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
694                         goto done;
695                 if (p.iph.ttl)
696                         p.iph.frag_off |= htons(IP_DF);
697
698                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
699
700                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
701                         if (t != NULL) {
702                                 if (t->dev != dev) {
703                                         err = -EEXIST;
704                                         break;
705                                 }
706                         } else {
707                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
708                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
709                                         err = -EINVAL;
710                                         break;
711                                 }
712                                 t = (struct ip_tunnel*)dev->priv;
713                                 ipip_tunnel_unlink(t);
714                                 t->parms.iph.saddr = p.iph.saddr;
715                                 t->parms.iph.daddr = p.iph.daddr;
716                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
717                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
718                                 ipip_tunnel_link(t);
719                                 netdev_state_change(dev);
720                         }
721                 }
722
723                 if (t) {
724                         err = 0;
725                         if (cmd == SIOCCHGTUNNEL) {
726                                 t->parms.iph.ttl = p.iph.ttl;
727                                 t->parms.iph.tos = p.iph.tos;
728                                 t->parms.iph.frag_off = p.iph.frag_off;
729                         }
730                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
731                                 err = -EFAULT;
732                 } else
733                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
734                 break;
735
736         case SIOCDELTUNNEL:
737                 err = -EPERM;
738                 if (!capable(CAP_NET_ADMIN))
739                         goto done;
740
741                 if (dev == ipip_fb_tunnel_dev) {
742                         err = -EFAULT;
743                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
744                                 goto done;
745                         err = -ENOENT;
746                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
747                                 goto done;
748                         err = -EPERM;
749                         if (t->dev == ipip_fb_tunnel_dev)
750                                 goto done;
751                         dev = t->dev;
752                 }
753                 err = unregister_netdevice(dev);
754                 break;
755
756         default:
757                 err = -EINVAL;
758         }
759
760 done:
761         return err;
762 }
763
764 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
765 {
766         return &(((struct ip_tunnel*)dev->priv)->stat);
767 }
768
769 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
770 {
771         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
772                 return -EINVAL;
773         dev->mtu = new_mtu;
774         return 0;
775 }
776
777 static void ipip_tunnel_setup(struct net_device *dev)
778 {
779         SET_MODULE_OWNER(dev);
780         dev->uninit             = ipip_tunnel_uninit;
781         dev->hard_start_xmit    = ipip_tunnel_xmit;
782         dev->get_stats          = ipip_tunnel_get_stats;
783         dev->do_ioctl           = ipip_tunnel_ioctl;
784         dev->change_mtu         = ipip_tunnel_change_mtu;
785         dev->destructor         = free_netdev;
786
787         dev->type               = ARPHRD_TUNNEL;
788         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
789         dev->mtu                = 1500 - sizeof(struct iphdr);
790         dev->flags              = IFF_NOARP;
791         dev->iflink             = 0;
792         dev->addr_len           = 4;
793 }
794
795 static int ipip_tunnel_init(struct net_device *dev)
796 {
797         struct net_device *tdev = NULL;
798         struct ip_tunnel *tunnel;
799         struct iphdr *iph;
800
801         tunnel = (struct ip_tunnel*)dev->priv;
802         iph = &tunnel->parms.iph;
803
804         tunnel->dev = dev;
805         strcpy(tunnel->parms.name, dev->name);
806
807         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
808         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
809
810         if (iph->daddr) {
811                 struct flowi fl = { .oif = tunnel->parms.link,
812                                     .nl_u = { .ip4_u =
813                                               { .daddr = iph->daddr,
814                                                 .saddr = iph->saddr,
815                                                 .tos = RT_TOS(iph->tos) } },
816                                     .proto = IPPROTO_IPIP };
817                 struct rtable *rt;
818                 if (!ip_route_output_key(&rt, &fl)) {
819                         tdev = rt->u.dst.dev;
820                         ip_rt_put(rt);
821                 }
822                 dev->flags |= IFF_POINTOPOINT;
823         }
824
825         if (!tdev && tunnel->parms.link)
826                 tdev = __dev_get_by_index(tunnel->parms.link);
827
828         if (tdev) {
829                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
830                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
831         }
832         dev->iflink = tunnel->parms.link;
833
834         return 0;
835 }
836
837 static int __init ipip_fb_tunnel_init(struct net_device *dev)
838 {
839         struct ip_tunnel *tunnel = dev->priv;
840         struct iphdr *iph = &tunnel->parms.iph;
841
842         tunnel->dev = dev;
843         strcpy(tunnel->parms.name, dev->name);
844
845         iph->version            = 4;
846         iph->protocol           = IPPROTO_IPIP;
847         iph->ihl                = 5;
848
849         dev_hold(dev);
850         tunnels_wc[0]           = tunnel;
851         return 0;
852 }
853
854 #ifdef CONFIG_INET_TUNNEL
855 static struct xfrm_tunnel ipip_handler = {
856         .handler        =       ipip_rcv,
857         .err_handler    =       ipip_err,
858 };
859
860 static inline int ipip_register(void)
861 {
862         return xfrm4_tunnel_register(&ipip_handler);
863 }
864
865 static inline int ipip_unregister(void)
866 {
867         return xfrm4_tunnel_deregister(&ipip_handler);
868 }
869 #else
870 static struct net_protocol ipip_protocol = {
871         .handler        =       ipip_rcv,
872         .err_handler    =       ipip_err,
873         .no_policy      =       1,
874 };
875
876 static inline int ipip_register(void)
877 {
878         return inet_add_protocol(&ipip_protocol, IPPROTO_IPIP);
879 }
880
881 static inline int ipip_unregister(void)
882 {
883         return inet_del_protocol(&ipip_protocol, IPPROTO_IPIP);
884 }
885 #endif
886
887 static char banner[] __initdata =
888         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
889
890 static int __init ipip_init(void)
891 {
892         int err;
893
894         printk(banner);
895
896         if (ipip_register() < 0) {
897                 printk(KERN_INFO "ipip init: can't register tunnel\n");
898                 return -EAGAIN;
899         }
900
901         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
902                                            "tunl0",
903                                            ipip_tunnel_setup);
904         if (!ipip_fb_tunnel_dev) {
905                 err = -ENOMEM;
906                 goto err1;
907         }
908
909         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
910
911         if ((err = register_netdev(ipip_fb_tunnel_dev)))
912                 goto err2;
913  out:
914         return err;
915  err2:
916         free_netdev(ipip_fb_tunnel_dev);
917  err1:
918         ipip_unregister();
919         goto out;
920 }
921
922 static void __exit ipip_destroy_tunnels(void)
923 {
924         int prio;
925
926         for (prio = 1; prio < 4; prio++) {
927                 int h;
928                 for (h = 0; h < HASH_SIZE; h++) {
929                         struct ip_tunnel *t;
930                         while ((t = tunnels[prio][h]) != NULL)
931                                 unregister_netdevice(t->dev);
932                 }
933         }
934 }
935
936 static void __exit ipip_fini(void)
937 {
938         if (ipip_unregister() < 0)
939                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
940
941         rtnl_lock();
942         ipip_destroy_tunnels();
943         unregister_netdevice(ipip_fb_tunnel_dev);
944         rtnl_unlock();
945 }
946
947 module_init(ipip_init);
948 module_exit(ipip_fini);
949 MODULE_LICENSE("GPL");