Merge branch 'master' of ../linux-2.6/
[linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118
119 #define HASH_SIZE  16
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121
122 static int ipip_fb_tunnel_init(struct net_device *dev);
123 static int ipip_tunnel_init(struct net_device *dev);
124 static void ipip_tunnel_setup(struct net_device *dev);
125
126 static struct net_device *ipip_fb_tunnel_dev;
127
128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
131 static struct ip_tunnel *tunnels_wc[1];
132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133
134 static DEFINE_RWLOCK(ipip_lock);
135
136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
137 {
138         unsigned h0 = HASH(remote);
139         unsigned h1 = HASH(local);
140         struct ip_tunnel *t;
141
142         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143                 if (local == t->parms.iph.saddr &&
144                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145                         return t;
146         }
147         for (t = tunnels_r[h0]; t; t = t->next) {
148                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149                         return t;
150         }
151         for (t = tunnels_l[h1]; t; t = t->next) {
152                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153                         return t;
154         }
155         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156                 return t;
157         return NULL;
158 }
159
160 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
161 {
162         __be32 remote = parms->iph.daddr;
163         __be32 local = parms->iph.saddr;
164         unsigned h = 0;
165         int prio = 0;
166
167         if (remote) {
168                 prio |= 2;
169                 h ^= HASH(remote);
170         }
171         if (local) {
172                 prio |= 1;
173                 h ^= HASH(local);
174         }
175         return &tunnels[prio][h];
176 }
177
178 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179 {
180         return __ipip_bucket(&t->parms);
181 }
182
183 static void ipip_tunnel_unlink(struct ip_tunnel *t)
184 {
185         struct ip_tunnel **tp;
186
187         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188                 if (t == *tp) {
189                         write_lock_bh(&ipip_lock);
190                         *tp = t->next;
191                         write_unlock_bh(&ipip_lock);
192                         break;
193                 }
194         }
195 }
196
197 static void ipip_tunnel_link(struct ip_tunnel *t)
198 {
199         struct ip_tunnel **tp = ipip_bucket(t);
200
201         t->next = *tp;
202         write_lock_bh(&ipip_lock);
203         *tp = t;
204         write_unlock_bh(&ipip_lock);
205 }
206
207 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208 {
209         __be32 remote = parms->iph.daddr;
210         __be32 local = parms->iph.saddr;
211         struct ip_tunnel *t, **tp, *nt;
212         struct net_device *dev;
213         char name[IFNAMSIZ];
214
215         for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
216                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217                         return t;
218         }
219         if (!create)
220                 return NULL;
221
222         if (parms->name[0])
223                 strlcpy(name, parms->name, IFNAMSIZ);
224         else
225                 sprintf(name, "tunl%%d");
226
227         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
228         if (dev == NULL)
229                 return NULL;
230
231         nt = netdev_priv(dev);
232         dev->init = ipip_tunnel_init;
233         nt->parms = *parms;
234
235         if (register_netdevice(dev) < 0) {
236                 free_netdev(dev);
237                 goto failed;
238         }
239
240         dev_hold(dev);
241         ipip_tunnel_link(nt);
242         return nt;
243
244 failed:
245         return NULL;
246 }
247
248 static void ipip_tunnel_uninit(struct net_device *dev)
249 {
250         if (dev == ipip_fb_tunnel_dev) {
251                 write_lock_bh(&ipip_lock);
252                 tunnels_wc[0] = NULL;
253                 write_unlock_bh(&ipip_lock);
254         } else
255                 ipip_tunnel_unlink(netdev_priv(dev));
256         dev_put(dev);
257 }
258
259 static int ipip_err(struct sk_buff *skb, u32 info)
260 {
261 #ifndef I_WISH_WORLD_WERE_PERFECT
262
263 /* It is not :-( All the routers (except for Linux) return only
264    8 bytes of packet payload. It means, that precise relaying of
265    ICMP in the real Internet is absolutely infeasible.
266  */
267         struct iphdr *iph = (struct iphdr*)skb->data;
268         const int type = icmp_hdr(skb)->type;
269         const int code = icmp_hdr(skb)->code;
270         struct ip_tunnel *t;
271         int err;
272
273         switch (type) {
274         default:
275         case ICMP_PARAMETERPROB:
276                 return 0;
277
278         case ICMP_DEST_UNREACH:
279                 switch (code) {
280                 case ICMP_SR_FAILED:
281                 case ICMP_PORT_UNREACH:
282                         /* Impossible event. */
283                         return 0;
284                 case ICMP_FRAG_NEEDED:
285                         /* Soft state for pmtu is maintained by IP core. */
286                         return 0;
287                 default:
288                         /* All others are translated to HOST_UNREACH.
289                            rfc2003 contains "deep thoughts" about NET_UNREACH,
290                            I believe they are just ether pollution. --ANK
291                          */
292                         break;
293                 }
294                 break;
295         case ICMP_TIME_EXCEEDED:
296                 if (code != ICMP_EXC_TTL)
297                         return 0;
298                 break;
299         }
300
301         err = -ENOENT;
302
303         read_lock(&ipip_lock);
304         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
305         if (t == NULL || t->parms.iph.daddr == 0)
306                 goto out;
307
308         err = 0;
309         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
310                 goto out;
311
312         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
313                 t->err_count++;
314         else
315                 t->err_count = 1;
316         t->err_time = jiffies;
317 out:
318         read_unlock(&ipip_lock);
319         return err;
320 #else
321         struct iphdr *iph = (struct iphdr*)dp;
322         int hlen = iph->ihl<<2;
323         struct iphdr *eiph;
324         const int type = icmp_hdr(skb)->type;
325         const int code = icmp_hdr(skb)->code;
326         int rel_type = 0;
327         int rel_code = 0;
328         __be32 rel_info = 0;
329         __u32 n = 0;
330         struct sk_buff *skb2;
331         struct flowi fl;
332         struct rtable *rt;
333
334         if (len < hlen + sizeof(struct iphdr))
335                 return 0;
336         eiph = (struct iphdr*)(dp + hlen);
337
338         switch (type) {
339         default:
340                 return 0;
341         case ICMP_PARAMETERPROB:
342                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
343                 if (n < hlen)
344                         return 0;
345
346                 /* So... This guy found something strange INSIDE encapsulated
347                    packet. Well, he is fool, but what can we do ?
348                  */
349                 rel_type = ICMP_PARAMETERPROB;
350                 rel_info = htonl((n - hlen) << 24);
351                 break;
352
353         case ICMP_DEST_UNREACH:
354                 switch (code) {
355                 case ICMP_SR_FAILED:
356                 case ICMP_PORT_UNREACH:
357                         /* Impossible event. */
358                         return 0;
359                 case ICMP_FRAG_NEEDED:
360                         /* And it is the only really necessary thing :-) */
361                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
362                         if (n < hlen+68)
363                                 return 0;
364                         n -= hlen;
365                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
366                         if (n > ntohs(eiph->tot_len))
367                                 return 0;
368                         rel_info = htonl(n);
369                         break;
370                 default:
371                         /* All others are translated to HOST_UNREACH.
372                            rfc2003 contains "deep thoughts" about NET_UNREACH,
373                            I believe, it is just ether pollution. --ANK
374                          */
375                         rel_type = ICMP_DEST_UNREACH;
376                         rel_code = ICMP_HOST_UNREACH;
377                         break;
378                 }
379                 break;
380         case ICMP_TIME_EXCEEDED:
381                 if (code != ICMP_EXC_TTL)
382                         return 0;
383                 break;
384         }
385
386         /* Prepare fake skb to feed it to icmp_send */
387         skb2 = skb_clone(skb, GFP_ATOMIC);
388         if (skb2 == NULL)
389                 return 0;
390         dst_release(skb2->dst);
391         skb2->dst = NULL;
392         skb_pull(skb2, skb->data - (u8*)eiph);
393         skb_reset_network_header(skb2);
394
395         /* Try to guess incoming interface */
396         memset(&fl, 0, sizeof(fl));
397         fl.fl4_daddr = eiph->saddr;
398         fl.fl4_tos = RT_TOS(eiph->tos);
399         fl.proto = IPPROTO_IPIP;
400         if (ip_route_output_key(&init_net, &rt, &key)) {
401                 kfree_skb(skb2);
402                 return 0;
403         }
404         skb2->dev = rt->u.dst.dev;
405
406         /* route "incoming" packet */
407         if (rt->rt_flags&RTCF_LOCAL) {
408                 ip_rt_put(rt);
409                 rt = NULL;
410                 fl.fl4_daddr = eiph->daddr;
411                 fl.fl4_src = eiph->saddr;
412                 fl.fl4_tos = eiph->tos;
413                 if (ip_route_output_key(&init_net, &rt, &fl) ||
414                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
415                         ip_rt_put(rt);
416                         kfree_skb(skb2);
417                         return 0;
418                 }
419         } else {
420                 ip_rt_put(rt);
421                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
422                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
423                         kfree_skb(skb2);
424                         return 0;
425                 }
426         }
427
428         /* change mtu on this route */
429         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
430                 if (n > dst_mtu(skb2->dst)) {
431                         kfree_skb(skb2);
432                         return 0;
433                 }
434                 skb2->dst->ops->update_pmtu(skb2->dst, n);
435         } else if (type == ICMP_TIME_EXCEEDED) {
436                 struct ip_tunnel *t = netdev_priv(skb2->dev);
437                 if (t->parms.iph.ttl) {
438                         rel_type = ICMP_DEST_UNREACH;
439                         rel_code = ICMP_HOST_UNREACH;
440                 }
441         }
442
443         icmp_send(skb2, rel_type, rel_code, rel_info);
444         kfree_skb(skb2);
445         return 0;
446 #endif
447 }
448
449 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
450                                         struct sk_buff *skb)
451 {
452         struct iphdr *inner_iph = ip_hdr(skb);
453
454         if (INET_ECN_is_ce(outer_iph->tos))
455                 IP_ECN_set_ce(inner_iph);
456 }
457
458 static int ipip_rcv(struct sk_buff *skb)
459 {
460         struct ip_tunnel *tunnel;
461         const struct iphdr *iph = ip_hdr(skb);
462
463         read_lock(&ipip_lock);
464         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
465                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
466                         read_unlock(&ipip_lock);
467                         kfree_skb(skb);
468                         return 0;
469                 }
470
471                 secpath_reset(skb);
472
473                 skb->mac_header = skb->network_header;
474                 skb_reset_network_header(skb);
475                 skb->protocol = htons(ETH_P_IP);
476                 skb->pkt_type = PACKET_HOST;
477
478                 tunnel->stat.rx_packets++;
479                 tunnel->stat.rx_bytes += skb->len;
480                 skb->dev = tunnel->dev;
481                 dst_release(skb->dst);
482                 skb->dst = NULL;
483                 nf_reset(skb);
484                 ipip_ecn_decapsulate(iph, skb);
485                 netif_rx(skb);
486                 read_unlock(&ipip_lock);
487                 return 0;
488         }
489         read_unlock(&ipip_lock);
490
491         return -1;
492 }
493
494 /*
495  *      This function assumes it is being called from dev_queue_xmit()
496  *      and that skb is filled properly by that function.
497  */
498
499 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
500 {
501         struct ip_tunnel *tunnel = netdev_priv(dev);
502         struct net_device_stats *stats = &tunnel->stat;
503         struct iphdr  *tiph = &tunnel->parms.iph;
504         u8     tos = tunnel->parms.iph.tos;
505         __be16 df = tiph->frag_off;
506         struct rtable *rt;                      /* Route to the other host */
507         struct net_device *tdev;                        /* Device to other host */
508         struct iphdr  *old_iph = ip_hdr(skb);
509         struct iphdr  *iph;                     /* Our new IP header */
510         unsigned int max_headroom;              /* The extra header space needed */
511         __be32 dst = tiph->daddr;
512         int    mtu;
513
514         if (tunnel->recursion++) {
515                 tunnel->stat.collisions++;
516                 goto tx_error;
517         }
518
519         if (skb->protocol != htons(ETH_P_IP))
520                 goto tx_error;
521
522         if (tos&1)
523                 tos = old_iph->tos;
524
525         if (!dst) {
526                 /* NBMA tunnel */
527                 if ((rt = (struct rtable*)skb->dst) == NULL) {
528                         tunnel->stat.tx_fifo_errors++;
529                         goto tx_error;
530                 }
531                 if ((dst = rt->rt_gateway) == 0)
532                         goto tx_error_icmp;
533         }
534
535         {
536                 struct flowi fl = { .oif = tunnel->parms.link,
537                                     .nl_u = { .ip4_u =
538                                               { .daddr = dst,
539                                                 .saddr = tiph->saddr,
540                                                 .tos = RT_TOS(tos) } },
541                                     .proto = IPPROTO_IPIP };
542                 if (ip_route_output_key(&init_net, &rt, &fl)) {
543                         tunnel->stat.tx_carrier_errors++;
544                         goto tx_error_icmp;
545                 }
546         }
547         tdev = rt->u.dst.dev;
548
549         if (tdev == dev) {
550                 ip_rt_put(rt);
551                 tunnel->stat.collisions++;
552                 goto tx_error;
553         }
554
555         if (tiph->frag_off)
556                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
557         else
558                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
559
560         if (mtu < 68) {
561                 tunnel->stat.collisions++;
562                 ip_rt_put(rt);
563                 goto tx_error;
564         }
565         if (skb->dst)
566                 skb->dst->ops->update_pmtu(skb->dst, mtu);
567
568         df |= (old_iph->frag_off&htons(IP_DF));
569
570         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
571                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
572                 ip_rt_put(rt);
573                 goto tx_error;
574         }
575
576         if (tunnel->err_count > 0) {
577                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
578                         tunnel->err_count--;
579                         dst_link_failure(skb);
580                 } else
581                         tunnel->err_count = 0;
582         }
583
584         /*
585          * Okay, now see if we can stuff it in the buffer as-is.
586          */
587         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
588
589         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
590             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
591                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
592                 if (!new_skb) {
593                         ip_rt_put(rt);
594                         stats->tx_dropped++;
595                         dev_kfree_skb(skb);
596                         tunnel->recursion--;
597                         return 0;
598                 }
599                 if (skb->sk)
600                         skb_set_owner_w(new_skb, skb->sk);
601                 dev_kfree_skb(skb);
602                 skb = new_skb;
603                 old_iph = ip_hdr(skb);
604         }
605
606         skb->transport_header = skb->network_header;
607         skb_push(skb, sizeof(struct iphdr));
608         skb_reset_network_header(skb);
609         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
610         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
611                               IPSKB_REROUTED);
612         dst_release(skb->dst);
613         skb->dst = &rt->u.dst;
614
615         /*
616          *      Push down and install the IPIP header.
617          */
618
619         iph                     =       ip_hdr(skb);
620         iph->version            =       4;
621         iph->ihl                =       sizeof(struct iphdr)>>2;
622         iph->frag_off           =       df;
623         iph->protocol           =       IPPROTO_IPIP;
624         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
625         iph->daddr              =       rt->rt_dst;
626         iph->saddr              =       rt->rt_src;
627
628         if ((iph->ttl = tiph->ttl) == 0)
629                 iph->ttl        =       old_iph->ttl;
630
631         nf_reset(skb);
632
633         IPTUNNEL_XMIT();
634         tunnel->recursion--;
635         return 0;
636
637 tx_error_icmp:
638         dst_link_failure(skb);
639 tx_error:
640         stats->tx_errors++;
641         dev_kfree_skb(skb);
642         tunnel->recursion--;
643         return 0;
644 }
645
646 static void ipip_tunnel_bind_dev(struct net_device *dev)
647 {
648         struct net_device *tdev = NULL;
649         struct ip_tunnel *tunnel;
650         struct iphdr *iph;
651
652         tunnel = netdev_priv(dev);
653         iph = &tunnel->parms.iph;
654
655         if (iph->daddr) {
656                 struct flowi fl = { .oif = tunnel->parms.link,
657                                     .nl_u = { .ip4_u =
658                                               { .daddr = iph->daddr,
659                                                 .saddr = iph->saddr,
660                                                 .tos = RT_TOS(iph->tos) } },
661                                     .proto = IPPROTO_IPIP };
662                 struct rtable *rt;
663                 if (!ip_route_output_key(&init_net, &rt, &fl)) {
664                         tdev = rt->u.dst.dev;
665                         ip_rt_put(rt);
666                 }
667                 dev->flags |= IFF_POINTOPOINT;
668         }
669
670         if (!tdev && tunnel->parms.link)
671                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
672
673         if (tdev) {
674                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
675                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
676         }
677         dev->iflink = tunnel->parms.link;
678 }
679
680 static int
681 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
682 {
683         int err = 0;
684         struct ip_tunnel_parm p;
685         struct ip_tunnel *t;
686
687         switch (cmd) {
688         case SIOCGETTUNNEL:
689                 t = NULL;
690                 if (dev == ipip_fb_tunnel_dev) {
691                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
692                                 err = -EFAULT;
693                                 break;
694                         }
695                         t = ipip_tunnel_locate(&p, 0);
696                 }
697                 if (t == NULL)
698                         t = netdev_priv(dev);
699                 memcpy(&p, &t->parms, sizeof(p));
700                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
701                         err = -EFAULT;
702                 break;
703
704         case SIOCADDTUNNEL:
705         case SIOCCHGTUNNEL:
706                 err = -EPERM;
707                 if (!capable(CAP_NET_ADMIN))
708                         goto done;
709
710                 err = -EFAULT;
711                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
712                         goto done;
713
714                 err = -EINVAL;
715                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
716                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
717                         goto done;
718                 if (p.iph.ttl)
719                         p.iph.frag_off |= htons(IP_DF);
720
721                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
722
723                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
724                         if (t != NULL) {
725                                 if (t->dev != dev) {
726                                         err = -EEXIST;
727                                         break;
728                                 }
729                         } else {
730                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
731                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
732                                         err = -EINVAL;
733                                         break;
734                                 }
735                                 t = netdev_priv(dev);
736                                 ipip_tunnel_unlink(t);
737                                 t->parms.iph.saddr = p.iph.saddr;
738                                 t->parms.iph.daddr = p.iph.daddr;
739                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
740                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
741                                 ipip_tunnel_link(t);
742                                 netdev_state_change(dev);
743                         }
744                 }
745
746                 if (t) {
747                         err = 0;
748                         if (cmd == SIOCCHGTUNNEL) {
749                                 t->parms.iph.ttl = p.iph.ttl;
750                                 t->parms.iph.tos = p.iph.tos;
751                                 t->parms.iph.frag_off = p.iph.frag_off;
752                                 if (t->parms.link != p.link) {
753                                         t->parms.link = p.link;
754                                         ipip_tunnel_bind_dev(dev);
755                                         netdev_state_change(dev);
756                                 }
757                         }
758                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
759                                 err = -EFAULT;
760                 } else
761                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
762                 break;
763
764         case SIOCDELTUNNEL:
765                 err = -EPERM;
766                 if (!capable(CAP_NET_ADMIN))
767                         goto done;
768
769                 if (dev == ipip_fb_tunnel_dev) {
770                         err = -EFAULT;
771                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
772                                 goto done;
773                         err = -ENOENT;
774                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
775                                 goto done;
776                         err = -EPERM;
777                         if (t->dev == ipip_fb_tunnel_dev)
778                                 goto done;
779                         dev = t->dev;
780                 }
781                 unregister_netdevice(dev);
782                 err = 0;
783                 break;
784
785         default:
786                 err = -EINVAL;
787         }
788
789 done:
790         return err;
791 }
792
793 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
794 {
795         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
796 }
797
798 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
799 {
800         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
801                 return -EINVAL;
802         dev->mtu = new_mtu;
803         return 0;
804 }
805
806 static void ipip_tunnel_setup(struct net_device *dev)
807 {
808         dev->uninit             = ipip_tunnel_uninit;
809         dev->hard_start_xmit    = ipip_tunnel_xmit;
810         dev->get_stats          = ipip_tunnel_get_stats;
811         dev->do_ioctl           = ipip_tunnel_ioctl;
812         dev->change_mtu         = ipip_tunnel_change_mtu;
813         dev->destructor         = free_netdev;
814
815         dev->type               = ARPHRD_TUNNEL;
816         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
817         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
818         dev->flags              = IFF_NOARP;
819         dev->iflink             = 0;
820         dev->addr_len           = 4;
821 }
822
823 static int ipip_tunnel_init(struct net_device *dev)
824 {
825         struct ip_tunnel *tunnel;
826
827         tunnel = netdev_priv(dev);
828
829         tunnel->dev = dev;
830         strcpy(tunnel->parms.name, dev->name);
831
832         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
833         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
834
835         ipip_tunnel_bind_dev(dev);
836
837         return 0;
838 }
839
840 static int __init ipip_fb_tunnel_init(struct net_device *dev)
841 {
842         struct ip_tunnel *tunnel = netdev_priv(dev);
843         struct iphdr *iph = &tunnel->parms.iph;
844
845         tunnel->dev = dev;
846         strcpy(tunnel->parms.name, dev->name);
847
848         iph->version            = 4;
849         iph->protocol           = IPPROTO_IPIP;
850         iph->ihl                = 5;
851
852         dev_hold(dev);
853         tunnels_wc[0]           = tunnel;
854         return 0;
855 }
856
857 static struct xfrm_tunnel ipip_handler = {
858         .handler        =       ipip_rcv,
859         .err_handler    =       ipip_err,
860         .priority       =       1,
861 };
862
863 static char banner[] __initdata =
864         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
865
866 static int __init ipip_init(void)
867 {
868         int err;
869
870         printk(banner);
871
872         if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
873                 printk(KERN_INFO "ipip init: can't register tunnel\n");
874                 return -EAGAIN;
875         }
876
877         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
878                                            "tunl0",
879                                            ipip_tunnel_setup);
880         if (!ipip_fb_tunnel_dev) {
881                 err = -ENOMEM;
882                 goto err1;
883         }
884
885         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
886
887         if ((err = register_netdev(ipip_fb_tunnel_dev)))
888                 goto err2;
889  out:
890         return err;
891  err2:
892         free_netdev(ipip_fb_tunnel_dev);
893  err1:
894         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
895         goto out;
896 }
897
898 static void __exit ipip_destroy_tunnels(void)
899 {
900         int prio;
901
902         for (prio = 1; prio < 4; prio++) {
903                 int h;
904                 for (h = 0; h < HASH_SIZE; h++) {
905                         struct ip_tunnel *t;
906                         while ((t = tunnels[prio][h]) != NULL)
907                                 unregister_netdevice(t->dev);
908                 }
909         }
910 }
911
912 static void __exit ipip_fini(void)
913 {
914         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
915                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
916
917         rtnl_lock();
918         ipip_destroy_tunnels();
919         unregister_netdevice(ipip_fb_tunnel_dev);
920         rtnl_unlock();
921 }
922
923 module_init(ipip_init);
924 module_exit(ipip_fini);
925 MODULE_LICENSE("GPL");