Merge commit master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6 of HEAD
[linux-2.6] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder. 
3  *
4  *      Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *      Authors:
7  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *      Fixes:
10  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
11  *                                      a module taking up 2 pages).
12  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *                                      to keep ip_forward happy.
14  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *                                      I do not want to merge them together.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  *
27  */
28
29 /* tunnel.c: an IP tunnel driver
30
31         The purpose of this driver is to provide an IP tunnel through
32         which you can tunnel network traffic transparently across subnets.
33
34         This was written by looking at Nick Holloway's dummy driver
35         Thanks for the great code!
36
37                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
38                 
39         Minor tweaks:
40                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
41                 dev->hard_header/hard_header_len changed to use no headers.
42                 Comments/bracketing tweaked.
43                 Made the tunnels use dev->name not tunnel: when error reporting.
44                 Added tx_dropped stat
45                 
46                 -Alan Cox       (Alan.Cox@linux.org) 21 March 95
47
48         Reworked:
49                 Changed to tunnel to destination gateway in addition to the
50                         tunnel's pointopoint address
51                 Almost completely rewritten
52                 Note:  There is currently no firewall or ICMP handling done.
53
54                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
55                 
56 */
57
58 /* Things I wish I had known when writing the tunnel driver:
59
60         When the tunnel_xmit() function is called, the skb contains the
61         packet to be sent (plus a great deal of extra info), and dev
62         contains the tunnel device that _we_ are.
63
64         When we are passed a packet, we are expected to fill in the
65         source address with our source IP address.
66
67         What is the proper way to allocate, copy and free a buffer?
68         After you allocate it, it is a "0 length" chunk of memory
69         starting at zero.  If you want to add headers to the buffer
70         later, you'll have to call "skb_reserve(skb, amount)" with
71         the amount of memory you want reserved.  Then, you call
72         "skb_put(skb, amount)" with the amount of space you want in
73         the buffer.  skb_put() returns a pointer to the top (#0) of
74         that buffer.  skb->len is set to the amount of space you have
75         "allocated" with skb_put().  You can then write up to skb->len
76         bytes to that buffer.  If you need more, you can call skb_put()
77         again with the additional amount of space you need.  You can
78         find out how much more space you can allocate by calling 
79         "skb_tailroom(skb)".
80         Now, to add header space, call "skb_push(skb, header_len)".
81         This creates space at the beginning of the buffer and returns
82         a pointer to this new space.  If later you need to strip a
83         header from a buffer, call "skb_pull(skb, header_len)".
84         skb_headroom() will return how much space is left at the top
85         of the buffer (before the main data).  Remember, this headroom
86         space must be reserved before the skb_put() function is called.
87         */
88
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94
95  
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111 #include <linux/if_ether.h>
112
113 #include <net/sock.h>
114 #include <net/ip.h>
115 #include <net/icmp.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118 #include <net/xfrm.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) ((addr^(addr>>4))&0xF)
122
123 static int ipip_fb_tunnel_init(struct net_device *dev);
124 static int ipip_tunnel_init(struct net_device *dev);
125 static void ipip_tunnel_setup(struct net_device *dev);
126
127 static struct net_device *ipip_fb_tunnel_dev;
128
129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_wc[1];
133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134
135 static DEFINE_RWLOCK(ipip_lock);
136
137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
138 {
139         unsigned h0 = HASH(remote);
140         unsigned h1 = HASH(local);
141         struct ip_tunnel *t;
142
143         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
144                 if (local == t->parms.iph.saddr &&
145                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
146                         return t;
147         }
148         for (t = tunnels_r[h0]; t; t = t->next) {
149                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150                         return t;
151         }
152         for (t = tunnels_l[h1]; t; t = t->next) {
153                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
154                         return t;
155         }
156         if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
157                 return t;
158         return NULL;
159 }
160
161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
162 {
163         u32 remote = t->parms.iph.daddr;
164         u32 local = t->parms.iph.saddr;
165         unsigned h = 0;
166         int prio = 0;
167
168         if (remote) {
169                 prio |= 2;
170                 h ^= HASH(remote);
171         }
172         if (local) {
173                 prio |= 1;
174                 h ^= HASH(local);
175         }
176         return &tunnels[prio][h];
177 }
178
179
180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
181 {
182         struct ip_tunnel **tp;
183
184         for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
185                 if (t == *tp) {
186                         write_lock_bh(&ipip_lock);
187                         *tp = t->next;
188                         write_unlock_bh(&ipip_lock);
189                         break;
190                 }
191         }
192 }
193
194 static void ipip_tunnel_link(struct ip_tunnel *t)
195 {
196         struct ip_tunnel **tp = ipip_bucket(t);
197
198         t->next = *tp;
199         write_lock_bh(&ipip_lock);
200         *tp = t;
201         write_unlock_bh(&ipip_lock);
202 }
203
204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
205 {
206         u32 remote = parms->iph.daddr;
207         u32 local = parms->iph.saddr;
208         struct ip_tunnel *t, **tp, *nt;
209         struct net_device *dev;
210         unsigned h = 0;
211         int prio = 0;
212         char name[IFNAMSIZ];
213
214         if (remote) {
215                 prio |= 2;
216                 h ^= HASH(remote);
217         }
218         if (local) {
219                 prio |= 1;
220                 h ^= HASH(local);
221         }
222         for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
223                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
224                         return t;
225         }
226         if (!create)
227                 return NULL;
228
229         if (parms->name[0])
230                 strlcpy(name, parms->name, IFNAMSIZ);
231         else {
232                 int i;
233                 for (i=1; i<100; i++) {
234                         sprintf(name, "tunl%d", i);
235                         if (__dev_get_by_name(name) == NULL)
236                                 break;
237                 }
238                 if (i==100)
239                         goto failed;
240         }
241
242         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243         if (dev == NULL)
244                 return NULL;
245
246         nt = netdev_priv(dev);
247         SET_MODULE_OWNER(dev);
248         dev->init = ipip_tunnel_init;
249         nt->parms = *parms;
250
251         if (register_netdevice(dev) < 0) {
252                 free_netdev(dev);
253                 goto failed;
254         }
255
256         dev_hold(dev);
257         ipip_tunnel_link(nt);
258         return nt;
259
260 failed:
261         return NULL;
262 }
263
264 static void ipip_tunnel_uninit(struct net_device *dev)
265 {
266         if (dev == ipip_fb_tunnel_dev) {
267                 write_lock_bh(&ipip_lock);
268                 tunnels_wc[0] = NULL;
269                 write_unlock_bh(&ipip_lock);
270         } else
271                 ipip_tunnel_unlink(netdev_priv(dev));
272         dev_put(dev);
273 }
274
275 static int ipip_err(struct sk_buff *skb, u32 info)
276 {
277 #ifndef I_WISH_WORLD_WERE_PERFECT
278
279 /* It is not :-( All the routers (except for Linux) return only
280    8 bytes of packet payload. It means, that precise relaying of
281    ICMP in the real Internet is absolutely infeasible.
282  */
283         struct iphdr *iph = (struct iphdr*)skb->data;
284         int type = skb->h.icmph->type;
285         int code = skb->h.icmph->code;
286         struct ip_tunnel *t;
287         int err;
288
289         switch (type) {
290         default:
291         case ICMP_PARAMETERPROB:
292                 return 0;
293
294         case ICMP_DEST_UNREACH:
295                 switch (code) {
296                 case ICMP_SR_FAILED:
297                 case ICMP_PORT_UNREACH:
298                         /* Impossible event. */
299                         return 0;
300                 case ICMP_FRAG_NEEDED:
301                         /* Soft state for pmtu is maintained by IP core. */
302                         return 0;
303                 default:
304                         /* All others are translated to HOST_UNREACH.
305                            rfc2003 contains "deep thoughts" about NET_UNREACH,
306                            I believe they are just ether pollution. --ANK
307                          */
308                         break;
309                 }
310                 break;
311         case ICMP_TIME_EXCEEDED:
312                 if (code != ICMP_EXC_TTL)
313                         return 0;
314                 break;
315         }
316
317         err = -ENOENT;
318
319         read_lock(&ipip_lock);
320         t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
321         if (t == NULL || t->parms.iph.daddr == 0)
322                 goto out;
323
324         err = 0;
325         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
326                 goto out;
327
328         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
329                 t->err_count++;
330         else
331                 t->err_count = 1;
332         t->err_time = jiffies;
333 out:
334         read_unlock(&ipip_lock);
335         return err;
336 #else
337         struct iphdr *iph = (struct iphdr*)dp;
338         int hlen = iph->ihl<<2;
339         struct iphdr *eiph;
340         int type = skb->h.icmph->type;
341         int code = skb->h.icmph->code;
342         int rel_type = 0;
343         int rel_code = 0;
344         int rel_info = 0;
345         struct sk_buff *skb2;
346         struct flowi fl;
347         struct rtable *rt;
348
349         if (len < hlen + sizeof(struct iphdr))
350                 return 0;
351         eiph = (struct iphdr*)(dp + hlen);
352
353         switch (type) {
354         default:
355                 return 0;
356         case ICMP_PARAMETERPROB:
357                 if (skb->h.icmph->un.gateway < hlen)
358                         return 0;
359
360                 /* So... This guy found something strange INSIDE encapsulated
361                    packet. Well, he is fool, but what can we do ?
362                  */
363                 rel_type = ICMP_PARAMETERPROB;
364                 rel_info = skb->h.icmph->un.gateway - hlen;
365                 break;
366
367         case ICMP_DEST_UNREACH:
368                 switch (code) {
369                 case ICMP_SR_FAILED:
370                 case ICMP_PORT_UNREACH:
371                         /* Impossible event. */
372                         return 0;
373                 case ICMP_FRAG_NEEDED:
374                         /* And it is the only really necessary thing :-) */
375                         rel_info = ntohs(skb->h.icmph->un.frag.mtu);
376                         if (rel_info < hlen+68)
377                                 return 0;
378                         rel_info -= hlen;
379                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
380                         if (rel_info > ntohs(eiph->tot_len))
381                                 return 0;
382                         break;
383                 default:
384                         /* All others are translated to HOST_UNREACH.
385                            rfc2003 contains "deep thoughts" about NET_UNREACH,
386                            I believe, it is just ether pollution. --ANK
387                          */
388                         rel_type = ICMP_DEST_UNREACH;
389                         rel_code = ICMP_HOST_UNREACH;
390                         break;
391                 }
392                 break;
393         case ICMP_TIME_EXCEEDED:
394                 if (code != ICMP_EXC_TTL)
395                         return 0;
396                 break;
397         }
398
399         /* Prepare fake skb to feed it to icmp_send */
400         skb2 = skb_clone(skb, GFP_ATOMIC);
401         if (skb2 == NULL)
402                 return 0;
403         dst_release(skb2->dst);
404         skb2->dst = NULL;
405         skb_pull(skb2, skb->data - (u8*)eiph);
406         skb2->nh.raw = skb2->data;
407
408         /* Try to guess incoming interface */
409         memset(&fl, 0, sizeof(fl));
410         fl.fl4_daddr = eiph->saddr;
411         fl.fl4_tos = RT_TOS(eiph->tos);
412         fl.proto = IPPROTO_IPIP;
413         if (ip_route_output_key(&rt, &key)) {
414                 kfree_skb(skb2);
415                 return 0;
416         }
417         skb2->dev = rt->u.dst.dev;
418
419         /* route "incoming" packet */
420         if (rt->rt_flags&RTCF_LOCAL) {
421                 ip_rt_put(rt);
422                 rt = NULL;
423                 fl.fl4_daddr = eiph->daddr;
424                 fl.fl4_src = eiph->saddr;
425                 fl.fl4_tos = eiph->tos;
426                 if (ip_route_output_key(&rt, &fl) ||
427                     rt->u.dst.dev->type != ARPHRD_TUNNEL) {
428                         ip_rt_put(rt);
429                         kfree_skb(skb2);
430                         return 0;
431                 }
432         } else {
433                 ip_rt_put(rt);
434                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
435                     skb2->dst->dev->type != ARPHRD_TUNNEL) {
436                         kfree_skb(skb2);
437                         return 0;
438                 }
439         }
440
441         /* change mtu on this route */
442         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
443                 if (rel_info > dst_mtu(skb2->dst)) {
444                         kfree_skb(skb2);
445                         return 0;
446                 }
447                 skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
448                 rel_info = htonl(rel_info);
449         } else if (type == ICMP_TIME_EXCEEDED) {
450                 struct ip_tunnel *t = netdev_priv(skb2->dev);
451                 if (t->parms.iph.ttl) {
452                         rel_type = ICMP_DEST_UNREACH;
453                         rel_code = ICMP_HOST_UNREACH;
454                 }
455         }
456
457         icmp_send(skb2, rel_type, rel_code, rel_info);
458         kfree_skb(skb2);
459         return 0;
460 #endif
461 }
462
463 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
464 {
465         struct iphdr *inner_iph = skb->nh.iph;
466
467         if (INET_ECN_is_ce(outer_iph->tos))
468                 IP_ECN_set_ce(inner_iph);
469 }
470
471 static int ipip_rcv(struct sk_buff *skb)
472 {
473         struct iphdr *iph;
474         struct ip_tunnel *tunnel;
475
476         iph = skb->nh.iph;
477
478         read_lock(&ipip_lock);
479         if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
480                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
481                         read_unlock(&ipip_lock);
482                         kfree_skb(skb);
483                         return 0;
484                 }
485
486                 secpath_reset(skb);
487
488                 skb->mac.raw = skb->nh.raw;
489                 skb->nh.raw = skb->data;
490                 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
491                 skb->protocol = htons(ETH_P_IP);
492                 skb->pkt_type = PACKET_HOST;
493
494                 tunnel->stat.rx_packets++;
495                 tunnel->stat.rx_bytes += skb->len;
496                 skb->dev = tunnel->dev;
497                 dst_release(skb->dst);
498                 skb->dst = NULL;
499                 nf_reset(skb);
500                 ipip_ecn_decapsulate(iph, skb);
501                 netif_rx(skb);
502                 read_unlock(&ipip_lock);
503                 return 0;
504         }
505         read_unlock(&ipip_lock);
506
507         return -1;
508 }
509
510 /*
511  *      This function assumes it is being called from dev_queue_xmit()
512  *      and that skb is filled properly by that function.
513  */
514
515 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
516 {
517         struct ip_tunnel *tunnel = netdev_priv(dev);
518         struct net_device_stats *stats = &tunnel->stat;
519         struct iphdr  *tiph = &tunnel->parms.iph;
520         u8     tos = tunnel->parms.iph.tos;
521         u16    df = tiph->frag_off;
522         struct rtable *rt;                      /* Route to the other host */
523         struct net_device *tdev;                        /* Device to other host */
524         struct iphdr  *old_iph = skb->nh.iph;
525         struct iphdr  *iph;                     /* Our new IP header */
526         int    max_headroom;                    /* The extra header space needed */
527         u32    dst = tiph->daddr;
528         int    mtu;
529
530         if (tunnel->recursion++) {
531                 tunnel->stat.collisions++;
532                 goto tx_error;
533         }
534
535         if (skb->protocol != htons(ETH_P_IP))
536                 goto tx_error;
537
538         if (tos&1)
539                 tos = old_iph->tos;
540
541         if (!dst) {
542                 /* NBMA tunnel */
543                 if ((rt = (struct rtable*)skb->dst) == NULL) {
544                         tunnel->stat.tx_fifo_errors++;
545                         goto tx_error;
546                 }
547                 if ((dst = rt->rt_gateway) == 0)
548                         goto tx_error_icmp;
549         }
550
551         {
552                 struct flowi fl = { .oif = tunnel->parms.link,
553                                     .nl_u = { .ip4_u =
554                                               { .daddr = dst,
555                                                 .saddr = tiph->saddr,
556                                                 .tos = RT_TOS(tos) } },
557                                     .proto = IPPROTO_IPIP };
558                 if (ip_route_output_key(&rt, &fl)) {
559                         tunnel->stat.tx_carrier_errors++;
560                         goto tx_error_icmp;
561                 }
562         }
563         tdev = rt->u.dst.dev;
564
565         if (tdev == dev) {
566                 ip_rt_put(rt);
567                 tunnel->stat.collisions++;
568                 goto tx_error;
569         }
570
571         if (tiph->frag_off)
572                 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
573         else
574                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
575
576         if (mtu < 68) {
577                 tunnel->stat.collisions++;
578                 ip_rt_put(rt);
579                 goto tx_error;
580         }
581         if (skb->dst)
582                 skb->dst->ops->update_pmtu(skb->dst, mtu);
583
584         df |= (old_iph->frag_off&htons(IP_DF));
585
586         if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
587                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
588                 ip_rt_put(rt);
589                 goto tx_error;
590         }
591
592         if (tunnel->err_count > 0) {
593                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
594                         tunnel->err_count--;
595                         dst_link_failure(skb);
596                 } else
597                         tunnel->err_count = 0;
598         }
599
600         /*
601          * Okay, now see if we can stuff it in the buffer as-is.
602          */
603         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
604
605         if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
606                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
607                 if (!new_skb) {
608                         ip_rt_put(rt);
609                         stats->tx_dropped++;
610                         dev_kfree_skb(skb);
611                         tunnel->recursion--;
612                         return 0;
613                 }
614                 if (skb->sk)
615                         skb_set_owner_w(new_skb, skb->sk);
616                 dev_kfree_skb(skb);
617                 skb = new_skb;
618                 old_iph = skb->nh.iph;
619         }
620
621         skb->h.raw = skb->nh.raw;
622         skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
623         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
625                               IPSKB_REROUTED);
626         dst_release(skb->dst);
627         skb->dst = &rt->u.dst;
628
629         /*
630          *      Push down and install the IPIP header.
631          */
632
633         iph                     =       skb->nh.iph;
634         iph->version            =       4;
635         iph->ihl                =       sizeof(struct iphdr)>>2;
636         iph->frag_off           =       df;
637         iph->protocol           =       IPPROTO_IPIP;
638         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
639         iph->daddr              =       rt->rt_dst;
640         iph->saddr              =       rt->rt_src;
641
642         if ((iph->ttl = tiph->ttl) == 0)
643                 iph->ttl        =       old_iph->ttl;
644
645         nf_reset(skb);
646
647         IPTUNNEL_XMIT();
648         tunnel->recursion--;
649         return 0;
650
651 tx_error_icmp:
652         dst_link_failure(skb);
653 tx_error:
654         stats->tx_errors++;
655         dev_kfree_skb(skb);
656         tunnel->recursion--;
657         return 0;
658 }
659
660 static int
661 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
662 {
663         int err = 0;
664         struct ip_tunnel_parm p;
665         struct ip_tunnel *t;
666
667         switch (cmd) {
668         case SIOCGETTUNNEL:
669                 t = NULL;
670                 if (dev == ipip_fb_tunnel_dev) {
671                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
672                                 err = -EFAULT;
673                                 break;
674                         }
675                         t = ipip_tunnel_locate(&p, 0);
676                 }
677                 if (t == NULL)
678                         t = netdev_priv(dev);
679                 memcpy(&p, &t->parms, sizeof(p));
680                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
681                         err = -EFAULT;
682                 break;
683
684         case SIOCADDTUNNEL:
685         case SIOCCHGTUNNEL:
686                 err = -EPERM;
687                 if (!capable(CAP_NET_ADMIN))
688                         goto done;
689
690                 err = -EFAULT;
691                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
692                         goto done;
693
694                 err = -EINVAL;
695                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
696                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
697                         goto done;
698                 if (p.iph.ttl)
699                         p.iph.frag_off |= htons(IP_DF);
700
701                 t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
702
703                 if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
704                         if (t != NULL) {
705                                 if (t->dev != dev) {
706                                         err = -EEXIST;
707                                         break;
708                                 }
709                         } else {
710                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
711                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
712                                         err = -EINVAL;
713                                         break;
714                                 }
715                                 t = netdev_priv(dev);
716                                 ipip_tunnel_unlink(t);
717                                 t->parms.iph.saddr = p.iph.saddr;
718                                 t->parms.iph.daddr = p.iph.daddr;
719                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
720                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
721                                 ipip_tunnel_link(t);
722                                 netdev_state_change(dev);
723                         }
724                 }
725
726                 if (t) {
727                         err = 0;
728                         if (cmd == SIOCCHGTUNNEL) {
729                                 t->parms.iph.ttl = p.iph.ttl;
730                                 t->parms.iph.tos = p.iph.tos;
731                                 t->parms.iph.frag_off = p.iph.frag_off;
732                         }
733                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
734                                 err = -EFAULT;
735                 } else
736                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
737                 break;
738
739         case SIOCDELTUNNEL:
740                 err = -EPERM;
741                 if (!capable(CAP_NET_ADMIN))
742                         goto done;
743
744                 if (dev == ipip_fb_tunnel_dev) {
745                         err = -EFAULT;
746                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
747                                 goto done;
748                         err = -ENOENT;
749                         if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
750                                 goto done;
751                         err = -EPERM;
752                         if (t->dev == ipip_fb_tunnel_dev)
753                                 goto done;
754                         dev = t->dev;
755                 }
756                 err = unregister_netdevice(dev);
757                 break;
758
759         default:
760                 err = -EINVAL;
761         }
762
763 done:
764         return err;
765 }
766
767 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
768 {
769         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
770 }
771
772 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
773 {
774         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
775                 return -EINVAL;
776         dev->mtu = new_mtu;
777         return 0;
778 }
779
780 static void ipip_tunnel_setup(struct net_device *dev)
781 {
782         SET_MODULE_OWNER(dev);
783         dev->uninit             = ipip_tunnel_uninit;
784         dev->hard_start_xmit    = ipip_tunnel_xmit;
785         dev->get_stats          = ipip_tunnel_get_stats;
786         dev->do_ioctl           = ipip_tunnel_ioctl;
787         dev->change_mtu         = ipip_tunnel_change_mtu;
788         dev->destructor         = free_netdev;
789
790         dev->type               = ARPHRD_TUNNEL;
791         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
792         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
793         dev->flags              = IFF_NOARP;
794         dev->iflink             = 0;
795         dev->addr_len           = 4;
796 }
797
798 static int ipip_tunnel_init(struct net_device *dev)
799 {
800         struct net_device *tdev = NULL;
801         struct ip_tunnel *tunnel;
802         struct iphdr *iph;
803
804         tunnel = netdev_priv(dev);
805         iph = &tunnel->parms.iph;
806
807         tunnel->dev = dev;
808         strcpy(tunnel->parms.name, dev->name);
809
810         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
811         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
812
813         if (iph->daddr) {
814                 struct flowi fl = { .oif = tunnel->parms.link,
815                                     .nl_u = { .ip4_u =
816                                               { .daddr = iph->daddr,
817                                                 .saddr = iph->saddr,
818                                                 .tos = RT_TOS(iph->tos) } },
819                                     .proto = IPPROTO_IPIP };
820                 struct rtable *rt;
821                 if (!ip_route_output_key(&rt, &fl)) {
822                         tdev = rt->u.dst.dev;
823                         ip_rt_put(rt);
824                 }
825                 dev->flags |= IFF_POINTOPOINT;
826         }
827
828         if (!tdev && tunnel->parms.link)
829                 tdev = __dev_get_by_index(tunnel->parms.link);
830
831         if (tdev) {
832                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
833                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
834         }
835         dev->iflink = tunnel->parms.link;
836
837         return 0;
838 }
839
840 static int __init ipip_fb_tunnel_init(struct net_device *dev)
841 {
842         struct ip_tunnel *tunnel = netdev_priv(dev);
843         struct iphdr *iph = &tunnel->parms.iph;
844
845         tunnel->dev = dev;
846         strcpy(tunnel->parms.name, dev->name);
847
848         iph->version            = 4;
849         iph->protocol           = IPPROTO_IPIP;
850         iph->ihl                = 5;
851
852         dev_hold(dev);
853         tunnels_wc[0]           = tunnel;
854         return 0;
855 }
856
857 static struct xfrm_tunnel ipip_handler = {
858         .handler        =       ipip_rcv,
859         .err_handler    =       ipip_err,
860         .priority       =       1,
861 };
862
863 static char banner[] __initdata =
864         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
865
866 static int __init ipip_init(void)
867 {
868         int err;
869
870         printk(banner);
871
872         if (xfrm4_tunnel_register(&ipip_handler)) {
873                 printk(KERN_INFO "ipip init: can't register tunnel\n");
874                 return -EAGAIN;
875         }
876
877         ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
878                                            "tunl0",
879                                            ipip_tunnel_setup);
880         if (!ipip_fb_tunnel_dev) {
881                 err = -ENOMEM;
882                 goto err1;
883         }
884
885         ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
886
887         if ((err = register_netdev(ipip_fb_tunnel_dev)))
888                 goto err2;
889  out:
890         return err;
891  err2:
892         free_netdev(ipip_fb_tunnel_dev);
893  err1:
894         xfrm4_tunnel_deregister(&ipip_handler);
895         goto out;
896 }
897
898 static void __exit ipip_destroy_tunnels(void)
899 {
900         int prio;
901
902         for (prio = 1; prio < 4; prio++) {
903                 int h;
904                 for (h = 0; h < HASH_SIZE; h++) {
905                         struct ip_tunnel *t;
906                         while ((t = tunnels[prio][h]) != NULL)
907                                 unregister_netdevice(t->dev);
908                 }
909         }
910 }
911
912 static void __exit ipip_fini(void)
913 {
914         if (xfrm4_tunnel_deregister(&ipip_handler))
915                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
916
917         rtnl_lock();
918         ipip_destroy_tunnels();
919         unregister_netdevice(ipip_fb_tunnel_dev);
920         rtnl_unlock();
921 }
922
923 module_init(ipip_init);
924 module_exit(ipip_fini);
925 MODULE_LICENSE("GPL");