[NETFILTER]: Remove some EXPERIMENTAL dependencies
[linux-2.6] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42
43 #ifdef CONFIG_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_fib.h>
46 #include <net/ip6_route.h>
47 #endif
48
49 /*
50    Problems & solutions
51    --------------------
52
53    1. The most important issue is detecting local dead loops.
54    They would cause complete host lockup in transmit, which
55    would be "resolved" by stack overflow or, if queueing is enabled,
56    with infinite looping in net_bh.
57
58    We cannot track such dead loops during route installation,
59    it is infeasible task. The most general solutions would be
60    to keep skb->encapsulation counter (sort of local ttl),
61    and silently drop packet when it expires. It is the best
62    solution, but it supposes maintaing new variable in ALL
63    skb, even if no tunneling is used.
64
65    Current solution: t->recursion lock breaks dead loops. It looks
66    like dev->tbusy flag, but I preferred new variable, because
67    the semantics is different. One day, when hard_start_xmit
68    will be multithreaded we will have to use skb->encapsulation.
69
70
71
72    2. Networking dead loops would not kill routers, but would really
73    kill network. IP hop limit plays role of "t->recursion" in this case,
74    if we copy it from packet being encapsulated to upper header.
75    It is very good solution, but it introduces two problems:
76
77    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
78      do not work over tunnels.
79    - traceroute does not work. I planned to relay ICMP from tunnel,
80      so that this problem would be solved and traceroute output
81      would even more informative. This idea appeared to be wrong:
82      only Linux complies to rfc1812 now (yes, guys, Linux is the only
83      true router now :-)), all routers (at least, in neighbourhood of mine)
84      return only 8 bytes of payload. It is the end.
85
86    Hence, if we want that OSPF worked or traceroute said something reasonable,
87    we should search for another solution.
88
89    One of them is to parse packet trying to detect inner encapsulation
90    made by our node. It is difficult or even impossible, especially,
91    taking into account fragmentation. TO be short, tt is not solution at all.
92
93    Current solution: The solution was UNEXPECTEDLY SIMPLE.
94    We force DF flag on tunnels with preconfigured hop limit,
95    that is ALL. :-) Well, it does not remove the problem completely,
96    but exponential growth of network traffic is changed to linear
97    (branches, that exceed pmtu are pruned) and tunnel mtu
98    fastly degrades to value <68, where looping stops.
99    Yes, it is not good if there exists a router in the loop,
100    which does not force DF, even when encapsulating packets have DF set.
101    But it is not our problem! Nobody could accuse us, we made
102    all that we could make. Even if it is your gated who injected
103    fatal route to network, even if it were you who configured
104    fatal static route: you are innocent. :-)
105
106
107
108    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
109    practically identical code. It would be good to glue them
110    together, but it is not very evident, how to make them modular.
111    sit is integral part of IPv6, ipip and gre are naturally modular.
112    We could extract common parts (hash table, ioctl etc)
113    to a separate module (ip_tunnel.c).
114
115    Alexey Kuznetsov.
116  */
117
118 static int ipgre_tunnel_init(struct net_device *dev);
119 static void ipgre_tunnel_setup(struct net_device *dev);
120
121 /* Fallback tunnel: no source, no destination, no key, no options */
122
123 static int ipgre_fb_tunnel_init(struct net_device *dev);
124
125 static struct net_device *ipgre_fb_tunnel_dev;
126
127 /* Tunnel hash table */
128
129 /*
130    4 hash tables:
131
132    3: (remote,local)
133    2: (remote,*)
134    1: (*,local)
135    0: (*,*)
136
137    We require exact key match i.e. if a key is present in packet
138    it will match only tunnel with the same key; if it is not present,
139    it will match only keyless tunnel.
140
141    All keysless packets, if not matched configured keyless tunnels
142    will match fallback tunnel.
143  */
144
145 #define HASH_SIZE  16
146 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
147
148 static struct ip_tunnel *tunnels[4][HASH_SIZE];
149
150 #define tunnels_r_l     (tunnels[3])
151 #define tunnels_r       (tunnels[2])
152 #define tunnels_l       (tunnels[1])
153 #define tunnels_wc      (tunnels[0])
154
155 static DEFINE_RWLOCK(ipgre_lock);
156
157 /* Given src, dst and key, find appropriate for input tunnel. */
158
159 static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be32 key)
160 {
161         unsigned h0 = HASH(remote);
162         unsigned h1 = HASH(key);
163         struct ip_tunnel *t;
164
165         for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
166                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
167                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
168                                 return t;
169                 }
170         }
171         for (t = tunnels_r[h0^h1]; t; t = t->next) {
172                 if (remote == t->parms.iph.daddr) {
173                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
174                                 return t;
175                 }
176         }
177         for (t = tunnels_l[h1]; t; t = t->next) {
178                 if (local == t->parms.iph.saddr ||
179                      (local == t->parms.iph.daddr &&
180                       ipv4_is_multicast(local))) {
181                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182                                 return t;
183                 }
184         }
185         for (t = tunnels_wc[h1]; t; t = t->next) {
186                 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
187                         return t;
188         }
189
190         if (ipgre_fb_tunnel_dev->flags&IFF_UP)
191                 return netdev_priv(ipgre_fb_tunnel_dev);
192         return NULL;
193 }
194
195 static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
196 {
197         __be32 remote = parms->iph.daddr;
198         __be32 local = parms->iph.saddr;
199         __be32 key = parms->i_key;
200         unsigned h = HASH(key);
201         int prio = 0;
202
203         if (local)
204                 prio |= 1;
205         if (remote && !ipv4_is_multicast(remote)) {
206                 prio |= 2;
207                 h ^= HASH(remote);
208         }
209
210         return &tunnels[prio][h];
211 }
212
213 static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
214 {
215         return __ipgre_bucket(&t->parms);
216 }
217
218 static void ipgre_tunnel_link(struct ip_tunnel *t)
219 {
220         struct ip_tunnel **tp = ipgre_bucket(t);
221
222         t->next = *tp;
223         write_lock_bh(&ipgre_lock);
224         *tp = t;
225         write_unlock_bh(&ipgre_lock);
226 }
227
228 static void ipgre_tunnel_unlink(struct ip_tunnel *t)
229 {
230         struct ip_tunnel **tp;
231
232         for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
233                 if (t == *tp) {
234                         write_lock_bh(&ipgre_lock);
235                         *tp = t->next;
236                         write_unlock_bh(&ipgre_lock);
237                         break;
238                 }
239         }
240 }
241
242 static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
243 {
244         __be32 remote = parms->iph.daddr;
245         __be32 local = parms->iph.saddr;
246         __be32 key = parms->i_key;
247         struct ip_tunnel *t, **tp, *nt;
248         struct net_device *dev;
249         char name[IFNAMSIZ];
250
251         for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
252                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
253                         if (key == t->parms.i_key)
254                                 return t;
255                 }
256         }
257         if (!create)
258                 return NULL;
259
260         if (parms->name[0])
261                 strlcpy(name, parms->name, IFNAMSIZ);
262         else {
263                 int i;
264                 for (i=1; i<100; i++) {
265                         sprintf(name, "gre%d", i);
266                         if (__dev_get_by_name(&init_net, name) == NULL)
267                                 break;
268                 }
269                 if (i==100)
270                         goto failed;
271         }
272
273         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
274         if (!dev)
275           return NULL;
276
277         dev->init = ipgre_tunnel_init;
278         nt = netdev_priv(dev);
279         nt->parms = *parms;
280
281         if (register_netdevice(dev) < 0) {
282                 free_netdev(dev);
283                 goto failed;
284         }
285
286         dev_hold(dev);
287         ipgre_tunnel_link(nt);
288         return nt;
289
290 failed:
291         return NULL;
292 }
293
294 static void ipgre_tunnel_uninit(struct net_device *dev)
295 {
296         ipgre_tunnel_unlink(netdev_priv(dev));
297         dev_put(dev);
298 }
299
300
301 static void ipgre_err(struct sk_buff *skb, u32 info)
302 {
303 #ifndef I_WISH_WORLD_WERE_PERFECT
304
305 /* It is not :-( All the routers (except for Linux) return only
306    8 bytes of packet payload. It means, that precise relaying of
307    ICMP in the real Internet is absolutely infeasible.
308
309    Moreover, Cisco "wise men" put GRE key to the third word
310    in GRE header. It makes impossible maintaining even soft state for keyed
311    GRE tunnels with enabled checksum. Tell them "thank you".
312
313    Well, I wonder, rfc1812 was written by Cisco employee,
314    what the hell these idiots break standrads established
315    by themself???
316  */
317
318         struct iphdr *iph = (struct iphdr*)skb->data;
319         __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
320         int grehlen = (iph->ihl<<2) + 4;
321         const int type = icmp_hdr(skb)->type;
322         const int code = icmp_hdr(skb)->code;
323         struct ip_tunnel *t;
324         __be16 flags;
325
326         flags = p[0];
327         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
328                 if (flags&(GRE_VERSION|GRE_ROUTING))
329                         return;
330                 if (flags&GRE_KEY) {
331                         grehlen += 4;
332                         if (flags&GRE_CSUM)
333                                 grehlen += 4;
334                 }
335         }
336
337         /* If only 8 bytes returned, keyed message will be dropped here */
338         if (skb_headlen(skb) < grehlen)
339                 return;
340
341         switch (type) {
342         default:
343         case ICMP_PARAMETERPROB:
344                 return;
345
346         case ICMP_DEST_UNREACH:
347                 switch (code) {
348                 case ICMP_SR_FAILED:
349                 case ICMP_PORT_UNREACH:
350                         /* Impossible event. */
351                         return;
352                 case ICMP_FRAG_NEEDED:
353                         /* Soft state for pmtu is maintained by IP core. */
354                         return;
355                 default:
356                         /* All others are translated to HOST_UNREACH.
357                            rfc2003 contains "deep thoughts" about NET_UNREACH,
358                            I believe they are just ether pollution. --ANK
359                          */
360                         break;
361                 }
362                 break;
363         case ICMP_TIME_EXCEEDED:
364                 if (code != ICMP_EXC_TTL)
365                         return;
366                 break;
367         }
368
369         read_lock(&ipgre_lock);
370         t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((__be32*)p) + (grehlen>>2) - 1) : 0);
371         if (t == NULL || t->parms.iph.daddr == 0 ||
372             ipv4_is_multicast(t->parms.iph.daddr))
373                 goto out;
374
375         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
376                 goto out;
377
378         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
379                 t->err_count++;
380         else
381                 t->err_count = 1;
382         t->err_time = jiffies;
383 out:
384         read_unlock(&ipgre_lock);
385         return;
386 #else
387         struct iphdr *iph = (struct iphdr*)dp;
388         struct iphdr *eiph;
389         __be16       *p = (__be16*)(dp+(iph->ihl<<2));
390         const int type = icmp_hdr(skb)->type;
391         const int code = icmp_hdr(skb)->code;
392         int rel_type = 0;
393         int rel_code = 0;
394         __be32 rel_info = 0;
395         __u32 n = 0;
396         __be16 flags;
397         int grehlen = (iph->ihl<<2) + 4;
398         struct sk_buff *skb2;
399         struct flowi fl;
400         struct rtable *rt;
401
402         if (p[1] != htons(ETH_P_IP))
403                 return;
404
405         flags = p[0];
406         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
407                 if (flags&(GRE_VERSION|GRE_ROUTING))
408                         return;
409                 if (flags&GRE_CSUM)
410                         grehlen += 4;
411                 if (flags&GRE_KEY)
412                         grehlen += 4;
413                 if (flags&GRE_SEQ)
414                         grehlen += 4;
415         }
416         if (len < grehlen + sizeof(struct iphdr))
417                 return;
418         eiph = (struct iphdr*)(dp + grehlen);
419
420         switch (type) {
421         default:
422                 return;
423         case ICMP_PARAMETERPROB:
424                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
425                 if (n < (iph->ihl<<2))
426                         return;
427
428                 /* So... This guy found something strange INSIDE encapsulated
429                    packet. Well, he is fool, but what can we do ?
430                  */
431                 rel_type = ICMP_PARAMETERPROB;
432                 n -= grehlen;
433                 rel_info = htonl(n << 24);
434                 break;
435
436         case ICMP_DEST_UNREACH:
437                 switch (code) {
438                 case ICMP_SR_FAILED:
439                 case ICMP_PORT_UNREACH:
440                         /* Impossible event. */
441                         return;
442                 case ICMP_FRAG_NEEDED:
443                         /* And it is the only really necessary thing :-) */
444                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
445                         if (n < grehlen+68)
446                                 return;
447                         n -= grehlen;
448                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
449                         if (n > ntohs(eiph->tot_len))
450                                 return;
451                         rel_info = htonl(n);
452                         break;
453                 default:
454                         /* All others are translated to HOST_UNREACH.
455                            rfc2003 contains "deep thoughts" about NET_UNREACH,
456                            I believe, it is just ether pollution. --ANK
457                          */
458                         rel_type = ICMP_DEST_UNREACH;
459                         rel_code = ICMP_HOST_UNREACH;
460                         break;
461                 }
462                 break;
463         case ICMP_TIME_EXCEEDED:
464                 if (code != ICMP_EXC_TTL)
465                         return;
466                 break;
467         }
468
469         /* Prepare fake skb to feed it to icmp_send */
470         skb2 = skb_clone(skb, GFP_ATOMIC);
471         if (skb2 == NULL)
472                 return;
473         dst_release(skb2->dst);
474         skb2->dst = NULL;
475         skb_pull(skb2, skb->data - (u8*)eiph);
476         skb_reset_network_header(skb2);
477
478         /* Try to guess incoming interface */
479         memset(&fl, 0, sizeof(fl));
480         fl.fl4_dst = eiph->saddr;
481         fl.fl4_tos = RT_TOS(eiph->tos);
482         fl.proto = IPPROTO_GRE;
483         if (ip_route_output_key(&rt, &fl)) {
484                 kfree_skb(skb2);
485                 return;
486         }
487         skb2->dev = rt->u.dst.dev;
488
489         /* route "incoming" packet */
490         if (rt->rt_flags&RTCF_LOCAL) {
491                 ip_rt_put(rt);
492                 rt = NULL;
493                 fl.fl4_dst = eiph->daddr;
494                 fl.fl4_src = eiph->saddr;
495                 fl.fl4_tos = eiph->tos;
496                 if (ip_route_output_key(&rt, &fl) ||
497                     rt->u.dst.dev->type != ARPHRD_IPGRE) {
498                         ip_rt_put(rt);
499                         kfree_skb(skb2);
500                         return;
501                 }
502         } else {
503                 ip_rt_put(rt);
504                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
505                     skb2->dst->dev->type != ARPHRD_IPGRE) {
506                         kfree_skb(skb2);
507                         return;
508                 }
509         }
510
511         /* change mtu on this route */
512         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
513                 if (n > dst_mtu(skb2->dst)) {
514                         kfree_skb(skb2);
515                         return;
516                 }
517                 skb2->dst->ops->update_pmtu(skb2->dst, n);
518         } else if (type == ICMP_TIME_EXCEEDED) {
519                 struct ip_tunnel *t = netdev_priv(skb2->dev);
520                 if (t->parms.iph.ttl) {
521                         rel_type = ICMP_DEST_UNREACH;
522                         rel_code = ICMP_HOST_UNREACH;
523                 }
524         }
525
526         icmp_send(skb2, rel_type, rel_code, rel_info);
527         kfree_skb(skb2);
528 #endif
529 }
530
531 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
532 {
533         if (INET_ECN_is_ce(iph->tos)) {
534                 if (skb->protocol == htons(ETH_P_IP)) {
535                         IP_ECN_set_ce(ip_hdr(skb));
536                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
537                         IP6_ECN_set_ce(ipv6_hdr(skb));
538                 }
539         }
540 }
541
542 static inline u8
543 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
544 {
545         u8 inner = 0;
546         if (skb->protocol == htons(ETH_P_IP))
547                 inner = old_iph->tos;
548         else if (skb->protocol == htons(ETH_P_IPV6))
549                 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
550         return INET_ECN_encapsulate(tos, inner);
551 }
552
553 static int ipgre_rcv(struct sk_buff *skb)
554 {
555         struct iphdr *iph;
556         u8     *h;
557         __be16    flags;
558         __sum16   csum = 0;
559         __be32 key = 0;
560         u32    seqno = 0;
561         struct ip_tunnel *tunnel;
562         int    offset = 4;
563
564         if (!pskb_may_pull(skb, 16))
565                 goto drop_nolock;
566
567         iph = ip_hdr(skb);
568         h = skb->data;
569         flags = *(__be16*)h;
570
571         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
572                 /* - Version must be 0.
573                    - We do not support routing headers.
574                  */
575                 if (flags&(GRE_VERSION|GRE_ROUTING))
576                         goto drop_nolock;
577
578                 if (flags&GRE_CSUM) {
579                         switch (skb->ip_summed) {
580                         case CHECKSUM_COMPLETE:
581                                 csum = csum_fold(skb->csum);
582                                 if (!csum)
583                                         break;
584                                 /* fall through */
585                         case CHECKSUM_NONE:
586                                 skb->csum = 0;
587                                 csum = __skb_checksum_complete(skb);
588                                 skb->ip_summed = CHECKSUM_COMPLETE;
589                         }
590                         offset += 4;
591                 }
592                 if (flags&GRE_KEY) {
593                         key = *(__be32*)(h + offset);
594                         offset += 4;
595                 }
596                 if (flags&GRE_SEQ) {
597                         seqno = ntohl(*(__be32*)(h + offset));
598                         offset += 4;
599                 }
600         }
601
602         read_lock(&ipgre_lock);
603         if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
604                 secpath_reset(skb);
605
606                 skb->protocol = *(__be16*)(h + 2);
607                 /* WCCP version 1 and 2 protocol decoding.
608                  * - Change protocol to IP
609                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
610                  */
611                 if (flags == 0 &&
612                     skb->protocol == htons(ETH_P_WCCP)) {
613                         skb->protocol = htons(ETH_P_IP);
614                         if ((*(h + offset) & 0xF0) != 0x40)
615                                 offset += 4;
616                 }
617
618                 skb->mac_header = skb->network_header;
619                 __pskb_pull(skb, offset);
620                 skb_reset_network_header(skb);
621                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
622                 skb->pkt_type = PACKET_HOST;
623 #ifdef CONFIG_NET_IPGRE_BROADCAST
624                 if (ipv4_is_multicast(iph->daddr)) {
625                         /* Looped back packet, drop it! */
626                         if (((struct rtable*)skb->dst)->fl.iif == 0)
627                                 goto drop;
628                         tunnel->stat.multicast++;
629                         skb->pkt_type = PACKET_BROADCAST;
630                 }
631 #endif
632
633                 if (((flags&GRE_CSUM) && csum) ||
634                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
635                         tunnel->stat.rx_crc_errors++;
636                         tunnel->stat.rx_errors++;
637                         goto drop;
638                 }
639                 if (tunnel->parms.i_flags&GRE_SEQ) {
640                         if (!(flags&GRE_SEQ) ||
641                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
642                                 tunnel->stat.rx_fifo_errors++;
643                                 tunnel->stat.rx_errors++;
644                                 goto drop;
645                         }
646                         tunnel->i_seqno = seqno + 1;
647                 }
648                 tunnel->stat.rx_packets++;
649                 tunnel->stat.rx_bytes += skb->len;
650                 skb->dev = tunnel->dev;
651                 dst_release(skb->dst);
652                 skb->dst = NULL;
653                 nf_reset(skb);
654                 ipgre_ecn_decapsulate(iph, skb);
655                 netif_rx(skb);
656                 read_unlock(&ipgre_lock);
657                 return(0);
658         }
659         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
660
661 drop:
662         read_unlock(&ipgre_lock);
663 drop_nolock:
664         kfree_skb(skb);
665         return(0);
666 }
667
668 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
669 {
670         struct ip_tunnel *tunnel = netdev_priv(dev);
671         struct net_device_stats *stats = &tunnel->stat;
672         struct iphdr  *old_iph = ip_hdr(skb);
673         struct iphdr  *tiph;
674         u8     tos;
675         __be16 df;
676         struct rtable *rt;                      /* Route to the other host */
677         struct net_device *tdev;                        /* Device to other host */
678         struct iphdr  *iph;                     /* Our new IP header */
679         unsigned int max_headroom;              /* The extra header space needed */
680         int    gre_hlen;
681         __be32 dst;
682         int    mtu;
683
684         if (tunnel->recursion++) {
685                 tunnel->stat.collisions++;
686                 goto tx_error;
687         }
688
689         if (dev->header_ops) {
690                 gre_hlen = 0;
691                 tiph = (struct iphdr*)skb->data;
692         } else {
693                 gre_hlen = tunnel->hlen;
694                 tiph = &tunnel->parms.iph;
695         }
696
697         if ((dst = tiph->daddr) == 0) {
698                 /* NBMA tunnel */
699
700                 if (skb->dst == NULL) {
701                         tunnel->stat.tx_fifo_errors++;
702                         goto tx_error;
703                 }
704
705                 if (skb->protocol == htons(ETH_P_IP)) {
706                         rt = (struct rtable*)skb->dst;
707                         if ((dst = rt->rt_gateway) == 0)
708                                 goto tx_error_icmp;
709                 }
710 #ifdef CONFIG_IPV6
711                 else if (skb->protocol == htons(ETH_P_IPV6)) {
712                         struct in6_addr *addr6;
713                         int addr_type;
714                         struct neighbour *neigh = skb->dst->neighbour;
715
716                         if (neigh == NULL)
717                                 goto tx_error;
718
719                         addr6 = (struct in6_addr*)&neigh->primary_key;
720                         addr_type = ipv6_addr_type(addr6);
721
722                         if (addr_type == IPV6_ADDR_ANY) {
723                                 addr6 = &ipv6_hdr(skb)->daddr;
724                                 addr_type = ipv6_addr_type(addr6);
725                         }
726
727                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
728                                 goto tx_error_icmp;
729
730                         dst = addr6->s6_addr32[3];
731                 }
732 #endif
733                 else
734                         goto tx_error;
735         }
736
737         tos = tiph->tos;
738         if (tos&1) {
739                 if (skb->protocol == htons(ETH_P_IP))
740                         tos = old_iph->tos;
741                 tos &= ~1;
742         }
743
744         {
745                 struct flowi fl = { .oif = tunnel->parms.link,
746                                     .nl_u = { .ip4_u =
747                                               { .daddr = dst,
748                                                 .saddr = tiph->saddr,
749                                                 .tos = RT_TOS(tos) } },
750                                     .proto = IPPROTO_GRE };
751                 if (ip_route_output_key(&rt, &fl)) {
752                         tunnel->stat.tx_carrier_errors++;
753                         goto tx_error;
754                 }
755         }
756         tdev = rt->u.dst.dev;
757
758         if (tdev == dev) {
759                 ip_rt_put(rt);
760                 tunnel->stat.collisions++;
761                 goto tx_error;
762         }
763
764         df = tiph->frag_off;
765         if (df)
766                 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
767         else
768                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
769
770         if (skb->dst)
771                 skb->dst->ops->update_pmtu(skb->dst, mtu);
772
773         if (skb->protocol == htons(ETH_P_IP)) {
774                 df |= (old_iph->frag_off&htons(IP_DF));
775
776                 if ((old_iph->frag_off&htons(IP_DF)) &&
777                     mtu < ntohs(old_iph->tot_len)) {
778                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
779                         ip_rt_put(rt);
780                         goto tx_error;
781                 }
782         }
783 #ifdef CONFIG_IPV6
784         else if (skb->protocol == htons(ETH_P_IPV6)) {
785                 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
786
787                 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
788                         if ((tunnel->parms.iph.daddr &&
789                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
790                             rt6->rt6i_dst.plen == 128) {
791                                 rt6->rt6i_flags |= RTF_MODIFIED;
792                                 skb->dst->metrics[RTAX_MTU-1] = mtu;
793                         }
794                 }
795
796                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
797                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
798                         ip_rt_put(rt);
799                         goto tx_error;
800                 }
801         }
802 #endif
803
804         if (tunnel->err_count > 0) {
805                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
806                         tunnel->err_count--;
807
808                         dst_link_failure(skb);
809                 } else
810                         tunnel->err_count = 0;
811         }
812
813         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
814
815         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
817                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
818                 if (!new_skb) {
819                         ip_rt_put(rt);
820                         stats->tx_dropped++;
821                         dev_kfree_skb(skb);
822                         tunnel->recursion--;
823                         return 0;
824                 }
825                 if (skb->sk)
826                         skb_set_owner_w(new_skb, skb->sk);
827                 dev_kfree_skb(skb);
828                 skb = new_skb;
829                 old_iph = ip_hdr(skb);
830         }
831
832         skb->transport_header = skb->network_header;
833         skb_push(skb, gre_hlen);
834         skb_reset_network_header(skb);
835         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
836         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
837                               IPSKB_REROUTED);
838         dst_release(skb->dst);
839         skb->dst = &rt->u.dst;
840
841         /*
842          *      Push down and install the IPIP header.
843          */
844
845         iph                     =       ip_hdr(skb);
846         iph->version            =       4;
847         iph->ihl                =       sizeof(struct iphdr) >> 2;
848         iph->frag_off           =       df;
849         iph->protocol           =       IPPROTO_GRE;
850         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
851         iph->daddr              =       rt->rt_dst;
852         iph->saddr              =       rt->rt_src;
853
854         if ((iph->ttl = tiph->ttl) == 0) {
855                 if (skb->protocol == htons(ETH_P_IP))
856                         iph->ttl = old_iph->ttl;
857 #ifdef CONFIG_IPV6
858                 else if (skb->protocol == htons(ETH_P_IPV6))
859                         iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
860 #endif
861                 else
862                         iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
863         }
864
865         ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
866         ((__be16*)(iph+1))[1] = skb->protocol;
867
868         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
869                 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
870
871                 if (tunnel->parms.o_flags&GRE_SEQ) {
872                         ++tunnel->o_seqno;
873                         *ptr = htonl(tunnel->o_seqno);
874                         ptr--;
875                 }
876                 if (tunnel->parms.o_flags&GRE_KEY) {
877                         *ptr = tunnel->parms.o_key;
878                         ptr--;
879                 }
880                 if (tunnel->parms.o_flags&GRE_CSUM) {
881                         *ptr = 0;
882                         *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
883                 }
884         }
885
886         nf_reset(skb);
887
888         IPTUNNEL_XMIT();
889         tunnel->recursion--;
890         return 0;
891
892 tx_error_icmp:
893         dst_link_failure(skb);
894
895 tx_error:
896         stats->tx_errors++;
897         dev_kfree_skb(skb);
898         tunnel->recursion--;
899         return 0;
900 }
901
902 static void ipgre_tunnel_bind_dev(struct net_device *dev)
903 {
904         struct net_device *tdev = NULL;
905         struct ip_tunnel *tunnel;
906         struct iphdr *iph;
907         int hlen = LL_MAX_HEADER;
908         int mtu = ETH_DATA_LEN;
909         int addend = sizeof(struct iphdr) + 4;
910
911         tunnel = netdev_priv(dev);
912         iph = &tunnel->parms.iph;
913
914         /* Guess output device to choose reasonable mtu and hard_header_len */
915
916         if (iph->daddr) {
917                 struct flowi fl = { .oif = tunnel->parms.link,
918                                     .nl_u = { .ip4_u =
919                                               { .daddr = iph->daddr,
920                                                 .saddr = iph->saddr,
921                                                 .tos = RT_TOS(iph->tos) } },
922                                     .proto = IPPROTO_GRE };
923                 struct rtable *rt;
924                 if (!ip_route_output_key(&rt, &fl)) {
925                         tdev = rt->u.dst.dev;
926                         ip_rt_put(rt);
927                 }
928                 dev->flags |= IFF_POINTOPOINT;
929         }
930
931         if (!tdev && tunnel->parms.link)
932                 tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
933
934         if (tdev) {
935                 hlen = tdev->hard_header_len;
936                 mtu = tdev->mtu;
937         }
938         dev->iflink = tunnel->parms.link;
939
940         /* Precalculate GRE options length */
941         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
942                 if (tunnel->parms.o_flags&GRE_CSUM)
943                         addend += 4;
944                 if (tunnel->parms.o_flags&GRE_KEY)
945                         addend += 4;
946                 if (tunnel->parms.o_flags&GRE_SEQ)
947                         addend += 4;
948         }
949         dev->hard_header_len = hlen + addend;
950         dev->mtu = mtu - addend;
951         tunnel->hlen = addend;
952
953 }
954
955 static int
956 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
957 {
958         int err = 0;
959         struct ip_tunnel_parm p;
960         struct ip_tunnel *t;
961
962         switch (cmd) {
963         case SIOCGETTUNNEL:
964                 t = NULL;
965                 if (dev == ipgre_fb_tunnel_dev) {
966                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
967                                 err = -EFAULT;
968                                 break;
969                         }
970                         t = ipgre_tunnel_locate(&p, 0);
971                 }
972                 if (t == NULL)
973                         t = netdev_priv(dev);
974                 memcpy(&p, &t->parms, sizeof(p));
975                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
976                         err = -EFAULT;
977                 break;
978
979         case SIOCADDTUNNEL:
980         case SIOCCHGTUNNEL:
981                 err = -EPERM;
982                 if (!capable(CAP_NET_ADMIN))
983                         goto done;
984
985                 err = -EFAULT;
986                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
987                         goto done;
988
989                 err = -EINVAL;
990                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
991                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
992                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
993                         goto done;
994                 if (p.iph.ttl)
995                         p.iph.frag_off |= htons(IP_DF);
996
997                 if (!(p.i_flags&GRE_KEY))
998                         p.i_key = 0;
999                 if (!(p.o_flags&GRE_KEY))
1000                         p.o_key = 0;
1001
1002                 t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
1003
1004                 if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1005                         if (t != NULL) {
1006                                 if (t->dev != dev) {
1007                                         err = -EEXIST;
1008                                         break;
1009                                 }
1010                         } else {
1011                                 unsigned nflags=0;
1012
1013                                 t = netdev_priv(dev);
1014
1015                                 if (ipv4_is_multicast(p.iph.daddr))
1016                                         nflags = IFF_BROADCAST;
1017                                 else if (p.iph.daddr)
1018                                         nflags = IFF_POINTOPOINT;
1019
1020                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1021                                         err = -EINVAL;
1022                                         break;
1023                                 }
1024                                 ipgre_tunnel_unlink(t);
1025                                 t->parms.iph.saddr = p.iph.saddr;
1026                                 t->parms.iph.daddr = p.iph.daddr;
1027                                 t->parms.i_key = p.i_key;
1028                                 t->parms.o_key = p.o_key;
1029                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1030                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1031                                 ipgre_tunnel_link(t);
1032                                 netdev_state_change(dev);
1033                         }
1034                 }
1035
1036                 if (t) {
1037                         err = 0;
1038                         if (cmd == SIOCCHGTUNNEL) {
1039                                 t->parms.iph.ttl = p.iph.ttl;
1040                                 t->parms.iph.tos = p.iph.tos;
1041                                 t->parms.iph.frag_off = p.iph.frag_off;
1042                                 if (t->parms.link != p.link) {
1043                                         t->parms.link = p.link;
1044                                         ipgre_tunnel_bind_dev(dev);
1045                                         netdev_state_change(dev);
1046                                 }
1047                         }
1048                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1049                                 err = -EFAULT;
1050                 } else
1051                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1052                 break;
1053
1054         case SIOCDELTUNNEL:
1055                 err = -EPERM;
1056                 if (!capable(CAP_NET_ADMIN))
1057                         goto done;
1058
1059                 if (dev == ipgre_fb_tunnel_dev) {
1060                         err = -EFAULT;
1061                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1062                                 goto done;
1063                         err = -ENOENT;
1064                         if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
1065                                 goto done;
1066                         err = -EPERM;
1067                         if (t == netdev_priv(ipgre_fb_tunnel_dev))
1068                                 goto done;
1069                         dev = t->dev;
1070                 }
1071                 unregister_netdevice(dev);
1072                 err = 0;
1073                 break;
1074
1075         default:
1076                 err = -EINVAL;
1077         }
1078
1079 done:
1080         return err;
1081 }
1082
1083 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1084 {
1085         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1086 }
1087
1088 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1089 {
1090         struct ip_tunnel *tunnel = netdev_priv(dev);
1091         if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1092                 return -EINVAL;
1093         dev->mtu = new_mtu;
1094         return 0;
1095 }
1096
1097 /* Nice toy. Unfortunately, useless in real life :-)
1098    It allows to construct virtual multiprotocol broadcast "LAN"
1099    over the Internet, provided multicast routing is tuned.
1100
1101
1102    I have no idea was this bicycle invented before me,
1103    so that I had to set ARPHRD_IPGRE to a random value.
1104    I have an impression, that Cisco could make something similar,
1105    but this feature is apparently missing in IOS<=11.2(8).
1106
1107    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1108    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1109
1110    ping -t 255 224.66.66.66
1111
1112    If nobody answers, mbone does not work.
1113
1114    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1115    ip addr add 10.66.66.<somewhat>/24 dev Universe
1116    ifconfig Universe up
1117    ifconfig Universe add fe80::<Your_real_addr>/10
1118    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1119    ftp 10.66.66.66
1120    ...
1121    ftp fec0:6666:6666::193.233.7.65
1122    ...
1123
1124  */
1125
1126 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127                         unsigned short type,
1128                         const void *daddr, const void *saddr, unsigned len)
1129 {
1130         struct ip_tunnel *t = netdev_priv(dev);
1131         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1132         __be16 *p = (__be16*)(iph+1);
1133
1134         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1135         p[0]            = t->parms.o_flags;
1136         p[1]            = htons(type);
1137
1138         /*
1139          *      Set the source hardware address.
1140          */
1141
1142         if (saddr)
1143                 memcpy(&iph->saddr, saddr, 4);
1144
1145         if (daddr) {
1146                 memcpy(&iph->daddr, daddr, 4);
1147                 return t->hlen;
1148         }
1149         if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1150                 return t->hlen;
1151
1152         return -t->hlen;
1153 }
1154
1155 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1156 {
1157         struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1158         memcpy(haddr, &iph->saddr, 4);
1159         return 4;
1160 }
1161
1162 static const struct header_ops ipgre_header_ops = {
1163         .create = ipgre_header,
1164         .parse  = ipgre_header_parse,
1165 };
1166
1167 #ifdef CONFIG_NET_IPGRE_BROADCAST
1168 static int ipgre_open(struct net_device *dev)
1169 {
1170         struct ip_tunnel *t = netdev_priv(dev);
1171
1172         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1173                 struct flowi fl = { .oif = t->parms.link,
1174                                     .nl_u = { .ip4_u =
1175                                               { .daddr = t->parms.iph.daddr,
1176                                                 .saddr = t->parms.iph.saddr,
1177                                                 .tos = RT_TOS(t->parms.iph.tos) } },
1178                                     .proto = IPPROTO_GRE };
1179                 struct rtable *rt;
1180                 if (ip_route_output_key(&rt, &fl))
1181                         return -EADDRNOTAVAIL;
1182                 dev = rt->u.dst.dev;
1183                 ip_rt_put(rt);
1184                 if (__in_dev_get_rtnl(dev) == NULL)
1185                         return -EADDRNOTAVAIL;
1186                 t->mlink = dev->ifindex;
1187                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1188         }
1189         return 0;
1190 }
1191
1192 static int ipgre_close(struct net_device *dev)
1193 {
1194         struct ip_tunnel *t = netdev_priv(dev);
1195         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1196                 struct in_device *in_dev = inetdev_by_index(t->mlink);
1197                 if (in_dev) {
1198                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1199                         in_dev_put(in_dev);
1200                 }
1201         }
1202         return 0;
1203 }
1204
1205 #endif
1206
1207 static void ipgre_tunnel_setup(struct net_device *dev)
1208 {
1209         dev->uninit             = ipgre_tunnel_uninit;
1210         dev->destructor         = free_netdev;
1211         dev->hard_start_xmit    = ipgre_tunnel_xmit;
1212         dev->get_stats          = ipgre_tunnel_get_stats;
1213         dev->do_ioctl           = ipgre_tunnel_ioctl;
1214         dev->change_mtu         = ipgre_tunnel_change_mtu;
1215
1216         dev->type               = ARPHRD_IPGRE;
1217         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1218         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1219         dev->flags              = IFF_NOARP;
1220         dev->iflink             = 0;
1221         dev->addr_len           = 4;
1222 }
1223
1224 static int ipgre_tunnel_init(struct net_device *dev)
1225 {
1226         struct ip_tunnel *tunnel;
1227         struct iphdr *iph;
1228
1229         tunnel = netdev_priv(dev);
1230         iph = &tunnel->parms.iph;
1231
1232         tunnel->dev = dev;
1233         strcpy(tunnel->parms.name, dev->name);
1234
1235         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1236         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1237
1238         ipgre_tunnel_bind_dev(dev);
1239
1240         if (iph->daddr) {
1241 #ifdef CONFIG_NET_IPGRE_BROADCAST
1242                 if (ipv4_is_multicast(iph->daddr)) {
1243                         if (!iph->saddr)
1244                                 return -EINVAL;
1245                         dev->flags = IFF_BROADCAST;
1246                         dev->header_ops = &ipgre_header_ops;
1247                         dev->open = ipgre_open;
1248                         dev->stop = ipgre_close;
1249                 }
1250 #endif
1251         } else
1252                 dev->header_ops = &ipgre_header_ops;
1253
1254         return 0;
1255 }
1256
1257 static int __init ipgre_fb_tunnel_init(struct net_device *dev)
1258 {
1259         struct ip_tunnel *tunnel = netdev_priv(dev);
1260         struct iphdr *iph = &tunnel->parms.iph;
1261
1262         tunnel->dev = dev;
1263         strcpy(tunnel->parms.name, dev->name);
1264
1265         iph->version            = 4;
1266         iph->protocol           = IPPROTO_GRE;
1267         iph->ihl                = 5;
1268         tunnel->hlen            = sizeof(struct iphdr) + 4;
1269
1270         dev_hold(dev);
1271         tunnels_wc[0]           = tunnel;
1272         return 0;
1273 }
1274
1275
1276 static struct net_protocol ipgre_protocol = {
1277         .handler        =       ipgre_rcv,
1278         .err_handler    =       ipgre_err,
1279 };
1280
1281
1282 /*
1283  *      And now the modules code and kernel interface.
1284  */
1285
1286 static int __init ipgre_init(void)
1287 {
1288         int err;
1289
1290         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1291
1292         if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1293                 printk(KERN_INFO "ipgre init: can't add protocol\n");
1294                 return -EAGAIN;
1295         }
1296
1297         ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1298                                            ipgre_tunnel_setup);
1299         if (!ipgre_fb_tunnel_dev) {
1300                 err = -ENOMEM;
1301                 goto err1;
1302         }
1303
1304         ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1305
1306         if ((err = register_netdev(ipgre_fb_tunnel_dev)))
1307                 goto err2;
1308 out:
1309         return err;
1310 err2:
1311         free_netdev(ipgre_fb_tunnel_dev);
1312 err1:
1313         inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1314         goto out;
1315 }
1316
1317 static void __exit ipgre_destroy_tunnels(void)
1318 {
1319         int prio;
1320
1321         for (prio = 0; prio < 4; prio++) {
1322                 int h;
1323                 for (h = 0; h < HASH_SIZE; h++) {
1324                         struct ip_tunnel *t;
1325                         while ((t = tunnels[prio][h]) != NULL)
1326                                 unregister_netdevice(t->dev);
1327                 }
1328         }
1329 }
1330
1331 static void __exit ipgre_fini(void)
1332 {
1333         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1334                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1335
1336         rtnl_lock();
1337         ipgre_destroy_tunnels();
1338         rtnl_unlock();
1339 }
1340
1341 module_init(ipgre_init);
1342 module_exit(ipgre_fini);
1343 MODULE_LICENSE("GPL");