Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound-2.6
[linux-2.6] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
31
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44
45 #ifdef CONFIG_IPV6
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #endif
50
51 /*
52    Problems & solutions
53    --------------------
54
55    1. The most important issue is detecting local dead loops.
56    They would cause complete host lockup in transmit, which
57    would be "resolved" by stack overflow or, if queueing is enabled,
58    with infinite looping in net_bh.
59
60    We cannot track such dead loops during route installation,
61    it is infeasible task. The most general solutions would be
62    to keep skb->encapsulation counter (sort of local ttl),
63    and silently drop packet when it expires. It is the best
64    solution, but it supposes maintaing new variable in ALL
65    skb, even if no tunneling is used.
66
67    Current solution: t->recursion lock breaks dead loops. It looks
68    like dev->tbusy flag, but I preferred new variable, because
69    the semantics is different. One day, when hard_start_xmit
70    will be multithreaded we will have to use skb->encapsulation.
71
72
73
74    2. Networking dead loops would not kill routers, but would really
75    kill network. IP hop limit plays role of "t->recursion" in this case,
76    if we copy it from packet being encapsulated to upper header.
77    It is very good solution, but it introduces two problems:
78
79    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80      do not work over tunnels.
81    - traceroute does not work. I planned to relay ICMP from tunnel,
82      so that this problem would be solved and traceroute output
83      would even more informative. This idea appeared to be wrong:
84      only Linux complies to rfc1812 now (yes, guys, Linux is the only
85      true router now :-)), all routers (at least, in neighbourhood of mine)
86      return only 8 bytes of payload. It is the end.
87
88    Hence, if we want that OSPF worked or traceroute said something reasonable,
89    we should search for another solution.
90
91    One of them is to parse packet trying to detect inner encapsulation
92    made by our node. It is difficult or even impossible, especially,
93    taking into account fragmentation. TO be short, tt is not solution at all.
94
95    Current solution: The solution was UNEXPECTEDLY SIMPLE.
96    We force DF flag on tunnels with preconfigured hop limit,
97    that is ALL. :-) Well, it does not remove the problem completely,
98    but exponential growth of network traffic is changed to linear
99    (branches, that exceed pmtu are pruned) and tunnel mtu
100    fastly degrades to value <68, where looping stops.
101    Yes, it is not good if there exists a router in the loop,
102    which does not force DF, even when encapsulating packets have DF set.
103    But it is not our problem! Nobody could accuse us, we made
104    all that we could make. Even if it is your gated who injected
105    fatal route to network, even if it were you who configured
106    fatal static route: you are innocent. :-)
107
108
109
110    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111    practically identical code. It would be good to glue them
112    together, but it is not very evident, how to make them modular.
113    sit is integral part of IPv6, ipip and gre are naturally modular.
114    We could extract common parts (hash table, ioctl etc)
115    to a separate module (ip_tunnel.c).
116
117    Alexey Kuznetsov.
118  */
119
120 static int ipgre_tunnel_init(struct net_device *dev);
121 static void ipgre_tunnel_setup(struct net_device *dev);
122
123 /* Fallback tunnel: no source, no destination, no key, no options */
124
125 static int ipgre_fb_tunnel_init(struct net_device *dev);
126
127 #define HASH_SIZE  16
128
129 static int ipgre_net_id;
130 struct ipgre_net {
131         struct ip_tunnel *tunnels[4][HASH_SIZE];
132
133         struct net_device *fb_tunnel_dev;
134 };
135
136 /* Tunnel hash table */
137
138 /*
139    4 hash tables:
140
141    3: (remote,local)
142    2: (remote,*)
143    1: (*,local)
144    0: (*,*)
145
146    We require exact key match i.e. if a key is present in packet
147    it will match only tunnel with the same key; if it is not present,
148    it will match only keyless tunnel.
149
150    All keysless packets, if not matched configured keyless tunnels
151    will match fallback tunnel.
152  */
153
154 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
155
156 #define tunnels_r_l     tunnels[3]
157 #define tunnels_r       tunnels[2]
158 #define tunnels_l       tunnels[1]
159 #define tunnels_wc      tunnels[0]
160
161 static DEFINE_RWLOCK(ipgre_lock);
162
163 /* Given src, dst and key, find appropriate for input tunnel. */
164
165 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
166                 __be32 remote, __be32 local, __be32 key)
167 {
168         unsigned h0 = HASH(remote);
169         unsigned h1 = HASH(key);
170         struct ip_tunnel *t;
171         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
172
173         for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
174                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
175                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
176                                 return t;
177                 }
178         }
179         for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
180                 if (remote == t->parms.iph.daddr) {
181                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
182                                 return t;
183                 }
184         }
185         for (t = ign->tunnels_l[h1]; t; t = t->next) {
186                 if (local == t->parms.iph.saddr ||
187                      (local == t->parms.iph.daddr &&
188                       ipv4_is_multicast(local))) {
189                         if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
190                                 return t;
191                 }
192         }
193         for (t = ign->tunnels_wc[h1]; t; t = t->next) {
194                 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
195                         return t;
196         }
197
198         if (ign->fb_tunnel_dev->flags&IFF_UP)
199                 return netdev_priv(ign->fb_tunnel_dev);
200         return NULL;
201 }
202
203 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
204                 struct ip_tunnel_parm *parms)
205 {
206         __be32 remote = parms->iph.daddr;
207         __be32 local = parms->iph.saddr;
208         __be32 key = parms->i_key;
209         unsigned h = HASH(key);
210         int prio = 0;
211
212         if (local)
213                 prio |= 1;
214         if (remote && !ipv4_is_multicast(remote)) {
215                 prio |= 2;
216                 h ^= HASH(remote);
217         }
218
219         return &ign->tunnels[prio][h];
220 }
221
222 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
223                 struct ip_tunnel *t)
224 {
225         return __ipgre_bucket(ign, &t->parms);
226 }
227
228 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
229 {
230         struct ip_tunnel **tp = ipgre_bucket(ign, t);
231
232         t->next = *tp;
233         write_lock_bh(&ipgre_lock);
234         *tp = t;
235         write_unlock_bh(&ipgre_lock);
236 }
237
238 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
239 {
240         struct ip_tunnel **tp;
241
242         for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
243                 if (t == *tp) {
244                         write_lock_bh(&ipgre_lock);
245                         *tp = t->next;
246                         write_unlock_bh(&ipgre_lock);
247                         break;
248                 }
249         }
250 }
251
252 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
253                 struct ip_tunnel_parm *parms, int create)
254 {
255         __be32 remote = parms->iph.daddr;
256         __be32 local = parms->iph.saddr;
257         __be32 key = parms->i_key;
258         struct ip_tunnel *t, **tp, *nt;
259         struct net_device *dev;
260         char name[IFNAMSIZ];
261         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
262
263         for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
264                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
265                         if (key == t->parms.i_key)
266                                 return t;
267                 }
268         }
269         if (!create)
270                 return NULL;
271
272         if (parms->name[0])
273                 strlcpy(name, parms->name, IFNAMSIZ);
274         else
275                 sprintf(name, "gre%%d");
276
277         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
278         if (!dev)
279           return NULL;
280
281         dev_net_set(dev, net);
282
283         if (strchr(name, '%')) {
284                 if (dev_alloc_name(dev, name) < 0)
285                         goto failed_free;
286         }
287
288         dev->init = ipgre_tunnel_init;
289         nt = netdev_priv(dev);
290         nt->parms = *parms;
291
292         if (register_netdevice(dev) < 0)
293                 goto failed_free;
294
295         dev_hold(dev);
296         ipgre_tunnel_link(ign, nt);
297         return nt;
298
299 failed_free:
300         free_netdev(dev);
301         return NULL;
302 }
303
304 static void ipgre_tunnel_uninit(struct net_device *dev)
305 {
306         struct net *net = dev_net(dev);
307         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
308
309         ipgre_tunnel_unlink(ign, netdev_priv(dev));
310         dev_put(dev);
311 }
312
313
314 static void ipgre_err(struct sk_buff *skb, u32 info)
315 {
316 #ifndef I_WISH_WORLD_WERE_PERFECT
317
318 /* It is not :-( All the routers (except for Linux) return only
319    8 bytes of packet payload. It means, that precise relaying of
320    ICMP in the real Internet is absolutely infeasible.
321
322    Moreover, Cisco "wise men" put GRE key to the third word
323    in GRE header. It makes impossible maintaining even soft state for keyed
324    GRE tunnels with enabled checksum. Tell them "thank you".
325
326    Well, I wonder, rfc1812 was written by Cisco employee,
327    what the hell these idiots break standrads established
328    by themself???
329  */
330
331         struct iphdr *iph = (struct iphdr*)skb->data;
332         __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
333         int grehlen = (iph->ihl<<2) + 4;
334         const int type = icmp_hdr(skb)->type;
335         const int code = icmp_hdr(skb)->code;
336         struct ip_tunnel *t;
337         __be16 flags;
338
339         flags = p[0];
340         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
341                 if (flags&(GRE_VERSION|GRE_ROUTING))
342                         return;
343                 if (flags&GRE_KEY) {
344                         grehlen += 4;
345                         if (flags&GRE_CSUM)
346                                 grehlen += 4;
347                 }
348         }
349
350         /* If only 8 bytes returned, keyed message will be dropped here */
351         if (skb_headlen(skb) < grehlen)
352                 return;
353
354         switch (type) {
355         default:
356         case ICMP_PARAMETERPROB:
357                 return;
358
359         case ICMP_DEST_UNREACH:
360                 switch (code) {
361                 case ICMP_SR_FAILED:
362                 case ICMP_PORT_UNREACH:
363                         /* Impossible event. */
364                         return;
365                 case ICMP_FRAG_NEEDED:
366                         /* Soft state for pmtu is maintained by IP core. */
367                         return;
368                 default:
369                         /* All others are translated to HOST_UNREACH.
370                            rfc2003 contains "deep thoughts" about NET_UNREACH,
371                            I believe they are just ether pollution. --ANK
372                          */
373                         break;
374                 }
375                 break;
376         case ICMP_TIME_EXCEEDED:
377                 if (code != ICMP_EXC_TTL)
378                         return;
379                 break;
380         }
381
382         read_lock(&ipgre_lock);
383         t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
384                         (flags&GRE_KEY) ?
385                         *(((__be32*)p) + (grehlen>>2) - 1) : 0);
386         if (t == NULL || t->parms.iph.daddr == 0 ||
387             ipv4_is_multicast(t->parms.iph.daddr))
388                 goto out;
389
390         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
391                 goto out;
392
393         if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
394                 t->err_count++;
395         else
396                 t->err_count = 1;
397         t->err_time = jiffies;
398 out:
399         read_unlock(&ipgre_lock);
400         return;
401 #else
402         struct iphdr *iph = (struct iphdr*)dp;
403         struct iphdr *eiph;
404         __be16       *p = (__be16*)(dp+(iph->ihl<<2));
405         const int type = icmp_hdr(skb)->type;
406         const int code = icmp_hdr(skb)->code;
407         int rel_type = 0;
408         int rel_code = 0;
409         __be32 rel_info = 0;
410         __u32 n = 0;
411         __be16 flags;
412         int grehlen = (iph->ihl<<2) + 4;
413         struct sk_buff *skb2;
414         struct flowi fl;
415         struct rtable *rt;
416
417         if (p[1] != htons(ETH_P_IP))
418                 return;
419
420         flags = p[0];
421         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
422                 if (flags&(GRE_VERSION|GRE_ROUTING))
423                         return;
424                 if (flags&GRE_CSUM)
425                         grehlen += 4;
426                 if (flags&GRE_KEY)
427                         grehlen += 4;
428                 if (flags&GRE_SEQ)
429                         grehlen += 4;
430         }
431         if (len < grehlen + sizeof(struct iphdr))
432                 return;
433         eiph = (struct iphdr*)(dp + grehlen);
434
435         switch (type) {
436         default:
437                 return;
438         case ICMP_PARAMETERPROB:
439                 n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
440                 if (n < (iph->ihl<<2))
441                         return;
442
443                 /* So... This guy found something strange INSIDE encapsulated
444                    packet. Well, he is fool, but what can we do ?
445                  */
446                 rel_type = ICMP_PARAMETERPROB;
447                 n -= grehlen;
448                 rel_info = htonl(n << 24);
449                 break;
450
451         case ICMP_DEST_UNREACH:
452                 switch (code) {
453                 case ICMP_SR_FAILED:
454                 case ICMP_PORT_UNREACH:
455                         /* Impossible event. */
456                         return;
457                 case ICMP_FRAG_NEEDED:
458                         /* And it is the only really necessary thing :-) */
459                         n = ntohs(icmp_hdr(skb)->un.frag.mtu);
460                         if (n < grehlen+68)
461                                 return;
462                         n -= grehlen;
463                         /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
464                         if (n > ntohs(eiph->tot_len))
465                                 return;
466                         rel_info = htonl(n);
467                         break;
468                 default:
469                         /* All others are translated to HOST_UNREACH.
470                            rfc2003 contains "deep thoughts" about NET_UNREACH,
471                            I believe, it is just ether pollution. --ANK
472                          */
473                         rel_type = ICMP_DEST_UNREACH;
474                         rel_code = ICMP_HOST_UNREACH;
475                         break;
476                 }
477                 break;
478         case ICMP_TIME_EXCEEDED:
479                 if (code != ICMP_EXC_TTL)
480                         return;
481                 break;
482         }
483
484         /* Prepare fake skb to feed it to icmp_send */
485         skb2 = skb_clone(skb, GFP_ATOMIC);
486         if (skb2 == NULL)
487                 return;
488         dst_release(skb2->dst);
489         skb2->dst = NULL;
490         skb_pull(skb2, skb->data - (u8*)eiph);
491         skb_reset_network_header(skb2);
492
493         /* Try to guess incoming interface */
494         memset(&fl, 0, sizeof(fl));
495         fl.fl4_dst = eiph->saddr;
496         fl.fl4_tos = RT_TOS(eiph->tos);
497         fl.proto = IPPROTO_GRE;
498         if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
499                 kfree_skb(skb2);
500                 return;
501         }
502         skb2->dev = rt->u.dst.dev;
503
504         /* route "incoming" packet */
505         if (rt->rt_flags&RTCF_LOCAL) {
506                 ip_rt_put(rt);
507                 rt = NULL;
508                 fl.fl4_dst = eiph->daddr;
509                 fl.fl4_src = eiph->saddr;
510                 fl.fl4_tos = eiph->tos;
511                 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
512                     rt->u.dst.dev->type != ARPHRD_IPGRE) {
513                         ip_rt_put(rt);
514                         kfree_skb(skb2);
515                         return;
516                 }
517         } else {
518                 ip_rt_put(rt);
519                 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
520                     skb2->dst->dev->type != ARPHRD_IPGRE) {
521                         kfree_skb(skb2);
522                         return;
523                 }
524         }
525
526         /* change mtu on this route */
527         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
528                 if (n > dst_mtu(skb2->dst)) {
529                         kfree_skb(skb2);
530                         return;
531                 }
532                 skb2->dst->ops->update_pmtu(skb2->dst, n);
533         } else if (type == ICMP_TIME_EXCEEDED) {
534                 struct ip_tunnel *t = netdev_priv(skb2->dev);
535                 if (t->parms.iph.ttl) {
536                         rel_type = ICMP_DEST_UNREACH;
537                         rel_code = ICMP_HOST_UNREACH;
538                 }
539         }
540
541         icmp_send(skb2, rel_type, rel_code, rel_info);
542         kfree_skb(skb2);
543 #endif
544 }
545
546 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
547 {
548         if (INET_ECN_is_ce(iph->tos)) {
549                 if (skb->protocol == htons(ETH_P_IP)) {
550                         IP_ECN_set_ce(ip_hdr(skb));
551                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
552                         IP6_ECN_set_ce(ipv6_hdr(skb));
553                 }
554         }
555 }
556
557 static inline u8
558 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
559 {
560         u8 inner = 0;
561         if (skb->protocol == htons(ETH_P_IP))
562                 inner = old_iph->tos;
563         else if (skb->protocol == htons(ETH_P_IPV6))
564                 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
565         return INET_ECN_encapsulate(tos, inner);
566 }
567
568 static int ipgre_rcv(struct sk_buff *skb)
569 {
570         struct iphdr *iph;
571         u8     *h;
572         __be16    flags;
573         __sum16   csum = 0;
574         __be32 key = 0;
575         u32    seqno = 0;
576         struct ip_tunnel *tunnel;
577         int    offset = 4;
578
579         if (!pskb_may_pull(skb, 16))
580                 goto drop_nolock;
581
582         iph = ip_hdr(skb);
583         h = skb->data;
584         flags = *(__be16*)h;
585
586         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
587                 /* - Version must be 0.
588                    - We do not support routing headers.
589                  */
590                 if (flags&(GRE_VERSION|GRE_ROUTING))
591                         goto drop_nolock;
592
593                 if (flags&GRE_CSUM) {
594                         switch (skb->ip_summed) {
595                         case CHECKSUM_COMPLETE:
596                                 csum = csum_fold(skb->csum);
597                                 if (!csum)
598                                         break;
599                                 /* fall through */
600                         case CHECKSUM_NONE:
601                                 skb->csum = 0;
602                                 csum = __skb_checksum_complete(skb);
603                                 skb->ip_summed = CHECKSUM_COMPLETE;
604                         }
605                         offset += 4;
606                 }
607                 if (flags&GRE_KEY) {
608                         key = *(__be32*)(h + offset);
609                         offset += 4;
610                 }
611                 if (flags&GRE_SEQ) {
612                         seqno = ntohl(*(__be32*)(h + offset));
613                         offset += 4;
614                 }
615         }
616
617         read_lock(&ipgre_lock);
618         if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
619                                         iph->saddr, iph->daddr, key)) != NULL) {
620                 secpath_reset(skb);
621
622                 skb->protocol = *(__be16*)(h + 2);
623                 /* WCCP version 1 and 2 protocol decoding.
624                  * - Change protocol to IP
625                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
626                  */
627                 if (flags == 0 &&
628                     skb->protocol == htons(ETH_P_WCCP)) {
629                         skb->protocol = htons(ETH_P_IP);
630                         if ((*(h + offset) & 0xF0) != 0x40)
631                                 offset += 4;
632                 }
633
634                 skb->mac_header = skb->network_header;
635                 __pskb_pull(skb, offset);
636                 skb_reset_network_header(skb);
637                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
638                 skb->pkt_type = PACKET_HOST;
639 #ifdef CONFIG_NET_IPGRE_BROADCAST
640                 if (ipv4_is_multicast(iph->daddr)) {
641                         /* Looped back packet, drop it! */
642                         if (skb->rtable->fl.iif == 0)
643                                 goto drop;
644                         tunnel->stat.multicast++;
645                         skb->pkt_type = PACKET_BROADCAST;
646                 }
647 #endif
648
649                 if (((flags&GRE_CSUM) && csum) ||
650                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
651                         tunnel->stat.rx_crc_errors++;
652                         tunnel->stat.rx_errors++;
653                         goto drop;
654                 }
655                 if (tunnel->parms.i_flags&GRE_SEQ) {
656                         if (!(flags&GRE_SEQ) ||
657                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
658                                 tunnel->stat.rx_fifo_errors++;
659                                 tunnel->stat.rx_errors++;
660                                 goto drop;
661                         }
662                         tunnel->i_seqno = seqno + 1;
663                 }
664                 tunnel->stat.rx_packets++;
665                 tunnel->stat.rx_bytes += skb->len;
666                 skb->dev = tunnel->dev;
667                 dst_release(skb->dst);
668                 skb->dst = NULL;
669                 nf_reset(skb);
670                 ipgre_ecn_decapsulate(iph, skb);
671                 netif_rx(skb);
672                 read_unlock(&ipgre_lock);
673                 return(0);
674         }
675         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
676
677 drop:
678         read_unlock(&ipgre_lock);
679 drop_nolock:
680         kfree_skb(skb);
681         return(0);
682 }
683
684 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
685 {
686         struct ip_tunnel *tunnel = netdev_priv(dev);
687         struct net_device_stats *stats = &tunnel->stat;
688         struct iphdr  *old_iph = ip_hdr(skb);
689         struct iphdr  *tiph;
690         u8     tos;
691         __be16 df;
692         struct rtable *rt;                      /* Route to the other host */
693         struct net_device *tdev;                        /* Device to other host */
694         struct iphdr  *iph;                     /* Our new IP header */
695         unsigned int max_headroom;              /* The extra header space needed */
696         int    gre_hlen;
697         __be32 dst;
698         int    mtu;
699
700         if (tunnel->recursion++) {
701                 tunnel->stat.collisions++;
702                 goto tx_error;
703         }
704
705         if (dev->header_ops) {
706                 gre_hlen = 0;
707                 tiph = (struct iphdr*)skb->data;
708         } else {
709                 gre_hlen = tunnel->hlen;
710                 tiph = &tunnel->parms.iph;
711         }
712
713         if ((dst = tiph->daddr) == 0) {
714                 /* NBMA tunnel */
715
716                 if (skb->dst == NULL) {
717                         tunnel->stat.tx_fifo_errors++;
718                         goto tx_error;
719                 }
720
721                 if (skb->protocol == htons(ETH_P_IP)) {
722                         rt = skb->rtable;
723                         if ((dst = rt->rt_gateway) == 0)
724                                 goto tx_error_icmp;
725                 }
726 #ifdef CONFIG_IPV6
727                 else if (skb->protocol == htons(ETH_P_IPV6)) {
728                         struct in6_addr *addr6;
729                         int addr_type;
730                         struct neighbour *neigh = skb->dst->neighbour;
731
732                         if (neigh == NULL)
733                                 goto tx_error;
734
735                         addr6 = (struct in6_addr*)&neigh->primary_key;
736                         addr_type = ipv6_addr_type(addr6);
737
738                         if (addr_type == IPV6_ADDR_ANY) {
739                                 addr6 = &ipv6_hdr(skb)->daddr;
740                                 addr_type = ipv6_addr_type(addr6);
741                         }
742
743                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
744                                 goto tx_error_icmp;
745
746                         dst = addr6->s6_addr32[3];
747                 }
748 #endif
749                 else
750                         goto tx_error;
751         }
752
753         tos = tiph->tos;
754         if (tos&1) {
755                 if (skb->protocol == htons(ETH_P_IP))
756                         tos = old_iph->tos;
757                 tos &= ~1;
758         }
759
760         {
761                 struct flowi fl = { .oif = tunnel->parms.link,
762                                     .nl_u = { .ip4_u =
763                                               { .daddr = dst,
764                                                 .saddr = tiph->saddr,
765                                                 .tos = RT_TOS(tos) } },
766                                     .proto = IPPROTO_GRE };
767                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
768                         tunnel->stat.tx_carrier_errors++;
769                         goto tx_error;
770                 }
771         }
772         tdev = rt->u.dst.dev;
773
774         if (tdev == dev) {
775                 ip_rt_put(rt);
776                 tunnel->stat.collisions++;
777                 goto tx_error;
778         }
779
780         df = tiph->frag_off;
781         if (df)
782                 mtu = dst_mtu(&rt->u.dst) - tunnel->hlen;
783         else
784                 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
785
786         if (skb->dst)
787                 skb->dst->ops->update_pmtu(skb->dst, mtu);
788
789         if (skb->protocol == htons(ETH_P_IP)) {
790                 df |= (old_iph->frag_off&htons(IP_DF));
791
792                 if ((old_iph->frag_off&htons(IP_DF)) &&
793                     mtu < ntohs(old_iph->tot_len)) {
794                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
795                         ip_rt_put(rt);
796                         goto tx_error;
797                 }
798         }
799 #ifdef CONFIG_IPV6
800         else if (skb->protocol == htons(ETH_P_IPV6)) {
801                 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
802
803                 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
804                         if ((tunnel->parms.iph.daddr &&
805                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
806                             rt6->rt6i_dst.plen == 128) {
807                                 rt6->rt6i_flags |= RTF_MODIFIED;
808                                 skb->dst->metrics[RTAX_MTU-1] = mtu;
809                         }
810                 }
811
812                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
813                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
814                         ip_rt_put(rt);
815                         goto tx_error;
816                 }
817         }
818 #endif
819
820         if (tunnel->err_count > 0) {
821                 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
822                         tunnel->err_count--;
823
824                         dst_link_failure(skb);
825                 } else
826                         tunnel->err_count = 0;
827         }
828
829         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
830
831         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
832             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
833                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
834                 if (!new_skb) {
835                         ip_rt_put(rt);
836                         stats->tx_dropped++;
837                         dev_kfree_skb(skb);
838                         tunnel->recursion--;
839                         return 0;
840                 }
841                 if (skb->sk)
842                         skb_set_owner_w(new_skb, skb->sk);
843                 dev_kfree_skb(skb);
844                 skb = new_skb;
845                 old_iph = ip_hdr(skb);
846         }
847
848         skb->transport_header = skb->network_header;
849         skb_push(skb, gre_hlen);
850         skb_reset_network_header(skb);
851         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
852         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
853                               IPSKB_REROUTED);
854         dst_release(skb->dst);
855         skb->dst = &rt->u.dst;
856
857         /*
858          *      Push down and install the IPIP header.
859          */
860
861         iph                     =       ip_hdr(skb);
862         iph->version            =       4;
863         iph->ihl                =       sizeof(struct iphdr) >> 2;
864         iph->frag_off           =       df;
865         iph->protocol           =       IPPROTO_GRE;
866         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
867         iph->daddr              =       rt->rt_dst;
868         iph->saddr              =       rt->rt_src;
869
870         if ((iph->ttl = tiph->ttl) == 0) {
871                 if (skb->protocol == htons(ETH_P_IP))
872                         iph->ttl = old_iph->ttl;
873 #ifdef CONFIG_IPV6
874                 else if (skb->protocol == htons(ETH_P_IPV6))
875                         iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
876 #endif
877                 else
878                         iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
879         }
880
881         ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
882         ((__be16*)(iph+1))[1] = skb->protocol;
883
884         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
885                 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
886
887                 if (tunnel->parms.o_flags&GRE_SEQ) {
888                         ++tunnel->o_seqno;
889                         *ptr = htonl(tunnel->o_seqno);
890                         ptr--;
891                 }
892                 if (tunnel->parms.o_flags&GRE_KEY) {
893                         *ptr = tunnel->parms.o_key;
894                         ptr--;
895                 }
896                 if (tunnel->parms.o_flags&GRE_CSUM) {
897                         *ptr = 0;
898                         *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
899                 }
900         }
901
902         nf_reset(skb);
903
904         IPTUNNEL_XMIT();
905         tunnel->recursion--;
906         return 0;
907
908 tx_error_icmp:
909         dst_link_failure(skb);
910
911 tx_error:
912         stats->tx_errors++;
913         dev_kfree_skb(skb);
914         tunnel->recursion--;
915         return 0;
916 }
917
918 static void ipgre_tunnel_bind_dev(struct net_device *dev)
919 {
920         struct net_device *tdev = NULL;
921         struct ip_tunnel *tunnel;
922         struct iphdr *iph;
923         int hlen = LL_MAX_HEADER;
924         int mtu = ETH_DATA_LEN;
925         int addend = sizeof(struct iphdr) + 4;
926
927         tunnel = netdev_priv(dev);
928         iph = &tunnel->parms.iph;
929
930         /* Guess output device to choose reasonable mtu and hard_header_len */
931
932         if (iph->daddr) {
933                 struct flowi fl = { .oif = tunnel->parms.link,
934                                     .nl_u = { .ip4_u =
935                                               { .daddr = iph->daddr,
936                                                 .saddr = iph->saddr,
937                                                 .tos = RT_TOS(iph->tos) } },
938                                     .proto = IPPROTO_GRE };
939                 struct rtable *rt;
940                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
941                         tdev = rt->u.dst.dev;
942                         ip_rt_put(rt);
943                 }
944                 dev->flags |= IFF_POINTOPOINT;
945         }
946
947         if (!tdev && tunnel->parms.link)
948                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
949
950         if (tdev) {
951                 hlen = tdev->hard_header_len;
952                 mtu = tdev->mtu;
953         }
954         dev->iflink = tunnel->parms.link;
955
956         /* Precalculate GRE options length */
957         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
958                 if (tunnel->parms.o_flags&GRE_CSUM)
959                         addend += 4;
960                 if (tunnel->parms.o_flags&GRE_KEY)
961                         addend += 4;
962                 if (tunnel->parms.o_flags&GRE_SEQ)
963                         addend += 4;
964         }
965         dev->hard_header_len = hlen + addend;
966         dev->mtu = mtu - addend;
967         tunnel->hlen = addend;
968
969 }
970
971 static int
972 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
973 {
974         int err = 0;
975         struct ip_tunnel_parm p;
976         struct ip_tunnel *t;
977         struct net *net = dev_net(dev);
978         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
979
980         switch (cmd) {
981         case SIOCGETTUNNEL:
982                 t = NULL;
983                 if (dev == ign->fb_tunnel_dev) {
984                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
985                                 err = -EFAULT;
986                                 break;
987                         }
988                         t = ipgre_tunnel_locate(net, &p, 0);
989                 }
990                 if (t == NULL)
991                         t = netdev_priv(dev);
992                 memcpy(&p, &t->parms, sizeof(p));
993                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
994                         err = -EFAULT;
995                 break;
996
997         case SIOCADDTUNNEL:
998         case SIOCCHGTUNNEL:
999                 err = -EPERM;
1000                 if (!capable(CAP_NET_ADMIN))
1001                         goto done;
1002
1003                 err = -EFAULT;
1004                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1005                         goto done;
1006
1007                 err = -EINVAL;
1008                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1009                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1010                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1011                         goto done;
1012                 if (p.iph.ttl)
1013                         p.iph.frag_off |= htons(IP_DF);
1014
1015                 if (!(p.i_flags&GRE_KEY))
1016                         p.i_key = 0;
1017                 if (!(p.o_flags&GRE_KEY))
1018                         p.o_key = 0;
1019
1020                 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1021
1022                 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1023                         if (t != NULL) {
1024                                 if (t->dev != dev) {
1025                                         err = -EEXIST;
1026                                         break;
1027                                 }
1028                         } else {
1029                                 unsigned nflags=0;
1030
1031                                 t = netdev_priv(dev);
1032
1033                                 if (ipv4_is_multicast(p.iph.daddr))
1034                                         nflags = IFF_BROADCAST;
1035                                 else if (p.iph.daddr)
1036                                         nflags = IFF_POINTOPOINT;
1037
1038                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1039                                         err = -EINVAL;
1040                                         break;
1041                                 }
1042                                 ipgre_tunnel_unlink(ign, t);
1043                                 t->parms.iph.saddr = p.iph.saddr;
1044                                 t->parms.iph.daddr = p.iph.daddr;
1045                                 t->parms.i_key = p.i_key;
1046                                 t->parms.o_key = p.o_key;
1047                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1048                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1049                                 ipgre_tunnel_link(ign, t);
1050                                 netdev_state_change(dev);
1051                         }
1052                 }
1053
1054                 if (t) {
1055                         err = 0;
1056                         if (cmd == SIOCCHGTUNNEL) {
1057                                 t->parms.iph.ttl = p.iph.ttl;
1058                                 t->parms.iph.tos = p.iph.tos;
1059                                 t->parms.iph.frag_off = p.iph.frag_off;
1060                                 if (t->parms.link != p.link) {
1061                                         t->parms.link = p.link;
1062                                         ipgre_tunnel_bind_dev(dev);
1063                                         netdev_state_change(dev);
1064                                 }
1065                         }
1066                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1067                                 err = -EFAULT;
1068                 } else
1069                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1070                 break;
1071
1072         case SIOCDELTUNNEL:
1073                 err = -EPERM;
1074                 if (!capable(CAP_NET_ADMIN))
1075                         goto done;
1076
1077                 if (dev == ign->fb_tunnel_dev) {
1078                         err = -EFAULT;
1079                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1080                                 goto done;
1081                         err = -ENOENT;
1082                         if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1083                                 goto done;
1084                         err = -EPERM;
1085                         if (t == netdev_priv(ign->fb_tunnel_dev))
1086                                 goto done;
1087                         dev = t->dev;
1088                 }
1089                 unregister_netdevice(dev);
1090                 err = 0;
1091                 break;
1092
1093         default:
1094                 err = -EINVAL;
1095         }
1096
1097 done:
1098         return err;
1099 }
1100
1101 static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
1102 {
1103         return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
1104 }
1105
1106 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1107 {
1108         struct ip_tunnel *tunnel = netdev_priv(dev);
1109         if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
1110                 return -EINVAL;
1111         dev->mtu = new_mtu;
1112         return 0;
1113 }
1114
1115 /* Nice toy. Unfortunately, useless in real life :-)
1116    It allows to construct virtual multiprotocol broadcast "LAN"
1117    over the Internet, provided multicast routing is tuned.
1118
1119
1120    I have no idea was this bicycle invented before me,
1121    so that I had to set ARPHRD_IPGRE to a random value.
1122    I have an impression, that Cisco could make something similar,
1123    but this feature is apparently missing in IOS<=11.2(8).
1124
1125    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1126    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1127
1128    ping -t 255 224.66.66.66
1129
1130    If nobody answers, mbone does not work.
1131
1132    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1133    ip addr add 10.66.66.<somewhat>/24 dev Universe
1134    ifconfig Universe up
1135    ifconfig Universe add fe80::<Your_real_addr>/10
1136    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1137    ftp 10.66.66.66
1138    ...
1139    ftp fec0:6666:6666::193.233.7.65
1140    ...
1141
1142  */
1143
1144 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1145                         unsigned short type,
1146                         const void *daddr, const void *saddr, unsigned len)
1147 {
1148         struct ip_tunnel *t = netdev_priv(dev);
1149         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1150         __be16 *p = (__be16*)(iph+1);
1151
1152         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1153         p[0]            = t->parms.o_flags;
1154         p[1]            = htons(type);
1155
1156         /*
1157          *      Set the source hardware address.
1158          */
1159
1160         if (saddr)
1161                 memcpy(&iph->saddr, saddr, 4);
1162
1163         if (daddr) {
1164                 memcpy(&iph->daddr, daddr, 4);
1165                 return t->hlen;
1166         }
1167         if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1168                 return t->hlen;
1169
1170         return -t->hlen;
1171 }
1172
1173 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1174 {
1175         struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1176         memcpy(haddr, &iph->saddr, 4);
1177         return 4;
1178 }
1179
1180 static const struct header_ops ipgre_header_ops = {
1181         .create = ipgre_header,
1182         .parse  = ipgre_header_parse,
1183 };
1184
1185 #ifdef CONFIG_NET_IPGRE_BROADCAST
1186 static int ipgre_open(struct net_device *dev)
1187 {
1188         struct ip_tunnel *t = netdev_priv(dev);
1189
1190         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1191                 struct flowi fl = { .oif = t->parms.link,
1192                                     .nl_u = { .ip4_u =
1193                                               { .daddr = t->parms.iph.daddr,
1194                                                 .saddr = t->parms.iph.saddr,
1195                                                 .tos = RT_TOS(t->parms.iph.tos) } },
1196                                     .proto = IPPROTO_GRE };
1197                 struct rtable *rt;
1198                 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1199                         return -EADDRNOTAVAIL;
1200                 dev = rt->u.dst.dev;
1201                 ip_rt_put(rt);
1202                 if (__in_dev_get_rtnl(dev) == NULL)
1203                         return -EADDRNOTAVAIL;
1204                 t->mlink = dev->ifindex;
1205                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1206         }
1207         return 0;
1208 }
1209
1210 static int ipgre_close(struct net_device *dev)
1211 {
1212         struct ip_tunnel *t = netdev_priv(dev);
1213         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1214                 struct in_device *in_dev;
1215                 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1216                 if (in_dev) {
1217                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1218                         in_dev_put(in_dev);
1219                 }
1220         }
1221         return 0;
1222 }
1223
1224 #endif
1225
1226 static void ipgre_tunnel_setup(struct net_device *dev)
1227 {
1228         dev->uninit             = ipgre_tunnel_uninit;
1229         dev->destructor         = free_netdev;
1230         dev->hard_start_xmit    = ipgre_tunnel_xmit;
1231         dev->get_stats          = ipgre_tunnel_get_stats;
1232         dev->do_ioctl           = ipgre_tunnel_ioctl;
1233         dev->change_mtu         = ipgre_tunnel_change_mtu;
1234
1235         dev->type               = ARPHRD_IPGRE;
1236         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1237         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1238         dev->flags              = IFF_NOARP;
1239         dev->iflink             = 0;
1240         dev->addr_len           = 4;
1241         dev->features           |= NETIF_F_NETNS_LOCAL;
1242 }
1243
1244 static int ipgre_tunnel_init(struct net_device *dev)
1245 {
1246         struct ip_tunnel *tunnel;
1247         struct iphdr *iph;
1248
1249         tunnel = netdev_priv(dev);
1250         iph = &tunnel->parms.iph;
1251
1252         tunnel->dev = dev;
1253         strcpy(tunnel->parms.name, dev->name);
1254
1255         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1256         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1257
1258         ipgre_tunnel_bind_dev(dev);
1259
1260         if (iph->daddr) {
1261 #ifdef CONFIG_NET_IPGRE_BROADCAST
1262                 if (ipv4_is_multicast(iph->daddr)) {
1263                         if (!iph->saddr)
1264                                 return -EINVAL;
1265                         dev->flags = IFF_BROADCAST;
1266                         dev->header_ops = &ipgre_header_ops;
1267                         dev->open = ipgre_open;
1268                         dev->stop = ipgre_close;
1269                 }
1270 #endif
1271         } else
1272                 dev->header_ops = &ipgre_header_ops;
1273
1274         return 0;
1275 }
1276
1277 static int ipgre_fb_tunnel_init(struct net_device *dev)
1278 {
1279         struct ip_tunnel *tunnel = netdev_priv(dev);
1280         struct iphdr *iph = &tunnel->parms.iph;
1281         struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1282
1283         tunnel->dev = dev;
1284         strcpy(tunnel->parms.name, dev->name);
1285
1286         iph->version            = 4;
1287         iph->protocol           = IPPROTO_GRE;
1288         iph->ihl                = 5;
1289         tunnel->hlen            = sizeof(struct iphdr) + 4;
1290
1291         dev_hold(dev);
1292         ign->tunnels_wc[0]      = tunnel;
1293         return 0;
1294 }
1295
1296
1297 static struct net_protocol ipgre_protocol = {
1298         .handler        =       ipgre_rcv,
1299         .err_handler    =       ipgre_err,
1300         .netns_ok       =       1,
1301 };
1302
1303 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1304 {
1305         int prio;
1306
1307         for (prio = 0; prio < 4; prio++) {
1308                 int h;
1309                 for (h = 0; h < HASH_SIZE; h++) {
1310                         struct ip_tunnel *t;
1311                         while ((t = ign->tunnels[prio][h]) != NULL)
1312                                 unregister_netdevice(t->dev);
1313                 }
1314         }
1315 }
1316
1317 static int ipgre_init_net(struct net *net)
1318 {
1319         int err;
1320         struct ipgre_net *ign;
1321
1322         err = -ENOMEM;
1323         ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1324         if (ign == NULL)
1325                 goto err_alloc;
1326
1327         err = net_assign_generic(net, ipgre_net_id, ign);
1328         if (err < 0)
1329                 goto err_assign;
1330
1331         ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1332                                            ipgre_tunnel_setup);
1333         if (!ign->fb_tunnel_dev) {
1334                 err = -ENOMEM;
1335                 goto err_alloc_dev;
1336         }
1337
1338         ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1339         dev_net_set(ign->fb_tunnel_dev, net);
1340
1341         if ((err = register_netdev(ign->fb_tunnel_dev)))
1342                 goto err_reg_dev;
1343
1344         return 0;
1345
1346 err_reg_dev:
1347         free_netdev(ign->fb_tunnel_dev);
1348 err_alloc_dev:
1349         /* nothing */
1350 err_assign:
1351         kfree(ign);
1352 err_alloc:
1353         return err;
1354 }
1355
1356 static void ipgre_exit_net(struct net *net)
1357 {
1358         struct ipgre_net *ign;
1359
1360         ign = net_generic(net, ipgre_net_id);
1361         rtnl_lock();
1362         ipgre_destroy_tunnels(ign);
1363         rtnl_unlock();
1364         kfree(ign);
1365 }
1366
1367 static struct pernet_operations ipgre_net_ops = {
1368         .init = ipgre_init_net,
1369         .exit = ipgre_exit_net,
1370 };
1371
1372 /*
1373  *      And now the modules code and kernel interface.
1374  */
1375
1376 static int __init ipgre_init(void)
1377 {
1378         int err;
1379
1380         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1381
1382         if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1383                 printk(KERN_INFO "ipgre init: can't add protocol\n");
1384                 return -EAGAIN;
1385         }
1386
1387         err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1388         if (err < 0)
1389                 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1390
1391         return err;
1392 }
1393
1394 static void __exit ipgre_fini(void)
1395 {
1396         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1397                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1398
1399         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1400 }
1401
1402 module_init(ipgre_init);
1403 module_exit(ipgre_fini);
1404 MODULE_LICENSE("GPL");