Merge branch 'linus' into tracing/hw-branch-tracing
[linux-2.6] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <net/net_namespace.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/route.h>
55 #include <net/sock.h>
56 #include <net/icmp.h>
57 #include <net/udp.h>
58 #include <net/raw.h>
59 #include <linux/notifier.h>
60 #include <linux/if_arp.h>
61 #include <linux/netfilter_ipv4.h>
62 #include <net/ipip.h>
63 #include <net/checksum.h>
64 #include <net/netlink.h>
65
66 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
67 #define CONFIG_IP_PIMSM 1
68 #endif
69
70 /* Big lock, protecting vif table, mrt cache and mroute socket state.
71    Note that the changes are semaphored via rtnl_lock.
72  */
73
74 static DEFINE_RWLOCK(mrt_lock);
75
76 /*
77  *      Multicast router control variables
78  */
79
80 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
81
82 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
83
84 /* Special spinlock for queue of unresolved entries */
85 static DEFINE_SPINLOCK(mfc_unres_lock);
86
87 /* We return to original Alan's scheme. Hash table of resolved
88    entries is changed only in process context and protected
89    with weak lock mrt_lock. Queue of unresolved entries is protected
90    with strong spinlock mfc_unres_lock.
91
92    In this case data path is free of exclusive locks at all.
93  */
94
95 static struct kmem_cache *mrt_cachep __read_mostly;
96
97 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
98 static int ipmr_cache_report(struct net *net,
99                              struct sk_buff *pkt, vifi_t vifi, int assert);
100 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
101
102 #ifdef CONFIG_IP_PIMSM_V2
103 static struct net_protocol pim_protocol;
104 #endif
105
106 static struct timer_list ipmr_expire_timer;
107
108 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
109
110 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
111 {
112         struct net *net = dev_net(dev);
113
114         dev_close(dev);
115
116         dev = __dev_get_by_name(net, "tunl0");
117         if (dev) {
118                 const struct net_device_ops *ops = dev->netdev_ops;
119                 struct ifreq ifr;
120                 struct ip_tunnel_parm p;
121
122                 memset(&p, 0, sizeof(p));
123                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
124                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
125                 p.iph.version = 4;
126                 p.iph.ihl = 5;
127                 p.iph.protocol = IPPROTO_IPIP;
128                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
129                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
130
131                 if (ops->ndo_do_ioctl) {
132                         mm_segment_t oldfs = get_fs();
133
134                         set_fs(KERNEL_DS);
135                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
136                         set_fs(oldfs);
137                 }
138         }
139 }
140
141 static
142 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
143 {
144         struct net_device  *dev;
145
146         dev = __dev_get_by_name(net, "tunl0");
147
148         if (dev) {
149                 const struct net_device_ops *ops = dev->netdev_ops;
150                 int err;
151                 struct ifreq ifr;
152                 struct ip_tunnel_parm p;
153                 struct in_device  *in_dev;
154
155                 memset(&p, 0, sizeof(p));
156                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
157                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
158                 p.iph.version = 4;
159                 p.iph.ihl = 5;
160                 p.iph.protocol = IPPROTO_IPIP;
161                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
162                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
163
164                 if (ops->ndo_do_ioctl) {
165                         mm_segment_t oldfs = get_fs();
166
167                         set_fs(KERNEL_DS);
168                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
169                         set_fs(oldfs);
170                 } else
171                         err = -EOPNOTSUPP;
172
173                 dev = NULL;
174
175                 if (err == 0 &&
176                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
177                         dev->flags |= IFF_MULTICAST;
178
179                         in_dev = __in_dev_get_rtnl(dev);
180                         if (in_dev == NULL)
181                                 goto failure;
182
183                         ipv4_devconf_setall(in_dev);
184                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
185
186                         if (dev_open(dev))
187                                 goto failure;
188                         dev_hold(dev);
189                 }
190         }
191         return dev;
192
193 failure:
194         /* allow the register to be completed before unregistering. */
195         rtnl_unlock();
196         rtnl_lock();
197
198         unregister_netdevice(dev);
199         return NULL;
200 }
201
202 #ifdef CONFIG_IP_PIMSM
203
204 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205 {
206         struct net *net = dev_net(dev);
207
208         read_lock(&mrt_lock);
209         dev->stats.tx_bytes += skb->len;
210         dev->stats.tx_packets++;
211         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
212                           IGMPMSG_WHOLEPKT);
213         read_unlock(&mrt_lock);
214         kfree_skb(skb);
215         return 0;
216 }
217
218 static const struct net_device_ops reg_vif_netdev_ops = {
219         .ndo_start_xmit = reg_vif_xmit,
220 };
221
222 static void reg_vif_setup(struct net_device *dev)
223 {
224         dev->type               = ARPHRD_PIMREG;
225         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
226         dev->flags              = IFF_NOARP;
227         dev->netdev_ops         = &reg_vif_netdev_ops,
228         dev->destructor         = free_netdev;
229 }
230
231 static struct net_device *ipmr_reg_vif(void)
232 {
233         struct net_device *dev;
234         struct in_device *in_dev;
235
236         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
237
238         if (dev == NULL)
239                 return NULL;
240
241         if (register_netdevice(dev)) {
242                 free_netdev(dev);
243                 return NULL;
244         }
245         dev->iflink = 0;
246
247         rcu_read_lock();
248         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249                 rcu_read_unlock();
250                 goto failure;
251         }
252
253         ipv4_devconf_setall(in_dev);
254         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255         rcu_read_unlock();
256
257         if (dev_open(dev))
258                 goto failure;
259
260         dev_hold(dev);
261
262         return dev;
263
264 failure:
265         /* allow the register to be completed before unregistering. */
266         rtnl_unlock();
267         rtnl_lock();
268
269         unregister_netdevice(dev);
270         return NULL;
271 }
272 #endif
273
274 /*
275  *      Delete a VIF entry
276  *      @notify: Set to 1, if the caller is a notifier_call
277  */
278
279 static int vif_delete(struct net *net, int vifi, int notify)
280 {
281         struct vif_device *v;
282         struct net_device *dev;
283         struct in_device *in_dev;
284
285         if (vifi < 0 || vifi >= net->ipv4.maxvif)
286                 return -EADDRNOTAVAIL;
287
288         v = &net->ipv4.vif_table[vifi];
289
290         write_lock_bh(&mrt_lock);
291         dev = v->dev;
292         v->dev = NULL;
293
294         if (!dev) {
295                 write_unlock_bh(&mrt_lock);
296                 return -EADDRNOTAVAIL;
297         }
298
299 #ifdef CONFIG_IP_PIMSM
300         if (vifi == net->ipv4.mroute_reg_vif_num)
301                 net->ipv4.mroute_reg_vif_num = -1;
302 #endif
303
304         if (vifi+1 == net->ipv4.maxvif) {
305                 int tmp;
306                 for (tmp=vifi-1; tmp>=0; tmp--) {
307                         if (VIF_EXISTS(net, tmp))
308                                 break;
309                 }
310                 net->ipv4.maxvif = tmp+1;
311         }
312
313         write_unlock_bh(&mrt_lock);
314
315         dev_set_allmulti(dev, -1);
316
317         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
318                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
319                 ip_rt_multicast_event(in_dev);
320         }
321
322         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
323                 unregister_netdevice(dev);
324
325         dev_put(dev);
326         return 0;
327 }
328
329 static inline void ipmr_cache_free(struct mfc_cache *c)
330 {
331         release_net(mfc_net(c));
332         kmem_cache_free(mrt_cachep, c);
333 }
334
335 /* Destroy an unresolved cache entry, killing queued skbs
336    and reporting error to netlink readers.
337  */
338
339 static void ipmr_destroy_unres(struct mfc_cache *c)
340 {
341         struct sk_buff *skb;
342         struct nlmsgerr *e;
343         struct net *net = mfc_net(c);
344
345         atomic_dec(&net->ipv4.cache_resolve_queue_len);
346
347         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348                 if (ip_hdr(skb)->version == 0) {
349                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
350                         nlh->nlmsg_type = NLMSG_ERROR;
351                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
352                         skb_trim(skb, nlh->nlmsg_len);
353                         e = NLMSG_DATA(nlh);
354                         e->error = -ETIMEDOUT;
355                         memset(&e->msg, 0, sizeof(e->msg));
356
357                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358                 } else
359                         kfree_skb(skb);
360         }
361
362         ipmr_cache_free(c);
363 }
364
365
366 /* Single timer process for all the unresolved queue. */
367
368 static void ipmr_expire_process(unsigned long dummy)
369 {
370         unsigned long now;
371         unsigned long expires;
372         struct mfc_cache *c, **cp;
373
374         if (!spin_trylock(&mfc_unres_lock)) {
375                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
376                 return;
377         }
378
379         if (mfc_unres_queue == NULL)
380                 goto out;
381
382         now = jiffies;
383         expires = 10*HZ;
384         cp = &mfc_unres_queue;
385
386         while ((c=*cp) != NULL) {
387                 if (time_after(c->mfc_un.unres.expires, now)) {
388                         unsigned long interval = c->mfc_un.unres.expires - now;
389                         if (interval < expires)
390                                 expires = interval;
391                         cp = &c->next;
392                         continue;
393                 }
394
395                 *cp = c->next;
396
397                 ipmr_destroy_unres(c);
398         }
399
400         if (mfc_unres_queue != NULL)
401                 mod_timer(&ipmr_expire_timer, jiffies + expires);
402
403 out:
404         spin_unlock(&mfc_unres_lock);
405 }
406
407 /* Fill oifs list. It is called under write locked mrt_lock. */
408
409 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410 {
411         int vifi;
412         struct net *net = mfc_net(cache);
413
414         cache->mfc_un.res.minvif = MAXVIFS;
415         cache->mfc_un.res.maxvif = 0;
416         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
417
418         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
419                 if (VIF_EXISTS(net, vifi) &&
420                     ttls[vifi] && ttls[vifi] < 255) {
421                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
422                         if (cache->mfc_un.res.minvif > vifi)
423                                 cache->mfc_un.res.minvif = vifi;
424                         if (cache->mfc_un.res.maxvif <= vifi)
425                                 cache->mfc_un.res.maxvif = vifi + 1;
426                 }
427         }
428 }
429
430 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
431 {
432         int vifi = vifc->vifc_vifi;
433         struct vif_device *v = &net->ipv4.vif_table[vifi];
434         struct net_device *dev;
435         struct in_device *in_dev;
436         int err;
437
438         /* Is vif busy ? */
439         if (VIF_EXISTS(net, vifi))
440                 return -EADDRINUSE;
441
442         switch (vifc->vifc_flags) {
443 #ifdef CONFIG_IP_PIMSM
444         case VIFF_REGISTER:
445                 /*
446                  * Special Purpose VIF in PIM
447                  * All the packets will be sent to the daemon
448                  */
449                 if (net->ipv4.mroute_reg_vif_num >= 0)
450                         return -EADDRINUSE;
451                 dev = ipmr_reg_vif();
452                 if (!dev)
453                         return -ENOBUFS;
454                 err = dev_set_allmulti(dev, 1);
455                 if (err) {
456                         unregister_netdevice(dev);
457                         dev_put(dev);
458                         return err;
459                 }
460                 break;
461 #endif
462         case VIFF_TUNNEL:
463                 dev = ipmr_new_tunnel(net, vifc);
464                 if (!dev)
465                         return -ENOBUFS;
466                 err = dev_set_allmulti(dev, 1);
467                 if (err) {
468                         ipmr_del_tunnel(dev, vifc);
469                         dev_put(dev);
470                         return err;
471                 }
472                 break;
473         case 0:
474                 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
475                 if (!dev)
476                         return -EADDRNOTAVAIL;
477                 err = dev_set_allmulti(dev, 1);
478                 if (err) {
479                         dev_put(dev);
480                         return err;
481                 }
482                 break;
483         default:
484                 return -EINVAL;
485         }
486
487         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
488                 return -EADDRNOTAVAIL;
489         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
490         ip_rt_multicast_event(in_dev);
491
492         /*
493          *      Fill in the VIF structures
494          */
495         v->rate_limit = vifc->vifc_rate_limit;
496         v->local = vifc->vifc_lcl_addr.s_addr;
497         v->remote = vifc->vifc_rmt_addr.s_addr;
498         v->flags = vifc->vifc_flags;
499         if (!mrtsock)
500                 v->flags |= VIFF_STATIC;
501         v->threshold = vifc->vifc_threshold;
502         v->bytes_in = 0;
503         v->bytes_out = 0;
504         v->pkt_in = 0;
505         v->pkt_out = 0;
506         v->link = dev->ifindex;
507         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
508                 v->link = dev->iflink;
509
510         /* And finish update writing critical data */
511         write_lock_bh(&mrt_lock);
512         v->dev = dev;
513 #ifdef CONFIG_IP_PIMSM
514         if (v->flags&VIFF_REGISTER)
515                 net->ipv4.mroute_reg_vif_num = vifi;
516 #endif
517         if (vifi+1 > net->ipv4.maxvif)
518                 net->ipv4.maxvif = vifi+1;
519         write_unlock_bh(&mrt_lock);
520         return 0;
521 }
522
523 static struct mfc_cache *ipmr_cache_find(struct net *net,
524                                          __be32 origin,
525                                          __be32 mcastgrp)
526 {
527         int line = MFC_HASH(mcastgrp, origin);
528         struct mfc_cache *c;
529
530         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
531                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
532                         break;
533         }
534         return c;
535 }
536
537 /*
538  *      Allocate a multicast cache entry
539  */
540 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
541 {
542         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
543         if (c == NULL)
544                 return NULL;
545         c->mfc_un.res.minvif = MAXVIFS;
546         mfc_net_set(c, net);
547         return c;
548 }
549
550 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
551 {
552         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
553         if (c == NULL)
554                 return NULL;
555         skb_queue_head_init(&c->mfc_un.unres.unresolved);
556         c->mfc_un.unres.expires = jiffies + 10*HZ;
557         mfc_net_set(c, net);
558         return c;
559 }
560
561 /*
562  *      A cache entry has gone into a resolved state from queued
563  */
564
565 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
566 {
567         struct sk_buff *skb;
568         struct nlmsgerr *e;
569
570         /*
571          *      Play the pending entries through our router
572          */
573
574         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
575                 if (ip_hdr(skb)->version == 0) {
576                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
577
578                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
579                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
580                                                   (u8 *)nlh);
581                         } else {
582                                 nlh->nlmsg_type = NLMSG_ERROR;
583                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
584                                 skb_trim(skb, nlh->nlmsg_len);
585                                 e = NLMSG_DATA(nlh);
586                                 e->error = -EMSGSIZE;
587                                 memset(&e->msg, 0, sizeof(e->msg));
588                         }
589
590                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
591                 } else
592                         ip_mr_forward(skb, c, 0);
593         }
594 }
595
596 /*
597  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
598  *      expects the following bizarre scheme.
599  *
600  *      Called under mrt_lock.
601  */
602
603 static int ipmr_cache_report(struct net *net,
604                              struct sk_buff *pkt, vifi_t vifi, int assert)
605 {
606         struct sk_buff *skb;
607         const int ihl = ip_hdrlen(pkt);
608         struct igmphdr *igmp;
609         struct igmpmsg *msg;
610         int ret;
611
612 #ifdef CONFIG_IP_PIMSM
613         if (assert == IGMPMSG_WHOLEPKT)
614                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
615         else
616 #endif
617                 skb = alloc_skb(128, GFP_ATOMIC);
618
619         if (!skb)
620                 return -ENOBUFS;
621
622 #ifdef CONFIG_IP_PIMSM
623         if (assert == IGMPMSG_WHOLEPKT) {
624                 /* Ugly, but we have no choice with this interface.
625                    Duplicate old header, fix ihl, length etc.
626                    And all this only to mangle msg->im_msgtype and
627                    to set msg->im_mbz to "mbz" :-)
628                  */
629                 skb_push(skb, sizeof(struct iphdr));
630                 skb_reset_network_header(skb);
631                 skb_reset_transport_header(skb);
632                 msg = (struct igmpmsg *)skb_network_header(skb);
633                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
634                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
635                 msg->im_mbz = 0;
636                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
637                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
638                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
639                                              sizeof(struct iphdr));
640         } else
641 #endif
642         {
643
644         /*
645          *      Copy the IP header
646          */
647
648         skb->network_header = skb->tail;
649         skb_put(skb, ihl);
650         skb_copy_to_linear_data(skb, pkt->data, ihl);
651         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
652         msg = (struct igmpmsg *)skb_network_header(skb);
653         msg->im_vif = vifi;
654         skb->dst = dst_clone(pkt->dst);
655
656         /*
657          *      Add our header
658          */
659
660         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
661         igmp->type      =
662         msg->im_msgtype = assert;
663         igmp->code      =       0;
664         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
665         skb->transport_header = skb->network_header;
666         }
667
668         if (net->ipv4.mroute_sk == NULL) {
669                 kfree_skb(skb);
670                 return -EINVAL;
671         }
672
673         /*
674          *      Deliver to mrouted
675          */
676         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
677         if (ret < 0) {
678                 if (net_ratelimit())
679                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
680                 kfree_skb(skb);
681         }
682
683         return ret;
684 }
685
686 /*
687  *      Queue a packet for resolution. It gets locked cache entry!
688  */
689
690 static int
691 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
692 {
693         int err;
694         struct mfc_cache *c;
695         const struct iphdr *iph = ip_hdr(skb);
696
697         spin_lock_bh(&mfc_unres_lock);
698         for (c=mfc_unres_queue; c; c=c->next) {
699                 if (net_eq(mfc_net(c), net) &&
700                     c->mfc_mcastgrp == iph->daddr &&
701                     c->mfc_origin == iph->saddr)
702                         break;
703         }
704
705         if (c == NULL) {
706                 /*
707                  *      Create a new entry if allowable
708                  */
709
710                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
711                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
712                         spin_unlock_bh(&mfc_unres_lock);
713
714                         kfree_skb(skb);
715                         return -ENOBUFS;
716                 }
717
718                 /*
719                  *      Fill in the new cache entry
720                  */
721                 c->mfc_parent   = -1;
722                 c->mfc_origin   = iph->saddr;
723                 c->mfc_mcastgrp = iph->daddr;
724
725                 /*
726                  *      Reflect first query at mrouted.
727                  */
728                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
729                 if (err < 0) {
730                         /* If the report failed throw the cache entry
731                            out - Brad Parker
732                          */
733                         spin_unlock_bh(&mfc_unres_lock);
734
735                         ipmr_cache_free(c);
736                         kfree_skb(skb);
737                         return err;
738                 }
739
740                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
741                 c->next = mfc_unres_queue;
742                 mfc_unres_queue = c;
743
744                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
745         }
746
747         /*
748          *      See if we can append the packet
749          */
750         if (c->mfc_un.unres.unresolved.qlen>3) {
751                 kfree_skb(skb);
752                 err = -ENOBUFS;
753         } else {
754                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
755                 err = 0;
756         }
757
758         spin_unlock_bh(&mfc_unres_lock);
759         return err;
760 }
761
762 /*
763  *      MFC cache manipulation by user space mroute daemon
764  */
765
766 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
767 {
768         int line;
769         struct mfc_cache *c, **cp;
770
771         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
772
773         for (cp = &net->ipv4.mfc_cache_array[line];
774              (c = *cp) != NULL; cp = &c->next) {
775                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
776                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
777                         write_lock_bh(&mrt_lock);
778                         *cp = c->next;
779                         write_unlock_bh(&mrt_lock);
780
781                         ipmr_cache_free(c);
782                         return 0;
783                 }
784         }
785         return -ENOENT;
786 }
787
788 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
789 {
790         int line;
791         struct mfc_cache *uc, *c, **cp;
792
793         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
794
795         for (cp = &net->ipv4.mfc_cache_array[line];
796              (c = *cp) != NULL; cp = &c->next) {
797                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
798                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
799                         break;
800         }
801
802         if (c != NULL) {
803                 write_lock_bh(&mrt_lock);
804                 c->mfc_parent = mfc->mfcc_parent;
805                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
806                 if (!mrtsock)
807                         c->mfc_flags |= MFC_STATIC;
808                 write_unlock_bh(&mrt_lock);
809                 return 0;
810         }
811
812         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
813                 return -EINVAL;
814
815         c = ipmr_cache_alloc(net);
816         if (c == NULL)
817                 return -ENOMEM;
818
819         c->mfc_origin = mfc->mfcc_origin.s_addr;
820         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
821         c->mfc_parent = mfc->mfcc_parent;
822         ipmr_update_thresholds(c, mfc->mfcc_ttls);
823         if (!mrtsock)
824                 c->mfc_flags |= MFC_STATIC;
825
826         write_lock_bh(&mrt_lock);
827         c->next = net->ipv4.mfc_cache_array[line];
828         net->ipv4.mfc_cache_array[line] = c;
829         write_unlock_bh(&mrt_lock);
830
831         /*
832          *      Check to see if we resolved a queued list. If so we
833          *      need to send on the frames and tidy up.
834          */
835         spin_lock_bh(&mfc_unres_lock);
836         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
837              cp = &uc->next) {
838                 if (net_eq(mfc_net(uc), net) &&
839                     uc->mfc_origin == c->mfc_origin &&
840                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
841                         *cp = uc->next;
842                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
843                         break;
844                 }
845         }
846         if (mfc_unres_queue == NULL)
847                 del_timer(&ipmr_expire_timer);
848         spin_unlock_bh(&mfc_unres_lock);
849
850         if (uc) {
851                 ipmr_cache_resolve(uc, c);
852                 ipmr_cache_free(uc);
853         }
854         return 0;
855 }
856
857 /*
858  *      Close the multicast socket, and clear the vif tables etc
859  */
860
861 static void mroute_clean_tables(struct net *net)
862 {
863         int i;
864
865         /*
866          *      Shut down all active vif entries
867          */
868         for (i = 0; i < net->ipv4.maxvif; i++) {
869                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
870                         vif_delete(net, i, 0);
871         }
872
873         /*
874          *      Wipe the cache
875          */
876         for (i=0; i<MFC_LINES; i++) {
877                 struct mfc_cache *c, **cp;
878
879                 cp = &net->ipv4.mfc_cache_array[i];
880                 while ((c = *cp) != NULL) {
881                         if (c->mfc_flags&MFC_STATIC) {
882                                 cp = &c->next;
883                                 continue;
884                         }
885                         write_lock_bh(&mrt_lock);
886                         *cp = c->next;
887                         write_unlock_bh(&mrt_lock);
888
889                         ipmr_cache_free(c);
890                 }
891         }
892
893         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
894                 struct mfc_cache *c, **cp;
895
896                 spin_lock_bh(&mfc_unres_lock);
897                 cp = &mfc_unres_queue;
898                 while ((c = *cp) != NULL) {
899                         if (!net_eq(mfc_net(c), net)) {
900                                 cp = &c->next;
901                                 continue;
902                         }
903                         *cp = c->next;
904
905                         ipmr_destroy_unres(c);
906                 }
907                 spin_unlock_bh(&mfc_unres_lock);
908         }
909 }
910
911 static void mrtsock_destruct(struct sock *sk)
912 {
913         struct net *net = sock_net(sk);
914
915         rtnl_lock();
916         if (sk == net->ipv4.mroute_sk) {
917                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
918
919                 write_lock_bh(&mrt_lock);
920                 net->ipv4.mroute_sk = NULL;
921                 write_unlock_bh(&mrt_lock);
922
923                 mroute_clean_tables(net);
924         }
925         rtnl_unlock();
926 }
927
928 /*
929  *      Socket options and virtual interface manipulation. The whole
930  *      virtual interface system is a complete heap, but unfortunately
931  *      that's how BSD mrouted happens to think. Maybe one day with a proper
932  *      MOSPF/PIM router set up we can clean this up.
933  */
934
935 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
936 {
937         int ret;
938         struct vifctl vif;
939         struct mfcctl mfc;
940         struct net *net = sock_net(sk);
941
942         if (optname != MRT_INIT) {
943                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
944                         return -EACCES;
945         }
946
947         switch (optname) {
948         case MRT_INIT:
949                 if (sk->sk_type != SOCK_RAW ||
950                     inet_sk(sk)->num != IPPROTO_IGMP)
951                         return -EOPNOTSUPP;
952                 if (optlen != sizeof(int))
953                         return -ENOPROTOOPT;
954
955                 rtnl_lock();
956                 if (net->ipv4.mroute_sk) {
957                         rtnl_unlock();
958                         return -EADDRINUSE;
959                 }
960
961                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
962                 if (ret == 0) {
963                         write_lock_bh(&mrt_lock);
964                         net->ipv4.mroute_sk = sk;
965                         write_unlock_bh(&mrt_lock);
966
967                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
968                 }
969                 rtnl_unlock();
970                 return ret;
971         case MRT_DONE:
972                 if (sk != net->ipv4.mroute_sk)
973                         return -EACCES;
974                 return ip_ra_control(sk, 0, NULL);
975         case MRT_ADD_VIF:
976         case MRT_DEL_VIF:
977                 if (optlen != sizeof(vif))
978                         return -EINVAL;
979                 if (copy_from_user(&vif, optval, sizeof(vif)))
980                         return -EFAULT;
981                 if (vif.vifc_vifi >= MAXVIFS)
982                         return -ENFILE;
983                 rtnl_lock();
984                 if (optname == MRT_ADD_VIF) {
985                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
986                 } else {
987                         ret = vif_delete(net, vif.vifc_vifi, 0);
988                 }
989                 rtnl_unlock();
990                 return ret;
991
992                 /*
993                  *      Manipulate the forwarding caches. These live
994                  *      in a sort of kernel/user symbiosis.
995                  */
996         case MRT_ADD_MFC:
997         case MRT_DEL_MFC:
998                 if (optlen != sizeof(mfc))
999                         return -EINVAL;
1000                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1001                         return -EFAULT;
1002                 rtnl_lock();
1003                 if (optname == MRT_DEL_MFC)
1004                         ret = ipmr_mfc_delete(net, &mfc);
1005                 else
1006                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1007                 rtnl_unlock();
1008                 return ret;
1009                 /*
1010                  *      Control PIM assert.
1011                  */
1012         case MRT_ASSERT:
1013         {
1014                 int v;
1015                 if (get_user(v,(int __user *)optval))
1016                         return -EFAULT;
1017                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1018                 return 0;
1019         }
1020 #ifdef CONFIG_IP_PIMSM
1021         case MRT_PIM:
1022         {
1023                 int v;
1024
1025                 if (get_user(v,(int __user *)optval))
1026                         return -EFAULT;
1027                 v = (v) ? 1 : 0;
1028
1029                 rtnl_lock();
1030                 ret = 0;
1031                 if (v != net->ipv4.mroute_do_pim) {
1032                         net->ipv4.mroute_do_pim = v;
1033                         net->ipv4.mroute_do_assert = v;
1034 #ifdef CONFIG_IP_PIMSM_V2
1035                         if (net->ipv4.mroute_do_pim)
1036                                 ret = inet_add_protocol(&pim_protocol,
1037                                                         IPPROTO_PIM);
1038                         else
1039                                 ret = inet_del_protocol(&pim_protocol,
1040                                                         IPPROTO_PIM);
1041                         if (ret < 0)
1042                                 ret = -EAGAIN;
1043 #endif
1044                 }
1045                 rtnl_unlock();
1046                 return ret;
1047         }
1048 #endif
1049         /*
1050          *      Spurious command, or MRT_VERSION which you cannot
1051          *      set.
1052          */
1053         default:
1054                 return -ENOPROTOOPT;
1055         }
1056 }
1057
1058 /*
1059  *      Getsock opt support for the multicast routing system.
1060  */
1061
1062 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1063 {
1064         int olr;
1065         int val;
1066         struct net *net = sock_net(sk);
1067
1068         if (optname != MRT_VERSION &&
1069 #ifdef CONFIG_IP_PIMSM
1070            optname!=MRT_PIM &&
1071 #endif
1072            optname!=MRT_ASSERT)
1073                 return -ENOPROTOOPT;
1074
1075         if (get_user(olr, optlen))
1076                 return -EFAULT;
1077
1078         olr = min_t(unsigned int, olr, sizeof(int));
1079         if (olr < 0)
1080                 return -EINVAL;
1081
1082         if (put_user(olr, optlen))
1083                 return -EFAULT;
1084         if (optname == MRT_VERSION)
1085                 val = 0x0305;
1086 #ifdef CONFIG_IP_PIMSM
1087         else if (optname == MRT_PIM)
1088                 val = net->ipv4.mroute_do_pim;
1089 #endif
1090         else
1091                 val = net->ipv4.mroute_do_assert;
1092         if (copy_to_user(optval, &val, olr))
1093                 return -EFAULT;
1094         return 0;
1095 }
1096
1097 /*
1098  *      The IP multicast ioctl support routines.
1099  */
1100
1101 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1102 {
1103         struct sioc_sg_req sr;
1104         struct sioc_vif_req vr;
1105         struct vif_device *vif;
1106         struct mfc_cache *c;
1107         struct net *net = sock_net(sk);
1108
1109         switch (cmd) {
1110         case SIOCGETVIFCNT:
1111                 if (copy_from_user(&vr, arg, sizeof(vr)))
1112                         return -EFAULT;
1113                 if (vr.vifi >= net->ipv4.maxvif)
1114                         return -EINVAL;
1115                 read_lock(&mrt_lock);
1116                 vif = &net->ipv4.vif_table[vr.vifi];
1117                 if (VIF_EXISTS(net, vr.vifi)) {
1118                         vr.icount = vif->pkt_in;
1119                         vr.ocount = vif->pkt_out;
1120                         vr.ibytes = vif->bytes_in;
1121                         vr.obytes = vif->bytes_out;
1122                         read_unlock(&mrt_lock);
1123
1124                         if (copy_to_user(arg, &vr, sizeof(vr)))
1125                                 return -EFAULT;
1126                         return 0;
1127                 }
1128                 read_unlock(&mrt_lock);
1129                 return -EADDRNOTAVAIL;
1130         case SIOCGETSGCNT:
1131                 if (copy_from_user(&sr, arg, sizeof(sr)))
1132                         return -EFAULT;
1133
1134                 read_lock(&mrt_lock);
1135                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1136                 if (c) {
1137                         sr.pktcnt = c->mfc_un.res.pkt;
1138                         sr.bytecnt = c->mfc_un.res.bytes;
1139                         sr.wrong_if = c->mfc_un.res.wrong_if;
1140                         read_unlock(&mrt_lock);
1141
1142                         if (copy_to_user(arg, &sr, sizeof(sr)))
1143                                 return -EFAULT;
1144                         return 0;
1145                 }
1146                 read_unlock(&mrt_lock);
1147                 return -EADDRNOTAVAIL;
1148         default:
1149                 return -ENOIOCTLCMD;
1150         }
1151 }
1152
1153
1154 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1155 {
1156         struct net_device *dev = ptr;
1157         struct net *net = dev_net(dev);
1158         struct vif_device *v;
1159         int ct;
1160
1161         if (!net_eq(dev_net(dev), net))
1162                 return NOTIFY_DONE;
1163
1164         if (event != NETDEV_UNREGISTER)
1165                 return NOTIFY_DONE;
1166         v = &net->ipv4.vif_table[0];
1167         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1168                 if (v->dev == dev)
1169                         vif_delete(net, ct, 1);
1170         }
1171         return NOTIFY_DONE;
1172 }
1173
1174
1175 static struct notifier_block ip_mr_notifier = {
1176         .notifier_call = ipmr_device_event,
1177 };
1178
1179 /*
1180  *      Encapsulate a packet by attaching a valid IPIP header to it.
1181  *      This avoids tunnel drivers and other mess and gives us the speed so
1182  *      important for multicast video.
1183  */
1184
1185 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1186 {
1187         struct iphdr *iph;
1188         struct iphdr *old_iph = ip_hdr(skb);
1189
1190         skb_push(skb, sizeof(struct iphdr));
1191         skb->transport_header = skb->network_header;
1192         skb_reset_network_header(skb);
1193         iph = ip_hdr(skb);
1194
1195         iph->version    =       4;
1196         iph->tos        =       old_iph->tos;
1197         iph->ttl        =       old_iph->ttl;
1198         iph->frag_off   =       0;
1199         iph->daddr      =       daddr;
1200         iph->saddr      =       saddr;
1201         iph->protocol   =       IPPROTO_IPIP;
1202         iph->ihl        =       5;
1203         iph->tot_len    =       htons(skb->len);
1204         ip_select_ident(iph, skb->dst, NULL);
1205         ip_send_check(iph);
1206
1207         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1208         nf_reset(skb);
1209 }
1210
1211 static inline int ipmr_forward_finish(struct sk_buff *skb)
1212 {
1213         struct ip_options * opt = &(IPCB(skb)->opt);
1214
1215         IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1216
1217         if (unlikely(opt->optlen))
1218                 ip_forward_options(skb);
1219
1220         return dst_output(skb);
1221 }
1222
1223 /*
1224  *      Processing handlers for ipmr_forward
1225  */
1226
1227 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1228 {
1229         struct net *net = mfc_net(c);
1230         const struct iphdr *iph = ip_hdr(skb);
1231         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1232         struct net_device *dev;
1233         struct rtable *rt;
1234         int    encap = 0;
1235
1236         if (vif->dev == NULL)
1237                 goto out_free;
1238
1239 #ifdef CONFIG_IP_PIMSM
1240         if (vif->flags & VIFF_REGISTER) {
1241                 vif->pkt_out++;
1242                 vif->bytes_out += skb->len;
1243                 vif->dev->stats.tx_bytes += skb->len;
1244                 vif->dev->stats.tx_packets++;
1245                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1246                 goto out_free;
1247         }
1248 #endif
1249
1250         if (vif->flags&VIFF_TUNNEL) {
1251                 struct flowi fl = { .oif = vif->link,
1252                                     .nl_u = { .ip4_u =
1253                                               { .daddr = vif->remote,
1254                                                 .saddr = vif->local,
1255                                                 .tos = RT_TOS(iph->tos) } },
1256                                     .proto = IPPROTO_IPIP };
1257                 if (ip_route_output_key(net, &rt, &fl))
1258                         goto out_free;
1259                 encap = sizeof(struct iphdr);
1260         } else {
1261                 struct flowi fl = { .oif = vif->link,
1262                                     .nl_u = { .ip4_u =
1263                                               { .daddr = iph->daddr,
1264                                                 .tos = RT_TOS(iph->tos) } },
1265                                     .proto = IPPROTO_IPIP };
1266                 if (ip_route_output_key(net, &rt, &fl))
1267                         goto out_free;
1268         }
1269
1270         dev = rt->u.dst.dev;
1271
1272         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1273                 /* Do not fragment multicasts. Alas, IPv4 does not
1274                    allow to send ICMP, so that packets will disappear
1275                    to blackhole.
1276                  */
1277
1278                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1279                 ip_rt_put(rt);
1280                 goto out_free;
1281         }
1282
1283         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1284
1285         if (skb_cow(skb, encap)) {
1286                 ip_rt_put(rt);
1287                 goto out_free;
1288         }
1289
1290         vif->pkt_out++;
1291         vif->bytes_out += skb->len;
1292
1293         dst_release(skb->dst);
1294         skb->dst = &rt->u.dst;
1295         ip_decrease_ttl(ip_hdr(skb));
1296
1297         /* FIXME: forward and output firewalls used to be called here.
1298          * What do we do with netfilter? -- RR */
1299         if (vif->flags & VIFF_TUNNEL) {
1300                 ip_encap(skb, vif->local, vif->remote);
1301                 /* FIXME: extra output firewall step used to be here. --RR */
1302                 vif->dev->stats.tx_packets++;
1303                 vif->dev->stats.tx_bytes += skb->len;
1304         }
1305
1306         IPCB(skb)->flags |= IPSKB_FORWARDED;
1307
1308         /*
1309          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1310          * not only before forwarding, but after forwarding on all output
1311          * interfaces. It is clear, if mrouter runs a multicasting
1312          * program, it should receive packets not depending to what interface
1313          * program is joined.
1314          * If we will not make it, the program will have to join on all
1315          * interfaces. On the other hand, multihoming host (or router, but
1316          * not mrouter) cannot join to more than one interface - it will
1317          * result in receiving multiple packets.
1318          */
1319         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1320                 ipmr_forward_finish);
1321         return;
1322
1323 out_free:
1324         kfree_skb(skb);
1325         return;
1326 }
1327
1328 static int ipmr_find_vif(struct net_device *dev)
1329 {
1330         struct net *net = dev_net(dev);
1331         int ct;
1332         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1333                 if (net->ipv4.vif_table[ct].dev == dev)
1334                         break;
1335         }
1336         return ct;
1337 }
1338
1339 /* "local" means that we should preserve one skb (for local delivery) */
1340
1341 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1342 {
1343         int psend = -1;
1344         int vif, ct;
1345         struct net *net = mfc_net(cache);
1346
1347         vif = cache->mfc_parent;
1348         cache->mfc_un.res.pkt++;
1349         cache->mfc_un.res.bytes += skb->len;
1350
1351         /*
1352          * Wrong interface: drop packet and (maybe) send PIM assert.
1353          */
1354         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1355                 int true_vifi;
1356
1357                 if (skb->rtable->fl.iif == 0) {
1358                         /* It is our own packet, looped back.
1359                            Very complicated situation...
1360
1361                            The best workaround until routing daemons will be
1362                            fixed is not to redistribute packet, if it was
1363                            send through wrong interface. It means, that
1364                            multicast applications WILL NOT work for
1365                            (S,G), which have default multicast route pointing
1366                            to wrong oif. In any case, it is not a good
1367                            idea to use multicasting applications on router.
1368                          */
1369                         goto dont_forward;
1370                 }
1371
1372                 cache->mfc_un.res.wrong_if++;
1373                 true_vifi = ipmr_find_vif(skb->dev);
1374
1375                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1376                     /* pimsm uses asserts, when switching from RPT to SPT,
1377                        so that we cannot check that packet arrived on an oif.
1378                        It is bad, but otherwise we would need to move pretty
1379                        large chunk of pimd to kernel. Ough... --ANK
1380                      */
1381                     (net->ipv4.mroute_do_pim ||
1382                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1383                     time_after(jiffies,
1384                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1385                         cache->mfc_un.res.last_assert = jiffies;
1386                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1387                 }
1388                 goto dont_forward;
1389         }
1390
1391         net->ipv4.vif_table[vif].pkt_in++;
1392         net->ipv4.vif_table[vif].bytes_in += skb->len;
1393
1394         /*
1395          *      Forward the frame
1396          */
1397         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1398                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1399                         if (psend != -1) {
1400                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1401                                 if (skb2)
1402                                         ipmr_queue_xmit(skb2, cache, psend);
1403                         }
1404                         psend = ct;
1405                 }
1406         }
1407         if (psend != -1) {
1408                 if (local) {
1409                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1410                         if (skb2)
1411                                 ipmr_queue_xmit(skb2, cache, psend);
1412                 } else {
1413                         ipmr_queue_xmit(skb, cache, psend);
1414                         return 0;
1415                 }
1416         }
1417
1418 dont_forward:
1419         if (!local)
1420                 kfree_skb(skb);
1421         return 0;
1422 }
1423
1424
1425 /*
1426  *      Multicast packets for forwarding arrive here
1427  */
1428
1429 int ip_mr_input(struct sk_buff *skb)
1430 {
1431         struct mfc_cache *cache;
1432         struct net *net = dev_net(skb->dev);
1433         int local = skb->rtable->rt_flags&RTCF_LOCAL;
1434
1435         /* Packet is looped back after forward, it should not be
1436            forwarded second time, but still can be delivered locally.
1437          */
1438         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1439                 goto dont_forward;
1440
1441         if (!local) {
1442                     if (IPCB(skb)->opt.router_alert) {
1443                             if (ip_call_ra_chain(skb))
1444                                     return 0;
1445                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1446                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1447                                Cisco IOS <= 11.2(8)) do not put router alert
1448                                option to IGMP packets destined to routable
1449                                groups. It is very bad, because it means
1450                                that we can forward NO IGMP messages.
1451                              */
1452                             read_lock(&mrt_lock);
1453                             if (net->ipv4.mroute_sk) {
1454                                     nf_reset(skb);
1455                                     raw_rcv(net->ipv4.mroute_sk, skb);
1456                                     read_unlock(&mrt_lock);
1457                                     return 0;
1458                             }
1459                             read_unlock(&mrt_lock);
1460                     }
1461         }
1462
1463         read_lock(&mrt_lock);
1464         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1465
1466         /*
1467          *      No usable cache entry
1468          */
1469         if (cache == NULL) {
1470                 int vif;
1471
1472                 if (local) {
1473                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1474                         ip_local_deliver(skb);
1475                         if (skb2 == NULL) {
1476                                 read_unlock(&mrt_lock);
1477                                 return -ENOBUFS;
1478                         }
1479                         skb = skb2;
1480                 }
1481
1482                 vif = ipmr_find_vif(skb->dev);
1483                 if (vif >= 0) {
1484                         int err = ipmr_cache_unresolved(net, vif, skb);
1485                         read_unlock(&mrt_lock);
1486
1487                         return err;
1488                 }
1489                 read_unlock(&mrt_lock);
1490                 kfree_skb(skb);
1491                 return -ENODEV;
1492         }
1493
1494         ip_mr_forward(skb, cache, local);
1495
1496         read_unlock(&mrt_lock);
1497
1498         if (local)
1499                 return ip_local_deliver(skb);
1500
1501         return 0;
1502
1503 dont_forward:
1504         if (local)
1505                 return ip_local_deliver(skb);
1506         kfree_skb(skb);
1507         return 0;
1508 }
1509
1510 #ifdef CONFIG_IP_PIMSM
1511 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1512 {
1513         struct net_device *reg_dev = NULL;
1514         struct iphdr *encap;
1515         struct net *net = dev_net(skb->dev);
1516
1517         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1518         /*
1519            Check that:
1520            a. packet is really destinted to a multicast group
1521            b. packet is not a NULL-REGISTER
1522            c. packet is not truncated
1523          */
1524         if (!ipv4_is_multicast(encap->daddr) ||
1525             encap->tot_len == 0 ||
1526             ntohs(encap->tot_len) + pimlen > skb->len)
1527                 return 1;
1528
1529         read_lock(&mrt_lock);
1530         if (net->ipv4.mroute_reg_vif_num >= 0)
1531                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1532         if (reg_dev)
1533                 dev_hold(reg_dev);
1534         read_unlock(&mrt_lock);
1535
1536         if (reg_dev == NULL)
1537                 return 1;
1538
1539         skb->mac_header = skb->network_header;
1540         skb_pull(skb, (u8*)encap - skb->data);
1541         skb_reset_network_header(skb);
1542         skb->dev = reg_dev;
1543         skb->protocol = htons(ETH_P_IP);
1544         skb->ip_summed = 0;
1545         skb->pkt_type = PACKET_HOST;
1546         dst_release(skb->dst);
1547         skb->dst = NULL;
1548         reg_dev->stats.rx_bytes += skb->len;
1549         reg_dev->stats.rx_packets++;
1550         nf_reset(skb);
1551         netif_rx(skb);
1552         dev_put(reg_dev);
1553
1554         return 0;
1555 }
1556 #endif
1557
1558 #ifdef CONFIG_IP_PIMSM_V1
1559 /*
1560  * Handle IGMP messages of PIMv1
1561  */
1562
1563 int pim_rcv_v1(struct sk_buff * skb)
1564 {
1565         struct igmphdr *pim;
1566         struct net *net = dev_net(skb->dev);
1567
1568         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1569                 goto drop;
1570
1571         pim = igmp_hdr(skb);
1572
1573         if (!net->ipv4.mroute_do_pim ||
1574             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1575                 goto drop;
1576
1577         if (__pim_rcv(skb, sizeof(*pim))) {
1578 drop:
1579                 kfree_skb(skb);
1580         }
1581         return 0;
1582 }
1583 #endif
1584
1585 #ifdef CONFIG_IP_PIMSM_V2
1586 static int pim_rcv(struct sk_buff * skb)
1587 {
1588         struct pimreghdr *pim;
1589
1590         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1591                 goto drop;
1592
1593         pim = (struct pimreghdr *)skb_transport_header(skb);
1594         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1595             (pim->flags&PIM_NULL_REGISTER) ||
1596             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1597              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1598                 goto drop;
1599
1600         if (__pim_rcv(skb, sizeof(*pim))) {
1601 drop:
1602                 kfree_skb(skb);
1603         }
1604         return 0;
1605 }
1606 #endif
1607
1608 static int
1609 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1610 {
1611         int ct;
1612         struct rtnexthop *nhp;
1613         struct net *net = mfc_net(c);
1614         struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1615         u8 *b = skb_tail_pointer(skb);
1616         struct rtattr *mp_head;
1617
1618         if (dev)
1619                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1620
1621         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1622
1623         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1624                 if (c->mfc_un.res.ttls[ct] < 255) {
1625                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1626                                 goto rtattr_failure;
1627                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1628                         nhp->rtnh_flags = 0;
1629                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1630                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1631                         nhp->rtnh_len = sizeof(*nhp);
1632                 }
1633         }
1634         mp_head->rta_type = RTA_MULTIPATH;
1635         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1636         rtm->rtm_type = RTN_MULTICAST;
1637         return 1;
1638
1639 rtattr_failure:
1640         nlmsg_trim(skb, b);
1641         return -EMSGSIZE;
1642 }
1643
1644 int ipmr_get_route(struct net *net,
1645                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1646 {
1647         int err;
1648         struct mfc_cache *cache;
1649         struct rtable *rt = skb->rtable;
1650
1651         read_lock(&mrt_lock);
1652         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1653
1654         if (cache == NULL) {
1655                 struct sk_buff *skb2;
1656                 struct iphdr *iph;
1657                 struct net_device *dev;
1658                 int vif;
1659
1660                 if (nowait) {
1661                         read_unlock(&mrt_lock);
1662                         return -EAGAIN;
1663                 }
1664
1665                 dev = skb->dev;
1666                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1667                         read_unlock(&mrt_lock);
1668                         return -ENODEV;
1669                 }
1670                 skb2 = skb_clone(skb, GFP_ATOMIC);
1671                 if (!skb2) {
1672                         read_unlock(&mrt_lock);
1673                         return -ENOMEM;
1674                 }
1675
1676                 skb_push(skb2, sizeof(struct iphdr));
1677                 skb_reset_network_header(skb2);
1678                 iph = ip_hdr(skb2);
1679                 iph->ihl = sizeof(struct iphdr) >> 2;
1680                 iph->saddr = rt->rt_src;
1681                 iph->daddr = rt->rt_dst;
1682                 iph->version = 0;
1683                 err = ipmr_cache_unresolved(net, vif, skb2);
1684                 read_unlock(&mrt_lock);
1685                 return err;
1686         }
1687
1688         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1689                 cache->mfc_flags |= MFC_NOTIFY;
1690         err = ipmr_fill_mroute(skb, cache, rtm);
1691         read_unlock(&mrt_lock);
1692         return err;
1693 }
1694
1695 #ifdef CONFIG_PROC_FS
1696 /*
1697  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1698  */
1699 struct ipmr_vif_iter {
1700         struct seq_net_private p;
1701         int ct;
1702 };
1703
1704 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1705                                            struct ipmr_vif_iter *iter,
1706                                            loff_t pos)
1707 {
1708         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1709                 if (!VIF_EXISTS(net, iter->ct))
1710                         continue;
1711                 if (pos-- == 0)
1712                         return &net->ipv4.vif_table[iter->ct];
1713         }
1714         return NULL;
1715 }
1716
1717 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1718         __acquires(mrt_lock)
1719 {
1720         struct net *net = seq_file_net(seq);
1721
1722         read_lock(&mrt_lock);
1723         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1724                 : SEQ_START_TOKEN;
1725 }
1726
1727 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1728 {
1729         struct ipmr_vif_iter *iter = seq->private;
1730         struct net *net = seq_file_net(seq);
1731
1732         ++*pos;
1733         if (v == SEQ_START_TOKEN)
1734                 return ipmr_vif_seq_idx(net, iter, 0);
1735
1736         while (++iter->ct < net->ipv4.maxvif) {
1737                 if (!VIF_EXISTS(net, iter->ct))
1738                         continue;
1739                 return &net->ipv4.vif_table[iter->ct];
1740         }
1741         return NULL;
1742 }
1743
1744 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1745         __releases(mrt_lock)
1746 {
1747         read_unlock(&mrt_lock);
1748 }
1749
1750 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1751 {
1752         struct net *net = seq_file_net(seq);
1753
1754         if (v == SEQ_START_TOKEN) {
1755                 seq_puts(seq,
1756                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1757         } else {
1758                 const struct vif_device *vif = v;
1759                 const char *name =  vif->dev ? vif->dev->name : "none";
1760
1761                 seq_printf(seq,
1762                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1763                            vif - net->ipv4.vif_table,
1764                            name, vif->bytes_in, vif->pkt_in,
1765                            vif->bytes_out, vif->pkt_out,
1766                            vif->flags, vif->local, vif->remote);
1767         }
1768         return 0;
1769 }
1770
1771 static const struct seq_operations ipmr_vif_seq_ops = {
1772         .start = ipmr_vif_seq_start,
1773         .next  = ipmr_vif_seq_next,
1774         .stop  = ipmr_vif_seq_stop,
1775         .show  = ipmr_vif_seq_show,
1776 };
1777
1778 static int ipmr_vif_open(struct inode *inode, struct file *file)
1779 {
1780         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1781                             sizeof(struct ipmr_vif_iter));
1782 }
1783
1784 static const struct file_operations ipmr_vif_fops = {
1785         .owner   = THIS_MODULE,
1786         .open    = ipmr_vif_open,
1787         .read    = seq_read,
1788         .llseek  = seq_lseek,
1789         .release = seq_release_net,
1790 };
1791
1792 struct ipmr_mfc_iter {
1793         struct seq_net_private p;
1794         struct mfc_cache **cache;
1795         int ct;
1796 };
1797
1798
1799 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1800                                           struct ipmr_mfc_iter *it, loff_t pos)
1801 {
1802         struct mfc_cache *mfc;
1803
1804         it->cache = net->ipv4.mfc_cache_array;
1805         read_lock(&mrt_lock);
1806         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1807                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1808                      mfc; mfc = mfc->next)
1809                         if (pos-- == 0)
1810                                 return mfc;
1811         read_unlock(&mrt_lock);
1812
1813         it->cache = &mfc_unres_queue;
1814         spin_lock_bh(&mfc_unres_lock);
1815         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1816                 if (net_eq(mfc_net(mfc), net) &&
1817                     pos-- == 0)
1818                         return mfc;
1819         spin_unlock_bh(&mfc_unres_lock);
1820
1821         it->cache = NULL;
1822         return NULL;
1823 }
1824
1825
1826 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1827 {
1828         struct ipmr_mfc_iter *it = seq->private;
1829         struct net *net = seq_file_net(seq);
1830
1831         it->cache = NULL;
1832         it->ct = 0;
1833         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1834                 : SEQ_START_TOKEN;
1835 }
1836
1837 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1838 {
1839         struct mfc_cache *mfc = v;
1840         struct ipmr_mfc_iter *it = seq->private;
1841         struct net *net = seq_file_net(seq);
1842
1843         ++*pos;
1844
1845         if (v == SEQ_START_TOKEN)
1846                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1847
1848         if (mfc->next)
1849                 return mfc->next;
1850
1851         if (it->cache == &mfc_unres_queue)
1852                 goto end_of_list;
1853
1854         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1855
1856         while (++it->ct < MFC_LINES) {
1857                 mfc = net->ipv4.mfc_cache_array[it->ct];
1858                 if (mfc)
1859                         return mfc;
1860         }
1861
1862         /* exhausted cache_array, show unresolved */
1863         read_unlock(&mrt_lock);
1864         it->cache = &mfc_unres_queue;
1865         it->ct = 0;
1866
1867         spin_lock_bh(&mfc_unres_lock);
1868         mfc = mfc_unres_queue;
1869         while (mfc && !net_eq(mfc_net(mfc), net))
1870                 mfc = mfc->next;
1871         if (mfc)
1872                 return mfc;
1873
1874  end_of_list:
1875         spin_unlock_bh(&mfc_unres_lock);
1876         it->cache = NULL;
1877
1878         return NULL;
1879 }
1880
1881 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1882 {
1883         struct ipmr_mfc_iter *it = seq->private;
1884         struct net *net = seq_file_net(seq);
1885
1886         if (it->cache == &mfc_unres_queue)
1887                 spin_unlock_bh(&mfc_unres_lock);
1888         else if (it->cache == net->ipv4.mfc_cache_array)
1889                 read_unlock(&mrt_lock);
1890 }
1891
1892 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1893 {
1894         int n;
1895         struct net *net = seq_file_net(seq);
1896
1897         if (v == SEQ_START_TOKEN) {
1898                 seq_puts(seq,
1899                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1900         } else {
1901                 const struct mfc_cache *mfc = v;
1902                 const struct ipmr_mfc_iter *it = seq->private;
1903
1904                 seq_printf(seq, "%08lX %08lX %-3hd",
1905                            (unsigned long) mfc->mfc_mcastgrp,
1906                            (unsigned long) mfc->mfc_origin,
1907                            mfc->mfc_parent);
1908
1909                 if (it->cache != &mfc_unres_queue) {
1910                         seq_printf(seq, " %8lu %8lu %8lu",
1911                                    mfc->mfc_un.res.pkt,
1912                                    mfc->mfc_un.res.bytes,
1913                                    mfc->mfc_un.res.wrong_if);
1914                         for (n = mfc->mfc_un.res.minvif;
1915                              n < mfc->mfc_un.res.maxvif; n++ ) {
1916                                 if (VIF_EXISTS(net, n) &&
1917                                     mfc->mfc_un.res.ttls[n] < 255)
1918                                         seq_printf(seq,
1919                                            " %2d:%-3d",
1920                                            n, mfc->mfc_un.res.ttls[n]);
1921                         }
1922                 } else {
1923                         /* unresolved mfc_caches don't contain
1924                          * pkt, bytes and wrong_if values
1925                          */
1926                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1927                 }
1928                 seq_putc(seq, '\n');
1929         }
1930         return 0;
1931 }
1932
1933 static const struct seq_operations ipmr_mfc_seq_ops = {
1934         .start = ipmr_mfc_seq_start,
1935         .next  = ipmr_mfc_seq_next,
1936         .stop  = ipmr_mfc_seq_stop,
1937         .show  = ipmr_mfc_seq_show,
1938 };
1939
1940 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1941 {
1942         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1943                             sizeof(struct ipmr_mfc_iter));
1944 }
1945
1946 static const struct file_operations ipmr_mfc_fops = {
1947         .owner   = THIS_MODULE,
1948         .open    = ipmr_mfc_open,
1949         .read    = seq_read,
1950         .llseek  = seq_lseek,
1951         .release = seq_release_net,
1952 };
1953 #endif
1954
1955 #ifdef CONFIG_IP_PIMSM_V2
1956 static struct net_protocol pim_protocol = {
1957         .handler        =       pim_rcv,
1958 };
1959 #endif
1960
1961
1962 /*
1963  *      Setup for IP multicast routing
1964  */
1965 static int __net_init ipmr_net_init(struct net *net)
1966 {
1967         int err = 0;
1968
1969         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1970                                       GFP_KERNEL);
1971         if (!net->ipv4.vif_table) {
1972                 err = -ENOMEM;
1973                 goto fail;
1974         }
1975
1976         /* Forwarding cache */
1977         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1978                                             sizeof(struct mfc_cache *),
1979                                             GFP_KERNEL);
1980         if (!net->ipv4.mfc_cache_array) {
1981                 err = -ENOMEM;
1982                 goto fail_mfc_cache;
1983         }
1984
1985 #ifdef CONFIG_IP_PIMSM
1986         net->ipv4.mroute_reg_vif_num = -1;
1987 #endif
1988
1989 #ifdef CONFIG_PROC_FS
1990         err = -ENOMEM;
1991         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1992                 goto proc_vif_fail;
1993         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1994                 goto proc_cache_fail;
1995 #endif
1996         return 0;
1997
1998 #ifdef CONFIG_PROC_FS
1999 proc_cache_fail:
2000         proc_net_remove(net, "ip_mr_vif");
2001 proc_vif_fail:
2002         kfree(net->ipv4.mfc_cache_array);
2003 #endif
2004 fail_mfc_cache:
2005         kfree(net->ipv4.vif_table);
2006 fail:
2007         return err;
2008 }
2009
2010 static void __net_exit ipmr_net_exit(struct net *net)
2011 {
2012 #ifdef CONFIG_PROC_FS
2013         proc_net_remove(net, "ip_mr_cache");
2014         proc_net_remove(net, "ip_mr_vif");
2015 #endif
2016         kfree(net->ipv4.mfc_cache_array);
2017         kfree(net->ipv4.vif_table);
2018 }
2019
2020 static struct pernet_operations ipmr_net_ops = {
2021         .init = ipmr_net_init,
2022         .exit = ipmr_net_exit,
2023 };
2024
2025 int __init ip_mr_init(void)
2026 {
2027         int err;
2028
2029         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2030                                        sizeof(struct mfc_cache),
2031                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2032                                        NULL);
2033         if (!mrt_cachep)
2034                 return -ENOMEM;
2035
2036         err = register_pernet_subsys(&ipmr_net_ops);
2037         if (err)
2038                 goto reg_pernet_fail;
2039
2040         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2041         err = register_netdevice_notifier(&ip_mr_notifier);
2042         if (err)
2043                 goto reg_notif_fail;
2044         return 0;
2045
2046 reg_notif_fail:
2047         del_timer(&ipmr_expire_timer);
2048         unregister_pernet_subsys(&ipmr_net_ops);
2049 reg_pernet_fail:
2050         kmem_cache_destroy(mrt_cachep);
2051         return err;
2052 }