[IPV6]: ROUTE: Clean up rt6_select() code path in ip6_route_{intput,output}().
[linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 static struct dst_ops ip6_dst_ops = {
102         .family                 =       AF_INET6,
103         .protocol               =       __constant_htons(ETH_P_IPV6),
104         .gc                     =       ip6_dst_gc,
105         .gc_thresh              =       1024,
106         .check                  =       ip6_dst_check,
107         .destroy                =       ip6_dst_destroy,
108         .ifdown                 =       ip6_dst_ifdown,
109         .negative_advice        =       ip6_negative_advice,
110         .link_failure           =       ip6_link_failure,
111         .update_pmtu            =       ip6_rt_update_pmtu,
112         .entry_size             =       sizeof(struct rt6_info),
113 };
114
115 struct rt6_info ip6_null_entry = {
116         .u = {
117                 .dst = {
118                         .__refcnt       = ATOMIC_INIT(1),
119                         .__use          = 1,
120                         .dev            = &loopback_dev,
121                         .obsolete       = -1,
122                         .error          = -ENETUNREACH,
123                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
124                         .input          = ip6_pkt_discard,
125                         .output         = ip6_pkt_discard_out,
126                         .ops            = &ip6_dst_ops,
127                         .path           = (struct dst_entry*)&ip6_null_entry,
128                 }
129         },
130         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
131         .rt6i_metric    = ~(u32) 0,
132         .rt6i_ref       = ATOMIC_INIT(1),
133 };
134
135 struct fib6_node ip6_routing_table = {
136         .leaf           = &ip6_null_entry,
137         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138 };
139
140 /* Protects all the ip6 fib */
141
142 DEFINE_RWLOCK(rt6_lock);
143
144
145 /* allocate dst with ip6_dst_ops */
146 static __inline__ struct rt6_info *ip6_dst_alloc(void)
147 {
148         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149 }
150
151 static void ip6_dst_destroy(struct dst_entry *dst)
152 {
153         struct rt6_info *rt = (struct rt6_info *)dst;
154         struct inet6_dev *idev = rt->rt6i_idev;
155
156         if (idev != NULL) {
157                 rt->rt6i_idev = NULL;
158                 in6_dev_put(idev);
159         }       
160 }
161
162 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163                            int how)
164 {
165         struct rt6_info *rt = (struct rt6_info *)dst;
166         struct inet6_dev *idev = rt->rt6i_idev;
167
168         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170                 if (loopback_idev != NULL) {
171                         rt->rt6i_idev = loopback_idev;
172                         in6_dev_put(idev);
173                 }
174         }
175 }
176
177 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178 {
179         return (rt->rt6i_flags & RTF_EXPIRES &&
180                 time_after(jiffies, rt->rt6i_expires));
181 }
182
183 /*
184  *      Route lookup. Any rt6_lock is implied.
185  */
186
187 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188                                                     int oif,
189                                                     int strict)
190 {
191         struct rt6_info *local = NULL;
192         struct rt6_info *sprt;
193
194         if (oif) {
195                 for (sprt = rt; sprt; sprt = sprt->u.next) {
196                         struct net_device *dev = sprt->rt6i_dev;
197                         if (dev->ifindex == oif)
198                                 return sprt;
199                         if (dev->flags & IFF_LOOPBACK) {
200                                 if (sprt->rt6i_idev == NULL ||
201                                     sprt->rt6i_idev->dev->ifindex != oif) {
202                                         if (strict && oif)
203                                                 continue;
204                                         if (local && (!oif || 
205                                                       local->rt6i_idev->dev->ifindex == oif))
206                                                 continue;
207                                 }
208                                 local = sprt;
209                         }
210                 }
211
212                 if (local)
213                         return local;
214
215                 if (strict)
216                         return &ip6_null_entry;
217         }
218         return rt;
219 }
220
221 /*
222  * Default Router Selection (RFC 2461 6.3.6)
223  */
224 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
225 {
226         struct net_device *dev = rt->rt6i_dev;
227         if (!oif || dev->ifindex == oif)
228                 return 2;
229         if ((dev->flags & IFF_LOOPBACK) &&
230             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
231                 return 1;
232         return 0;
233 }
234
235 static int inline rt6_check_neigh(struct rt6_info *rt)
236 {
237         struct neighbour *neigh = rt->rt6i_nexthop;
238         int m = 0;
239         if (neigh) {
240                 read_lock_bh(&neigh->lock);
241                 if (neigh->nud_state & NUD_VALID)
242                         m = 1;
243                 read_unlock_bh(&neigh->lock);
244         }
245         return m;
246 }
247
248 static int rt6_score_route(struct rt6_info *rt, int oif,
249                            int strict)
250 {
251         int m = rt6_check_dev(rt, oif);
252         if (!m && (strict & RT6_SELECT_F_IFACE))
253                 return -1;
254         if (rt6_check_neigh(rt))
255                 m |= 4;
256         else if (strict & RT6_SELECT_F_REACHABLE)
257                 return -1;
258         return m;
259 }
260
261 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
262                                    int strict)
263 {
264         struct rt6_info *match = NULL, *last = NULL;
265         struct rt6_info *rt, *rt0 = *head;
266         u32 metric;
267         int mpri = -1;
268
269         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
270                   __FUNCTION__, head, head ? *head : NULL, oif);
271
272         for (rt = rt0, metric = rt0->rt6i_metric;
273              rt && rt->rt6i_metric == metric;
274              rt = rt->u.next) {
275                 int m;
276
277                 if (rt6_check_expired(rt))
278                         continue;
279
280                 last = rt;
281
282                 m = rt6_score_route(rt, oif, strict);
283                 if (m < 0)
284                         continue;
285
286                 if (m > mpri) {
287                         match = rt;
288                         mpri = m;
289                 }
290         }
291
292         if (!match &&
293             (strict & RT6_SELECT_F_REACHABLE) &&
294             last && last != rt0) {
295                 /* no entries matched; do round-robin */
296                 *head = rt0->u.next;
297                 rt0->u.next = last->u.next;
298                 last->u.next = rt0;
299         }
300
301         RT6_TRACE("%s() => %p, score=%d\n",
302                   __FUNCTION__, match, mpri);
303
304         return (match ? match : &ip6_null_entry);
305 }
306
307 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
308                             int oif, int strict)
309 {
310         struct fib6_node *fn;
311         struct rt6_info *rt;
312
313         read_lock_bh(&rt6_lock);
314         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
315         rt = rt6_device_match(fn->leaf, oif, strict);
316         dst_hold(&rt->u.dst);
317         rt->u.dst.__use++;
318         read_unlock_bh(&rt6_lock);
319
320         rt->u.dst.lastuse = jiffies;
321         if (rt->u.dst.error == 0)
322                 return rt;
323         dst_release(&rt->u.dst);
324         return NULL;
325 }
326
327 /* ip6_ins_rt is called with FREE rt6_lock.
328    It takes new route entry, the addition fails by any reason the
329    route is freed. In any case, if caller does not hold it, it may
330    be destroyed.
331  */
332
333 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
334                 void *_rtattr, struct netlink_skb_parms *req)
335 {
336         int err;
337
338         write_lock_bh(&rt6_lock);
339         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
340         write_unlock_bh(&rt6_lock);
341
342         return err;
343 }
344
345 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
346                                       struct in6_addr *saddr)
347 {
348         struct rt6_info *rt;
349
350         /*
351          *      Clone the route.
352          */
353
354         rt = ip6_rt_copy(ort);
355
356         if (rt) {
357                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
358                         if (rt->rt6i_dst.plen != 128 &&
359                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
360                                 rt->rt6i_flags |= RTF_ANYCAST;
361                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
362                 }
363
364                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
365                 rt->rt6i_dst.plen = 128;
366                 rt->rt6i_flags |= RTF_CACHE;
367                 rt->u.dst.flags |= DST_HOST;
368
369 #ifdef CONFIG_IPV6_SUBTREES
370                 if (rt->rt6i_src.plen && saddr) {
371                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
372                         rt->rt6i_src.plen = 128;
373                 }
374 #endif
375
376                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
377
378         }
379
380         return rt;
381 }
382
383 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
384 {
385         struct rt6_info *rt = ip6_rt_copy(ort);
386         if (rt) {
387                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
388                 rt->rt6i_dst.plen = 128;
389                 rt->rt6i_flags |= RTF_CACHE;
390                 if (rt->rt6i_flags & RTF_REJECT)
391                         rt->u.dst.error = ort->u.dst.error;
392                 rt->u.dst.flags |= DST_HOST;
393                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
394         }
395         return rt;
396 }
397
398 #define BACKTRACK() \
399 if (rt == &ip6_null_entry && strict) { \
400        while ((fn = fn->parent) != NULL) { \
401                 if (fn->fn_flags & RTN_ROOT) { \
402                         goto out; \
403                 } \
404                 if (fn->fn_flags & RTN_RTINFO) \
405                         goto restart; \
406         } \
407 }
408
409
410 void ip6_route_input(struct sk_buff *skb)
411 {
412         struct fib6_node *fn;
413         struct rt6_info *rt, *nrt;
414         int strict;
415         int attempts = 3;
416         int err;
417
418         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
419
420 relookup:
421         read_lock_bh(&rt6_lock);
422
423         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
424                          &skb->nh.ipv6h->saddr);
425
426 restart:
427         rt = fn->leaf;
428
429         rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | RT6_SELECT_F_REACHABLE);
430         if (rt == &ip6_null_entry)
431                 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict);
432         BACKTRACK();
433         if ((rt->rt6i_flags & RTF_CACHE))
434                 goto out;
435
436         dst_hold(&rt->u.dst);
437         read_unlock_bh(&rt6_lock);
438
439         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
440                 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
441         else {
442 #if CLONE_OFFLINK_ROUTE
443                 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
444 #else
445                 goto out2;
446 #endif
447         }
448
449         dst_release(&rt->u.dst);
450         rt = nrt ? : &ip6_null_entry;
451
452         dst_hold(&rt->u.dst);
453         if (nrt) {
454                 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
455                 if (!err)
456                         goto out2;
457         }
458
459         if (--attempts <= 0)
460                 goto out2;
461
462         /*
463          * Race condition! In the gap, when rt6_lock was
464          * released someone could insert this route.  Relookup.
465          */
466         dst_release(&rt->u.dst);
467         goto relookup;
468
469 out:
470         dst_hold(&rt->u.dst);
471         read_unlock_bh(&rt6_lock);
472 out2:
473         rt->u.dst.lastuse = jiffies;
474         rt->u.dst.__use++;
475         skb->dst = (struct dst_entry *) rt;
476         return;
477 }
478
479 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
480 {
481         struct fib6_node *fn;
482         struct rt6_info *rt, *nrt;
483         int strict;
484         int attempts = 3;
485         int err;
486
487         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
488
489 relookup:
490         read_lock_bh(&rt6_lock);
491
492         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
493
494 restart:
495         rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
496         if (rt == &ip6_null_entry)
497                 rt = rt6_select(&fn->leaf, fl->oif, strict);
498         BACKTRACK();
499         if ((rt->rt6i_flags & RTF_CACHE))
500                 goto out;
501
502         dst_hold(&rt->u.dst);
503         read_unlock_bh(&rt6_lock);
504
505         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
506                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
507         else {
508 #if CLONE_OFFLINK_ROUTE
509                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
510 #else
511                 goto out2;
512 #endif
513         }
514
515         dst_release(&rt->u.dst);
516         rt = nrt ? : &ip6_null_entry;
517
518         dst_hold(&rt->u.dst);
519         if (nrt) {
520                 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
521                 if (!err)
522                         goto out2;
523         }
524
525         if (--attempts <= 0)
526                 goto out2;
527
528         /*
529          * Race condition! In the gap, when rt6_lock was
530          * released someone could insert this route.  Relookup.
531          */
532         dst_release(&rt->u.dst);
533         goto relookup;
534
535 out:
536         dst_hold(&rt->u.dst);
537         read_unlock_bh(&rt6_lock);
538 out2:
539         rt->u.dst.lastuse = jiffies;
540         rt->u.dst.__use++;
541         return &rt->u.dst;
542 }
543
544
545 /*
546  *      Destination cache support functions
547  */
548
549 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
550 {
551         struct rt6_info *rt;
552
553         rt = (struct rt6_info *) dst;
554
555         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
556                 return dst;
557
558         return NULL;
559 }
560
561 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
562 {
563         struct rt6_info *rt = (struct rt6_info *) dst;
564
565         if (rt) {
566                 if (rt->rt6i_flags & RTF_CACHE)
567                         ip6_del_rt(rt, NULL, NULL, NULL);
568                 else
569                         dst_release(dst);
570         }
571         return NULL;
572 }
573
574 static void ip6_link_failure(struct sk_buff *skb)
575 {
576         struct rt6_info *rt;
577
578         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
579
580         rt = (struct rt6_info *) skb->dst;
581         if (rt) {
582                 if (rt->rt6i_flags&RTF_CACHE) {
583                         dst_set_expires(&rt->u.dst, 0);
584                         rt->rt6i_flags |= RTF_EXPIRES;
585                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
586                         rt->rt6i_node->fn_sernum = -1;
587         }
588 }
589
590 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
591 {
592         struct rt6_info *rt6 = (struct rt6_info*)dst;
593
594         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
595                 rt6->rt6i_flags |= RTF_MODIFIED;
596                 if (mtu < IPV6_MIN_MTU) {
597                         mtu = IPV6_MIN_MTU;
598                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
599                 }
600                 dst->metrics[RTAX_MTU-1] = mtu;
601         }
602 }
603
604 /* Protected by rt6_lock.  */
605 static struct dst_entry *ndisc_dst_gc_list;
606 static int ipv6_get_mtu(struct net_device *dev);
607
608 static inline unsigned int ipv6_advmss(unsigned int mtu)
609 {
610         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
611
612         if (mtu < ip6_rt_min_advmss)
613                 mtu = ip6_rt_min_advmss;
614
615         /*
616          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
617          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
618          * IPV6_MAXPLEN is also valid and means: "any MSS, 
619          * rely only on pmtu discovery"
620          */
621         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
622                 mtu = IPV6_MAXPLEN;
623         return mtu;
624 }
625
626 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
627                                   struct neighbour *neigh,
628                                   struct in6_addr *addr,
629                                   int (*output)(struct sk_buff *))
630 {
631         struct rt6_info *rt;
632         struct inet6_dev *idev = in6_dev_get(dev);
633
634         if (unlikely(idev == NULL))
635                 return NULL;
636
637         rt = ip6_dst_alloc();
638         if (unlikely(rt == NULL)) {
639                 in6_dev_put(idev);
640                 goto out;
641         }
642
643         dev_hold(dev);
644         if (neigh)
645                 neigh_hold(neigh);
646         else
647                 neigh = ndisc_get_neigh(dev, addr);
648
649         rt->rt6i_dev      = dev;
650         rt->rt6i_idev     = idev;
651         rt->rt6i_nexthop  = neigh;
652         atomic_set(&rt->u.dst.__refcnt, 1);
653         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
654         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
655         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
656         rt->u.dst.output  = output;
657
658 #if 0   /* there's no chance to use these for ndisc */
659         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
660                                 ? DST_HOST 
661                                 : 0;
662         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
663         rt->rt6i_dst.plen = 128;
664 #endif
665
666         write_lock_bh(&rt6_lock);
667         rt->u.dst.next = ndisc_dst_gc_list;
668         ndisc_dst_gc_list = &rt->u.dst;
669         write_unlock_bh(&rt6_lock);
670
671         fib6_force_start_gc();
672
673 out:
674         return (struct dst_entry *)rt;
675 }
676
677 int ndisc_dst_gc(int *more)
678 {
679         struct dst_entry *dst, *next, **pprev;
680         int freed;
681
682         next = NULL;
683         pprev = &ndisc_dst_gc_list;
684         freed = 0;
685         while ((dst = *pprev) != NULL) {
686                 if (!atomic_read(&dst->__refcnt)) {
687                         *pprev = dst->next;
688                         dst_free(dst);
689                         freed++;
690                 } else {
691                         pprev = &dst->next;
692                         (*more)++;
693                 }
694         }
695
696         return freed;
697 }
698
699 static int ip6_dst_gc(void)
700 {
701         static unsigned expire = 30*HZ;
702         static unsigned long last_gc;
703         unsigned long now = jiffies;
704
705         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
706             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
707                 goto out;
708
709         expire++;
710         fib6_run_gc(expire);
711         last_gc = now;
712         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
713                 expire = ip6_rt_gc_timeout>>1;
714
715 out:
716         expire -= expire>>ip6_rt_gc_elasticity;
717         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
718 }
719
720 /* Clean host part of a prefix. Not necessary in radix tree,
721    but results in cleaner routing tables.
722
723    Remove it only when all the things will work!
724  */
725
726 static int ipv6_get_mtu(struct net_device *dev)
727 {
728         int mtu = IPV6_MIN_MTU;
729         struct inet6_dev *idev;
730
731         idev = in6_dev_get(dev);
732         if (idev) {
733                 mtu = idev->cnf.mtu6;
734                 in6_dev_put(idev);
735         }
736         return mtu;
737 }
738
739 int ipv6_get_hoplimit(struct net_device *dev)
740 {
741         int hoplimit = ipv6_devconf.hop_limit;
742         struct inet6_dev *idev;
743
744         idev = in6_dev_get(dev);
745         if (idev) {
746                 hoplimit = idev->cnf.hop_limit;
747                 in6_dev_put(idev);
748         }
749         return hoplimit;
750 }
751
752 /*
753  *
754  */
755
756 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
757                 void *_rtattr, struct netlink_skb_parms *req)
758 {
759         int err;
760         struct rtmsg *r;
761         struct rtattr **rta;
762         struct rt6_info *rt = NULL;
763         struct net_device *dev = NULL;
764         struct inet6_dev *idev = NULL;
765         int addr_type;
766
767         rta = (struct rtattr **) _rtattr;
768
769         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
770                 return -EINVAL;
771 #ifndef CONFIG_IPV6_SUBTREES
772         if (rtmsg->rtmsg_src_len)
773                 return -EINVAL;
774 #endif
775         if (rtmsg->rtmsg_ifindex) {
776                 err = -ENODEV;
777                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
778                 if (!dev)
779                         goto out;
780                 idev = in6_dev_get(dev);
781                 if (!idev)
782                         goto out;
783         }
784
785         if (rtmsg->rtmsg_metric == 0)
786                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
787
788         rt = ip6_dst_alloc();
789
790         if (rt == NULL) {
791                 err = -ENOMEM;
792                 goto out;
793         }
794
795         rt->u.dst.obsolete = -1;
796         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
797         if (nlh && (r = NLMSG_DATA(nlh))) {
798                 rt->rt6i_protocol = r->rtm_protocol;
799         } else {
800                 rt->rt6i_protocol = RTPROT_BOOT;
801         }
802
803         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
804
805         if (addr_type & IPV6_ADDR_MULTICAST)
806                 rt->u.dst.input = ip6_mc_input;
807         else
808                 rt->u.dst.input = ip6_forward;
809
810         rt->u.dst.output = ip6_output;
811
812         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
813                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
814         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
815         if (rt->rt6i_dst.plen == 128)
816                rt->u.dst.flags = DST_HOST;
817
818 #ifdef CONFIG_IPV6_SUBTREES
819         ipv6_addr_prefix(&rt->rt6i_src.addr, 
820                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
821         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
822 #endif
823
824         rt->rt6i_metric = rtmsg->rtmsg_metric;
825
826         /* We cannot add true routes via loopback here,
827            they would result in kernel looping; promote them to reject routes
828          */
829         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
830             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
831                 /* hold loopback dev/idev if we haven't done so. */
832                 if (dev != &loopback_dev) {
833                         if (dev) {
834                                 dev_put(dev);
835                                 in6_dev_put(idev);
836                         }
837                         dev = &loopback_dev;
838                         dev_hold(dev);
839                         idev = in6_dev_get(dev);
840                         if (!idev) {
841                                 err = -ENODEV;
842                                 goto out;
843                         }
844                 }
845                 rt->u.dst.output = ip6_pkt_discard_out;
846                 rt->u.dst.input = ip6_pkt_discard;
847                 rt->u.dst.error = -ENETUNREACH;
848                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
849                 goto install_route;
850         }
851
852         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
853                 struct in6_addr *gw_addr;
854                 int gwa_type;
855
856                 gw_addr = &rtmsg->rtmsg_gateway;
857                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
858                 gwa_type = ipv6_addr_type(gw_addr);
859
860                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
861                         struct rt6_info *grt;
862
863                         /* IPv6 strictly inhibits using not link-local
864                            addresses as nexthop address.
865                            Otherwise, router will not able to send redirects.
866                            It is very good, but in some (rare!) circumstances
867                            (SIT, PtP, NBMA NOARP links) it is handy to allow
868                            some exceptions. --ANK
869                          */
870                         err = -EINVAL;
871                         if (!(gwa_type&IPV6_ADDR_UNICAST))
872                                 goto out;
873
874                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
875
876                         err = -EHOSTUNREACH;
877                         if (grt == NULL)
878                                 goto out;
879                         if (dev) {
880                                 if (dev != grt->rt6i_dev) {
881                                         dst_release(&grt->u.dst);
882                                         goto out;
883                                 }
884                         } else {
885                                 dev = grt->rt6i_dev;
886                                 idev = grt->rt6i_idev;
887                                 dev_hold(dev);
888                                 in6_dev_hold(grt->rt6i_idev);
889                         }
890                         if (!(grt->rt6i_flags&RTF_GATEWAY))
891                                 err = 0;
892                         dst_release(&grt->u.dst);
893
894                         if (err)
895                                 goto out;
896                 }
897                 err = -EINVAL;
898                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
899                         goto out;
900         }
901
902         err = -ENODEV;
903         if (dev == NULL)
904                 goto out;
905
906         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
907                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
908                 if (IS_ERR(rt->rt6i_nexthop)) {
909                         err = PTR_ERR(rt->rt6i_nexthop);
910                         rt->rt6i_nexthop = NULL;
911                         goto out;
912                 }
913         }
914
915         rt->rt6i_flags = rtmsg->rtmsg_flags;
916
917 install_route:
918         if (rta && rta[RTA_METRICS-1]) {
919                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
920                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
921
922                 while (RTA_OK(attr, attrlen)) {
923                         unsigned flavor = attr->rta_type;
924                         if (flavor) {
925                                 if (flavor > RTAX_MAX) {
926                                         err = -EINVAL;
927                                         goto out;
928                                 }
929                                 rt->u.dst.metrics[flavor-1] =
930                                         *(u32 *)RTA_DATA(attr);
931                         }
932                         attr = RTA_NEXT(attr, attrlen);
933                 }
934         }
935
936         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
937                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
938         if (!rt->u.dst.metrics[RTAX_MTU-1])
939                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
940         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
941                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
942         rt->u.dst.dev = dev;
943         rt->rt6i_idev = idev;
944         return ip6_ins_rt(rt, nlh, _rtattr, req);
945
946 out:
947         if (dev)
948                 dev_put(dev);
949         if (idev)
950                 in6_dev_put(idev);
951         if (rt)
952                 dst_free((struct dst_entry *) rt);
953         return err;
954 }
955
956 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
957 {
958         int err;
959
960         write_lock_bh(&rt6_lock);
961
962         err = fib6_del(rt, nlh, _rtattr, req);
963         dst_release(&rt->u.dst);
964
965         write_unlock_bh(&rt6_lock);
966
967         return err;
968 }
969
970 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
971 {
972         struct fib6_node *fn;
973         struct rt6_info *rt;
974         int err = -ESRCH;
975
976         read_lock_bh(&rt6_lock);
977
978         fn = fib6_locate(&ip6_routing_table,
979                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
980                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
981         
982         if (fn) {
983                 for (rt = fn->leaf; rt; rt = rt->u.next) {
984                         if (rtmsg->rtmsg_ifindex &&
985                             (rt->rt6i_dev == NULL ||
986                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
987                                 continue;
988                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
989                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
990                                 continue;
991                         if (rtmsg->rtmsg_metric &&
992                             rtmsg->rtmsg_metric != rt->rt6i_metric)
993                                 continue;
994                         dst_hold(&rt->u.dst);
995                         read_unlock_bh(&rt6_lock);
996
997                         return ip6_del_rt(rt, nlh, _rtattr, req);
998                 }
999         }
1000         read_unlock_bh(&rt6_lock);
1001
1002         return err;
1003 }
1004
1005 /*
1006  *      Handle redirects
1007  */
1008 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1009                   struct neighbour *neigh, u8 *lladdr, int on_link)
1010 {
1011         struct rt6_info *rt, *nrt;
1012
1013         /* Locate old route to this destination. */
1014         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1015
1016         if (rt == NULL)
1017                 return;
1018
1019         if (neigh->dev != rt->rt6i_dev)
1020                 goto out;
1021
1022         /*
1023          * Current route is on-link; redirect is always invalid.
1024          * 
1025          * Seems, previous statement is not true. It could
1026          * be node, which looks for us as on-link (f.e. proxy ndisc)
1027          * But then router serving it might decide, that we should
1028          * know truth 8)8) --ANK (980726).
1029          */
1030         if (!(rt->rt6i_flags&RTF_GATEWAY))
1031                 goto out;
1032
1033         /*
1034          *      RFC 2461 specifies that redirects should only be
1035          *      accepted if they come from the nexthop to the target.
1036          *      Due to the way default routers are chosen, this notion
1037          *      is a bit fuzzy and one might need to check all default
1038          *      routers.
1039          */
1040         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1041                 if (rt->rt6i_flags & RTF_DEFAULT) {
1042                         struct rt6_info *rt1;
1043
1044                         read_lock(&rt6_lock);
1045                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1046                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1047                                         dst_hold(&rt1->u.dst);
1048                                         dst_release(&rt->u.dst);
1049                                         read_unlock(&rt6_lock);
1050                                         rt = rt1;
1051                                         goto source_ok;
1052                                 }
1053                         }
1054                         read_unlock(&rt6_lock);
1055                 }
1056                 if (net_ratelimit())
1057                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1058                                "for redirect target\n");
1059                 goto out;
1060         }
1061
1062 source_ok:
1063
1064         /*
1065          *      We have finally decided to accept it.
1066          */
1067
1068         neigh_update(neigh, lladdr, NUD_STALE, 
1069                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1070                      NEIGH_UPDATE_F_OVERRIDE|
1071                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1072                                      NEIGH_UPDATE_F_ISROUTER))
1073                      );
1074
1075         /*
1076          * Redirect received -> path was valid.
1077          * Look, redirects are sent only in response to data packets,
1078          * so that this nexthop apparently is reachable. --ANK
1079          */
1080         dst_confirm(&rt->u.dst);
1081
1082         /* Duplicate redirect: silently ignore. */
1083         if (neigh == rt->u.dst.neighbour)
1084                 goto out;
1085
1086         nrt = ip6_rt_copy(rt);
1087         if (nrt == NULL)
1088                 goto out;
1089
1090         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1091         if (on_link)
1092                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1093
1094         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1095         nrt->rt6i_dst.plen = 128;
1096         nrt->u.dst.flags |= DST_HOST;
1097
1098         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1099         nrt->rt6i_nexthop = neigh_clone(neigh);
1100         /* Reset pmtu, it may be better */
1101         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1102         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1103
1104         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1105                 goto out;
1106
1107         if (rt->rt6i_flags&RTF_CACHE) {
1108                 ip6_del_rt(rt, NULL, NULL, NULL);
1109                 return;
1110         }
1111
1112 out:
1113         dst_release(&rt->u.dst);
1114         return;
1115 }
1116
1117 /*
1118  *      Handle ICMP "packet too big" messages
1119  *      i.e. Path MTU discovery
1120  */
1121
1122 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1123                         struct net_device *dev, u32 pmtu)
1124 {
1125         struct rt6_info *rt, *nrt;
1126         int allfrag = 0;
1127
1128         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1129         if (rt == NULL)
1130                 return;
1131
1132         if (pmtu >= dst_mtu(&rt->u.dst))
1133                 goto out;
1134
1135         if (pmtu < IPV6_MIN_MTU) {
1136                 /*
1137                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1138                  * MTU (1280) and a fragment header should always be included
1139                  * after a node receiving Too Big message reporting PMTU is
1140                  * less than the IPv6 Minimum Link MTU.
1141                  */
1142                 pmtu = IPV6_MIN_MTU;
1143                 allfrag = 1;
1144         }
1145
1146         /* New mtu received -> path was valid.
1147            They are sent only in response to data packets,
1148            so that this nexthop apparently is reachable. --ANK
1149          */
1150         dst_confirm(&rt->u.dst);
1151
1152         /* Host route. If it is static, it would be better
1153            not to override it, but add new one, so that
1154            when cache entry will expire old pmtu
1155            would return automatically.
1156          */
1157         if (rt->rt6i_flags & RTF_CACHE) {
1158                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1159                 if (allfrag)
1160                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1161                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1162                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1163                 goto out;
1164         }
1165
1166         /* Network route.
1167            Two cases are possible:
1168            1. It is connected route. Action: COW
1169            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1170          */
1171         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1172                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1173         else
1174                 nrt = rt6_alloc_clone(rt, daddr);
1175
1176         if (nrt) {
1177                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1178                 if (allfrag)
1179                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1180
1181                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1182                  * happened within 5 mins, the recommended timer is 10 mins.
1183                  * Here this route expiration time is set to ip6_rt_mtu_expires
1184                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1185                  * and detecting PMTU increase will be automatically happened.
1186                  */
1187                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1188                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1189
1190                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1191         }
1192 out:
1193         dst_release(&rt->u.dst);
1194 }
1195
1196 /*
1197  *      Misc support functions
1198  */
1199
1200 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1201 {
1202         struct rt6_info *rt = ip6_dst_alloc();
1203
1204         if (rt) {
1205                 rt->u.dst.input = ort->u.dst.input;
1206                 rt->u.dst.output = ort->u.dst.output;
1207
1208                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1209                 rt->u.dst.dev = ort->u.dst.dev;
1210                 if (rt->u.dst.dev)
1211                         dev_hold(rt->u.dst.dev);
1212                 rt->rt6i_idev = ort->rt6i_idev;
1213                 if (rt->rt6i_idev)
1214                         in6_dev_hold(rt->rt6i_idev);
1215                 rt->u.dst.lastuse = jiffies;
1216                 rt->rt6i_expires = 0;
1217
1218                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1219                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1220                 rt->rt6i_metric = 0;
1221
1222                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1223 #ifdef CONFIG_IPV6_SUBTREES
1224                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1225 #endif
1226         }
1227         return rt;
1228 }
1229
1230 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1231 {       
1232         struct rt6_info *rt;
1233         struct fib6_node *fn;
1234
1235         fn = &ip6_routing_table;
1236
1237         write_lock_bh(&rt6_lock);
1238         for (rt = fn->leaf; rt; rt=rt->u.next) {
1239                 if (dev == rt->rt6i_dev &&
1240                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1241                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1242                         break;
1243         }
1244         if (rt)
1245                 dst_hold(&rt->u.dst);
1246         write_unlock_bh(&rt6_lock);
1247         return rt;
1248 }
1249
1250 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1251                                      struct net_device *dev)
1252 {
1253         struct in6_rtmsg rtmsg;
1254
1255         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1256         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1257         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1258         rtmsg.rtmsg_metric = 1024;
1259         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1260
1261         rtmsg.rtmsg_ifindex = dev->ifindex;
1262
1263         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1264         return rt6_get_dflt_router(gwaddr, dev);
1265 }
1266
1267 void rt6_purge_dflt_routers(void)
1268 {
1269         struct rt6_info *rt;
1270
1271 restart:
1272         read_lock_bh(&rt6_lock);
1273         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1274                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1275                         dst_hold(&rt->u.dst);
1276
1277                         read_unlock_bh(&rt6_lock);
1278
1279                         ip6_del_rt(rt, NULL, NULL, NULL);
1280
1281                         goto restart;
1282                 }
1283         }
1284         read_unlock_bh(&rt6_lock);
1285 }
1286
1287 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1288 {
1289         struct in6_rtmsg rtmsg;
1290         int err;
1291
1292         switch(cmd) {
1293         case SIOCADDRT:         /* Add a route */
1294         case SIOCDELRT:         /* Delete a route */
1295                 if (!capable(CAP_NET_ADMIN))
1296                         return -EPERM;
1297                 err = copy_from_user(&rtmsg, arg,
1298                                      sizeof(struct in6_rtmsg));
1299                 if (err)
1300                         return -EFAULT;
1301                         
1302                 rtnl_lock();
1303                 switch (cmd) {
1304                 case SIOCADDRT:
1305                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1306                         break;
1307                 case SIOCDELRT:
1308                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1309                         break;
1310                 default:
1311                         err = -EINVAL;
1312                 }
1313                 rtnl_unlock();
1314
1315                 return err;
1316         };
1317
1318         return -EINVAL;
1319 }
1320
1321 /*
1322  *      Drop the packet on the floor
1323  */
1324
1325 static int ip6_pkt_discard(struct sk_buff *skb)
1326 {
1327         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1328         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1329         kfree_skb(skb);
1330         return 0;
1331 }
1332
1333 static int ip6_pkt_discard_out(struct sk_buff *skb)
1334 {
1335         skb->dev = skb->dst->dev;
1336         return ip6_pkt_discard(skb);
1337 }
1338
1339 /*
1340  *      Allocate a dst for local (unicast / anycast) address.
1341  */
1342
1343 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1344                                     const struct in6_addr *addr,
1345                                     int anycast)
1346 {
1347         struct rt6_info *rt = ip6_dst_alloc();
1348
1349         if (rt == NULL)
1350                 return ERR_PTR(-ENOMEM);
1351
1352         dev_hold(&loopback_dev);
1353         in6_dev_hold(idev);
1354
1355         rt->u.dst.flags = DST_HOST;
1356         rt->u.dst.input = ip6_input;
1357         rt->u.dst.output = ip6_output;
1358         rt->rt6i_dev = &loopback_dev;
1359         rt->rt6i_idev = idev;
1360         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1361         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1362         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1363         rt->u.dst.obsolete = -1;
1364
1365         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1366         if (anycast)
1367                 rt->rt6i_flags |= RTF_ANYCAST;
1368         else
1369                 rt->rt6i_flags |= RTF_LOCAL;
1370         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1371         if (rt->rt6i_nexthop == NULL) {
1372                 dst_free((struct dst_entry *) rt);
1373                 return ERR_PTR(-ENOMEM);
1374         }
1375
1376         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1377         rt->rt6i_dst.plen = 128;
1378
1379         atomic_set(&rt->u.dst.__refcnt, 1);
1380
1381         return rt;
1382 }
1383
1384 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1385 {
1386         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1387             rt != &ip6_null_entry) {
1388                 RT6_TRACE("deleted by ifdown %p\n", rt);
1389                 return -1;
1390         }
1391         return 0;
1392 }
1393
1394 void rt6_ifdown(struct net_device *dev)
1395 {
1396         write_lock_bh(&rt6_lock);
1397         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1398         write_unlock_bh(&rt6_lock);
1399 }
1400
1401 struct rt6_mtu_change_arg
1402 {
1403         struct net_device *dev;
1404         unsigned mtu;
1405 };
1406
1407 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1408 {
1409         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1410         struct inet6_dev *idev;
1411
1412         /* In IPv6 pmtu discovery is not optional,
1413            so that RTAX_MTU lock cannot disable it.
1414            We still use this lock to block changes
1415            caused by addrconf/ndisc.
1416         */
1417
1418         idev = __in6_dev_get(arg->dev);
1419         if (idev == NULL)
1420                 return 0;
1421
1422         /* For administrative MTU increase, there is no way to discover
1423            IPv6 PMTU increase, so PMTU increase should be updated here.
1424            Since RFC 1981 doesn't include administrative MTU increase
1425            update PMTU increase is a MUST. (i.e. jumbo frame)
1426          */
1427         /*
1428            If new MTU is less than route PMTU, this new MTU will be the
1429            lowest MTU in the path, update the route PMTU to reflect PMTU
1430            decreases; if new MTU is greater than route PMTU, and the
1431            old MTU is the lowest MTU in the path, update the route PMTU
1432            to reflect the increase. In this case if the other nodes' MTU
1433            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1434            PMTU discouvery.
1435          */
1436         if (rt->rt6i_dev == arg->dev &&
1437             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1438             (dst_mtu(&rt->u.dst) > arg->mtu ||
1439              (dst_mtu(&rt->u.dst) < arg->mtu &&
1440               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1441                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1442         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1443         return 0;
1444 }
1445
1446 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1447 {
1448         struct rt6_mtu_change_arg arg;
1449
1450         arg.dev = dev;
1451         arg.mtu = mtu;
1452         read_lock_bh(&rt6_lock);
1453         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1454         read_unlock_bh(&rt6_lock);
1455 }
1456
1457 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1458                               struct in6_rtmsg *rtmsg)
1459 {
1460         memset(rtmsg, 0, sizeof(*rtmsg));
1461
1462         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1463         rtmsg->rtmsg_src_len = r->rtm_src_len;
1464         rtmsg->rtmsg_flags = RTF_UP;
1465         if (r->rtm_type == RTN_UNREACHABLE)
1466                 rtmsg->rtmsg_flags |= RTF_REJECT;
1467
1468         if (rta[RTA_GATEWAY-1]) {
1469                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1470                         return -EINVAL;
1471                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1472                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1473         }
1474         if (rta[RTA_DST-1]) {
1475                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1476                         return -EINVAL;
1477                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1478         }
1479         if (rta[RTA_SRC-1]) {
1480                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1481                         return -EINVAL;
1482                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1483         }
1484         if (rta[RTA_OIF-1]) {
1485                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1486                         return -EINVAL;
1487                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1488         }
1489         if (rta[RTA_PRIORITY-1]) {
1490                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1491                         return -EINVAL;
1492                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1493         }
1494         return 0;
1495 }
1496
1497 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1498 {
1499         struct rtmsg *r = NLMSG_DATA(nlh);
1500         struct in6_rtmsg rtmsg;
1501
1502         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1503                 return -EINVAL;
1504         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1505 }
1506
1507 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1508 {
1509         struct rtmsg *r = NLMSG_DATA(nlh);
1510         struct in6_rtmsg rtmsg;
1511
1512         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1513                 return -EINVAL;
1514         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1515 }
1516
1517 struct rt6_rtnl_dump_arg
1518 {
1519         struct sk_buff *skb;
1520         struct netlink_callback *cb;
1521 };
1522
1523 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1524                          struct in6_addr *dst, struct in6_addr *src,
1525                          int iif, int type, u32 pid, u32 seq,
1526                          int prefix, unsigned int flags)
1527 {
1528         struct rtmsg *rtm;
1529         struct nlmsghdr  *nlh;
1530         unsigned char    *b = skb->tail;
1531         struct rta_cacheinfo ci;
1532
1533         if (prefix) {   /* user wants prefix routes only */
1534                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1535                         /* success since this is not a prefix route */
1536                         return 1;
1537                 }
1538         }
1539
1540         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1541         rtm = NLMSG_DATA(nlh);
1542         rtm->rtm_family = AF_INET6;
1543         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1544         rtm->rtm_src_len = rt->rt6i_src.plen;
1545         rtm->rtm_tos = 0;
1546         rtm->rtm_table = RT_TABLE_MAIN;
1547         if (rt->rt6i_flags&RTF_REJECT)
1548                 rtm->rtm_type = RTN_UNREACHABLE;
1549         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1550                 rtm->rtm_type = RTN_LOCAL;
1551         else
1552                 rtm->rtm_type = RTN_UNICAST;
1553         rtm->rtm_flags = 0;
1554         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1555         rtm->rtm_protocol = rt->rt6i_protocol;
1556         if (rt->rt6i_flags&RTF_DYNAMIC)
1557                 rtm->rtm_protocol = RTPROT_REDIRECT;
1558         else if (rt->rt6i_flags & RTF_ADDRCONF)
1559                 rtm->rtm_protocol = RTPROT_KERNEL;
1560         else if (rt->rt6i_flags&RTF_DEFAULT)
1561                 rtm->rtm_protocol = RTPROT_RA;
1562
1563         if (rt->rt6i_flags&RTF_CACHE)
1564                 rtm->rtm_flags |= RTM_F_CLONED;
1565
1566         if (dst) {
1567                 RTA_PUT(skb, RTA_DST, 16, dst);
1568                 rtm->rtm_dst_len = 128;
1569         } else if (rtm->rtm_dst_len)
1570                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1571 #ifdef CONFIG_IPV6_SUBTREES
1572         if (src) {
1573                 RTA_PUT(skb, RTA_SRC, 16, src);
1574                 rtm->rtm_src_len = 128;
1575         } else if (rtm->rtm_src_len)
1576                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1577 #endif
1578         if (iif)
1579                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1580         else if (dst) {
1581                 struct in6_addr saddr_buf;
1582                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1583                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1584         }
1585         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1586                 goto rtattr_failure;
1587         if (rt->u.dst.neighbour)
1588                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1589         if (rt->u.dst.dev)
1590                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1591         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1592         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1593         if (rt->rt6i_expires)
1594                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1595         else
1596                 ci.rta_expires = 0;
1597         ci.rta_used = rt->u.dst.__use;
1598         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1599         ci.rta_error = rt->u.dst.error;
1600         ci.rta_id = 0;
1601         ci.rta_ts = 0;
1602         ci.rta_tsage = 0;
1603         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1604         nlh->nlmsg_len = skb->tail - b;
1605         return skb->len;
1606
1607 nlmsg_failure:
1608 rtattr_failure:
1609         skb_trim(skb, b - skb->data);
1610         return -1;
1611 }
1612
1613 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1614 {
1615         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1616         int prefix;
1617
1618         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1619                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1620                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1621         } else
1622                 prefix = 0;
1623
1624         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1625                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1626                      prefix, NLM_F_MULTI);
1627 }
1628
1629 static int fib6_dump_node(struct fib6_walker_t *w)
1630 {
1631         int res;
1632         struct rt6_info *rt;
1633
1634         for (rt = w->leaf; rt; rt = rt->u.next) {
1635                 res = rt6_dump_route(rt, w->args);
1636                 if (res < 0) {
1637                         /* Frame is full, suspend walking */
1638                         w->leaf = rt;
1639                         return 1;
1640                 }
1641                 BUG_TRAP(res!=0);
1642         }
1643         w->leaf = NULL;
1644         return 0;
1645 }
1646
1647 static void fib6_dump_end(struct netlink_callback *cb)
1648 {
1649         struct fib6_walker_t *w = (void*)cb->args[0];
1650
1651         if (w) {
1652                 cb->args[0] = 0;
1653                 fib6_walker_unlink(w);
1654                 kfree(w);
1655         }
1656         cb->done = (void*)cb->args[1];
1657         cb->args[1] = 0;
1658 }
1659
1660 static int fib6_dump_done(struct netlink_callback *cb)
1661 {
1662         fib6_dump_end(cb);
1663         return cb->done ? cb->done(cb) : 0;
1664 }
1665
1666 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1667 {
1668         struct rt6_rtnl_dump_arg arg;
1669         struct fib6_walker_t *w;
1670         int res;
1671
1672         arg.skb = skb;
1673         arg.cb = cb;
1674
1675         w = (void*)cb->args[0];
1676         if (w == NULL) {
1677                 /* New dump:
1678                  * 
1679                  * 1. hook callback destructor.
1680                  */
1681                 cb->args[1] = (long)cb->done;
1682                 cb->done = fib6_dump_done;
1683
1684                 /*
1685                  * 2. allocate and initialize walker.
1686                  */
1687                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1688                 if (w == NULL)
1689                         return -ENOMEM;
1690                 RT6_TRACE("dump<%p", w);
1691                 memset(w, 0, sizeof(*w));
1692                 w->root = &ip6_routing_table;
1693                 w->func = fib6_dump_node;
1694                 w->args = &arg;
1695                 cb->args[0] = (long)w;
1696                 read_lock_bh(&rt6_lock);
1697                 res = fib6_walk(w);
1698                 read_unlock_bh(&rt6_lock);
1699         } else {
1700                 w->args = &arg;
1701                 read_lock_bh(&rt6_lock);
1702                 res = fib6_walk_continue(w);
1703                 read_unlock_bh(&rt6_lock);
1704         }
1705 #if RT6_DEBUG >= 3
1706         if (res <= 0 && skb->len == 0)
1707                 RT6_TRACE("%p>dump end\n", w);
1708 #endif
1709         res = res < 0 ? res : skb->len;
1710         /* res < 0 is an error. (really, impossible)
1711            res == 0 means that dump is complete, but skb still can contain data.
1712            res > 0 dump is not complete, but frame is full.
1713          */
1714         /* Destroy walker, if dump of this table is complete. */
1715         if (res <= 0)
1716                 fib6_dump_end(cb);
1717         return res;
1718 }
1719
1720 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1721 {
1722         struct rtattr **rta = arg;
1723         int iif = 0;
1724         int err = -ENOBUFS;
1725         struct sk_buff *skb;
1726         struct flowi fl;
1727         struct rt6_info *rt;
1728
1729         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1730         if (skb == NULL)
1731                 goto out;
1732
1733         /* Reserve room for dummy headers, this skb can pass
1734            through good chunk of routing engine.
1735          */
1736         skb->mac.raw = skb->data;
1737         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1738
1739         memset(&fl, 0, sizeof(fl));
1740         if (rta[RTA_SRC-1])
1741                 ipv6_addr_copy(&fl.fl6_src,
1742                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1743         if (rta[RTA_DST-1])
1744                 ipv6_addr_copy(&fl.fl6_dst,
1745                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1746
1747         if (rta[RTA_IIF-1])
1748                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1749
1750         if (iif) {
1751                 struct net_device *dev;
1752                 dev = __dev_get_by_index(iif);
1753                 if (!dev) {
1754                         err = -ENODEV;
1755                         goto out_free;
1756                 }
1757         }
1758
1759         fl.oif = 0;
1760         if (rta[RTA_OIF-1])
1761                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1762
1763         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1764
1765         skb->dst = &rt->u.dst;
1766
1767         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1768         err = rt6_fill_node(skb, rt, 
1769                             &fl.fl6_dst, &fl.fl6_src,
1770                             iif,
1771                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1772                             nlh->nlmsg_seq, 0, 0);
1773         if (err < 0) {
1774                 err = -EMSGSIZE;
1775                 goto out_free;
1776         }
1777
1778         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1779         if (err > 0)
1780                 err = 0;
1781 out:
1782         return err;
1783 out_free:
1784         kfree_skb(skb);
1785         goto out;       
1786 }
1787
1788 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1789                         struct netlink_skb_parms *req)
1790 {
1791         struct sk_buff *skb;
1792         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1793         u32 pid = current->pid;
1794         u32 seq = 0;
1795
1796         if (req)
1797                 pid = req->pid;
1798         if (nlh)
1799                 seq = nlh->nlmsg_seq;
1800         
1801         skb = alloc_skb(size, gfp_any());
1802         if (!skb) {
1803                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1804                 return;
1805         }
1806         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1807                 kfree_skb(skb);
1808                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1809                 return;
1810         }
1811         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1812         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1813 }
1814
1815 /*
1816  *      /proc
1817  */
1818
1819 #ifdef CONFIG_PROC_FS
1820
1821 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1822
1823 struct rt6_proc_arg
1824 {
1825         char *buffer;
1826         int offset;
1827         int length;
1828         int skip;
1829         int len;
1830 };
1831
1832 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1833 {
1834         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1835         int i;
1836
1837         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1838                 arg->skip++;
1839                 return 0;
1840         }
1841
1842         if (arg->len >= arg->length)
1843                 return 0;
1844
1845         for (i=0; i<16; i++) {
1846                 sprintf(arg->buffer + arg->len, "%02x",
1847                         rt->rt6i_dst.addr.s6_addr[i]);
1848                 arg->len += 2;
1849         }
1850         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1851                             rt->rt6i_dst.plen);
1852
1853 #ifdef CONFIG_IPV6_SUBTREES
1854         for (i=0; i<16; i++) {
1855                 sprintf(arg->buffer + arg->len, "%02x",
1856                         rt->rt6i_src.addr.s6_addr[i]);
1857                 arg->len += 2;
1858         }
1859         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1860                             rt->rt6i_src.plen);
1861 #else
1862         sprintf(arg->buffer + arg->len,
1863                 "00000000000000000000000000000000 00 ");
1864         arg->len += 36;
1865 #endif
1866
1867         if (rt->rt6i_nexthop) {
1868                 for (i=0; i<16; i++) {
1869                         sprintf(arg->buffer + arg->len, "%02x",
1870                                 rt->rt6i_nexthop->primary_key[i]);
1871                         arg->len += 2;
1872                 }
1873         } else {
1874                 sprintf(arg->buffer + arg->len,
1875                         "00000000000000000000000000000000");
1876                 arg->len += 32;
1877         }
1878         arg->len += sprintf(arg->buffer + arg->len,
1879                             " %08x %08x %08x %08x %8s\n",
1880                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1881                             rt->u.dst.__use, rt->rt6i_flags, 
1882                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1883         return 0;
1884 }
1885
1886 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1887 {
1888         struct rt6_proc_arg arg;
1889         arg.buffer = buffer;
1890         arg.offset = offset;
1891         arg.length = length;
1892         arg.skip = 0;
1893         arg.len = 0;
1894
1895         read_lock_bh(&rt6_lock);
1896         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1897         read_unlock_bh(&rt6_lock);
1898
1899         *start = buffer;
1900         if (offset)
1901                 *start += offset % RT6_INFO_LEN;
1902
1903         arg.len -= offset % RT6_INFO_LEN;
1904
1905         if (arg.len > length)
1906                 arg.len = length;
1907         if (arg.len < 0)
1908                 arg.len = 0;
1909
1910         return arg.len;
1911 }
1912
1913 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1914 {
1915         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1916                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1917                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1918                       rt6_stats.fib_rt_cache,
1919                       atomic_read(&ip6_dst_ops.entries),
1920                       rt6_stats.fib_discarded_routes);
1921
1922         return 0;
1923 }
1924
1925 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1926 {
1927         return single_open(file, rt6_stats_seq_show, NULL);
1928 }
1929
1930 static struct file_operations rt6_stats_seq_fops = {
1931         .owner   = THIS_MODULE,
1932         .open    = rt6_stats_seq_open,
1933         .read    = seq_read,
1934         .llseek  = seq_lseek,
1935         .release = single_release,
1936 };
1937 #endif  /* CONFIG_PROC_FS */
1938
1939 #ifdef CONFIG_SYSCTL
1940
1941 static int flush_delay;
1942
1943 static
1944 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1945                               void __user *buffer, size_t *lenp, loff_t *ppos)
1946 {
1947         if (write) {
1948                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1949                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1950                 return 0;
1951         } else
1952                 return -EINVAL;
1953 }
1954
1955 ctl_table ipv6_route_table[] = {
1956         {
1957                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1958                 .procname       =       "flush",
1959                 .data           =       &flush_delay,
1960                 .maxlen         =       sizeof(int),
1961                 .mode           =       0200,
1962                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1963         },
1964         {
1965                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1966                 .procname       =       "gc_thresh",
1967                 .data           =       &ip6_dst_ops.gc_thresh,
1968                 .maxlen         =       sizeof(int),
1969                 .mode           =       0644,
1970                 .proc_handler   =       &proc_dointvec,
1971         },
1972         {
1973                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1974                 .procname       =       "max_size",
1975                 .data           =       &ip6_rt_max_size,
1976                 .maxlen         =       sizeof(int),
1977                 .mode           =       0644,
1978                 .proc_handler   =       &proc_dointvec,
1979         },
1980         {
1981                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1982                 .procname       =       "gc_min_interval",
1983                 .data           =       &ip6_rt_gc_min_interval,
1984                 .maxlen         =       sizeof(int),
1985                 .mode           =       0644,
1986                 .proc_handler   =       &proc_dointvec_jiffies,
1987                 .strategy       =       &sysctl_jiffies,
1988         },
1989         {
1990                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
1991                 .procname       =       "gc_timeout",
1992                 .data           =       &ip6_rt_gc_timeout,
1993                 .maxlen         =       sizeof(int),
1994                 .mode           =       0644,
1995                 .proc_handler   =       &proc_dointvec_jiffies,
1996                 .strategy       =       &sysctl_jiffies,
1997         },
1998         {
1999                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2000                 .procname       =       "gc_interval",
2001                 .data           =       &ip6_rt_gc_interval,
2002                 .maxlen         =       sizeof(int),
2003                 .mode           =       0644,
2004                 .proc_handler   =       &proc_dointvec_jiffies,
2005                 .strategy       =       &sysctl_jiffies,
2006         },
2007         {
2008                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2009                 .procname       =       "gc_elasticity",
2010                 .data           =       &ip6_rt_gc_elasticity,
2011                 .maxlen         =       sizeof(int),
2012                 .mode           =       0644,
2013                 .proc_handler   =       &proc_dointvec_jiffies,
2014                 .strategy       =       &sysctl_jiffies,
2015         },
2016         {
2017                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2018                 .procname       =       "mtu_expires",
2019                 .data           =       &ip6_rt_mtu_expires,
2020                 .maxlen         =       sizeof(int),
2021                 .mode           =       0644,
2022                 .proc_handler   =       &proc_dointvec_jiffies,
2023                 .strategy       =       &sysctl_jiffies,
2024         },
2025         {
2026                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2027                 .procname       =       "min_adv_mss",
2028                 .data           =       &ip6_rt_min_advmss,
2029                 .maxlen         =       sizeof(int),
2030                 .mode           =       0644,
2031                 .proc_handler   =       &proc_dointvec_jiffies,
2032                 .strategy       =       &sysctl_jiffies,
2033         },
2034         {
2035                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2036                 .procname       =       "gc_min_interval_ms",
2037                 .data           =       &ip6_rt_gc_min_interval,
2038                 .maxlen         =       sizeof(int),
2039                 .mode           =       0644,
2040                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2041                 .strategy       =       &sysctl_ms_jiffies,
2042         },
2043         { .ctl_name = 0 }
2044 };
2045
2046 #endif
2047
2048 void __init ip6_route_init(void)
2049 {
2050         struct proc_dir_entry *p;
2051
2052         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2053                                                      sizeof(struct rt6_info),
2054                                                      0, SLAB_HWCACHE_ALIGN,
2055                                                      NULL, NULL);
2056         if (!ip6_dst_ops.kmem_cachep)
2057                 panic("cannot create ip6_dst_cache");
2058
2059         fib6_init();
2060 #ifdef  CONFIG_PROC_FS
2061         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2062         if (p)
2063                 p->owner = THIS_MODULE;
2064
2065         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2066 #endif
2067 #ifdef CONFIG_XFRM
2068         xfrm6_init();
2069 #endif
2070 }
2071
2072 void ip6_route_cleanup(void)
2073 {
2074 #ifdef CONFIG_PROC_FS
2075         proc_net_remove("ipv6_route");
2076         proc_net_remove("rt6_stats");
2077 #endif
2078 #ifdef CONFIG_XFRM
2079         xfrm6_fini();
2080 #endif
2081         rt6_ifdown(NULL);
2082         fib6_gc_cleanup();
2083         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2084 }