[IPV6]: ROUTE: More strict check for default routers in rt6_get_dflt_router().
[linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 static struct dst_ops ip6_dst_ops = {
102         .family                 =       AF_INET6,
103         .protocol               =       __constant_htons(ETH_P_IPV6),
104         .gc                     =       ip6_dst_gc,
105         .gc_thresh              =       1024,
106         .check                  =       ip6_dst_check,
107         .destroy                =       ip6_dst_destroy,
108         .ifdown                 =       ip6_dst_ifdown,
109         .negative_advice        =       ip6_negative_advice,
110         .link_failure           =       ip6_link_failure,
111         .update_pmtu            =       ip6_rt_update_pmtu,
112         .entry_size             =       sizeof(struct rt6_info),
113 };
114
115 struct rt6_info ip6_null_entry = {
116         .u = {
117                 .dst = {
118                         .__refcnt       = ATOMIC_INIT(1),
119                         .__use          = 1,
120                         .dev            = &loopback_dev,
121                         .obsolete       = -1,
122                         .error          = -ENETUNREACH,
123                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
124                         .input          = ip6_pkt_discard,
125                         .output         = ip6_pkt_discard_out,
126                         .ops            = &ip6_dst_ops,
127                         .path           = (struct dst_entry*)&ip6_null_entry,
128                 }
129         },
130         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
131         .rt6i_metric    = ~(u32) 0,
132         .rt6i_ref       = ATOMIC_INIT(1),
133 };
134
135 struct fib6_node ip6_routing_table = {
136         .leaf           = &ip6_null_entry,
137         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138 };
139
140 /* Protects all the ip6 fib */
141
142 DEFINE_RWLOCK(rt6_lock);
143
144
145 /* allocate dst with ip6_dst_ops */
146 static __inline__ struct rt6_info *ip6_dst_alloc(void)
147 {
148         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149 }
150
151 static void ip6_dst_destroy(struct dst_entry *dst)
152 {
153         struct rt6_info *rt = (struct rt6_info *)dst;
154         struct inet6_dev *idev = rt->rt6i_idev;
155
156         if (idev != NULL) {
157                 rt->rt6i_idev = NULL;
158                 in6_dev_put(idev);
159         }       
160 }
161
162 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163                            int how)
164 {
165         struct rt6_info *rt = (struct rt6_info *)dst;
166         struct inet6_dev *idev = rt->rt6i_idev;
167
168         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170                 if (loopback_idev != NULL) {
171                         rt->rt6i_idev = loopback_idev;
172                         in6_dev_put(idev);
173                 }
174         }
175 }
176
177 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178 {
179         return (rt->rt6i_flags & RTF_EXPIRES &&
180                 time_after(jiffies, rt->rt6i_expires));
181 }
182
183 /*
184  *      Route lookup. Any rt6_lock is implied.
185  */
186
187 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188                                                     int oif,
189                                                     int strict)
190 {
191         struct rt6_info *local = NULL;
192         struct rt6_info *sprt;
193
194         if (oif) {
195                 for (sprt = rt; sprt; sprt = sprt->u.next) {
196                         struct net_device *dev = sprt->rt6i_dev;
197                         if (dev->ifindex == oif)
198                                 return sprt;
199                         if (dev->flags & IFF_LOOPBACK) {
200                                 if (sprt->rt6i_idev == NULL ||
201                                     sprt->rt6i_idev->dev->ifindex != oif) {
202                                         if (strict && oif)
203                                                 continue;
204                                         if (local && (!oif || 
205                                                       local->rt6i_idev->dev->ifindex == oif))
206                                                 continue;
207                                 }
208                                 local = sprt;
209                         }
210                 }
211
212                 if (local)
213                         return local;
214
215                 if (strict)
216                         return &ip6_null_entry;
217         }
218         return rt;
219 }
220
221 /*
222  * Default Router Selection (RFC 2461 6.3.6)
223  */
224 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
225 {
226         struct net_device *dev = rt->rt6i_dev;
227         if (!oif || dev->ifindex == oif)
228                 return 2;
229         if ((dev->flags & IFF_LOOPBACK) &&
230             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
231                 return 1;
232         return 0;
233 }
234
235 static int inline rt6_check_neigh(struct rt6_info *rt)
236 {
237         struct neighbour *neigh = rt->rt6i_nexthop;
238         int m = 0;
239         if (neigh) {
240                 read_lock_bh(&neigh->lock);
241                 if (neigh->nud_state & NUD_VALID)
242                         m = 1;
243                 read_unlock_bh(&neigh->lock);
244         }
245         return m;
246 }
247
248 static int rt6_score_route(struct rt6_info *rt, int oif,
249                            int strict)
250 {
251         int m = rt6_check_dev(rt, oif);
252         if (!m && (strict & RT6_SELECT_F_IFACE))
253                 return -1;
254         if (rt6_check_neigh(rt))
255                 m |= 4;
256         else if (strict & RT6_SELECT_F_REACHABLE)
257                 return -1;
258         return m;
259 }
260
261 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
262                                    int strict)
263 {
264         struct rt6_info *match = NULL, *last = NULL;
265         struct rt6_info *rt, *rt0 = *head;
266         u32 metric;
267         int mpri = -1;
268
269         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
270                   __FUNCTION__, head, head ? *head : NULL, oif);
271
272         for (rt = rt0, metric = rt0->rt6i_metric;
273              rt && rt->rt6i_metric == metric;
274              rt = rt->u.next) {
275                 int m;
276
277                 if (rt6_check_expired(rt))
278                         continue;
279
280                 last = rt;
281
282                 m = rt6_score_route(rt, oif, strict);
283                 if (m < 0)
284                         continue;
285
286                 if (m > mpri) {
287                         match = rt;
288                         mpri = m;
289                 }
290         }
291
292         if (!match &&
293             (strict & RT6_SELECT_F_REACHABLE) &&
294             last && last != rt0) {
295                 /* no entries matched; do round-robin */
296                 *head = rt0->u.next;
297                 rt0->u.next = last->u.next;
298                 last->u.next = rt0;
299         }
300
301         RT6_TRACE("%s() => %p, score=%d\n",
302                   __FUNCTION__, match, mpri);
303
304         return (match ? match : &ip6_null_entry);
305 }
306
307 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
308                             int oif, int strict)
309 {
310         struct fib6_node *fn;
311         struct rt6_info *rt;
312
313         read_lock_bh(&rt6_lock);
314         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
315         rt = rt6_device_match(fn->leaf, oif, strict);
316         dst_hold(&rt->u.dst);
317         rt->u.dst.__use++;
318         read_unlock_bh(&rt6_lock);
319
320         rt->u.dst.lastuse = jiffies;
321         if (rt->u.dst.error == 0)
322                 return rt;
323         dst_release(&rt->u.dst);
324         return NULL;
325 }
326
327 /* ip6_ins_rt is called with FREE rt6_lock.
328    It takes new route entry, the addition fails by any reason the
329    route is freed. In any case, if caller does not hold it, it may
330    be destroyed.
331  */
332
333 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
334                 void *_rtattr, struct netlink_skb_parms *req)
335 {
336         int err;
337
338         write_lock_bh(&rt6_lock);
339         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
340         write_unlock_bh(&rt6_lock);
341
342         return err;
343 }
344
345 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
346                                       struct in6_addr *saddr)
347 {
348         struct rt6_info *rt;
349
350         /*
351          *      Clone the route.
352          */
353
354         rt = ip6_rt_copy(ort);
355
356         if (rt) {
357                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
358                         if (rt->rt6i_dst.plen != 128 &&
359                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
360                                 rt->rt6i_flags |= RTF_ANYCAST;
361                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
362                 }
363
364                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
365                 rt->rt6i_dst.plen = 128;
366                 rt->rt6i_flags |= RTF_CACHE;
367                 rt->u.dst.flags |= DST_HOST;
368
369 #ifdef CONFIG_IPV6_SUBTREES
370                 if (rt->rt6i_src.plen && saddr) {
371                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
372                         rt->rt6i_src.plen = 128;
373                 }
374 #endif
375
376                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
377
378         }
379
380         return rt;
381 }
382
383 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
384 {
385         struct rt6_info *rt = ip6_rt_copy(ort);
386         if (rt) {
387                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
388                 rt->rt6i_dst.plen = 128;
389                 rt->rt6i_flags |= RTF_CACHE;
390                 if (rt->rt6i_flags & RTF_REJECT)
391                         rt->u.dst.error = ort->u.dst.error;
392                 rt->u.dst.flags |= DST_HOST;
393                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
394         }
395         return rt;
396 }
397
398 #define BACKTRACK() \
399 if (rt == &ip6_null_entry && strict) { \
400        while ((fn = fn->parent) != NULL) { \
401                 if (fn->fn_flags & RTN_ROOT) { \
402                         goto out; \
403                 } \
404                 if (fn->fn_flags & RTN_RTINFO) \
405                         goto restart; \
406         } \
407 }
408
409
410 void ip6_route_input(struct sk_buff *skb)
411 {
412         struct fib6_node *fn;
413         struct rt6_info *rt, *nrt;
414         int strict;
415         int attempts = 3;
416         int err;
417
418         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
419
420 relookup:
421         read_lock_bh(&rt6_lock);
422
423         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
424                          &skb->nh.ipv6h->saddr);
425
426 restart:
427         rt = fn->leaf;
428
429         if ((rt->rt6i_flags & RTF_CACHE)) {
430                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
431                 BACKTRACK();
432                 goto out;
433         }
434
435         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
436         BACKTRACK();
437
438         dst_hold(&rt->u.dst);
439         read_unlock_bh(&rt6_lock);
440
441         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
442                 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
443         else {
444 #if CLONE_OFFLINK_ROUTE
445                 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
446 #else
447                 goto out2;
448 #endif
449         }
450
451         dst_release(&rt->u.dst);
452         rt = nrt ? : &ip6_null_entry;
453
454         dst_hold(&rt->u.dst);
455         if (nrt) {
456                 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
457                 if (!err)
458                         goto out2;
459         }
460
461         if (--attempts <= 0)
462                 goto out2;
463
464         /*
465          * Race condition! In the gap, when rt6_lock was
466          * released someone could insert this route.  Relookup.
467          */
468         dst_release(&rt->u.dst);
469         goto relookup;
470
471 out:
472         dst_hold(&rt->u.dst);
473         read_unlock_bh(&rt6_lock);
474 out2:
475         rt->u.dst.lastuse = jiffies;
476         rt->u.dst.__use++;
477         skb->dst = (struct dst_entry *) rt;
478         return;
479 }
480
481 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
482 {
483         struct fib6_node *fn;
484         struct rt6_info *rt, *nrt;
485         int strict;
486         int attempts = 3;
487         int err;
488
489         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
490
491 relookup:
492         read_lock_bh(&rt6_lock);
493
494         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
495
496 restart:
497         rt = fn->leaf;
498
499         if ((rt->rt6i_flags & RTF_CACHE)) {
500                 rt = rt6_device_match(rt, fl->oif, strict);
501                 BACKTRACK();
502                 goto out;
503         }
504         if (rt->rt6i_flags & RTF_DEFAULT) {
505                 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
506                 if (rt == &ip6_null_entry)
507                         rt = rt6_select(&fn->leaf, fl->oif, strict);
508         } else {
509                 rt = rt6_device_match(rt, fl->oif, strict);
510                 BACKTRACK();
511         }
512
513         dst_hold(&rt->u.dst);
514         read_unlock_bh(&rt6_lock);
515
516         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
517                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
518         else {
519 #if CLONE_OFFLINK_ROUTE
520                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
521 #else
522                 goto out2;
523 #endif
524         }
525
526         dst_release(&rt->u.dst);
527         rt = nrt ? : &ip6_null_entry;
528
529         dst_hold(&rt->u.dst);
530         if (nrt) {
531                 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
532                 if (!err)
533                         goto out2;
534         }
535
536         if (--attempts <= 0)
537                 goto out2;
538
539         /*
540          * Race condition! In the gap, when rt6_lock was
541          * released someone could insert this route.  Relookup.
542          */
543         dst_release(&rt->u.dst);
544         goto relookup;
545
546 out:
547         dst_hold(&rt->u.dst);
548         read_unlock_bh(&rt6_lock);
549 out2:
550         rt->u.dst.lastuse = jiffies;
551         rt->u.dst.__use++;
552         return &rt->u.dst;
553 }
554
555
556 /*
557  *      Destination cache support functions
558  */
559
560 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
561 {
562         struct rt6_info *rt;
563
564         rt = (struct rt6_info *) dst;
565
566         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
567                 return dst;
568
569         return NULL;
570 }
571
572 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
573 {
574         struct rt6_info *rt = (struct rt6_info *) dst;
575
576         if (rt) {
577                 if (rt->rt6i_flags & RTF_CACHE)
578                         ip6_del_rt(rt, NULL, NULL, NULL);
579                 else
580                         dst_release(dst);
581         }
582         return NULL;
583 }
584
585 static void ip6_link_failure(struct sk_buff *skb)
586 {
587         struct rt6_info *rt;
588
589         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
590
591         rt = (struct rt6_info *) skb->dst;
592         if (rt) {
593                 if (rt->rt6i_flags&RTF_CACHE) {
594                         dst_set_expires(&rt->u.dst, 0);
595                         rt->rt6i_flags |= RTF_EXPIRES;
596                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
597                         rt->rt6i_node->fn_sernum = -1;
598         }
599 }
600
601 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
602 {
603         struct rt6_info *rt6 = (struct rt6_info*)dst;
604
605         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
606                 rt6->rt6i_flags |= RTF_MODIFIED;
607                 if (mtu < IPV6_MIN_MTU) {
608                         mtu = IPV6_MIN_MTU;
609                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
610                 }
611                 dst->metrics[RTAX_MTU-1] = mtu;
612         }
613 }
614
615 /* Protected by rt6_lock.  */
616 static struct dst_entry *ndisc_dst_gc_list;
617 static int ipv6_get_mtu(struct net_device *dev);
618
619 static inline unsigned int ipv6_advmss(unsigned int mtu)
620 {
621         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
622
623         if (mtu < ip6_rt_min_advmss)
624                 mtu = ip6_rt_min_advmss;
625
626         /*
627          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
628          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
629          * IPV6_MAXPLEN is also valid and means: "any MSS, 
630          * rely only on pmtu discovery"
631          */
632         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
633                 mtu = IPV6_MAXPLEN;
634         return mtu;
635 }
636
637 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
638                                   struct neighbour *neigh,
639                                   struct in6_addr *addr,
640                                   int (*output)(struct sk_buff *))
641 {
642         struct rt6_info *rt;
643         struct inet6_dev *idev = in6_dev_get(dev);
644
645         if (unlikely(idev == NULL))
646                 return NULL;
647
648         rt = ip6_dst_alloc();
649         if (unlikely(rt == NULL)) {
650                 in6_dev_put(idev);
651                 goto out;
652         }
653
654         dev_hold(dev);
655         if (neigh)
656                 neigh_hold(neigh);
657         else
658                 neigh = ndisc_get_neigh(dev, addr);
659
660         rt->rt6i_dev      = dev;
661         rt->rt6i_idev     = idev;
662         rt->rt6i_nexthop  = neigh;
663         atomic_set(&rt->u.dst.__refcnt, 1);
664         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
665         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
666         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
667         rt->u.dst.output  = output;
668
669 #if 0   /* there's no chance to use these for ndisc */
670         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
671                                 ? DST_HOST 
672                                 : 0;
673         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
674         rt->rt6i_dst.plen = 128;
675 #endif
676
677         write_lock_bh(&rt6_lock);
678         rt->u.dst.next = ndisc_dst_gc_list;
679         ndisc_dst_gc_list = &rt->u.dst;
680         write_unlock_bh(&rt6_lock);
681
682         fib6_force_start_gc();
683
684 out:
685         return (struct dst_entry *)rt;
686 }
687
688 int ndisc_dst_gc(int *more)
689 {
690         struct dst_entry *dst, *next, **pprev;
691         int freed;
692
693         next = NULL;
694         pprev = &ndisc_dst_gc_list;
695         freed = 0;
696         while ((dst = *pprev) != NULL) {
697                 if (!atomic_read(&dst->__refcnt)) {
698                         *pprev = dst->next;
699                         dst_free(dst);
700                         freed++;
701                 } else {
702                         pprev = &dst->next;
703                         (*more)++;
704                 }
705         }
706
707         return freed;
708 }
709
710 static int ip6_dst_gc(void)
711 {
712         static unsigned expire = 30*HZ;
713         static unsigned long last_gc;
714         unsigned long now = jiffies;
715
716         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
717             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
718                 goto out;
719
720         expire++;
721         fib6_run_gc(expire);
722         last_gc = now;
723         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
724                 expire = ip6_rt_gc_timeout>>1;
725
726 out:
727         expire -= expire>>ip6_rt_gc_elasticity;
728         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
729 }
730
731 /* Clean host part of a prefix. Not necessary in radix tree,
732    but results in cleaner routing tables.
733
734    Remove it only when all the things will work!
735  */
736
737 static int ipv6_get_mtu(struct net_device *dev)
738 {
739         int mtu = IPV6_MIN_MTU;
740         struct inet6_dev *idev;
741
742         idev = in6_dev_get(dev);
743         if (idev) {
744                 mtu = idev->cnf.mtu6;
745                 in6_dev_put(idev);
746         }
747         return mtu;
748 }
749
750 int ipv6_get_hoplimit(struct net_device *dev)
751 {
752         int hoplimit = ipv6_devconf.hop_limit;
753         struct inet6_dev *idev;
754
755         idev = in6_dev_get(dev);
756         if (idev) {
757                 hoplimit = idev->cnf.hop_limit;
758                 in6_dev_put(idev);
759         }
760         return hoplimit;
761 }
762
763 /*
764  *
765  */
766
767 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
768                 void *_rtattr, struct netlink_skb_parms *req)
769 {
770         int err;
771         struct rtmsg *r;
772         struct rtattr **rta;
773         struct rt6_info *rt = NULL;
774         struct net_device *dev = NULL;
775         struct inet6_dev *idev = NULL;
776         int addr_type;
777
778         rta = (struct rtattr **) _rtattr;
779
780         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
781                 return -EINVAL;
782 #ifndef CONFIG_IPV6_SUBTREES
783         if (rtmsg->rtmsg_src_len)
784                 return -EINVAL;
785 #endif
786         if (rtmsg->rtmsg_ifindex) {
787                 err = -ENODEV;
788                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
789                 if (!dev)
790                         goto out;
791                 idev = in6_dev_get(dev);
792                 if (!idev)
793                         goto out;
794         }
795
796         if (rtmsg->rtmsg_metric == 0)
797                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
798
799         rt = ip6_dst_alloc();
800
801         if (rt == NULL) {
802                 err = -ENOMEM;
803                 goto out;
804         }
805
806         rt->u.dst.obsolete = -1;
807         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
808         if (nlh && (r = NLMSG_DATA(nlh))) {
809                 rt->rt6i_protocol = r->rtm_protocol;
810         } else {
811                 rt->rt6i_protocol = RTPROT_BOOT;
812         }
813
814         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
815
816         if (addr_type & IPV6_ADDR_MULTICAST)
817                 rt->u.dst.input = ip6_mc_input;
818         else
819                 rt->u.dst.input = ip6_forward;
820
821         rt->u.dst.output = ip6_output;
822
823         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
824                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
825         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
826         if (rt->rt6i_dst.plen == 128)
827                rt->u.dst.flags = DST_HOST;
828
829 #ifdef CONFIG_IPV6_SUBTREES
830         ipv6_addr_prefix(&rt->rt6i_src.addr, 
831                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
832         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
833 #endif
834
835         rt->rt6i_metric = rtmsg->rtmsg_metric;
836
837         /* We cannot add true routes via loopback here,
838            they would result in kernel looping; promote them to reject routes
839          */
840         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
841             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
842                 /* hold loopback dev/idev if we haven't done so. */
843                 if (dev != &loopback_dev) {
844                         if (dev) {
845                                 dev_put(dev);
846                                 in6_dev_put(idev);
847                         }
848                         dev = &loopback_dev;
849                         dev_hold(dev);
850                         idev = in6_dev_get(dev);
851                         if (!idev) {
852                                 err = -ENODEV;
853                                 goto out;
854                         }
855                 }
856                 rt->u.dst.output = ip6_pkt_discard_out;
857                 rt->u.dst.input = ip6_pkt_discard;
858                 rt->u.dst.error = -ENETUNREACH;
859                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
860                 goto install_route;
861         }
862
863         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
864                 struct in6_addr *gw_addr;
865                 int gwa_type;
866
867                 gw_addr = &rtmsg->rtmsg_gateway;
868                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
869                 gwa_type = ipv6_addr_type(gw_addr);
870
871                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
872                         struct rt6_info *grt;
873
874                         /* IPv6 strictly inhibits using not link-local
875                            addresses as nexthop address.
876                            Otherwise, router will not able to send redirects.
877                            It is very good, but in some (rare!) circumstances
878                            (SIT, PtP, NBMA NOARP links) it is handy to allow
879                            some exceptions. --ANK
880                          */
881                         err = -EINVAL;
882                         if (!(gwa_type&IPV6_ADDR_UNICAST))
883                                 goto out;
884
885                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
886
887                         err = -EHOSTUNREACH;
888                         if (grt == NULL)
889                                 goto out;
890                         if (dev) {
891                                 if (dev != grt->rt6i_dev) {
892                                         dst_release(&grt->u.dst);
893                                         goto out;
894                                 }
895                         } else {
896                                 dev = grt->rt6i_dev;
897                                 idev = grt->rt6i_idev;
898                                 dev_hold(dev);
899                                 in6_dev_hold(grt->rt6i_idev);
900                         }
901                         if (!(grt->rt6i_flags&RTF_GATEWAY))
902                                 err = 0;
903                         dst_release(&grt->u.dst);
904
905                         if (err)
906                                 goto out;
907                 }
908                 err = -EINVAL;
909                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
910                         goto out;
911         }
912
913         err = -ENODEV;
914         if (dev == NULL)
915                 goto out;
916
917         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
918                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
919                 if (IS_ERR(rt->rt6i_nexthop)) {
920                         err = PTR_ERR(rt->rt6i_nexthop);
921                         rt->rt6i_nexthop = NULL;
922                         goto out;
923                 }
924         }
925
926         rt->rt6i_flags = rtmsg->rtmsg_flags;
927
928 install_route:
929         if (rta && rta[RTA_METRICS-1]) {
930                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
931                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
932
933                 while (RTA_OK(attr, attrlen)) {
934                         unsigned flavor = attr->rta_type;
935                         if (flavor) {
936                                 if (flavor > RTAX_MAX) {
937                                         err = -EINVAL;
938                                         goto out;
939                                 }
940                                 rt->u.dst.metrics[flavor-1] =
941                                         *(u32 *)RTA_DATA(attr);
942                         }
943                         attr = RTA_NEXT(attr, attrlen);
944                 }
945         }
946
947         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
948                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
949         if (!rt->u.dst.metrics[RTAX_MTU-1])
950                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
951         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
952                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
953         rt->u.dst.dev = dev;
954         rt->rt6i_idev = idev;
955         return ip6_ins_rt(rt, nlh, _rtattr, req);
956
957 out:
958         if (dev)
959                 dev_put(dev);
960         if (idev)
961                 in6_dev_put(idev);
962         if (rt)
963                 dst_free((struct dst_entry *) rt);
964         return err;
965 }
966
967 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
968 {
969         int err;
970
971         write_lock_bh(&rt6_lock);
972
973         err = fib6_del(rt, nlh, _rtattr, req);
974         dst_release(&rt->u.dst);
975
976         write_unlock_bh(&rt6_lock);
977
978         return err;
979 }
980
981 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
982 {
983         struct fib6_node *fn;
984         struct rt6_info *rt;
985         int err = -ESRCH;
986
987         read_lock_bh(&rt6_lock);
988
989         fn = fib6_locate(&ip6_routing_table,
990                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
991                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
992         
993         if (fn) {
994                 for (rt = fn->leaf; rt; rt = rt->u.next) {
995                         if (rtmsg->rtmsg_ifindex &&
996                             (rt->rt6i_dev == NULL ||
997                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
998                                 continue;
999                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1000                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1001                                 continue;
1002                         if (rtmsg->rtmsg_metric &&
1003                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1004                                 continue;
1005                         dst_hold(&rt->u.dst);
1006                         read_unlock_bh(&rt6_lock);
1007
1008                         return ip6_del_rt(rt, nlh, _rtattr, req);
1009                 }
1010         }
1011         read_unlock_bh(&rt6_lock);
1012
1013         return err;
1014 }
1015
1016 /*
1017  *      Handle redirects
1018  */
1019 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1020                   struct neighbour *neigh, u8 *lladdr, int on_link)
1021 {
1022         struct rt6_info *rt, *nrt;
1023
1024         /* Locate old route to this destination. */
1025         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1026
1027         if (rt == NULL)
1028                 return;
1029
1030         if (neigh->dev != rt->rt6i_dev)
1031                 goto out;
1032
1033         /*
1034          * Current route is on-link; redirect is always invalid.
1035          * 
1036          * Seems, previous statement is not true. It could
1037          * be node, which looks for us as on-link (f.e. proxy ndisc)
1038          * But then router serving it might decide, that we should
1039          * know truth 8)8) --ANK (980726).
1040          */
1041         if (!(rt->rt6i_flags&RTF_GATEWAY))
1042                 goto out;
1043
1044         /*
1045          *      RFC 2461 specifies that redirects should only be
1046          *      accepted if they come from the nexthop to the target.
1047          *      Due to the way default routers are chosen, this notion
1048          *      is a bit fuzzy and one might need to check all default
1049          *      routers.
1050          */
1051         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1052                 if (rt->rt6i_flags & RTF_DEFAULT) {
1053                         struct rt6_info *rt1;
1054
1055                         read_lock(&rt6_lock);
1056                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1057                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1058                                         dst_hold(&rt1->u.dst);
1059                                         dst_release(&rt->u.dst);
1060                                         read_unlock(&rt6_lock);
1061                                         rt = rt1;
1062                                         goto source_ok;
1063                                 }
1064                         }
1065                         read_unlock(&rt6_lock);
1066                 }
1067                 if (net_ratelimit())
1068                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1069                                "for redirect target\n");
1070                 goto out;
1071         }
1072
1073 source_ok:
1074
1075         /*
1076          *      We have finally decided to accept it.
1077          */
1078
1079         neigh_update(neigh, lladdr, NUD_STALE, 
1080                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1081                      NEIGH_UPDATE_F_OVERRIDE|
1082                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1083                                      NEIGH_UPDATE_F_ISROUTER))
1084                      );
1085
1086         /*
1087          * Redirect received -> path was valid.
1088          * Look, redirects are sent only in response to data packets,
1089          * so that this nexthop apparently is reachable. --ANK
1090          */
1091         dst_confirm(&rt->u.dst);
1092
1093         /* Duplicate redirect: silently ignore. */
1094         if (neigh == rt->u.dst.neighbour)
1095                 goto out;
1096
1097         nrt = ip6_rt_copy(rt);
1098         if (nrt == NULL)
1099                 goto out;
1100
1101         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1102         if (on_link)
1103                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1104
1105         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1106         nrt->rt6i_dst.plen = 128;
1107         nrt->u.dst.flags |= DST_HOST;
1108
1109         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1110         nrt->rt6i_nexthop = neigh_clone(neigh);
1111         /* Reset pmtu, it may be better */
1112         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1113         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1114
1115         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1116                 goto out;
1117
1118         if (rt->rt6i_flags&RTF_CACHE) {
1119                 ip6_del_rt(rt, NULL, NULL, NULL);
1120                 return;
1121         }
1122
1123 out:
1124         dst_release(&rt->u.dst);
1125         return;
1126 }
1127
1128 /*
1129  *      Handle ICMP "packet too big" messages
1130  *      i.e. Path MTU discovery
1131  */
1132
1133 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1134                         struct net_device *dev, u32 pmtu)
1135 {
1136         struct rt6_info *rt, *nrt;
1137         int allfrag = 0;
1138
1139         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1140         if (rt == NULL)
1141                 return;
1142
1143         if (pmtu >= dst_mtu(&rt->u.dst))
1144                 goto out;
1145
1146         if (pmtu < IPV6_MIN_MTU) {
1147                 /*
1148                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1149                  * MTU (1280) and a fragment header should always be included
1150                  * after a node receiving Too Big message reporting PMTU is
1151                  * less than the IPv6 Minimum Link MTU.
1152                  */
1153                 pmtu = IPV6_MIN_MTU;
1154                 allfrag = 1;
1155         }
1156
1157         /* New mtu received -> path was valid.
1158            They are sent only in response to data packets,
1159            so that this nexthop apparently is reachable. --ANK
1160          */
1161         dst_confirm(&rt->u.dst);
1162
1163         /* Host route. If it is static, it would be better
1164            not to override it, but add new one, so that
1165            when cache entry will expire old pmtu
1166            would return automatically.
1167          */
1168         if (rt->rt6i_flags & RTF_CACHE) {
1169                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1170                 if (allfrag)
1171                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1172                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1173                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1174                 goto out;
1175         }
1176
1177         /* Network route.
1178            Two cases are possible:
1179            1. It is connected route. Action: COW
1180            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1181          */
1182         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1183                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1184         else
1185                 nrt = rt6_alloc_clone(rt, daddr);
1186
1187         if (nrt) {
1188                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1189                 if (allfrag)
1190                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1191
1192                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1193                  * happened within 5 mins, the recommended timer is 10 mins.
1194                  * Here this route expiration time is set to ip6_rt_mtu_expires
1195                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1196                  * and detecting PMTU increase will be automatically happened.
1197                  */
1198                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1199                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1200
1201                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1202         }
1203 out:
1204         dst_release(&rt->u.dst);
1205 }
1206
1207 /*
1208  *      Misc support functions
1209  */
1210
1211 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1212 {
1213         struct rt6_info *rt = ip6_dst_alloc();
1214
1215         if (rt) {
1216                 rt->u.dst.input = ort->u.dst.input;
1217                 rt->u.dst.output = ort->u.dst.output;
1218
1219                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1220                 rt->u.dst.dev = ort->u.dst.dev;
1221                 if (rt->u.dst.dev)
1222                         dev_hold(rt->u.dst.dev);
1223                 rt->rt6i_idev = ort->rt6i_idev;
1224                 if (rt->rt6i_idev)
1225                         in6_dev_hold(rt->rt6i_idev);
1226                 rt->u.dst.lastuse = jiffies;
1227                 rt->rt6i_expires = 0;
1228
1229                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1230                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1231                 rt->rt6i_metric = 0;
1232
1233                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1234 #ifdef CONFIG_IPV6_SUBTREES
1235                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1236 #endif
1237         }
1238         return rt;
1239 }
1240
1241 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1242 {       
1243         struct rt6_info *rt;
1244         struct fib6_node *fn;
1245
1246         fn = &ip6_routing_table;
1247
1248         write_lock_bh(&rt6_lock);
1249         for (rt = fn->leaf; rt; rt=rt->u.next) {
1250                 if (dev == rt->rt6i_dev &&
1251                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1252                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1253                         break;
1254         }
1255         if (rt)
1256                 dst_hold(&rt->u.dst);
1257         write_unlock_bh(&rt6_lock);
1258         return rt;
1259 }
1260
1261 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1262                                      struct net_device *dev)
1263 {
1264         struct in6_rtmsg rtmsg;
1265
1266         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1267         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1268         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1269         rtmsg.rtmsg_metric = 1024;
1270         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1271
1272         rtmsg.rtmsg_ifindex = dev->ifindex;
1273
1274         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1275         return rt6_get_dflt_router(gwaddr, dev);
1276 }
1277
1278 void rt6_purge_dflt_routers(void)
1279 {
1280         struct rt6_info *rt;
1281
1282 restart:
1283         read_lock_bh(&rt6_lock);
1284         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1285                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1286                         dst_hold(&rt->u.dst);
1287
1288                         read_unlock_bh(&rt6_lock);
1289
1290                         ip6_del_rt(rt, NULL, NULL, NULL);
1291
1292                         goto restart;
1293                 }
1294         }
1295         read_unlock_bh(&rt6_lock);
1296 }
1297
1298 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1299 {
1300         struct in6_rtmsg rtmsg;
1301         int err;
1302
1303         switch(cmd) {
1304         case SIOCADDRT:         /* Add a route */
1305         case SIOCDELRT:         /* Delete a route */
1306                 if (!capable(CAP_NET_ADMIN))
1307                         return -EPERM;
1308                 err = copy_from_user(&rtmsg, arg,
1309                                      sizeof(struct in6_rtmsg));
1310                 if (err)
1311                         return -EFAULT;
1312                         
1313                 rtnl_lock();
1314                 switch (cmd) {
1315                 case SIOCADDRT:
1316                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1317                         break;
1318                 case SIOCDELRT:
1319                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1320                         break;
1321                 default:
1322                         err = -EINVAL;
1323                 }
1324                 rtnl_unlock();
1325
1326                 return err;
1327         };
1328
1329         return -EINVAL;
1330 }
1331
1332 /*
1333  *      Drop the packet on the floor
1334  */
1335
1336 static int ip6_pkt_discard(struct sk_buff *skb)
1337 {
1338         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1339         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1340         kfree_skb(skb);
1341         return 0;
1342 }
1343
1344 static int ip6_pkt_discard_out(struct sk_buff *skb)
1345 {
1346         skb->dev = skb->dst->dev;
1347         return ip6_pkt_discard(skb);
1348 }
1349
1350 /*
1351  *      Allocate a dst for local (unicast / anycast) address.
1352  */
1353
1354 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1355                                     const struct in6_addr *addr,
1356                                     int anycast)
1357 {
1358         struct rt6_info *rt = ip6_dst_alloc();
1359
1360         if (rt == NULL)
1361                 return ERR_PTR(-ENOMEM);
1362
1363         dev_hold(&loopback_dev);
1364         in6_dev_hold(idev);
1365
1366         rt->u.dst.flags = DST_HOST;
1367         rt->u.dst.input = ip6_input;
1368         rt->u.dst.output = ip6_output;
1369         rt->rt6i_dev = &loopback_dev;
1370         rt->rt6i_idev = idev;
1371         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1372         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1373         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1374         rt->u.dst.obsolete = -1;
1375
1376         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1377         if (anycast)
1378                 rt->rt6i_flags |= RTF_ANYCAST;
1379         else
1380                 rt->rt6i_flags |= RTF_LOCAL;
1381         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1382         if (rt->rt6i_nexthop == NULL) {
1383                 dst_free((struct dst_entry *) rt);
1384                 return ERR_PTR(-ENOMEM);
1385         }
1386
1387         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1388         rt->rt6i_dst.plen = 128;
1389
1390         atomic_set(&rt->u.dst.__refcnt, 1);
1391
1392         return rt;
1393 }
1394
1395 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1396 {
1397         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1398             rt != &ip6_null_entry) {
1399                 RT6_TRACE("deleted by ifdown %p\n", rt);
1400                 return -1;
1401         }
1402         return 0;
1403 }
1404
1405 void rt6_ifdown(struct net_device *dev)
1406 {
1407         write_lock_bh(&rt6_lock);
1408         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1409         write_unlock_bh(&rt6_lock);
1410 }
1411
1412 struct rt6_mtu_change_arg
1413 {
1414         struct net_device *dev;
1415         unsigned mtu;
1416 };
1417
1418 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1419 {
1420         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1421         struct inet6_dev *idev;
1422
1423         /* In IPv6 pmtu discovery is not optional,
1424            so that RTAX_MTU lock cannot disable it.
1425            We still use this lock to block changes
1426            caused by addrconf/ndisc.
1427         */
1428
1429         idev = __in6_dev_get(arg->dev);
1430         if (idev == NULL)
1431                 return 0;
1432
1433         /* For administrative MTU increase, there is no way to discover
1434            IPv6 PMTU increase, so PMTU increase should be updated here.
1435            Since RFC 1981 doesn't include administrative MTU increase
1436            update PMTU increase is a MUST. (i.e. jumbo frame)
1437          */
1438         /*
1439            If new MTU is less than route PMTU, this new MTU will be the
1440            lowest MTU in the path, update the route PMTU to reflect PMTU
1441            decreases; if new MTU is greater than route PMTU, and the
1442            old MTU is the lowest MTU in the path, update the route PMTU
1443            to reflect the increase. In this case if the other nodes' MTU
1444            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1445            PMTU discouvery.
1446          */
1447         if (rt->rt6i_dev == arg->dev &&
1448             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1449             (dst_mtu(&rt->u.dst) > arg->mtu ||
1450              (dst_mtu(&rt->u.dst) < arg->mtu &&
1451               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1452                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1453         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1454         return 0;
1455 }
1456
1457 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1458 {
1459         struct rt6_mtu_change_arg arg;
1460
1461         arg.dev = dev;
1462         arg.mtu = mtu;
1463         read_lock_bh(&rt6_lock);
1464         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1465         read_unlock_bh(&rt6_lock);
1466 }
1467
1468 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1469                               struct in6_rtmsg *rtmsg)
1470 {
1471         memset(rtmsg, 0, sizeof(*rtmsg));
1472
1473         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1474         rtmsg->rtmsg_src_len = r->rtm_src_len;
1475         rtmsg->rtmsg_flags = RTF_UP;
1476         if (r->rtm_type == RTN_UNREACHABLE)
1477                 rtmsg->rtmsg_flags |= RTF_REJECT;
1478
1479         if (rta[RTA_GATEWAY-1]) {
1480                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1481                         return -EINVAL;
1482                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1483                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1484         }
1485         if (rta[RTA_DST-1]) {
1486                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1487                         return -EINVAL;
1488                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1489         }
1490         if (rta[RTA_SRC-1]) {
1491                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1492                         return -EINVAL;
1493                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1494         }
1495         if (rta[RTA_OIF-1]) {
1496                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1497                         return -EINVAL;
1498                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1499         }
1500         if (rta[RTA_PRIORITY-1]) {
1501                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1502                         return -EINVAL;
1503                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1504         }
1505         return 0;
1506 }
1507
1508 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1509 {
1510         struct rtmsg *r = NLMSG_DATA(nlh);
1511         struct in6_rtmsg rtmsg;
1512
1513         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1514                 return -EINVAL;
1515         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1516 }
1517
1518 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1519 {
1520         struct rtmsg *r = NLMSG_DATA(nlh);
1521         struct in6_rtmsg rtmsg;
1522
1523         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1524                 return -EINVAL;
1525         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1526 }
1527
1528 struct rt6_rtnl_dump_arg
1529 {
1530         struct sk_buff *skb;
1531         struct netlink_callback *cb;
1532 };
1533
1534 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1535                          struct in6_addr *dst, struct in6_addr *src,
1536                          int iif, int type, u32 pid, u32 seq,
1537                          int prefix, unsigned int flags)
1538 {
1539         struct rtmsg *rtm;
1540         struct nlmsghdr  *nlh;
1541         unsigned char    *b = skb->tail;
1542         struct rta_cacheinfo ci;
1543
1544         if (prefix) {   /* user wants prefix routes only */
1545                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1546                         /* success since this is not a prefix route */
1547                         return 1;
1548                 }
1549         }
1550
1551         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1552         rtm = NLMSG_DATA(nlh);
1553         rtm->rtm_family = AF_INET6;
1554         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1555         rtm->rtm_src_len = rt->rt6i_src.plen;
1556         rtm->rtm_tos = 0;
1557         rtm->rtm_table = RT_TABLE_MAIN;
1558         if (rt->rt6i_flags&RTF_REJECT)
1559                 rtm->rtm_type = RTN_UNREACHABLE;
1560         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1561                 rtm->rtm_type = RTN_LOCAL;
1562         else
1563                 rtm->rtm_type = RTN_UNICAST;
1564         rtm->rtm_flags = 0;
1565         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1566         rtm->rtm_protocol = rt->rt6i_protocol;
1567         if (rt->rt6i_flags&RTF_DYNAMIC)
1568                 rtm->rtm_protocol = RTPROT_REDIRECT;
1569         else if (rt->rt6i_flags & RTF_ADDRCONF)
1570                 rtm->rtm_protocol = RTPROT_KERNEL;
1571         else if (rt->rt6i_flags&RTF_DEFAULT)
1572                 rtm->rtm_protocol = RTPROT_RA;
1573
1574         if (rt->rt6i_flags&RTF_CACHE)
1575                 rtm->rtm_flags |= RTM_F_CLONED;
1576
1577         if (dst) {
1578                 RTA_PUT(skb, RTA_DST, 16, dst);
1579                 rtm->rtm_dst_len = 128;
1580         } else if (rtm->rtm_dst_len)
1581                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1582 #ifdef CONFIG_IPV6_SUBTREES
1583         if (src) {
1584                 RTA_PUT(skb, RTA_SRC, 16, src);
1585                 rtm->rtm_src_len = 128;
1586         } else if (rtm->rtm_src_len)
1587                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1588 #endif
1589         if (iif)
1590                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1591         else if (dst) {
1592                 struct in6_addr saddr_buf;
1593                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1594                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1595         }
1596         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1597                 goto rtattr_failure;
1598         if (rt->u.dst.neighbour)
1599                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1600         if (rt->u.dst.dev)
1601                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1602         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1603         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1604         if (rt->rt6i_expires)
1605                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1606         else
1607                 ci.rta_expires = 0;
1608         ci.rta_used = rt->u.dst.__use;
1609         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1610         ci.rta_error = rt->u.dst.error;
1611         ci.rta_id = 0;
1612         ci.rta_ts = 0;
1613         ci.rta_tsage = 0;
1614         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1615         nlh->nlmsg_len = skb->tail - b;
1616         return skb->len;
1617
1618 nlmsg_failure:
1619 rtattr_failure:
1620         skb_trim(skb, b - skb->data);
1621         return -1;
1622 }
1623
1624 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1625 {
1626         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1627         int prefix;
1628
1629         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1630                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1631                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1632         } else
1633                 prefix = 0;
1634
1635         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1636                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1637                      prefix, NLM_F_MULTI);
1638 }
1639
1640 static int fib6_dump_node(struct fib6_walker_t *w)
1641 {
1642         int res;
1643         struct rt6_info *rt;
1644
1645         for (rt = w->leaf; rt; rt = rt->u.next) {
1646                 res = rt6_dump_route(rt, w->args);
1647                 if (res < 0) {
1648                         /* Frame is full, suspend walking */
1649                         w->leaf = rt;
1650                         return 1;
1651                 }
1652                 BUG_TRAP(res!=0);
1653         }
1654         w->leaf = NULL;
1655         return 0;
1656 }
1657
1658 static void fib6_dump_end(struct netlink_callback *cb)
1659 {
1660         struct fib6_walker_t *w = (void*)cb->args[0];
1661
1662         if (w) {
1663                 cb->args[0] = 0;
1664                 fib6_walker_unlink(w);
1665                 kfree(w);
1666         }
1667         cb->done = (void*)cb->args[1];
1668         cb->args[1] = 0;
1669 }
1670
1671 static int fib6_dump_done(struct netlink_callback *cb)
1672 {
1673         fib6_dump_end(cb);
1674         return cb->done ? cb->done(cb) : 0;
1675 }
1676
1677 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1678 {
1679         struct rt6_rtnl_dump_arg arg;
1680         struct fib6_walker_t *w;
1681         int res;
1682
1683         arg.skb = skb;
1684         arg.cb = cb;
1685
1686         w = (void*)cb->args[0];
1687         if (w == NULL) {
1688                 /* New dump:
1689                  * 
1690                  * 1. hook callback destructor.
1691                  */
1692                 cb->args[1] = (long)cb->done;
1693                 cb->done = fib6_dump_done;
1694
1695                 /*
1696                  * 2. allocate and initialize walker.
1697                  */
1698                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1699                 if (w == NULL)
1700                         return -ENOMEM;
1701                 RT6_TRACE("dump<%p", w);
1702                 memset(w, 0, sizeof(*w));
1703                 w->root = &ip6_routing_table;
1704                 w->func = fib6_dump_node;
1705                 w->args = &arg;
1706                 cb->args[0] = (long)w;
1707                 read_lock_bh(&rt6_lock);
1708                 res = fib6_walk(w);
1709                 read_unlock_bh(&rt6_lock);
1710         } else {
1711                 w->args = &arg;
1712                 read_lock_bh(&rt6_lock);
1713                 res = fib6_walk_continue(w);
1714                 read_unlock_bh(&rt6_lock);
1715         }
1716 #if RT6_DEBUG >= 3
1717         if (res <= 0 && skb->len == 0)
1718                 RT6_TRACE("%p>dump end\n", w);
1719 #endif
1720         res = res < 0 ? res : skb->len;
1721         /* res < 0 is an error. (really, impossible)
1722            res == 0 means that dump is complete, but skb still can contain data.
1723            res > 0 dump is not complete, but frame is full.
1724          */
1725         /* Destroy walker, if dump of this table is complete. */
1726         if (res <= 0)
1727                 fib6_dump_end(cb);
1728         return res;
1729 }
1730
1731 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1732 {
1733         struct rtattr **rta = arg;
1734         int iif = 0;
1735         int err = -ENOBUFS;
1736         struct sk_buff *skb;
1737         struct flowi fl;
1738         struct rt6_info *rt;
1739
1740         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1741         if (skb == NULL)
1742                 goto out;
1743
1744         /* Reserve room for dummy headers, this skb can pass
1745            through good chunk of routing engine.
1746          */
1747         skb->mac.raw = skb->data;
1748         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1749
1750         memset(&fl, 0, sizeof(fl));
1751         if (rta[RTA_SRC-1])
1752                 ipv6_addr_copy(&fl.fl6_src,
1753                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1754         if (rta[RTA_DST-1])
1755                 ipv6_addr_copy(&fl.fl6_dst,
1756                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1757
1758         if (rta[RTA_IIF-1])
1759                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1760
1761         if (iif) {
1762                 struct net_device *dev;
1763                 dev = __dev_get_by_index(iif);
1764                 if (!dev) {
1765                         err = -ENODEV;
1766                         goto out_free;
1767                 }
1768         }
1769
1770         fl.oif = 0;
1771         if (rta[RTA_OIF-1])
1772                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1773
1774         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1775
1776         skb->dst = &rt->u.dst;
1777
1778         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1779         err = rt6_fill_node(skb, rt, 
1780                             &fl.fl6_dst, &fl.fl6_src,
1781                             iif,
1782                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1783                             nlh->nlmsg_seq, 0, 0);
1784         if (err < 0) {
1785                 err = -EMSGSIZE;
1786                 goto out_free;
1787         }
1788
1789         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1790         if (err > 0)
1791                 err = 0;
1792 out:
1793         return err;
1794 out_free:
1795         kfree_skb(skb);
1796         goto out;       
1797 }
1798
1799 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1800                         struct netlink_skb_parms *req)
1801 {
1802         struct sk_buff *skb;
1803         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1804         u32 pid = current->pid;
1805         u32 seq = 0;
1806
1807         if (req)
1808                 pid = req->pid;
1809         if (nlh)
1810                 seq = nlh->nlmsg_seq;
1811         
1812         skb = alloc_skb(size, gfp_any());
1813         if (!skb) {
1814                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1815                 return;
1816         }
1817         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1818                 kfree_skb(skb);
1819                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1820                 return;
1821         }
1822         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1823         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1824 }
1825
1826 /*
1827  *      /proc
1828  */
1829
1830 #ifdef CONFIG_PROC_FS
1831
1832 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1833
1834 struct rt6_proc_arg
1835 {
1836         char *buffer;
1837         int offset;
1838         int length;
1839         int skip;
1840         int len;
1841 };
1842
1843 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1844 {
1845         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1846         int i;
1847
1848         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1849                 arg->skip++;
1850                 return 0;
1851         }
1852
1853         if (arg->len >= arg->length)
1854                 return 0;
1855
1856         for (i=0; i<16; i++) {
1857                 sprintf(arg->buffer + arg->len, "%02x",
1858                         rt->rt6i_dst.addr.s6_addr[i]);
1859                 arg->len += 2;
1860         }
1861         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1862                             rt->rt6i_dst.plen);
1863
1864 #ifdef CONFIG_IPV6_SUBTREES
1865         for (i=0; i<16; i++) {
1866                 sprintf(arg->buffer + arg->len, "%02x",
1867                         rt->rt6i_src.addr.s6_addr[i]);
1868                 arg->len += 2;
1869         }
1870         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1871                             rt->rt6i_src.plen);
1872 #else
1873         sprintf(arg->buffer + arg->len,
1874                 "00000000000000000000000000000000 00 ");
1875         arg->len += 36;
1876 #endif
1877
1878         if (rt->rt6i_nexthop) {
1879                 for (i=0; i<16; i++) {
1880                         sprintf(arg->buffer + arg->len, "%02x",
1881                                 rt->rt6i_nexthop->primary_key[i]);
1882                         arg->len += 2;
1883                 }
1884         } else {
1885                 sprintf(arg->buffer + arg->len,
1886                         "00000000000000000000000000000000");
1887                 arg->len += 32;
1888         }
1889         arg->len += sprintf(arg->buffer + arg->len,
1890                             " %08x %08x %08x %08x %8s\n",
1891                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1892                             rt->u.dst.__use, rt->rt6i_flags, 
1893                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1894         return 0;
1895 }
1896
1897 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1898 {
1899         struct rt6_proc_arg arg;
1900         arg.buffer = buffer;
1901         arg.offset = offset;
1902         arg.length = length;
1903         arg.skip = 0;
1904         arg.len = 0;
1905
1906         read_lock_bh(&rt6_lock);
1907         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1908         read_unlock_bh(&rt6_lock);
1909
1910         *start = buffer;
1911         if (offset)
1912                 *start += offset % RT6_INFO_LEN;
1913
1914         arg.len -= offset % RT6_INFO_LEN;
1915
1916         if (arg.len > length)
1917                 arg.len = length;
1918         if (arg.len < 0)
1919                 arg.len = 0;
1920
1921         return arg.len;
1922 }
1923
1924 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1925 {
1926         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1927                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1928                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1929                       rt6_stats.fib_rt_cache,
1930                       atomic_read(&ip6_dst_ops.entries),
1931                       rt6_stats.fib_discarded_routes);
1932
1933         return 0;
1934 }
1935
1936 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1937 {
1938         return single_open(file, rt6_stats_seq_show, NULL);
1939 }
1940
1941 static struct file_operations rt6_stats_seq_fops = {
1942         .owner   = THIS_MODULE,
1943         .open    = rt6_stats_seq_open,
1944         .read    = seq_read,
1945         .llseek  = seq_lseek,
1946         .release = single_release,
1947 };
1948 #endif  /* CONFIG_PROC_FS */
1949
1950 #ifdef CONFIG_SYSCTL
1951
1952 static int flush_delay;
1953
1954 static
1955 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1956                               void __user *buffer, size_t *lenp, loff_t *ppos)
1957 {
1958         if (write) {
1959                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1960                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1961                 return 0;
1962         } else
1963                 return -EINVAL;
1964 }
1965
1966 ctl_table ipv6_route_table[] = {
1967         {
1968                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1969                 .procname       =       "flush",
1970                 .data           =       &flush_delay,
1971                 .maxlen         =       sizeof(int),
1972                 .mode           =       0200,
1973                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1974         },
1975         {
1976                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1977                 .procname       =       "gc_thresh",
1978                 .data           =       &ip6_dst_ops.gc_thresh,
1979                 .maxlen         =       sizeof(int),
1980                 .mode           =       0644,
1981                 .proc_handler   =       &proc_dointvec,
1982         },
1983         {
1984                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1985                 .procname       =       "max_size",
1986                 .data           =       &ip6_rt_max_size,
1987                 .maxlen         =       sizeof(int),
1988                 .mode           =       0644,
1989                 .proc_handler   =       &proc_dointvec,
1990         },
1991         {
1992                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1993                 .procname       =       "gc_min_interval",
1994                 .data           =       &ip6_rt_gc_min_interval,
1995                 .maxlen         =       sizeof(int),
1996                 .mode           =       0644,
1997                 .proc_handler   =       &proc_dointvec_jiffies,
1998                 .strategy       =       &sysctl_jiffies,
1999         },
2000         {
2001                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2002                 .procname       =       "gc_timeout",
2003                 .data           =       &ip6_rt_gc_timeout,
2004                 .maxlen         =       sizeof(int),
2005                 .mode           =       0644,
2006                 .proc_handler   =       &proc_dointvec_jiffies,
2007                 .strategy       =       &sysctl_jiffies,
2008         },
2009         {
2010                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2011                 .procname       =       "gc_interval",
2012                 .data           =       &ip6_rt_gc_interval,
2013                 .maxlen         =       sizeof(int),
2014                 .mode           =       0644,
2015                 .proc_handler   =       &proc_dointvec_jiffies,
2016                 .strategy       =       &sysctl_jiffies,
2017         },
2018         {
2019                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2020                 .procname       =       "gc_elasticity",
2021                 .data           =       &ip6_rt_gc_elasticity,
2022                 .maxlen         =       sizeof(int),
2023                 .mode           =       0644,
2024                 .proc_handler   =       &proc_dointvec_jiffies,
2025                 .strategy       =       &sysctl_jiffies,
2026         },
2027         {
2028                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2029                 .procname       =       "mtu_expires",
2030                 .data           =       &ip6_rt_mtu_expires,
2031                 .maxlen         =       sizeof(int),
2032                 .mode           =       0644,
2033                 .proc_handler   =       &proc_dointvec_jiffies,
2034                 .strategy       =       &sysctl_jiffies,
2035         },
2036         {
2037                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2038                 .procname       =       "min_adv_mss",
2039                 .data           =       &ip6_rt_min_advmss,
2040                 .maxlen         =       sizeof(int),
2041                 .mode           =       0644,
2042                 .proc_handler   =       &proc_dointvec_jiffies,
2043                 .strategy       =       &sysctl_jiffies,
2044         },
2045         {
2046                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2047                 .procname       =       "gc_min_interval_ms",
2048                 .data           =       &ip6_rt_gc_min_interval,
2049                 .maxlen         =       sizeof(int),
2050                 .mode           =       0644,
2051                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2052                 .strategy       =       &sysctl_ms_jiffies,
2053         },
2054         { .ctl_name = 0 }
2055 };
2056
2057 #endif
2058
2059 void __init ip6_route_init(void)
2060 {
2061         struct proc_dir_entry *p;
2062
2063         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2064                                                      sizeof(struct rt6_info),
2065                                                      0, SLAB_HWCACHE_ALIGN,
2066                                                      NULL, NULL);
2067         if (!ip6_dst_ops.kmem_cachep)
2068                 panic("cannot create ip6_dst_cache");
2069
2070         fib6_init();
2071 #ifdef  CONFIG_PROC_FS
2072         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2073         if (p)
2074                 p->owner = THIS_MODULE;
2075
2076         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2077 #endif
2078 #ifdef CONFIG_XFRM
2079         xfrm6_init();
2080 #endif
2081 }
2082
2083 void ip6_route_cleanup(void)
2084 {
2085 #ifdef CONFIG_PROC_FS
2086         proc_net_remove("ipv6_route");
2087         proc_net_remove("rt6_stats");
2088 #endif
2089 #ifdef CONFIG_XFRM
2090         xfrm6_fini();
2091 #endif
2092         rt6_ifdown(NULL);
2093         fib6_gc_cleanup();
2094         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2095 }