Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *,
88                                        struct net_device *dev, int how);
89 static int               ip6_dst_gc(void);
90
91 static int              ip6_pkt_discard(struct sk_buff *skb);
92 static int              ip6_pkt_discard_out(struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96 static struct dst_ops ip6_dst_ops = {
97         .family                 =       AF_INET6,
98         .protocol               =       __constant_htons(ETH_P_IPV6),
99         .gc                     =       ip6_dst_gc,
100         .gc_thresh              =       1024,
101         .check                  =       ip6_dst_check,
102         .destroy                =       ip6_dst_destroy,
103         .ifdown                 =       ip6_dst_ifdown,
104         .negative_advice        =       ip6_negative_advice,
105         .link_failure           =       ip6_link_failure,
106         .update_pmtu            =       ip6_rt_update_pmtu,
107         .entry_size             =       sizeof(struct rt6_info),
108 };
109
110 struct rt6_info ip6_null_entry = {
111         .u = {
112                 .dst = {
113                         .__refcnt       = ATOMIC_INIT(1),
114                         .__use          = 1,
115                         .dev            = &loopback_dev,
116                         .obsolete       = -1,
117                         .error          = -ENETUNREACH,
118                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
119                         .input          = ip6_pkt_discard,
120                         .output         = ip6_pkt_discard_out,
121                         .ops            = &ip6_dst_ops,
122                         .path           = (struct dst_entry*)&ip6_null_entry,
123                 }
124         },
125         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
126         .rt6i_metric    = ~(u32) 0,
127         .rt6i_ref       = ATOMIC_INIT(1),
128 };
129
130 struct fib6_node ip6_routing_table = {
131         .leaf           = &ip6_null_entry,
132         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133 };
134
135 /* Protects all the ip6 fib */
136
137 DEFINE_RWLOCK(rt6_lock);
138
139
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 {
143         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144 }
145
146 static void ip6_dst_destroy(struct dst_entry *dst)
147 {
148         struct rt6_info *rt = (struct rt6_info *)dst;
149         struct inet6_dev *idev = rt->rt6i_idev;
150
151         if (idev != NULL) {
152                 rt->rt6i_idev = NULL;
153                 in6_dev_put(idev);
154         }       
155 }
156
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158                            int how)
159 {
160         struct rt6_info *rt = (struct rt6_info *)dst;
161         struct inet6_dev *idev = rt->rt6i_idev;
162
163         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165                 if (loopback_idev != NULL) {
166                         rt->rt6i_idev = loopback_idev;
167                         in6_dev_put(idev);
168                 }
169         }
170 }
171
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173 {
174         return (rt->rt6i_flags & RTF_EXPIRES &&
175                 time_after(jiffies, rt->rt6i_expires));
176 }
177
178 /*
179  *      Route lookup. Any rt6_lock is implied.
180  */
181
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183                                                     int oif,
184                                                     int strict)
185 {
186         struct rt6_info *local = NULL;
187         struct rt6_info *sprt;
188
189         if (oif) {
190                 for (sprt = rt; sprt; sprt = sprt->u.next) {
191                         struct net_device *dev = sprt->rt6i_dev;
192                         if (dev->ifindex == oif)
193                                 return sprt;
194                         if (dev->flags & IFF_LOOPBACK) {
195                                 if (sprt->rt6i_idev == NULL ||
196                                     sprt->rt6i_idev->dev->ifindex != oif) {
197                                         if (strict && oif)
198                                                 continue;
199                                         if (local && (!oif || 
200                                                       local->rt6i_idev->dev->ifindex == oif))
201                                                 continue;
202                                 }
203                                 local = sprt;
204                         }
205                 }
206
207                 if (local)
208                         return local;
209
210                 if (strict)
211                         return &ip6_null_entry;
212         }
213         return rt;
214 }
215
216 /*
217  *      pointer to the last default router chosen. BH is disabled locally.
218  */
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
223 {
224         spin_lock_bh(&rt6_dflt_lock);
225         if (rt == NULL || rt == rt6_dflt_pointer) {
226                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227                 rt6_dflt_pointer = NULL;
228         }
229         spin_unlock_bh(&rt6_dflt_lock);
230 }
231
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234 {
235         struct rt6_info *match = NULL;
236         struct rt6_info *sprt;
237         int mpri = 0;
238
239         for (sprt = rt; sprt; sprt = sprt->u.next) {
240                 struct neighbour *neigh;
241                 int m = 0;
242
243                 if (!oif ||
244                     (sprt->rt6i_dev &&
245                      sprt->rt6i_dev->ifindex == oif))
246                         m += 8;
247
248                 if (rt6_check_expired(sprt))
249                         continue;
250
251                 if (sprt == rt6_dflt_pointer)
252                         m += 4;
253
254                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255                         read_lock_bh(&neigh->lock);
256                         switch (neigh->nud_state) {
257                         case NUD_REACHABLE:
258                                 m += 3;
259                                 break;
260
261                         case NUD_STALE:
262                         case NUD_DELAY:
263                         case NUD_PROBE:
264                                 m += 2;
265                                 break;
266
267                         case NUD_NOARP:
268                         case NUD_PERMANENT:
269                                 m += 1;
270                                 break;
271
272                         case NUD_INCOMPLETE:
273                         default:
274                                 read_unlock_bh(&neigh->lock);
275                                 continue;
276                         }
277                         read_unlock_bh(&neigh->lock);
278                 } else {
279                         continue;
280                 }
281
282                 if (m > mpri || m >= 12) {
283                         match = sprt;
284                         mpri = m;
285                         if (m >= 12) {
286                                 /* we choose the last default router if it
287                                  * is in (probably) reachable state.
288                                  * If route changed, we should do pmtu
289                                  * discovery. --yoshfuji
290                                  */
291                                 break;
292                         }
293                 }
294         }
295
296         spin_lock(&rt6_dflt_lock);
297         if (!match) {
298                 /*
299                  *      No default routers are known to be reachable.
300                  *      SHOULD round robin
301                  */
302                 if (rt6_dflt_pointer) {
303                         for (sprt = rt6_dflt_pointer->u.next;
304                              sprt; sprt = sprt->u.next) {
305                                 if (sprt->u.dst.obsolete <= 0 &&
306                                     sprt->u.dst.error == 0 &&
307                                     !rt6_check_expired(sprt)) {
308                                         match = sprt;
309                                         break;
310                                 }
311                         }
312                         for (sprt = rt;
313                              !match && sprt;
314                              sprt = sprt->u.next) {
315                                 if (sprt->u.dst.obsolete <= 0 &&
316                                     sprt->u.dst.error == 0 &&
317                                     !rt6_check_expired(sprt)) {
318                                         match = sprt;
319                                         break;
320                                 }
321                                 if (sprt == rt6_dflt_pointer)
322                                         break;
323                         }
324                 }
325         }
326
327         if (match) {
328                 if (rt6_dflt_pointer != match)
329                         RT6_TRACE("changed default router: %p->%p\n",
330                                   rt6_dflt_pointer, match);
331                 rt6_dflt_pointer = match;
332         }
333         spin_unlock(&rt6_dflt_lock);
334
335         if (!match) {
336                 /*
337                  * Last Resort: if no default routers found, 
338                  * use addrconf default route.
339                  * We don't record this route.
340                  */
341                 for (sprt = ip6_routing_table.leaf;
342                      sprt; sprt = sprt->u.next) {
343                         if (!rt6_check_expired(sprt) &&
344                             (sprt->rt6i_flags & RTF_DEFAULT) &&
345                             (!oif ||
346                              (sprt->rt6i_dev &&
347                               sprt->rt6i_dev->ifindex == oif))) {
348                                 match = sprt;
349                                 break;
350                         }
351                 }
352                 if (!match) {
353                         /* no default route.  give up. */
354                         match = &ip6_null_entry;
355                 }
356         }
357
358         return match;
359 }
360
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362                             int oif, int strict)
363 {
364         struct fib6_node *fn;
365         struct rt6_info *rt;
366
367         read_lock_bh(&rt6_lock);
368         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369         rt = rt6_device_match(fn->leaf, oif, strict);
370         dst_hold(&rt->u.dst);
371         rt->u.dst.__use++;
372         read_unlock_bh(&rt6_lock);
373
374         rt->u.dst.lastuse = jiffies;
375         if (rt->u.dst.error == 0)
376                 return rt;
377         dst_release(&rt->u.dst);
378         return NULL;
379 }
380
381 /* ip6_ins_rt is called with FREE rt6_lock.
382    It takes new route entry, the addition fails by any reason the
383    route is freed. In any case, if caller does not hold it, it may
384    be destroyed.
385  */
386
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388                 void *_rtattr, struct netlink_skb_parms *req)
389 {
390         int err;
391
392         write_lock_bh(&rt6_lock);
393         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
394         write_unlock_bh(&rt6_lock);
395
396         return err;
397 }
398
399 /* No rt6_lock! If COW failed, the function returns dead route entry
400    with dst->error set to errno value.
401  */
402
403 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
404                                 struct in6_addr *saddr, struct netlink_skb_parms *req)
405 {
406         int err;
407         struct rt6_info *rt;
408
409         /*
410          *      Clone the route.
411          */
412
413         rt = ip6_rt_copy(ort);
414
415         if (rt) {
416                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417                         if (rt->rt6i_dst.plen != 128 &&
418                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419                                 rt->rt6i_flags |= RTF_ANYCAST;
420                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
421                 }
422
423                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
424                 rt->rt6i_dst.plen = 128;
425                 rt->rt6i_flags |= RTF_CACHE;
426                 rt->u.dst.flags |= DST_HOST;
427
428 #ifdef CONFIG_IPV6_SUBTREES
429                 if (rt->rt6i_src.plen && saddr) {
430                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431                         rt->rt6i_src.plen = 128;
432                 }
433 #endif
434
435                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
436
437                 dst_hold(&rt->u.dst);
438
439                 err = ip6_ins_rt(rt, NULL, NULL, req);
440                 if (err == 0)
441                         return rt;
442
443                 rt->u.dst.error = err;
444
445                 return rt;
446         }
447         dst_hold(&ip6_null_entry.u.dst);
448         return &ip6_null_entry;
449 }
450
451 #define BACKTRACK() \
452 if (rt == &ip6_null_entry && strict) { \
453        while ((fn = fn->parent) != NULL) { \
454                 if (fn->fn_flags & RTN_ROOT) { \
455                         dst_hold(&rt->u.dst); \
456                         goto out; \
457                 } \
458                 if (fn->fn_flags & RTN_RTINFO) \
459                         goto restart; \
460         } \
461 }
462
463
464 void ip6_route_input(struct sk_buff *skb)
465 {
466         struct fib6_node *fn;
467         struct rt6_info *rt;
468         int strict;
469         int attempts = 3;
470
471         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
472
473 relookup:
474         read_lock_bh(&rt6_lock);
475
476         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
477                          &skb->nh.ipv6h->saddr);
478
479 restart:
480         rt = fn->leaf;
481
482         if ((rt->rt6i_flags & RTF_CACHE)) {
483                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
484                 BACKTRACK();
485                 dst_hold(&rt->u.dst);
486                 goto out;
487         }
488
489         rt = rt6_device_match(rt, skb->dev->ifindex, strict);
490         BACKTRACK();
491
492         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
493                 struct rt6_info *nrt;
494                 dst_hold(&rt->u.dst);
495                 read_unlock_bh(&rt6_lock);
496
497                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
498                               &skb->nh.ipv6h->saddr,
499                               &NETLINK_CB(skb));
500
501                 dst_release(&rt->u.dst);
502                 rt = nrt;
503
504                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
505                         goto out2;
506
507                 /* Race condition! In the gap, when rt6_lock was
508                    released someone could insert this route.  Relookup.
509                 */
510                 dst_release(&rt->u.dst);
511                 goto relookup;
512         }
513         dst_hold(&rt->u.dst);
514
515 out:
516         read_unlock_bh(&rt6_lock);
517 out2:
518         rt->u.dst.lastuse = jiffies;
519         rt->u.dst.__use++;
520         skb->dst = (struct dst_entry *) rt;
521 }
522
523 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
524 {
525         struct fib6_node *fn;
526         struct rt6_info *rt;
527         int strict;
528         int attempts = 3;
529
530         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
531
532 relookup:
533         read_lock_bh(&rt6_lock);
534
535         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
536
537 restart:
538         rt = fn->leaf;
539
540         if ((rt->rt6i_flags & RTF_CACHE)) {
541                 rt = rt6_device_match(rt, fl->oif, strict);
542                 BACKTRACK();
543                 dst_hold(&rt->u.dst);
544                 goto out;
545         }
546         if (rt->rt6i_flags & RTF_DEFAULT) {
547                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
548                         rt = rt6_best_dflt(rt, fl->oif);
549         } else {
550                 rt = rt6_device_match(rt, fl->oif, strict);
551                 BACKTRACK();
552         }
553
554         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
555                 struct rt6_info *nrt;
556                 dst_hold(&rt->u.dst);
557                 read_unlock_bh(&rt6_lock);
558
559                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
560
561                 dst_release(&rt->u.dst);
562                 rt = nrt;
563
564                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
565                         goto out2;
566
567                 /* Race condition! In the gap, when rt6_lock was
568                    released someone could insert this route.  Relookup.
569                 */
570                 dst_release(&rt->u.dst);
571                 goto relookup;
572         }
573         dst_hold(&rt->u.dst);
574
575 out:
576         read_unlock_bh(&rt6_lock);
577 out2:
578         rt->u.dst.lastuse = jiffies;
579         rt->u.dst.__use++;
580         return &rt->u.dst;
581 }
582
583
584 /*
585  *      Destination cache support functions
586  */
587
588 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
589 {
590         struct rt6_info *rt;
591
592         rt = (struct rt6_info *) dst;
593
594         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
595                 return dst;
596
597         return NULL;
598 }
599
600 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
601 {
602         struct rt6_info *rt = (struct rt6_info *) dst;
603
604         if (rt) {
605                 if (rt->rt6i_flags & RTF_CACHE)
606                         ip6_del_rt(rt, NULL, NULL, NULL);
607                 else
608                         dst_release(dst);
609         }
610         return NULL;
611 }
612
613 static void ip6_link_failure(struct sk_buff *skb)
614 {
615         struct rt6_info *rt;
616
617         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
618
619         rt = (struct rt6_info *) skb->dst;
620         if (rt) {
621                 if (rt->rt6i_flags&RTF_CACHE) {
622                         dst_set_expires(&rt->u.dst, 0);
623                         rt->rt6i_flags |= RTF_EXPIRES;
624                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
625                         rt->rt6i_node->fn_sernum = -1;
626         }
627 }
628
629 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
630 {
631         struct rt6_info *rt6 = (struct rt6_info*)dst;
632
633         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
634                 rt6->rt6i_flags |= RTF_MODIFIED;
635                 if (mtu < IPV6_MIN_MTU) {
636                         mtu = IPV6_MIN_MTU;
637                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
638                 }
639                 dst->metrics[RTAX_MTU-1] = mtu;
640         }
641 }
642
643 /* Protected by rt6_lock.  */
644 static struct dst_entry *ndisc_dst_gc_list;
645 static int ipv6_get_mtu(struct net_device *dev);
646
647 static inline unsigned int ipv6_advmss(unsigned int mtu)
648 {
649         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
650
651         if (mtu < ip6_rt_min_advmss)
652                 mtu = ip6_rt_min_advmss;
653
654         /*
655          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
656          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
657          * IPV6_MAXPLEN is also valid and means: "any MSS, 
658          * rely only on pmtu discovery"
659          */
660         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
661                 mtu = IPV6_MAXPLEN;
662         return mtu;
663 }
664
665 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
666                                   struct neighbour *neigh,
667                                   struct in6_addr *addr,
668                                   int (*output)(struct sk_buff *))
669 {
670         struct rt6_info *rt;
671         struct inet6_dev *idev = in6_dev_get(dev);
672
673         if (unlikely(idev == NULL))
674                 return NULL;
675
676         rt = ip6_dst_alloc();
677         if (unlikely(rt == NULL)) {
678                 in6_dev_put(idev);
679                 goto out;
680         }
681
682         dev_hold(dev);
683         if (neigh)
684                 neigh_hold(neigh);
685         else
686                 neigh = ndisc_get_neigh(dev, addr);
687
688         rt->rt6i_dev      = dev;
689         rt->rt6i_idev     = idev;
690         rt->rt6i_nexthop  = neigh;
691         atomic_set(&rt->u.dst.__refcnt, 1);
692         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
693         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
694         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
695         rt->u.dst.output  = output;
696
697 #if 0   /* there's no chance to use these for ndisc */
698         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
699                                 ? DST_HOST 
700                                 : 0;
701         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
702         rt->rt6i_dst.plen = 128;
703 #endif
704
705         write_lock_bh(&rt6_lock);
706         rt->u.dst.next = ndisc_dst_gc_list;
707         ndisc_dst_gc_list = &rt->u.dst;
708         write_unlock_bh(&rt6_lock);
709
710         fib6_force_start_gc();
711
712 out:
713         return (struct dst_entry *)rt;
714 }
715
716 int ndisc_dst_gc(int *more)
717 {
718         struct dst_entry *dst, *next, **pprev;
719         int freed;
720
721         next = NULL;
722         pprev = &ndisc_dst_gc_list;
723         freed = 0;
724         while ((dst = *pprev) != NULL) {
725                 if (!atomic_read(&dst->__refcnt)) {
726                         *pprev = dst->next;
727                         dst_free(dst);
728                         freed++;
729                 } else {
730                         pprev = &dst->next;
731                         (*more)++;
732                 }
733         }
734
735         return freed;
736 }
737
738 static int ip6_dst_gc(void)
739 {
740         static unsigned expire = 30*HZ;
741         static unsigned long last_gc;
742         unsigned long now = jiffies;
743
744         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
745             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
746                 goto out;
747
748         expire++;
749         fib6_run_gc(expire);
750         last_gc = now;
751         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
752                 expire = ip6_rt_gc_timeout>>1;
753
754 out:
755         expire -= expire>>ip6_rt_gc_elasticity;
756         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
757 }
758
759 /* Clean host part of a prefix. Not necessary in radix tree,
760    but results in cleaner routing tables.
761
762    Remove it only when all the things will work!
763  */
764
765 static int ipv6_get_mtu(struct net_device *dev)
766 {
767         int mtu = IPV6_MIN_MTU;
768         struct inet6_dev *idev;
769
770         idev = in6_dev_get(dev);
771         if (idev) {
772                 mtu = idev->cnf.mtu6;
773                 in6_dev_put(idev);
774         }
775         return mtu;
776 }
777
778 int ipv6_get_hoplimit(struct net_device *dev)
779 {
780         int hoplimit = ipv6_devconf.hop_limit;
781         struct inet6_dev *idev;
782
783         idev = in6_dev_get(dev);
784         if (idev) {
785                 hoplimit = idev->cnf.hop_limit;
786                 in6_dev_put(idev);
787         }
788         return hoplimit;
789 }
790
791 /*
792  *
793  */
794
795 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
796                 void *_rtattr, struct netlink_skb_parms *req)
797 {
798         int err;
799         struct rtmsg *r;
800         struct rtattr **rta;
801         struct rt6_info *rt = NULL;
802         struct net_device *dev = NULL;
803         struct inet6_dev *idev = NULL;
804         int addr_type;
805
806         rta = (struct rtattr **) _rtattr;
807
808         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
809                 return -EINVAL;
810 #ifndef CONFIG_IPV6_SUBTREES
811         if (rtmsg->rtmsg_src_len)
812                 return -EINVAL;
813 #endif
814         if (rtmsg->rtmsg_ifindex) {
815                 err = -ENODEV;
816                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
817                 if (!dev)
818                         goto out;
819                 idev = in6_dev_get(dev);
820                 if (!idev)
821                         goto out;
822         }
823
824         if (rtmsg->rtmsg_metric == 0)
825                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
826
827         rt = ip6_dst_alloc();
828
829         if (rt == NULL) {
830                 err = -ENOMEM;
831                 goto out;
832         }
833
834         rt->u.dst.obsolete = -1;
835         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
836         if (nlh && (r = NLMSG_DATA(nlh))) {
837                 rt->rt6i_protocol = r->rtm_protocol;
838         } else {
839                 rt->rt6i_protocol = RTPROT_BOOT;
840         }
841
842         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
843
844         if (addr_type & IPV6_ADDR_MULTICAST)
845                 rt->u.dst.input = ip6_mc_input;
846         else
847                 rt->u.dst.input = ip6_forward;
848
849         rt->u.dst.output = ip6_output;
850
851         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
852                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
853         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
854         if (rt->rt6i_dst.plen == 128)
855                rt->u.dst.flags = DST_HOST;
856
857 #ifdef CONFIG_IPV6_SUBTREES
858         ipv6_addr_prefix(&rt->rt6i_src.addr, 
859                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
860         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
861 #endif
862
863         rt->rt6i_metric = rtmsg->rtmsg_metric;
864
865         /* We cannot add true routes via loopback here,
866            they would result in kernel looping; promote them to reject routes
867          */
868         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
869             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
870                 /* hold loopback dev/idev if we haven't done so. */
871                 if (dev != &loopback_dev) {
872                         if (dev) {
873                                 dev_put(dev);
874                                 in6_dev_put(idev);
875                         }
876                         dev = &loopback_dev;
877                         dev_hold(dev);
878                         idev = in6_dev_get(dev);
879                         if (!idev) {
880                                 err = -ENODEV;
881                                 goto out;
882                         }
883                 }
884                 rt->u.dst.output = ip6_pkt_discard_out;
885                 rt->u.dst.input = ip6_pkt_discard;
886                 rt->u.dst.error = -ENETUNREACH;
887                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
888                 goto install_route;
889         }
890
891         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
892                 struct in6_addr *gw_addr;
893                 int gwa_type;
894
895                 gw_addr = &rtmsg->rtmsg_gateway;
896                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
897                 gwa_type = ipv6_addr_type(gw_addr);
898
899                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
900                         struct rt6_info *grt;
901
902                         /* IPv6 strictly inhibits using not link-local
903                            addresses as nexthop address.
904                            Otherwise, router will not able to send redirects.
905                            It is very good, but in some (rare!) circumstances
906                            (SIT, PtP, NBMA NOARP links) it is handy to allow
907                            some exceptions. --ANK
908                          */
909                         err = -EINVAL;
910                         if (!(gwa_type&IPV6_ADDR_UNICAST))
911                                 goto out;
912
913                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
914
915                         err = -EHOSTUNREACH;
916                         if (grt == NULL)
917                                 goto out;
918                         if (dev) {
919                                 if (dev != grt->rt6i_dev) {
920                                         dst_release(&grt->u.dst);
921                                         goto out;
922                                 }
923                         } else {
924                                 dev = grt->rt6i_dev;
925                                 idev = grt->rt6i_idev;
926                                 dev_hold(dev);
927                                 in6_dev_hold(grt->rt6i_idev);
928                         }
929                         if (!(grt->rt6i_flags&RTF_GATEWAY))
930                                 err = 0;
931                         dst_release(&grt->u.dst);
932
933                         if (err)
934                                 goto out;
935                 }
936                 err = -EINVAL;
937                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
938                         goto out;
939         }
940
941         err = -ENODEV;
942         if (dev == NULL)
943                 goto out;
944
945         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
946                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
947                 if (IS_ERR(rt->rt6i_nexthop)) {
948                         err = PTR_ERR(rt->rt6i_nexthop);
949                         rt->rt6i_nexthop = NULL;
950                         goto out;
951                 }
952         }
953
954         rt->rt6i_flags = rtmsg->rtmsg_flags;
955
956 install_route:
957         if (rta && rta[RTA_METRICS-1]) {
958                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
959                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
960
961                 while (RTA_OK(attr, attrlen)) {
962                         unsigned flavor = attr->rta_type;
963                         if (flavor) {
964                                 if (flavor > RTAX_MAX) {
965                                         err = -EINVAL;
966                                         goto out;
967                                 }
968                                 rt->u.dst.metrics[flavor-1] =
969                                         *(u32 *)RTA_DATA(attr);
970                         }
971                         attr = RTA_NEXT(attr, attrlen);
972                 }
973         }
974
975         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
976                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
977         if (!rt->u.dst.metrics[RTAX_MTU-1])
978                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
979         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
980                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
981         rt->u.dst.dev = dev;
982         rt->rt6i_idev = idev;
983         return ip6_ins_rt(rt, nlh, _rtattr, req);
984
985 out:
986         if (dev)
987                 dev_put(dev);
988         if (idev)
989                 in6_dev_put(idev);
990         if (rt)
991                 dst_free((struct dst_entry *) rt);
992         return err;
993 }
994
995 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
996 {
997         int err;
998
999         write_lock_bh(&rt6_lock);
1000
1001         rt6_reset_dflt_pointer(NULL);
1002
1003         err = fib6_del(rt, nlh, _rtattr, req);
1004         dst_release(&rt->u.dst);
1005
1006         write_unlock_bh(&rt6_lock);
1007
1008         return err;
1009 }
1010
1011 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1012 {
1013         struct fib6_node *fn;
1014         struct rt6_info *rt;
1015         int err = -ESRCH;
1016
1017         read_lock_bh(&rt6_lock);
1018
1019         fn = fib6_locate(&ip6_routing_table,
1020                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1021                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1022         
1023         if (fn) {
1024                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1025                         if (rtmsg->rtmsg_ifindex &&
1026                             (rt->rt6i_dev == NULL ||
1027                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1028                                 continue;
1029                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1030                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1031                                 continue;
1032                         if (rtmsg->rtmsg_metric &&
1033                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1034                                 continue;
1035                         dst_hold(&rt->u.dst);
1036                         read_unlock_bh(&rt6_lock);
1037
1038                         return ip6_del_rt(rt, nlh, _rtattr, req);
1039                 }
1040         }
1041         read_unlock_bh(&rt6_lock);
1042
1043         return err;
1044 }
1045
1046 /*
1047  *      Handle redirects
1048  */
1049 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1050                   struct neighbour *neigh, u8 *lladdr, int on_link)
1051 {
1052         struct rt6_info *rt, *nrt;
1053
1054         /* Locate old route to this destination. */
1055         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1056
1057         if (rt == NULL)
1058                 return;
1059
1060         if (neigh->dev != rt->rt6i_dev)
1061                 goto out;
1062
1063         /*
1064          * Current route is on-link; redirect is always invalid.
1065          * 
1066          * Seems, previous statement is not true. It could
1067          * be node, which looks for us as on-link (f.e. proxy ndisc)
1068          * But then router serving it might decide, that we should
1069          * know truth 8)8) --ANK (980726).
1070          */
1071         if (!(rt->rt6i_flags&RTF_GATEWAY))
1072                 goto out;
1073
1074         /*
1075          *      RFC 2461 specifies that redirects should only be
1076          *      accepted if they come from the nexthop to the target.
1077          *      Due to the way default routers are chosen, this notion
1078          *      is a bit fuzzy and one might need to check all default
1079          *      routers.
1080          */
1081         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1082                 if (rt->rt6i_flags & RTF_DEFAULT) {
1083                         struct rt6_info *rt1;
1084
1085                         read_lock(&rt6_lock);
1086                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1087                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1088                                         dst_hold(&rt1->u.dst);
1089                                         dst_release(&rt->u.dst);
1090                                         read_unlock(&rt6_lock);
1091                                         rt = rt1;
1092                                         goto source_ok;
1093                                 }
1094                         }
1095                         read_unlock(&rt6_lock);
1096                 }
1097                 if (net_ratelimit())
1098                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1099                                "for redirect target\n");
1100                 goto out;
1101         }
1102
1103 source_ok:
1104
1105         /*
1106          *      We have finally decided to accept it.
1107          */
1108
1109         neigh_update(neigh, lladdr, NUD_STALE, 
1110                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1111                      NEIGH_UPDATE_F_OVERRIDE|
1112                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1113                                      NEIGH_UPDATE_F_ISROUTER))
1114                      );
1115
1116         /*
1117          * Redirect received -> path was valid.
1118          * Look, redirects are sent only in response to data packets,
1119          * so that this nexthop apparently is reachable. --ANK
1120          */
1121         dst_confirm(&rt->u.dst);
1122
1123         /* Duplicate redirect: silently ignore. */
1124         if (neigh == rt->u.dst.neighbour)
1125                 goto out;
1126
1127         nrt = ip6_rt_copy(rt);
1128         if (nrt == NULL)
1129                 goto out;
1130
1131         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1132         if (on_link)
1133                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1134
1135         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1136         nrt->rt6i_dst.plen = 128;
1137         nrt->u.dst.flags |= DST_HOST;
1138
1139         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1140         nrt->rt6i_nexthop = neigh_clone(neigh);
1141         /* Reset pmtu, it may be better */
1142         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1143         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1144
1145         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1146                 goto out;
1147
1148         if (rt->rt6i_flags&RTF_CACHE) {
1149                 ip6_del_rt(rt, NULL, NULL, NULL);
1150                 return;
1151         }
1152
1153 out:
1154         dst_release(&rt->u.dst);
1155         return;
1156 }
1157
1158 /*
1159  *      Handle ICMP "packet too big" messages
1160  *      i.e. Path MTU discovery
1161  */
1162
1163 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1164                         struct net_device *dev, u32 pmtu)
1165 {
1166         struct rt6_info *rt, *nrt;
1167         int allfrag = 0;
1168
1169         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1170         if (rt == NULL)
1171                 return;
1172
1173         if (pmtu >= dst_mtu(&rt->u.dst))
1174                 goto out;
1175
1176         if (pmtu < IPV6_MIN_MTU) {
1177                 /*
1178                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1179                  * MTU (1280) and a fragment header should always be included
1180                  * after a node receiving Too Big message reporting PMTU is
1181                  * less than the IPv6 Minimum Link MTU.
1182                  */
1183                 pmtu = IPV6_MIN_MTU;
1184                 allfrag = 1;
1185         }
1186
1187         /* New mtu received -> path was valid.
1188            They are sent only in response to data packets,
1189            so that this nexthop apparently is reachable. --ANK
1190          */
1191         dst_confirm(&rt->u.dst);
1192
1193         /* Host route. If it is static, it would be better
1194            not to override it, but add new one, so that
1195            when cache entry will expire old pmtu
1196            would return automatically.
1197          */
1198         if (rt->rt6i_flags & RTF_CACHE) {
1199                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1200                 if (allfrag)
1201                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1202                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1203                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1204                 goto out;
1205         }
1206
1207         /* Network route.
1208            Two cases are possible:
1209            1. It is connected route. Action: COW
1210            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1211          */
1212         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1213                 nrt = rt6_cow(rt, daddr, saddr, NULL);
1214                 if (!nrt->u.dst.error) {
1215                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1216                         if (allfrag)
1217                                 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1218                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1219                            happened within 5 mins, the recommended timer is 10 mins.
1220                            Here this route expiration time is set to ip6_rt_mtu_expires
1221                            which is 10 mins. After 10 mins the decreased pmtu is expired
1222                            and detecting PMTU increase will be automatically happened.
1223                          */
1224                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1225                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1226                 }
1227                 dst_release(&nrt->u.dst);
1228         } else {
1229                 nrt = ip6_rt_copy(rt);
1230                 if (nrt == NULL)
1231                         goto out;
1232                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1233                 nrt->rt6i_dst.plen = 128;
1234                 nrt->u.dst.flags |= DST_HOST;
1235                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1236                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1237                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1238                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1239                 if (allfrag)
1240                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1241                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1242         }
1243
1244 out:
1245         dst_release(&rt->u.dst);
1246 }
1247
1248 /*
1249  *      Misc support functions
1250  */
1251
1252 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1253 {
1254         struct rt6_info *rt = ip6_dst_alloc();
1255
1256         if (rt) {
1257                 rt->u.dst.input = ort->u.dst.input;
1258                 rt->u.dst.output = ort->u.dst.output;
1259
1260                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1261                 rt->u.dst.dev = ort->u.dst.dev;
1262                 if (rt->u.dst.dev)
1263                         dev_hold(rt->u.dst.dev);
1264                 rt->rt6i_idev = ort->rt6i_idev;
1265                 if (rt->rt6i_idev)
1266                         in6_dev_hold(rt->rt6i_idev);
1267                 rt->u.dst.lastuse = jiffies;
1268                 rt->rt6i_expires = 0;
1269
1270                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1271                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1272                 rt->rt6i_metric = 0;
1273
1274                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1275 #ifdef CONFIG_IPV6_SUBTREES
1276                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1277 #endif
1278         }
1279         return rt;
1280 }
1281
1282 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1283 {       
1284         struct rt6_info *rt;
1285         struct fib6_node *fn;
1286
1287         fn = &ip6_routing_table;
1288
1289         write_lock_bh(&rt6_lock);
1290         for (rt = fn->leaf; rt; rt=rt->u.next) {
1291                 if (dev == rt->rt6i_dev &&
1292                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1293                         break;
1294         }
1295         if (rt)
1296                 dst_hold(&rt->u.dst);
1297         write_unlock_bh(&rt6_lock);
1298         return rt;
1299 }
1300
1301 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1302                                      struct net_device *dev)
1303 {
1304         struct in6_rtmsg rtmsg;
1305
1306         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1307         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1308         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1309         rtmsg.rtmsg_metric = 1024;
1310         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1311
1312         rtmsg.rtmsg_ifindex = dev->ifindex;
1313
1314         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1315         return rt6_get_dflt_router(gwaddr, dev);
1316 }
1317
1318 void rt6_purge_dflt_routers(void)
1319 {
1320         struct rt6_info *rt;
1321
1322 restart:
1323         read_lock_bh(&rt6_lock);
1324         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1325                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1326                         dst_hold(&rt->u.dst);
1327
1328                         rt6_reset_dflt_pointer(NULL);
1329
1330                         read_unlock_bh(&rt6_lock);
1331
1332                         ip6_del_rt(rt, NULL, NULL, NULL);
1333
1334                         goto restart;
1335                 }
1336         }
1337         read_unlock_bh(&rt6_lock);
1338 }
1339
1340 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1341 {
1342         struct in6_rtmsg rtmsg;
1343         int err;
1344
1345         switch(cmd) {
1346         case SIOCADDRT:         /* Add a route */
1347         case SIOCDELRT:         /* Delete a route */
1348                 if (!capable(CAP_NET_ADMIN))
1349                         return -EPERM;
1350                 err = copy_from_user(&rtmsg, arg,
1351                                      sizeof(struct in6_rtmsg));
1352                 if (err)
1353                         return -EFAULT;
1354                         
1355                 rtnl_lock();
1356                 switch (cmd) {
1357                 case SIOCADDRT:
1358                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1359                         break;
1360                 case SIOCDELRT:
1361                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1362                         break;
1363                 default:
1364                         err = -EINVAL;
1365                 }
1366                 rtnl_unlock();
1367
1368                 return err;
1369         };
1370
1371         return -EINVAL;
1372 }
1373
1374 /*
1375  *      Drop the packet on the floor
1376  */
1377
1378 static int ip6_pkt_discard(struct sk_buff *skb)
1379 {
1380         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1381         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1382         kfree_skb(skb);
1383         return 0;
1384 }
1385
1386 static int ip6_pkt_discard_out(struct sk_buff *skb)
1387 {
1388         skb->dev = skb->dst->dev;
1389         return ip6_pkt_discard(skb);
1390 }
1391
1392 /*
1393  *      Allocate a dst for local (unicast / anycast) address.
1394  */
1395
1396 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1397                                     const struct in6_addr *addr,
1398                                     int anycast)
1399 {
1400         struct rt6_info *rt = ip6_dst_alloc();
1401
1402         if (rt == NULL)
1403                 return ERR_PTR(-ENOMEM);
1404
1405         dev_hold(&loopback_dev);
1406         in6_dev_hold(idev);
1407
1408         rt->u.dst.flags = DST_HOST;
1409         rt->u.dst.input = ip6_input;
1410         rt->u.dst.output = ip6_output;
1411         rt->rt6i_dev = &loopback_dev;
1412         rt->rt6i_idev = idev;
1413         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1414         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1415         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1416         rt->u.dst.obsolete = -1;
1417
1418         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1419         if (anycast)
1420                 rt->rt6i_flags |= RTF_ANYCAST;
1421         else
1422                 rt->rt6i_flags |= RTF_LOCAL;
1423         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1424         if (rt->rt6i_nexthop == NULL) {
1425                 dst_free((struct dst_entry *) rt);
1426                 return ERR_PTR(-ENOMEM);
1427         }
1428
1429         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1430         rt->rt6i_dst.plen = 128;
1431
1432         atomic_set(&rt->u.dst.__refcnt, 1);
1433
1434         return rt;
1435 }
1436
1437 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1438 {
1439         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1440             rt != &ip6_null_entry) {
1441                 RT6_TRACE("deleted by ifdown %p\n", rt);
1442                 return -1;
1443         }
1444         return 0;
1445 }
1446
1447 void rt6_ifdown(struct net_device *dev)
1448 {
1449         write_lock_bh(&rt6_lock);
1450         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1451         write_unlock_bh(&rt6_lock);
1452 }
1453
1454 struct rt6_mtu_change_arg
1455 {
1456         struct net_device *dev;
1457         unsigned mtu;
1458 };
1459
1460 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1461 {
1462         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1463         struct inet6_dev *idev;
1464
1465         /* In IPv6 pmtu discovery is not optional,
1466            so that RTAX_MTU lock cannot disable it.
1467            We still use this lock to block changes
1468            caused by addrconf/ndisc.
1469         */
1470
1471         idev = __in6_dev_get(arg->dev);
1472         if (idev == NULL)
1473                 return 0;
1474
1475         /* For administrative MTU increase, there is no way to discover
1476            IPv6 PMTU increase, so PMTU increase should be updated here.
1477            Since RFC 1981 doesn't include administrative MTU increase
1478            update PMTU increase is a MUST. (i.e. jumbo frame)
1479          */
1480         /*
1481            If new MTU is less than route PMTU, this new MTU will be the
1482            lowest MTU in the path, update the route PMTU to reflect PMTU
1483            decreases; if new MTU is greater than route PMTU, and the
1484            old MTU is the lowest MTU in the path, update the route PMTU
1485            to reflect the increase. In this case if the other nodes' MTU
1486            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1487            PMTU discouvery.
1488          */
1489         if (rt->rt6i_dev == arg->dev &&
1490             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1491             (dst_mtu(&rt->u.dst) > arg->mtu ||
1492              (dst_mtu(&rt->u.dst) < arg->mtu &&
1493               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1494                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1495         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1496         return 0;
1497 }
1498
1499 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1500 {
1501         struct rt6_mtu_change_arg arg;
1502
1503         arg.dev = dev;
1504         arg.mtu = mtu;
1505         read_lock_bh(&rt6_lock);
1506         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1507         read_unlock_bh(&rt6_lock);
1508 }
1509
1510 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1511                               struct in6_rtmsg *rtmsg)
1512 {
1513         memset(rtmsg, 0, sizeof(*rtmsg));
1514
1515         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1516         rtmsg->rtmsg_src_len = r->rtm_src_len;
1517         rtmsg->rtmsg_flags = RTF_UP;
1518         if (r->rtm_type == RTN_UNREACHABLE)
1519                 rtmsg->rtmsg_flags |= RTF_REJECT;
1520
1521         if (rta[RTA_GATEWAY-1]) {
1522                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1523                         return -EINVAL;
1524                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1525                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1526         }
1527         if (rta[RTA_DST-1]) {
1528                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1529                         return -EINVAL;
1530                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1531         }
1532         if (rta[RTA_SRC-1]) {
1533                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1534                         return -EINVAL;
1535                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1536         }
1537         if (rta[RTA_OIF-1]) {
1538                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1539                         return -EINVAL;
1540                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1541         }
1542         if (rta[RTA_PRIORITY-1]) {
1543                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1544                         return -EINVAL;
1545                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1546         }
1547         return 0;
1548 }
1549
1550 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1551 {
1552         struct rtmsg *r = NLMSG_DATA(nlh);
1553         struct in6_rtmsg rtmsg;
1554
1555         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1556                 return -EINVAL;
1557         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1558 }
1559
1560 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1561 {
1562         struct rtmsg *r = NLMSG_DATA(nlh);
1563         struct in6_rtmsg rtmsg;
1564
1565         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1566                 return -EINVAL;
1567         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1568 }
1569
1570 struct rt6_rtnl_dump_arg
1571 {
1572         struct sk_buff *skb;
1573         struct netlink_callback *cb;
1574 };
1575
1576 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1577                          struct in6_addr *dst, struct in6_addr *src,
1578                          int iif, int type, u32 pid, u32 seq,
1579                          int prefix, unsigned int flags)
1580 {
1581         struct rtmsg *rtm;
1582         struct nlmsghdr  *nlh;
1583         unsigned char    *b = skb->tail;
1584         struct rta_cacheinfo ci;
1585
1586         if (prefix) {   /* user wants prefix routes only */
1587                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1588                         /* success since this is not a prefix route */
1589                         return 1;
1590                 }
1591         }
1592
1593         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1594         rtm = NLMSG_DATA(nlh);
1595         rtm->rtm_family = AF_INET6;
1596         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1597         rtm->rtm_src_len = rt->rt6i_src.plen;
1598         rtm->rtm_tos = 0;
1599         rtm->rtm_table = RT_TABLE_MAIN;
1600         if (rt->rt6i_flags&RTF_REJECT)
1601                 rtm->rtm_type = RTN_UNREACHABLE;
1602         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1603                 rtm->rtm_type = RTN_LOCAL;
1604         else
1605                 rtm->rtm_type = RTN_UNICAST;
1606         rtm->rtm_flags = 0;
1607         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1608         rtm->rtm_protocol = rt->rt6i_protocol;
1609         if (rt->rt6i_flags&RTF_DYNAMIC)
1610                 rtm->rtm_protocol = RTPROT_REDIRECT;
1611         else if (rt->rt6i_flags & RTF_ADDRCONF)
1612                 rtm->rtm_protocol = RTPROT_KERNEL;
1613         else if (rt->rt6i_flags&RTF_DEFAULT)
1614                 rtm->rtm_protocol = RTPROT_RA;
1615
1616         if (rt->rt6i_flags&RTF_CACHE)
1617                 rtm->rtm_flags |= RTM_F_CLONED;
1618
1619         if (dst) {
1620                 RTA_PUT(skb, RTA_DST, 16, dst);
1621                 rtm->rtm_dst_len = 128;
1622         } else if (rtm->rtm_dst_len)
1623                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1624 #ifdef CONFIG_IPV6_SUBTREES
1625         if (src) {
1626                 RTA_PUT(skb, RTA_SRC, 16, src);
1627                 rtm->rtm_src_len = 128;
1628         } else if (rtm->rtm_src_len)
1629                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1630 #endif
1631         if (iif)
1632                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1633         else if (dst) {
1634                 struct in6_addr saddr_buf;
1635                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1636                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1637         }
1638         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1639                 goto rtattr_failure;
1640         if (rt->u.dst.neighbour)
1641                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1642         if (rt->u.dst.dev)
1643                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1644         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1645         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1646         if (rt->rt6i_expires)
1647                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1648         else
1649                 ci.rta_expires = 0;
1650         ci.rta_used = rt->u.dst.__use;
1651         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1652         ci.rta_error = rt->u.dst.error;
1653         ci.rta_id = 0;
1654         ci.rta_ts = 0;
1655         ci.rta_tsage = 0;
1656         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1657         nlh->nlmsg_len = skb->tail - b;
1658         return skb->len;
1659
1660 nlmsg_failure:
1661 rtattr_failure:
1662         skb_trim(skb, b - skb->data);
1663         return -1;
1664 }
1665
1666 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1667 {
1668         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1669         int prefix;
1670
1671         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1672                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1673                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1674         } else
1675                 prefix = 0;
1676
1677         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1678                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1679                      prefix, NLM_F_MULTI);
1680 }
1681
1682 static int fib6_dump_node(struct fib6_walker_t *w)
1683 {
1684         int res;
1685         struct rt6_info *rt;
1686
1687         for (rt = w->leaf; rt; rt = rt->u.next) {
1688                 res = rt6_dump_route(rt, w->args);
1689                 if (res < 0) {
1690                         /* Frame is full, suspend walking */
1691                         w->leaf = rt;
1692                         return 1;
1693                 }
1694                 BUG_TRAP(res!=0);
1695         }
1696         w->leaf = NULL;
1697         return 0;
1698 }
1699
1700 static void fib6_dump_end(struct netlink_callback *cb)
1701 {
1702         struct fib6_walker_t *w = (void*)cb->args[0];
1703
1704         if (w) {
1705                 cb->args[0] = 0;
1706                 fib6_walker_unlink(w);
1707                 kfree(w);
1708         }
1709         cb->done = (void*)cb->args[1];
1710         cb->args[1] = 0;
1711 }
1712
1713 static int fib6_dump_done(struct netlink_callback *cb)
1714 {
1715         fib6_dump_end(cb);
1716         return cb->done ? cb->done(cb) : 0;
1717 }
1718
1719 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1720 {
1721         struct rt6_rtnl_dump_arg arg;
1722         struct fib6_walker_t *w;
1723         int res;
1724
1725         arg.skb = skb;
1726         arg.cb = cb;
1727
1728         w = (void*)cb->args[0];
1729         if (w == NULL) {
1730                 /* New dump:
1731                  * 
1732                  * 1. hook callback destructor.
1733                  */
1734                 cb->args[1] = (long)cb->done;
1735                 cb->done = fib6_dump_done;
1736
1737                 /*
1738                  * 2. allocate and initialize walker.
1739                  */
1740                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1741                 if (w == NULL)
1742                         return -ENOMEM;
1743                 RT6_TRACE("dump<%p", w);
1744                 memset(w, 0, sizeof(*w));
1745                 w->root = &ip6_routing_table;
1746                 w->func = fib6_dump_node;
1747                 w->args = &arg;
1748                 cb->args[0] = (long)w;
1749                 read_lock_bh(&rt6_lock);
1750                 res = fib6_walk(w);
1751                 read_unlock_bh(&rt6_lock);
1752         } else {
1753                 w->args = &arg;
1754                 read_lock_bh(&rt6_lock);
1755                 res = fib6_walk_continue(w);
1756                 read_unlock_bh(&rt6_lock);
1757         }
1758 #if RT6_DEBUG >= 3
1759         if (res <= 0 && skb->len == 0)
1760                 RT6_TRACE("%p>dump end\n", w);
1761 #endif
1762         res = res < 0 ? res : skb->len;
1763         /* res < 0 is an error. (really, impossible)
1764            res == 0 means that dump is complete, but skb still can contain data.
1765            res > 0 dump is not complete, but frame is full.
1766          */
1767         /* Destroy walker, if dump of this table is complete. */
1768         if (res <= 0)
1769                 fib6_dump_end(cb);
1770         return res;
1771 }
1772
1773 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1774 {
1775         struct rtattr **rta = arg;
1776         int iif = 0;
1777         int err = -ENOBUFS;
1778         struct sk_buff *skb;
1779         struct flowi fl;
1780         struct rt6_info *rt;
1781
1782         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1783         if (skb == NULL)
1784                 goto out;
1785
1786         /* Reserve room for dummy headers, this skb can pass
1787            through good chunk of routing engine.
1788          */
1789         skb->mac.raw = skb->data;
1790         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1791
1792         memset(&fl, 0, sizeof(fl));
1793         if (rta[RTA_SRC-1])
1794                 ipv6_addr_copy(&fl.fl6_src,
1795                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1796         if (rta[RTA_DST-1])
1797                 ipv6_addr_copy(&fl.fl6_dst,
1798                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1799
1800         if (rta[RTA_IIF-1])
1801                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1802
1803         if (iif) {
1804                 struct net_device *dev;
1805                 dev = __dev_get_by_index(iif);
1806                 if (!dev) {
1807                         err = -ENODEV;
1808                         goto out_free;
1809                 }
1810         }
1811
1812         fl.oif = 0;
1813         if (rta[RTA_OIF-1])
1814                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1815
1816         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1817
1818         skb->dst = &rt->u.dst;
1819
1820         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821         err = rt6_fill_node(skb, rt, 
1822                             &fl.fl6_dst, &fl.fl6_src,
1823                             iif,
1824                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825                             nlh->nlmsg_seq, 0, 0);
1826         if (err < 0) {
1827                 err = -EMSGSIZE;
1828                 goto out_free;
1829         }
1830
1831         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1832         if (err > 0)
1833                 err = 0;
1834 out:
1835         return err;
1836 out_free:
1837         kfree_skb(skb);
1838         goto out;       
1839 }
1840
1841 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1842                         struct netlink_skb_parms *req)
1843 {
1844         struct sk_buff *skb;
1845         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1846         u32 pid = current->pid;
1847         u32 seq = 0;
1848
1849         if (req)
1850                 pid = req->pid;
1851         if (nlh)
1852                 seq = nlh->nlmsg_seq;
1853         
1854         skb = alloc_skb(size, gfp_any());
1855         if (!skb) {
1856                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1857                 return;
1858         }
1859         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1860                 kfree_skb(skb);
1861                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1862                 return;
1863         }
1864         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1865         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1866 }
1867
1868 /*
1869  *      /proc
1870  */
1871
1872 #ifdef CONFIG_PROC_FS
1873
1874 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1875
1876 struct rt6_proc_arg
1877 {
1878         char *buffer;
1879         int offset;
1880         int length;
1881         int skip;
1882         int len;
1883 };
1884
1885 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1886 {
1887         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1888         int i;
1889
1890         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1891                 arg->skip++;
1892                 return 0;
1893         }
1894
1895         if (arg->len >= arg->length)
1896                 return 0;
1897
1898         for (i=0; i<16; i++) {
1899                 sprintf(arg->buffer + arg->len, "%02x",
1900                         rt->rt6i_dst.addr.s6_addr[i]);
1901                 arg->len += 2;
1902         }
1903         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1904                             rt->rt6i_dst.plen);
1905
1906 #ifdef CONFIG_IPV6_SUBTREES
1907         for (i=0; i<16; i++) {
1908                 sprintf(arg->buffer + arg->len, "%02x",
1909                         rt->rt6i_src.addr.s6_addr[i]);
1910                 arg->len += 2;
1911         }
1912         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1913                             rt->rt6i_src.plen);
1914 #else
1915         sprintf(arg->buffer + arg->len,
1916                 "00000000000000000000000000000000 00 ");
1917         arg->len += 36;
1918 #endif
1919
1920         if (rt->rt6i_nexthop) {
1921                 for (i=0; i<16; i++) {
1922                         sprintf(arg->buffer + arg->len, "%02x",
1923                                 rt->rt6i_nexthop->primary_key[i]);
1924                         arg->len += 2;
1925                 }
1926         } else {
1927                 sprintf(arg->buffer + arg->len,
1928                         "00000000000000000000000000000000");
1929                 arg->len += 32;
1930         }
1931         arg->len += sprintf(arg->buffer + arg->len,
1932                             " %08x %08x %08x %08x %8s\n",
1933                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1934                             rt->u.dst.__use, rt->rt6i_flags, 
1935                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1936         return 0;
1937 }
1938
1939 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1940 {
1941         struct rt6_proc_arg arg;
1942         arg.buffer = buffer;
1943         arg.offset = offset;
1944         arg.length = length;
1945         arg.skip = 0;
1946         arg.len = 0;
1947
1948         read_lock_bh(&rt6_lock);
1949         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1950         read_unlock_bh(&rt6_lock);
1951
1952         *start = buffer;
1953         if (offset)
1954                 *start += offset % RT6_INFO_LEN;
1955
1956         arg.len -= offset % RT6_INFO_LEN;
1957
1958         if (arg.len > length)
1959                 arg.len = length;
1960         if (arg.len < 0)
1961                 arg.len = 0;
1962
1963         return arg.len;
1964 }
1965
1966 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1967 {
1968         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1969                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1970                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1971                       rt6_stats.fib_rt_cache,
1972                       atomic_read(&ip6_dst_ops.entries),
1973                       rt6_stats.fib_discarded_routes);
1974
1975         return 0;
1976 }
1977
1978 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1979 {
1980         return single_open(file, rt6_stats_seq_show, NULL);
1981 }
1982
1983 static struct file_operations rt6_stats_seq_fops = {
1984         .owner   = THIS_MODULE,
1985         .open    = rt6_stats_seq_open,
1986         .read    = seq_read,
1987         .llseek  = seq_lseek,
1988         .release = single_release,
1989 };
1990 #endif  /* CONFIG_PROC_FS */
1991
1992 #ifdef CONFIG_SYSCTL
1993
1994 static int flush_delay;
1995
1996 static
1997 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1998                               void __user *buffer, size_t *lenp, loff_t *ppos)
1999 {
2000         if (write) {
2001                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2002                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2003                 return 0;
2004         } else
2005                 return -EINVAL;
2006 }
2007
2008 ctl_table ipv6_route_table[] = {
2009         {
2010                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2011                 .procname       =       "flush",
2012                 .data           =       &flush_delay,
2013                 .maxlen         =       sizeof(int),
2014                 .mode           =       0200,
2015                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2016         },
2017         {
2018                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2019                 .procname       =       "gc_thresh",
2020                 .data           =       &ip6_dst_ops.gc_thresh,
2021                 .maxlen         =       sizeof(int),
2022                 .mode           =       0644,
2023                 .proc_handler   =       &proc_dointvec,
2024         },
2025         {
2026                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2027                 .procname       =       "max_size",
2028                 .data           =       &ip6_rt_max_size,
2029                 .maxlen         =       sizeof(int),
2030                 .mode           =       0644,
2031                 .proc_handler   =       &proc_dointvec,
2032         },
2033         {
2034                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2035                 .procname       =       "gc_min_interval",
2036                 .data           =       &ip6_rt_gc_min_interval,
2037                 .maxlen         =       sizeof(int),
2038                 .mode           =       0644,
2039                 .proc_handler   =       &proc_dointvec_jiffies,
2040                 .strategy       =       &sysctl_jiffies,
2041         },
2042         {
2043                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2044                 .procname       =       "gc_timeout",
2045                 .data           =       &ip6_rt_gc_timeout,
2046                 .maxlen         =       sizeof(int),
2047                 .mode           =       0644,
2048                 .proc_handler   =       &proc_dointvec_jiffies,
2049                 .strategy       =       &sysctl_jiffies,
2050         },
2051         {
2052                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2053                 .procname       =       "gc_interval",
2054                 .data           =       &ip6_rt_gc_interval,
2055                 .maxlen         =       sizeof(int),
2056                 .mode           =       0644,
2057                 .proc_handler   =       &proc_dointvec_jiffies,
2058                 .strategy       =       &sysctl_jiffies,
2059         },
2060         {
2061                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2062                 .procname       =       "gc_elasticity",
2063                 .data           =       &ip6_rt_gc_elasticity,
2064                 .maxlen         =       sizeof(int),
2065                 .mode           =       0644,
2066                 .proc_handler   =       &proc_dointvec_jiffies,
2067                 .strategy       =       &sysctl_jiffies,
2068         },
2069         {
2070                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2071                 .procname       =       "mtu_expires",
2072                 .data           =       &ip6_rt_mtu_expires,
2073                 .maxlen         =       sizeof(int),
2074                 .mode           =       0644,
2075                 .proc_handler   =       &proc_dointvec_jiffies,
2076                 .strategy       =       &sysctl_jiffies,
2077         },
2078         {
2079                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2080                 .procname       =       "min_adv_mss",
2081                 .data           =       &ip6_rt_min_advmss,
2082                 .maxlen         =       sizeof(int),
2083                 .mode           =       0644,
2084                 .proc_handler   =       &proc_dointvec_jiffies,
2085                 .strategy       =       &sysctl_jiffies,
2086         },
2087         {
2088                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2089                 .procname       =       "gc_min_interval_ms",
2090                 .data           =       &ip6_rt_gc_min_interval,
2091                 .maxlen         =       sizeof(int),
2092                 .mode           =       0644,
2093                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2094                 .strategy       =       &sysctl_ms_jiffies,
2095         },
2096         { .ctl_name = 0 }
2097 };
2098
2099 #endif
2100
2101 void __init ip6_route_init(void)
2102 {
2103         struct proc_dir_entry *p;
2104
2105         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2106                                                      sizeof(struct rt6_info),
2107                                                      0, SLAB_HWCACHE_ALIGN,
2108                                                      NULL, NULL);
2109         if (!ip6_dst_ops.kmem_cachep)
2110                 panic("cannot create ip6_dst_cache");
2111
2112         fib6_init();
2113 #ifdef  CONFIG_PROC_FS
2114         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2115         if (p)
2116                 p->owner = THIS_MODULE;
2117
2118         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2119 #endif
2120 #ifdef CONFIG_XFRM
2121         xfrm6_init();
2122 #endif
2123 }
2124
2125 void ip6_route_cleanup(void)
2126 {
2127 #ifdef CONFIG_PROC_FS
2128         proc_net_remove("ipv6_route");
2129         proc_net_remove("rt6_stats");
2130 #endif
2131 #ifdef CONFIG_XFRM
2132         xfrm6_fini();
2133 #endif
2134         rt6_ifdown(NULL);
2135         fib6_gc_cleanup();
2136         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2137 }